diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index 1b1d81fcd07a2..ce1980697abbb 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -131,6 +131,15 @@ def ext: GICombineRule < (apply [{ applyEXT(*${root}, ${matchinfo}); }]) >; +def fullrev: GICombineRule < + (defs root:$root, shuffle_matchdata:$matchinfo), + (match (G_IMPLICIT_DEF $src2), + (G_SHUFFLE_VECTOR $src, $src1, $src2, $mask):$root, + [{ return ShuffleVectorInst::isReverseMask(${mask}.getShuffleMask(), + ${mask}.getShuffleMask().size()); }]), + (apply [{ applyFullRev(*${root}, MRI); }]) +>; + def insertelt_nonconst: GICombineRule < (defs root:$root, shuffle_matchdata:$matchinfo), (match (wip_match_opcode G_INSERT_VECTOR_ELT):$root, @@ -163,7 +172,7 @@ def form_duplane : GICombineRule < (apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }]) >; -def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn, +def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn, fullrev, form_duplane, shuf_to_ins]>; // Turn G_UNMERGE_VALUES -> G_EXTRACT_VECTOR_ELT's diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp index 56d70ffdece71..8130ea4b8902f 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -405,6 +405,19 @@ void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) { MI.eraseFromParent(); } +void applyFullRev(MachineInstr &MI, MachineRegisterInfo &MRI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(Dst); + assert(DstTy.getSizeInBits() == 128 && + "Expected 128bit vector in applyFullRev"); + MachineIRBuilder MIRBuilder(MI); + auto Cst = MIRBuilder.buildConstant(LLT::scalar(32), 8); + auto Rev = MIRBuilder.buildInstr(AArch64::G_REV64, {DstTy}, {Src}); + MIRBuilder.buildInstr(AArch64::G_EXT, {Dst}, {Rev, Rev, Cst}); + MI.eraseFromParent(); +} + bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) { assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT); diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll index ee9fff7ceebc6..f0c9dccb21d84 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll @@ -440,11 +440,10 @@ define <8 x i16> @shufsext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) { ; ; CHECK-GI-LABEL: shufsext_v8i8_v8i16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: adrp x8, .LCPI14_0 -; CHECK-GI-NEXT: sshll v2.8h, v0.8b, #0 +; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 ; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI14_0] -; CHECK-GI-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b +; CHECK-GI-NEXT: rev64 v0.8h, v0.8h +; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h ; CHECK-GI-NEXT: ret entry: @@ -493,11 +492,10 @@ define <8 x i16> @shufzext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) { ; ; CHECK-GI-LABEL: shufzext_v8i8_v8i16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: adrp x8, .LCPI16_0 -; CHECK-GI-NEXT: ushll v2.8h, v0.8b, #0 +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI16_0] -; CHECK-GI-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b +; CHECK-GI-NEXT: rev64 v0.8h, v0.8h +; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h ; CHECK-GI-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll index abdfb996fa166..db5b93282e9c4 100644 --- a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll +++ b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll @@ -23,19 +23,11 @@ entry: } define <4 x i32> @v4i32(<4 x i32> %a) { -; CHECK-SD-LABEL: v4i32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: rev64 v0.4s, v0.4s -; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: v4i32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: adrp x8, .LCPI2_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI2_0] -; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rev64 v0.4s, v0.4s +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ret entry: %V128 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> ret <4 x i32> %V128 @@ -52,19 +44,11 @@ entry: } define <8 x i16> @v8i16(<8 x i16> %a) { -; CHECK-SD-LABEL: v8i16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: rev64 v0.8h, v0.8h -; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: v8i16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: adrp x8, .LCPI4_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI4_0] -; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rev64 v0.8h, v0.8h +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ret entry: %V128 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> ret <8 x i16> %V128 @@ -93,6 +77,22 @@ entry: ret <8 x i16> %V128 } +define <4 x i16> @v8i16_3(<8 x i16> %a) { +; CHECK-SD-LABEL: v8i16_3: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: rev64 v0.4h, v0.4h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i16_3: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: rev64 v0.8h, v0.8h +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret +entry: + %V128 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> + ret <4 x i16> %V128 +} + define <4 x i16> @v4i16(<4 x i16> %a) { ; CHECK-LABEL: v4i16: ; CHECK: // %bb.0: // %entry @@ -104,19 +104,11 @@ entry: } define <16 x i8> @v16i8(<16 x i8> %a) { -; CHECK-SD-LABEL: v16i8: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: rev64 v0.16b, v0.16b -; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: v16i8: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: adrp x8, .LCPI7_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI7_0] -; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rev64 v0.16b, v0.16b +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ret entry: %V128 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> ret <16 x i8> %V128 @@ -125,18 +117,18 @@ entry: define <16 x i8> @v16i8_2(<8 x i8> %a, <8 x i8> %b) { ; CHECK-SD-LABEL: v16i8_2: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: adrp x8, .LCPI8_0 +; CHECK-SD-NEXT: adrp x8, .LCPI9_0 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI8_0] +; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI9_0] ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: v16i8_2: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: adrp x8, .LCPI8_0 +; CHECK-GI-NEXT: adrp x8, .LCPI9_0 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI8_0] +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI9_0] ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret @@ -166,19 +158,11 @@ entry: } define <4 x float> @v4f32(<4 x float> %a) { -; CHECK-SD-LABEL: v4f32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: rev64 v0.4s, v0.4s -; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: v4f32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: adrp x8, .LCPI11_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI11_0] -; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: v4f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rev64 v0.4s, v0.4s +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ret entry: %V128 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> ret <4 x float> %V128 @@ -195,19 +179,11 @@ entry: } define <8 x half> @v8f16(<8 x half> %a) { -; CHECK-SD-LABEL: v8f16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: rev64 v0.8h, v0.8h -; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: v8f16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: adrp x8, .LCPI13_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] -; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: v8f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rev64 v0.8h, v0.8h +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ret entry: %V128 = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> ret <8 x half> %V128