Skip to content

Commit 19c2a5f

Browse files
committed
[AArch64][GlobalISel] Perfect Shuffles
This is a port of the existing perfect shuffle generation code from SDAG, following the same structure. I wrote it a while ago and it has been sitting on my machine. It brings the codegen for certain shuffles inline and avoids the need for generating a tbl and constant pool load.
1 parent cc3b6c3 commit 19c2a5f

14 files changed

+319
-179
lines changed

llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1302,6 +1302,24 @@ class MachineIRBuilder {
13021302
const SrcOp &Elt,
13031303
const SrcOp &Idx);
13041304

1305+
/// Build and insert \p Res = G_INSERT_VECTOR_ELT \p Val, \p Elt, \p Idx
1306+
///
1307+
/// \pre setBasicBlock or setMI must have been called.
1308+
/// \pre \p Res must be a generic virtual register with scalar type.
1309+
/// \pre \p Val must be a generic virtual register with vector type.
1310+
/// \pre \p Elt must be a generic virtual register with scalar type.
1311+
///
1312+
/// \return The newly created instruction.
1313+
MachineInstrBuilder buildInsertVectorElementConstant(const DstOp &Res,
1314+
const SrcOp &Val,
1315+
const SrcOp &Elt,
1316+
const int Idx) {
1317+
auto TLI = getMF().getSubtarget().getTargetLowering();
1318+
unsigned VecIdxWidth = TLI->getVectorIdxTy(getDataLayout()).getSizeInBits();
1319+
return buildInsertVectorElement(
1320+
Res, Val, Elt, buildConstant(LLT::scalar(VecIdxWidth), Idx));
1321+
}
1322+
13051323
/// Build and insert \p Res = G_EXTRACT_VECTOR_ELT \p Val, \p Idx
13061324
///
13071325
/// \pre setBasicBlock or setMI must have been called.

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,13 @@ def shuf_to_ins: GICombineRule <
129129
(apply [{ applyINS(*${root}, MRI, B, ${matchinfo}); }])
130130
>;
131131

132+
def perfect_shuffle: GICombineRule <
133+
(defs root:$root),
134+
(match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
135+
[{ return matchPerfectShuffle(*${root}, MRI); }]),
136+
(apply [{ applyPerfectShuffle(*${root}, MRI, B); }])
137+
>;
138+
132139
def vashr_vlshr_imm_matchdata : GIDefMatchData<"int64_t">;
133140
def vashr_vlshr_imm : GICombineRule<
134141
(defs root:$root, vashr_vlshr_imm_matchdata:$matchinfo),
@@ -147,7 +154,8 @@ def form_duplane : GICombineRule <
147154
>;
148155

149156
def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn,
150-
form_duplane, shuf_to_ins]>;
157+
form_duplane, shuf_to_ins,
158+
perfect_shuffle]>;
151159

152160
// Turn G_UNMERGE_VALUES -> G_EXTRACT_VECTOR_ELT's
153161
def vector_unmerge_lowering : GICombineRule <

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12090,25 +12090,6 @@ static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1,
1209012090
unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
1209112091
unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
1209212092

12093-
enum {
12094-
OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
12095-
OP_VREV,
12096-
OP_VDUP0,
12097-
OP_VDUP1,
12098-
OP_VDUP2,
12099-
OP_VDUP3,
12100-
OP_VEXT1,
12101-
OP_VEXT2,
12102-
OP_VEXT3,
12103-
OP_VUZPL, // VUZP, left result
12104-
OP_VUZPR, // VUZP, right result
12105-
OP_VZIPL, // VZIP, left result
12106-
OP_VZIPR, // VZIP, right result
12107-
OP_VTRNL, // VTRN, left result
12108-
OP_VTRNR, // VTRN, right result
12109-
OP_MOVLANE // Move lane. RHSID is the lane to move into
12110-
};
12111-
1211212093
if (OpNum == OP_COPY) {
1211312094
if (LHSID == (1 * 9 + 2) * 9 + 3)
1211412095
return LHS;

llvm/lib/Target/AArch64/AArch64PerfectShuffle.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6588,6 +6588,25 @@ static const unsigned PerfectShuffleTable[6561 + 1] = {
65886588
835584U, // <u,u,u,u>: Cost 0 copy LHS
65896589
0};
65906590

6591+
enum {
6592+
OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
6593+
OP_VREV,
6594+
OP_VDUP0,
6595+
OP_VDUP1,
6596+
OP_VDUP2,
6597+
OP_VDUP3,
6598+
OP_VEXT1,
6599+
OP_VEXT2,
6600+
OP_VEXT3,
6601+
OP_VUZPL, // VUZP, left result
6602+
OP_VUZPR, // VUZP, right result
6603+
OP_VZIPL, // VZIP, left result
6604+
OP_VZIPR, // VZIP, right result
6605+
OP_VTRNL, // VTRN, left result
6606+
OP_VTRNR, // VTRN, right result
6607+
OP_MOVLANE // Move lane. RHSID is the lane to move into
6608+
};
6609+
65916610
inline unsigned getPerfectShuffleCost(llvm::ArrayRef<int> M) {
65926611
assert(M.size() == 4 && "Expected a 4 entry perfect shuffle");
65936612

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,189 @@ void applyINS(MachineInstr &MI, MachineRegisterInfo &MRI,
504504
MI.eraseFromParent();
505505
}
506506

507+
/// Match 4 elemental G_SHUFFLE_VECTOR
508+
bool matchPerfectShuffle(MachineInstr &MI, MachineRegisterInfo &MRI) {
509+
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
510+
return MRI.getType(MI.getOperand(0).getReg()).getNumElements() == 4;
511+
}
512+
513+
static Register GeneratePerfectShuffle(unsigned ID, Register V1, Register V2,
514+
unsigned PFEntry, Register LHS,
515+
Register RHS, MachineIRBuilder &MIB,
516+
MachineRegisterInfo &MRI) {
517+
unsigned OpNum = (PFEntry >> 26) & 0x0F;
518+
unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
519+
unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
520+
521+
if (OpNum == OP_COPY) {
522+
if (LHSID == (1 * 9 + 2) * 9 + 3)
523+
return LHS;
524+
assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!");
525+
return RHS;
526+
}
527+
528+
if (OpNum == OP_MOVLANE) {
529+
// Decompose a PerfectShuffle ID to get the Mask for lane Elt
530+
auto getPFIDLane = [](unsigned ID, int Elt) -> int {
531+
assert(Elt < 4 && "Expected Perfect Lanes to be less than 4");
532+
Elt = 3 - Elt;
533+
while (Elt > 0) {
534+
ID /= 9;
535+
Elt--;
536+
}
537+
return (ID % 9 == 8) ? -1 : ID % 9;
538+
};
539+
540+
// For OP_MOVLANE shuffles, the RHSID represents the lane to move into. We
541+
// get the lane to move from the PFID, which is always from the
542+
// original vectors (V1 or V2).
543+
Register OpLHS = GeneratePerfectShuffle(
544+
LHSID, V1, V2, PerfectShuffleTable[LHSID], LHS, RHS, MIB, MRI);
545+
LLT VT = MRI.getType(OpLHS);
546+
assert(RHSID < 8 && "Expected a lane index for RHSID!");
547+
unsigned ExtLane = 0;
548+
Register Input;
549+
550+
// OP_MOVLANE are either D movs (if bit 0x4 is set) or S movs. D movs
551+
// convert into a higher type.
552+
if (RHSID & 0x4) {
553+
int MaskElt = getPFIDLane(ID, (RHSID & 0x01) << 1) >> 1;
554+
if (MaskElt == -1)
555+
MaskElt = (getPFIDLane(ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1;
556+
assert(MaskElt >= 0 && "Didn't expect an undef movlane index!");
557+
ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2);
558+
Input = MaskElt < 2 ? V1 : V2;
559+
if (VT.getScalarSizeInBits() == 16 && VT != LLT::fixed_vector(2, 32)) {
560+
Input = MIB.buildInstr(TargetOpcode::G_BITCAST,
561+
{LLT::fixed_vector(2, 32)}, {Input})
562+
.getReg(0);
563+
OpLHS = MIB.buildInstr(TargetOpcode::G_BITCAST,
564+
{LLT::fixed_vector(2, 32)}, {OpLHS})
565+
.getReg(0);
566+
}
567+
if (VT.getScalarSizeInBits() == 32 && VT != LLT::fixed_vector(2, 64)) {
568+
Input = MIB.buildInstr(TargetOpcode::G_BITCAST,
569+
{LLT::fixed_vector(2, 64)}, {Input})
570+
.getReg(0);
571+
OpLHS = MIB.buildInstr(TargetOpcode::G_BITCAST,
572+
{LLT::fixed_vector(2, 64)}, {OpLHS})
573+
.getReg(0);
574+
}
575+
} else {
576+
int MaskElt = getPFIDLane(ID, RHSID);
577+
assert(MaskElt >= 0 && "Didn't expect an undef movlane index!");
578+
ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4);
579+
Input = MaskElt < 4 ? V1 : V2;
580+
if (VT.getScalarSizeInBits() == 16 && VT != LLT::fixed_vector(4, 16)) {
581+
Input = MIB.buildInstr(TargetOpcode::G_BITCAST,
582+
{LLT::fixed_vector(4, 16)}, {Input})
583+
.getReg(0);
584+
OpLHS = MIB.buildInstr(TargetOpcode::G_BITCAST,
585+
{LLT::fixed_vector(4, 16)}, {OpLHS})
586+
.getReg(0);
587+
}
588+
}
589+
auto Ext = MIB.buildExtractVectorElementConstant(
590+
MRI.getType(Input).getElementType(), Input, ExtLane);
591+
auto Ins = MIB.buildInsertVectorElementConstant(MRI.getType(Input), OpLHS,
592+
Ext, RHSID & 0x3);
593+
if (MRI.getType(Ins.getReg(0)) != VT)
594+
return MIB.buildInstr(TargetOpcode::G_BITCAST, {VT}, {Ins}).getReg(0);
595+
return Ins.getReg(0);
596+
}
597+
598+
Register OpLHS, OpRHS;
599+
OpLHS = GeneratePerfectShuffle(LHSID, V1, V2, PerfectShuffleTable[LHSID], LHS,
600+
RHS, MIB, MRI);
601+
OpRHS = GeneratePerfectShuffle(RHSID, V1, V2, PerfectShuffleTable[RHSID], LHS,
602+
RHS, MIB, MRI);
603+
LLT VT = MRI.getType(OpLHS);
604+
605+
switch (OpNum) {
606+
default:
607+
llvm_unreachable("Unknown shuffle opcode!");
608+
case OP_VREV: {
609+
// VREV divides the vector in half and swaps within the half.
610+
unsigned Opcode = VT.getScalarSizeInBits() == 32 ? AArch64::G_REV64
611+
: VT.getScalarSizeInBits() == 16 ? AArch64::G_REV32
612+
: AArch64::G_REV16;
613+
return MIB.buildInstr(Opcode, {VT}, {OpLHS}).getReg(0);
614+
}
615+
case OP_VDUP0:
616+
case OP_VDUP1:
617+
case OP_VDUP2:
618+
case OP_VDUP3: {
619+
unsigned Opcode;
620+
if (VT.getScalarSizeInBits() == 8)
621+
Opcode = AArch64::G_DUPLANE8;
622+
else if (VT.getScalarSizeInBits() == 16)
623+
Opcode = AArch64::G_DUPLANE16;
624+
else if (VT.getScalarSizeInBits() == 32)
625+
Opcode = AArch64::G_DUPLANE32;
626+
else if (VT.getScalarSizeInBits() == 64)
627+
Opcode = AArch64::G_DUPLANE64;
628+
else
629+
llvm_unreachable("Invalid vector element type?");
630+
631+
if (VT.getSizeInBits() == 64)
632+
OpLHS = MIB.buildConcatVectors(
633+
VT.changeElementCount(VT.getElementCount() * 2),
634+
{OpLHS, MIB.buildUndef(VT).getReg(0)})
635+
.getReg(0);
636+
Register Lane =
637+
MIB.buildConstant(LLT::scalar(64), OpNum - OP_VDUP0).getReg(0);
638+
return MIB.buildInstr(Opcode, {VT}, {OpLHS, Lane}).getReg(0);
639+
}
640+
case OP_VEXT1:
641+
case OP_VEXT2:
642+
case OP_VEXT3: {
643+
unsigned Imm = (OpNum - OP_VEXT1 + 1) * VT.getScalarSizeInBits() / 8;
644+
return MIB
645+
.buildInstr(AArch64::G_EXT, {VT},
646+
{OpLHS, OpRHS, MIB.buildConstant(LLT::scalar(64), Imm)})
647+
.getReg(0);
648+
}
649+
case OP_VUZPL:
650+
return MIB.buildInstr(AArch64::G_UZP1, {VT}, {OpLHS, OpRHS}).getReg(0);
651+
case OP_VUZPR:
652+
return MIB.buildInstr(AArch64::G_UZP2, {VT}, {OpLHS, OpRHS}).getReg(0);
653+
case OP_VZIPL:
654+
return MIB.buildInstr(AArch64::G_ZIP1, {VT}, {OpLHS, OpRHS}).getReg(0);
655+
case OP_VZIPR:
656+
return MIB.buildInstr(AArch64::G_ZIP2, {VT}, {OpLHS, OpRHS}).getReg(0);
657+
case OP_VTRNL:
658+
return MIB.buildInstr(AArch64::G_TRN1, {VT}, {OpLHS, OpRHS}).getReg(0);
659+
case OP_VTRNR:
660+
return MIB.buildInstr(AArch64::G_TRN2, {VT}, {OpLHS, OpRHS}).getReg(0);
661+
}
662+
}
663+
664+
void applyPerfectShuffle(MachineInstr &MI, MachineRegisterInfo &MRI,
665+
MachineIRBuilder &Builder) {
666+
Register Dst = MI.getOperand(0).getReg();
667+
Register LHS = MI.getOperand(1).getReg();
668+
Register RHS = MI.getOperand(2).getReg();
669+
ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
670+
assert(ShuffleMask.size() == 4 && "Expected 4 element mask");
671+
672+
unsigned PFIndexes[4];
673+
for (unsigned i = 0; i != 4; ++i) {
674+
if (ShuffleMask[i] < 0)
675+
PFIndexes[i] = 8;
676+
else
677+
PFIndexes[i] = ShuffleMask[i];
678+
}
679+
680+
// Compute the index in the perfect shuffle table.
681+
unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
682+
PFIndexes[2] * 9 + PFIndexes[3];
683+
unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
684+
Register Res = GeneratePerfectShuffle(PFTableIndex, LHS, RHS, PFEntry, LHS,
685+
RHS, Builder, MRI);
686+
Builder.buildCopy(Dst, Res);
687+
MI.eraseFromParent();
688+
}
689+
507690
/// isVShiftRImm - Check if this is a valid vector for the immediate
508691
/// operand of a vector shift right operation. The value must be in the range:
509692
/// 1 <= Value <= ElementBits for a right shift.

llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-shuffle-splat.mir

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,8 +280,16 @@ body: |
280280
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
281281
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
282282
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], [[COPY]](s32), [[C]](s64)
283-
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[IVEC]](<4 x s32>), [[DEF]], shufflemask(undef, 0, 0, 3)
284-
; CHECK-NEXT: $q0 = COPY [[SHUF]](<4 x s32>)
283+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
284+
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[IVEC]](<4 x s32>), [[C1]](s64)
285+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
286+
; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[EVEC]](s32), [[C2]](s64)
287+
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
288+
; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[IVEC]](<4 x s32>), [[C3]](s64)
289+
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
290+
; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[EVEC1]](s32), [[C4]](s64)
291+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY [[IVEC2]](<4 x s32>)
292+
; CHECK-NEXT: $q0 = COPY [[COPY1]](<4 x s32>)
285293
; CHECK-NEXT: RET_ReallyLR implicit $q0
286294
%0:_(s32) = COPY $s0
287295
%2:_(<4 x s32>) = G_IMPLICIT_DEF

llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-uzp.mir

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,10 @@ body: |
6767
; CHECK-NEXT: {{ $}}
6868
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
6969
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
70-
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(0, 1, 4, 6)
71-
; CHECK-NEXT: $q0 = COPY [[SHUF]](<4 x s32>)
70+
; CHECK-NEXT: [[ZIP1_:%[0-9]+]]:_(<4 x s32>) = G_ZIP1 [[COPY]], [[COPY1]]
71+
; CHECK-NEXT: [[UZP1_:%[0-9]+]]:_(<4 x s32>) = G_UZP1 [[ZIP1_]], [[COPY1]]
72+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY [[UZP1_]](<4 x s32>)
73+
; CHECK-NEXT: $q0 = COPY [[COPY2]](<4 x s32>)
7274
; CHECK-NEXT: RET_ReallyLR implicit $q0
7375
%0:_(<4 x s32>) = COPY $q0
7476
%1:_(<4 x s32>) = COPY $q1
@@ -92,8 +94,13 @@ body: |
9294
; CHECK-NEXT: {{ $}}
9395
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
9496
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
95-
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(1, 4, 5, 7)
96-
; CHECK-NEXT: $q0 = COPY [[SHUF]](<4 x s32>)
97+
; CHECK-NEXT: [[UZP2_:%[0-9]+]]:_(<4 x s32>) = G_UZP2 [[COPY]], [[COPY1]]
98+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
99+
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C]](s64)
100+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
101+
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[UZP2_]], [[EVEC]](s32), [[C1]](s64)
102+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY [[IVEC]](<4 x s32>)
103+
; CHECK-NEXT: $q0 = COPY [[COPY2]](<4 x s32>)
97104
; CHECK-NEXT: RET_ReallyLR implicit $q0
98105
%0:_(<4 x s32>) = COPY $q0
99106
%1:_(<4 x s32>) = COPY $q1

llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-zip.mir

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,13 @@ body: |
220220
; CHECK-NEXT: {{ $}}
221221
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
222222
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
223-
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(3, 4, 1, 5)
224-
; CHECK-NEXT: $q0 = COPY [[SHUF]](<4 x s32>)
223+
; CHECK-NEXT: [[ZIP1_:%[0-9]+]]:_(<4 x s32>) = G_ZIP1 [[COPY]], [[COPY1]]
224+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
225+
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
226+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
227+
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[ZIP1_]], [[EVEC]](s32), [[C1]](s64)
228+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY [[IVEC]](<4 x s32>)
229+
; CHECK-NEXT: $q0 = COPY [[COPY2]](<4 x s32>)
225230
; CHECK-NEXT: RET_ReallyLR implicit $q0
226231
%0:_(<4 x s32>) = COPY $q0
227232
%1:_(<4 x s32>) = COPY $q1

0 commit comments

Comments
 (0)