@@ -504,6 +504,189 @@ void applyINS(MachineInstr &MI, MachineRegisterInfo &MRI,
504504 MI.eraseFromParent ();
505505}
506506
507+ // / Match 4 elemental G_SHUFFLE_VECTOR
508+ bool matchPerfectShuffle (MachineInstr &MI, MachineRegisterInfo &MRI) {
509+ assert (MI.getOpcode () == TargetOpcode::G_SHUFFLE_VECTOR);
510+ return MRI.getType (MI.getOperand (0 ).getReg ()).getNumElements () == 4 ;
511+ }
512+
513+ static Register GeneratePerfectShuffle (unsigned ID, Register V1, Register V2,
514+ unsigned PFEntry, Register LHS,
515+ Register RHS, MachineIRBuilder &MIB,
516+ MachineRegisterInfo &MRI) {
517+ unsigned OpNum = (PFEntry >> 26 ) & 0x0F ;
518+ unsigned LHSID = (PFEntry >> 13 ) & ((1 << 13 ) - 1 );
519+ unsigned RHSID = (PFEntry >> 0 ) & ((1 << 13 ) - 1 );
520+
521+ if (OpNum == OP_COPY) {
522+ if (LHSID == (1 * 9 + 2 ) * 9 + 3 )
523+ return LHS;
524+ assert (LHSID == ((4 * 9 + 5 ) * 9 + 6 ) * 9 + 7 && " Illegal OP_COPY!" );
525+ return RHS;
526+ }
527+
528+ if (OpNum == OP_MOVLANE) {
529+ // Decompose a PerfectShuffle ID to get the Mask for lane Elt
530+ auto getPFIDLane = [](unsigned ID, int Elt) -> int {
531+ assert (Elt < 4 && " Expected Perfect Lanes to be less than 4" );
532+ Elt = 3 - Elt;
533+ while (Elt > 0 ) {
534+ ID /= 9 ;
535+ Elt--;
536+ }
537+ return (ID % 9 == 8 ) ? -1 : ID % 9 ;
538+ };
539+
540+ // For OP_MOVLANE shuffles, the RHSID represents the lane to move into. We
541+ // get the lane to move from the PFID, which is always from the
542+ // original vectors (V1 or V2).
543+ Register OpLHS = GeneratePerfectShuffle (
544+ LHSID, V1, V2, PerfectShuffleTable[LHSID], LHS, RHS, MIB, MRI);
545+ LLT VT = MRI.getType (OpLHS);
546+ assert (RHSID < 8 && " Expected a lane index for RHSID!" );
547+ unsigned ExtLane = 0 ;
548+ Register Input;
549+
550+ // OP_MOVLANE are either D movs (if bit 0x4 is set) or S movs. D movs
551+ // convert into a higher type.
552+ if (RHSID & 0x4 ) {
553+ int MaskElt = getPFIDLane (ID, (RHSID & 0x01 ) << 1 ) >> 1 ;
554+ if (MaskElt == -1 )
555+ MaskElt = (getPFIDLane (ID, ((RHSID & 0x01 ) << 1 ) + 1 ) - 1 ) >> 1 ;
556+ assert (MaskElt >= 0 && " Didn't expect an undef movlane index!" );
557+ ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2 );
558+ Input = MaskElt < 2 ? V1 : V2;
559+ if (VT.getScalarSizeInBits () == 16 && VT != LLT::fixed_vector (2 , 32 )) {
560+ Input = MIB.buildInstr (TargetOpcode::G_BITCAST,
561+ {LLT::fixed_vector (2 , 32 )}, {Input})
562+ .getReg (0 );
563+ OpLHS = MIB.buildInstr (TargetOpcode::G_BITCAST,
564+ {LLT::fixed_vector (2 , 32 )}, {OpLHS})
565+ .getReg (0 );
566+ }
567+ if (VT.getScalarSizeInBits () == 32 && VT != LLT::fixed_vector (2 , 64 )) {
568+ Input = MIB.buildInstr (TargetOpcode::G_BITCAST,
569+ {LLT::fixed_vector (2 , 64 )}, {Input})
570+ .getReg (0 );
571+ OpLHS = MIB.buildInstr (TargetOpcode::G_BITCAST,
572+ {LLT::fixed_vector (2 , 64 )}, {OpLHS})
573+ .getReg (0 );
574+ }
575+ } else {
576+ int MaskElt = getPFIDLane (ID, RHSID);
577+ assert (MaskElt >= 0 && " Didn't expect an undef movlane index!" );
578+ ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4 );
579+ Input = MaskElt < 4 ? V1 : V2;
580+ if (VT.getScalarSizeInBits () == 16 && VT != LLT::fixed_vector (4 , 16 )) {
581+ Input = MIB.buildInstr (TargetOpcode::G_BITCAST,
582+ {LLT::fixed_vector (4 , 16 )}, {Input})
583+ .getReg (0 );
584+ OpLHS = MIB.buildInstr (TargetOpcode::G_BITCAST,
585+ {LLT::fixed_vector (4 , 16 )}, {OpLHS})
586+ .getReg (0 );
587+ }
588+ }
589+ auto Ext = MIB.buildExtractVectorElementConstant (
590+ MRI.getType (Input).getElementType (), Input, ExtLane);
591+ auto Ins = MIB.buildInsertVectorElementConstant (MRI.getType (Input), OpLHS,
592+ Ext, RHSID & 0x3 );
593+ if (MRI.getType (Ins.getReg (0 )) != VT)
594+ return MIB.buildInstr (TargetOpcode::G_BITCAST, {VT}, {Ins}).getReg (0 );
595+ return Ins.getReg (0 );
596+ }
597+
598+ Register OpLHS, OpRHS;
599+ OpLHS = GeneratePerfectShuffle (LHSID, V1, V2, PerfectShuffleTable[LHSID], LHS,
600+ RHS, MIB, MRI);
601+ OpRHS = GeneratePerfectShuffle (RHSID, V1, V2, PerfectShuffleTable[RHSID], LHS,
602+ RHS, MIB, MRI);
603+ LLT VT = MRI.getType (OpLHS);
604+
605+ switch (OpNum) {
606+ default :
607+ llvm_unreachable (" Unknown shuffle opcode!" );
608+ case OP_VREV: {
609+ // VREV divides the vector in half and swaps within the half.
610+ unsigned Opcode = VT.getScalarSizeInBits () == 32 ? AArch64::G_REV64
611+ : VT.getScalarSizeInBits () == 16 ? AArch64::G_REV32
612+ : AArch64::G_REV16;
613+ return MIB.buildInstr (Opcode, {VT}, {OpLHS}).getReg (0 );
614+ }
615+ case OP_VDUP0:
616+ case OP_VDUP1:
617+ case OP_VDUP2:
618+ case OP_VDUP3: {
619+ unsigned Opcode;
620+ if (VT.getScalarSizeInBits () == 8 )
621+ Opcode = AArch64::G_DUPLANE8;
622+ else if (VT.getScalarSizeInBits () == 16 )
623+ Opcode = AArch64::G_DUPLANE16;
624+ else if (VT.getScalarSizeInBits () == 32 )
625+ Opcode = AArch64::G_DUPLANE32;
626+ else if (VT.getScalarSizeInBits () == 64 )
627+ Opcode = AArch64::G_DUPLANE64;
628+ else
629+ llvm_unreachable (" Invalid vector element type?" );
630+
631+ if (VT.getSizeInBits () == 64 )
632+ OpLHS = MIB.buildConcatVectors (
633+ VT.changeElementCount (VT.getElementCount () * 2 ),
634+ {OpLHS, MIB.buildUndef (VT).getReg (0 )})
635+ .getReg (0 );
636+ Register Lane =
637+ MIB.buildConstant (LLT::scalar (64 ), OpNum - OP_VDUP0).getReg (0 );
638+ return MIB.buildInstr (Opcode, {VT}, {OpLHS, Lane}).getReg (0 );
639+ }
640+ case OP_VEXT1:
641+ case OP_VEXT2:
642+ case OP_VEXT3: {
643+ unsigned Imm = (OpNum - OP_VEXT1 + 1 ) * VT.getScalarSizeInBits () / 8 ;
644+ return MIB
645+ .buildInstr (AArch64::G_EXT, {VT},
646+ {OpLHS, OpRHS, MIB.buildConstant (LLT::scalar (64 ), Imm)})
647+ .getReg (0 );
648+ }
649+ case OP_VUZPL:
650+ return MIB.buildInstr (AArch64::G_UZP1, {VT}, {OpLHS, OpRHS}).getReg (0 );
651+ case OP_VUZPR:
652+ return MIB.buildInstr (AArch64::G_UZP2, {VT}, {OpLHS, OpRHS}).getReg (0 );
653+ case OP_VZIPL:
654+ return MIB.buildInstr (AArch64::G_ZIP1, {VT}, {OpLHS, OpRHS}).getReg (0 );
655+ case OP_VZIPR:
656+ return MIB.buildInstr (AArch64::G_ZIP2, {VT}, {OpLHS, OpRHS}).getReg (0 );
657+ case OP_VTRNL:
658+ return MIB.buildInstr (AArch64::G_TRN1, {VT}, {OpLHS, OpRHS}).getReg (0 );
659+ case OP_VTRNR:
660+ return MIB.buildInstr (AArch64::G_TRN2, {VT}, {OpLHS, OpRHS}).getReg (0 );
661+ }
662+ }
663+
664+ void applyPerfectShuffle (MachineInstr &MI, MachineRegisterInfo &MRI,
665+ MachineIRBuilder &Builder) {
666+ Register Dst = MI.getOperand (0 ).getReg ();
667+ Register LHS = MI.getOperand (1 ).getReg ();
668+ Register RHS = MI.getOperand (2 ).getReg ();
669+ ArrayRef<int > ShuffleMask = MI.getOperand (3 ).getShuffleMask ();
670+ assert (ShuffleMask.size () == 4 && " Expected 4 element mask" );
671+
672+ unsigned PFIndexes[4 ];
673+ for (unsigned i = 0 ; i != 4 ; ++i) {
674+ if (ShuffleMask[i] < 0 )
675+ PFIndexes[i] = 8 ;
676+ else
677+ PFIndexes[i] = ShuffleMask[i];
678+ }
679+
680+ // Compute the index in the perfect shuffle table.
681+ unsigned PFTableIndex = PFIndexes[0 ] * 9 * 9 * 9 + PFIndexes[1 ] * 9 * 9 +
682+ PFIndexes[2 ] * 9 + PFIndexes[3 ];
683+ unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
684+ Register Res = GeneratePerfectShuffle (PFTableIndex, LHS, RHS, PFEntry, LHS,
685+ RHS, Builder, MRI);
686+ Builder.buildCopy (Dst, Res);
687+ MI.eraseFromParent ();
688+ }
689+
507690// / isVShiftRImm - Check if this is a valid vector for the immediate
508691// / operand of a vector shift right operation. The value must be in the range:
509692// / 1 <= Value <= ElementBits for a right shift.
0 commit comments