@@ -13501,172 +13501,6 @@ static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
1350113501 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
1350213502}
1350313503
13504- /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
13505- /// the specified operations to build the shuffle. ID is the perfect-shuffle
13506- //ID, V1 and V2 are the original shuffle inputs. PFEntry is the Perfect shuffle
13507- //table entry and LHS/RHS are the immediate inputs for this stage of the
13508- //shuffle.
13509- static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1,
13510- SDValue V2, unsigned PFEntry, SDValue LHS,
13511- SDValue RHS, SelectionDAG &DAG,
13512- const SDLoc &dl) {
13513- unsigned OpNum = (PFEntry >> 26) & 0x0F;
13514- unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
13515- unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
13516-
13517- enum {
13518- OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
13519- OP_VREV,
13520- OP_VDUP0,
13521- OP_VDUP1,
13522- OP_VDUP2,
13523- OP_VDUP3,
13524- OP_VEXT1,
13525- OP_VEXT2,
13526- OP_VEXT3,
13527- OP_VUZPL, // VUZP, left result
13528- OP_VUZPR, // VUZP, right result
13529- OP_VZIPL, // VZIP, left result
13530- OP_VZIPR, // VZIP, right result
13531- OP_VTRNL, // VTRN, left result
13532- OP_VTRNR, // VTRN, right result
13533- OP_MOVLANE // Move lane. RHSID is the lane to move into
13534- };
13535-
13536- if (OpNum == OP_COPY) {
13537- if (LHSID == (1 * 9 + 2) * 9 + 3)
13538- return LHS;
13539- assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!");
13540- return RHS;
13541- }
13542-
13543- if (OpNum == OP_MOVLANE) {
13544- // Decompose a PerfectShuffle ID to get the Mask for lane Elt
13545- auto getPFIDLane = [](unsigned ID, int Elt) -> int {
13546- assert(Elt < 4 && "Expected Perfect Lanes to be less than 4");
13547- Elt = 3 - Elt;
13548- while (Elt > 0) {
13549- ID /= 9;
13550- Elt--;
13551- }
13552- return (ID % 9 == 8) ? -1 : ID % 9;
13553- };
13554-
13555- // For OP_MOVLANE shuffles, the RHSID represents the lane to move into. We
13556- // get the lane to move from the PFID, which is always from the
13557- // original vectors (V1 or V2).
13558- SDValue OpLHS = GeneratePerfectShuffle(
13559- LHSID, V1, V2, PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
13560- EVT VT = OpLHS.getValueType();
13561- assert(RHSID < 8 && "Expected a lane index for RHSID!");
13562- unsigned ExtLane = 0;
13563- SDValue Input;
13564-
13565- // OP_MOVLANE are either D movs (if bit 0x4 is set) or S movs. D movs
13566- // convert into a higher type.
13567- if (RHSID & 0x4) {
13568- int MaskElt = getPFIDLane(ID, (RHSID & 0x01) << 1) >> 1;
13569- if (MaskElt == -1)
13570- MaskElt = (getPFIDLane(ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1;
13571- assert(MaskElt >= 0 && "Didn't expect an undef movlane index!");
13572- ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2);
13573- Input = MaskElt < 2 ? V1 : V2;
13574- if (VT.getScalarSizeInBits() == 16) {
13575- Input = DAG.getBitcast(MVT::v2f32, Input);
13576- OpLHS = DAG.getBitcast(MVT::v2f32, OpLHS);
13577- } else {
13578- assert(VT.getScalarSizeInBits() == 32 &&
13579- "Expected 16 or 32 bit shuffle elemements");
13580- Input = DAG.getBitcast(MVT::v2f64, Input);
13581- OpLHS = DAG.getBitcast(MVT::v2f64, OpLHS);
13582- }
13583- } else {
13584- int MaskElt = getPFIDLane(ID, RHSID);
13585- assert(MaskElt >= 0 && "Didn't expect an undef movlane index!");
13586- ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4);
13587- Input = MaskElt < 4 ? V1 : V2;
13588- // Be careful about creating illegal types. Use f16 instead of i16.
13589- if (VT == MVT::v4i16) {
13590- Input = DAG.getBitcast(MVT::v4f16, Input);
13591- OpLHS = DAG.getBitcast(MVT::v4f16, OpLHS);
13592- }
13593- }
13594- SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
13595- Input.getValueType().getVectorElementType(),
13596- Input, DAG.getVectorIdxConstant(ExtLane, dl));
13597- SDValue Ins =
13598- DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Input.getValueType(), OpLHS,
13599- Ext, DAG.getVectorIdxConstant(RHSID & 0x3, dl));
13600- return DAG.getBitcast(VT, Ins);
13601- }
13602-
13603- SDValue OpLHS, OpRHS;
13604- OpLHS = GeneratePerfectShuffle(LHSID, V1, V2, PerfectShuffleTable[LHSID], LHS,
13605- RHS, DAG, dl);
13606- OpRHS = GeneratePerfectShuffle(RHSID, V1, V2, PerfectShuffleTable[RHSID], LHS,
13607- RHS, DAG, dl);
13608- EVT VT = OpLHS.getValueType();
13609-
13610- switch (OpNum) {
13611- default:
13612- llvm_unreachable("Unknown shuffle opcode!");
13613- case OP_VREV:
13614- // VREV divides the vector in half and swaps within the half.
13615- if (VT.getVectorElementType() == MVT::i32 ||
13616- VT.getVectorElementType() == MVT::f32)
13617- return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
13618- // vrev <4 x i16> -> REV32
13619- if (VT.getVectorElementType() == MVT::i16 ||
13620- VT.getVectorElementType() == MVT::f16 ||
13621- VT.getVectorElementType() == MVT::bf16)
13622- return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
13623- // vrev <4 x i8> -> REV16
13624- assert(VT.getVectorElementType() == MVT::i8);
13625- return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS);
13626- case OP_VDUP0:
13627- case OP_VDUP1:
13628- case OP_VDUP2:
13629- case OP_VDUP3: {
13630- EVT EltTy = VT.getVectorElementType();
13631- unsigned Opcode;
13632- if (EltTy == MVT::i8)
13633- Opcode = AArch64ISD::DUPLANE8;
13634- else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16)
13635- Opcode = AArch64ISD::DUPLANE16;
13636- else if (EltTy == MVT::i32 || EltTy == MVT::f32)
13637- Opcode = AArch64ISD::DUPLANE32;
13638- else if (EltTy == MVT::i64 || EltTy == MVT::f64)
13639- Opcode = AArch64ISD::DUPLANE64;
13640- else
13641- llvm_unreachable("Invalid vector element type?");
13642-
13643- if (VT.getSizeInBits() == 64)
13644- OpLHS = WidenVector(OpLHS, DAG);
13645- SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
13646- return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
13647- }
13648- case OP_VEXT1:
13649- case OP_VEXT2:
13650- case OP_VEXT3: {
13651- unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
13652- return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
13653- DAG.getConstant(Imm, dl, MVT::i32));
13654- }
13655- case OP_VUZPL:
13656- return DAG.getNode(AArch64ISD::UZP1, dl, VT, OpLHS, OpRHS);
13657- case OP_VUZPR:
13658- return DAG.getNode(AArch64ISD::UZP2, dl, VT, OpLHS, OpRHS);
13659- case OP_VZIPL:
13660- return DAG.getNode(AArch64ISD::ZIP1, dl, VT, OpLHS, OpRHS);
13661- case OP_VZIPR:
13662- return DAG.getNode(AArch64ISD::ZIP2, dl, VT, OpLHS, OpRHS);
13663- case OP_VTRNL:
13664- return DAG.getNode(AArch64ISD::TRN1, dl, VT, OpLHS, OpRHS);
13665- case OP_VTRNR:
13666- return DAG.getNode(AArch64ISD::TRN2, dl, VT, OpLHS, OpRHS);
13667- }
13668- }
13669-
1367013504static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
1367113505 SelectionDAG &DAG) {
1367213506 // Check to see if we can use the TBL instruction.
@@ -14090,8 +13924,95 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
1409013924 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
1409113925 PFIndexes[2] * 9 + PFIndexes[3];
1409213926 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
14093- return GeneratePerfectShuffle(PFTableIndex, V1, V2, PFEntry, V1, V2, DAG,
14094- dl);
13927+
13928+ auto BuildRev = [&DAG, &dl](SDValue OpLHS) {
13929+ EVT VT = OpLHS.getValueType();
13930+ unsigned Opcode = VT.getScalarSizeInBits() == 32 ? AArch64ISD::REV64
13931+ : VT.getScalarSizeInBits() == 16 ? AArch64ISD::REV32
13932+ : AArch64ISD::REV16;
13933+ return DAG.getNode(Opcode, dl, VT, OpLHS);
13934+ };
13935+ auto BuildDup = [&DAG, &dl](SDValue OpLHS, unsigned Lane) {
13936+ EVT VT = OpLHS.getValueType();
13937+ unsigned Opcode;
13938+ if (VT.getScalarSizeInBits() == 8)
13939+ Opcode = AArch64ISD::DUPLANE8;
13940+ else if (VT.getScalarSizeInBits() == 16)
13941+ Opcode = AArch64ISD::DUPLANE16;
13942+ else if (VT.getScalarSizeInBits() == 32)
13943+ Opcode = AArch64ISD::DUPLANE32;
13944+ else if (VT.getScalarSizeInBits() == 64)
13945+ Opcode = AArch64ISD::DUPLANE64;
13946+ else
13947+ llvm_unreachable("Invalid vector element type?");
13948+
13949+ if (VT.getSizeInBits() == 64)
13950+ OpLHS = WidenVector(OpLHS, DAG);
13951+ return DAG.getNode(Opcode, dl, VT, OpLHS,
13952+ DAG.getConstant(Lane, dl, MVT::i64));
13953+ };
13954+ auto BuildExt = [&DAG, &dl](SDValue OpLHS, SDValue OpRHS, unsigned Imm) {
13955+ EVT VT = OpLHS.getValueType();
13956+ Imm = Imm * getExtFactor(OpLHS);
13957+ return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
13958+ DAG.getConstant(Imm, dl, MVT::i32));
13959+ };
13960+ auto BuildZipLike = [&DAG, &dl](unsigned OpNum, SDValue OpLHS,
13961+ SDValue OpRHS) {
13962+ EVT VT = OpLHS.getValueType();
13963+ switch (OpNum) {
13964+ default:
13965+ llvm_unreachable("Unexpected perfect shuffle opcode\n");
13966+ case OP_VUZPL:
13967+ return DAG.getNode(AArch64ISD::UZP1, dl, VT, OpLHS, OpRHS);
13968+ case OP_VUZPR:
13969+ return DAG.getNode(AArch64ISD::UZP2, dl, VT, OpLHS, OpRHS);
13970+ case OP_VZIPL:
13971+ return DAG.getNode(AArch64ISD::ZIP1, dl, VT, OpLHS, OpRHS);
13972+ case OP_VZIPR:
13973+ return DAG.getNode(AArch64ISD::ZIP2, dl, VT, OpLHS, OpRHS);
13974+ case OP_VTRNL:
13975+ return DAG.getNode(AArch64ISD::TRN1, dl, VT, OpLHS, OpRHS);
13976+ case OP_VTRNR:
13977+ return DAG.getNode(AArch64ISD::TRN2, dl, VT, OpLHS, OpRHS);
13978+ }
13979+ };
13980+ auto BuildExtractInsert64 = [&DAG, &dl](SDValue ExtSrc, unsigned ExtLane,
13981+ SDValue InsSrc, unsigned InsLane) {
13982+ EVT VT = InsSrc.getValueType();
13983+ if (VT.getScalarSizeInBits() == 16) {
13984+ ExtSrc = DAG.getBitcast(MVT::v2f32, ExtSrc);
13985+ InsSrc = DAG.getBitcast(MVT::v2f32, InsSrc);
13986+ } else if (VT.getScalarSizeInBits() == 32) {
13987+ ExtSrc = DAG.getBitcast(MVT::v2f64, ExtSrc);
13988+ InsSrc = DAG.getBitcast(MVT::v2f64, InsSrc);
13989+ }
13990+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
13991+ ExtSrc.getValueType().getVectorElementType(),
13992+ ExtSrc, DAG.getVectorIdxConstant(ExtLane, dl));
13993+ SDValue Ins =
13994+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtSrc.getValueType(), InsSrc,
13995+ Ext, DAG.getVectorIdxConstant(InsLane, dl));
13996+ return DAG.getBitcast(VT, Ins);
13997+ };
13998+ auto BuildExtractInsert32 = [&DAG, &dl](SDValue ExtSrc, unsigned ExtLane,
13999+ SDValue InsSrc, unsigned InsLane) {
14000+ EVT VT = InsSrc.getValueType();
14001+ if (VT.getScalarSizeInBits() == 16) {
14002+ ExtSrc = DAG.getBitcast(MVT::v4f16, ExtSrc);
14003+ InsSrc = DAG.getBitcast(MVT::v4f16, InsSrc);
14004+ }
14005+ SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
14006+ ExtSrc.getValueType().getVectorElementType(),
14007+ ExtSrc, DAG.getVectorIdxConstant(ExtLane, dl));
14008+ SDValue Ins =
14009+ DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtSrc.getValueType(), InsSrc,
14010+ Ext, DAG.getVectorIdxConstant(InsLane, dl));
14011+ return DAG.getBitcast(VT, Ins);
14012+ };
14013+ return generatePerfectShuffle<SDValue, MVT>(
14014+ PFTableIndex, V1, V2, PFEntry, V1, V2, BuildExtractInsert64,
14015+ BuildExtractInsert32, BuildRev, BuildDup, BuildExt, BuildZipLike);
1409514016 }
1409614017
1409714018 // Check for a "select shuffle", generating a BSL to pick between lanes in
0 commit comments