diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 496f126b7173d..d80509cf39849 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -252,9 +252,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SETCC, VT, Legal); setOperationAction(ISD::VSELECT, VT, Legal); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); } for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, Legal); @@ -298,9 +298,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SETCC, VT, Legal); setOperationAction(ISD::VSELECT, VT, Legal); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); } for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, Legal); @@ -428,9 +428,926 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, return SDValue(); } +/// Determine whether a range fits a regular pattern of values. +/// This function accounts for the possibility of jumping over the End iterator. +template +static bool +fitsRegularPattern(typename SmallVectorImpl::const_iterator Begin, + unsigned CheckStride, + typename SmallVectorImpl::const_iterator End, + ValType ExpectedIndex, unsigned ExpectedIndexStride) { + auto &I = Begin; + + while (I != End) { + if (*I != -1 && *I != ExpectedIndex) + return false; + ExpectedIndex += ExpectedIndexStride; + + // Incrementing past End is undefined behaviour so we must increment one + // step at a time and check for End at each step. + for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) + ; // Empty loop body. + } + return true; +} + +/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible). +/// +/// VREPLVEI performs vector broadcast based on an element specified by an +/// integer immediate, with its mask being similar to: +/// +/// where x is any valid index. +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above form. +static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + int SplatIndex = -1; + for (const auto &M : Mask) { + if (M != -1) { + SplatIndex = M; + break; + } + } + + if (SplatIndex == -1) + return DAG.getUNDEF(VT); + + assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index"); + if (fitsRegularPattern(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) { + APInt Imm(64, SplatIndex); + return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1, + DAG.getConstant(Imm, DL, MVT::i64)); + } + + return SDValue(); +} + +/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible). +/// +/// VSHUF4I splits the vector into blocks of four elements, then shuffles these +/// elements according to a <4 x i2> constant (encoded as an integer immediate). +/// +/// It is therefore possible to lower into VSHUF4I when the mask takes the form: +/// +/// When undef's appear they are treated as if they were whatever value is +/// necessary in order to fit the above forms. +/// +/// For example: +/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef, +/// <8 x i32> +/// is lowered to: +/// (VSHUF4I_H $v0, $v1, 27) +/// where the 27 comes from: +/// 3 + (2 << 2) + (1 << 4) + (0 << 6) +static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + // When the size is less than 4, lower cost instructions may be used. + if (Mask.size() < 4) + return SDValue(); + + int SubMask[4] = {-1, -1, -1, -1}; + for (unsigned i = 0; i < 4; ++i) { + for (unsigned j = i; j < Mask.size(); j += 4) { + int Idx = Mask[j]; + + // Convert from vector index to 4-element subvector index + // If an index refers to an element outside of the subvector then give up + if (Idx != -1) { + Idx -= 4 * (j / 4); + if (Idx < 0 || Idx >= 4) + return SDValue(); + } + + // If the mask has an undef, replace it with the current index. + // Note that it might still be undef if the current index is also undef + if (SubMask[i] == -1) + SubMask[i] = Idx; + // Check that non-undef values are the same as in the mask. If they + // aren't then give up + else if (Idx != -1 && Idx != SubMask[i]) + return SDValue(); + } + } + + // Calculate the immediate. Replace any remaining undefs with zero + APInt Imm(64, 0); + for (int i = 3; i >= 0; --i) { + int Idx = SubMask[i]; + + if (Idx == -1) + Idx = 0; + + Imm <<= 2; + Imm |= Idx & 0x3; + } + + return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, + DAG.getConstant(Imm, DL, MVT::i64)); +} + +/// Lower VECTOR_SHUFFLE into VPACKEV (if possible). +/// +/// VPACKEV interleaves the even elements from each vector. +/// +/// It is possible to lower into VPACKEV when the mask consists of two of the +/// following forms interleaved: +/// <0, 2, 4, ...> +/// +/// where n is the number of elements in the vector. +/// For example: +/// <0, 0, 2, 2, 4, 4, ...> +/// <0, n, 2, n+2, 4, n+4, ...> +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &End = Mask.end(); + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 2, End, 0, 2)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 2, End, Mask.size(), 2)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End, 0, 2)) + V2 = OriV1; + else if (fitsRegularPattern(Begin + 1, 2, End, Mask.size(), 2)) + V2 = OriV2; + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into VPACKOD (if possible). +/// +/// VPACKOD interleaves the odd elements from each vector. +/// +/// It is possible to lower into VPACKOD when the mask consists of two of the +/// following forms interleaved: +/// <1, 3, 5, ...> +/// +/// where n is the number of elements in the vector. +/// For example: +/// <1, 1, 3, 3, 5, 5, ...> +/// <1, n+1, 3, n+3, 5, n+5, ...> +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &End = Mask.end(); + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 2, End, 1, 2)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 2, End, Mask.size() + 1, 2)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End, 1, 2)) + V2 = OriV1; + else if (fitsRegularPattern(Begin + 1, 2, End, Mask.size() + 1, 2)) + V2 = OriV2; + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into VILVH (if possible). +/// +/// VILVH interleaves consecutive elements from the left (highest-indexed) half +/// of each vector. +/// +/// It is possible to lower into VILVH when the mask consists of two of the +/// following forms interleaved: +/// +/// +/// where n is the number of elements in the vector and x is half n. +/// For example: +/// +/// +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &End = Mask.end(); + unsigned HalfSize = Mask.size() / 2; + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 2, End, HalfSize, 1)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 2, End, Mask.size() + HalfSize, 1)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End, HalfSize, 1)) + V2 = OriV1; + else if (fitsRegularPattern(Begin + 1, 2, End, Mask.size() + HalfSize, + 1)) + V2 = OriV2; + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into VILVL (if possible). +/// +/// VILVL interleaves consecutive elements from the right (lowest-indexed) half +/// of each vector. +/// +/// It is possible to lower into VILVL when the mask consists of two of the +/// following forms interleaved: +/// <0, 1, 2, ...> +/// +/// where n is the number of elements in the vector. +/// For example: +/// <0, 0, 1, 1, 2, 2, ...> +/// <0, n, 1, n+1, 2, n+2, ...> +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &End = Mask.end(); + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 2, End, 0, 1)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 2, End, Mask.size(), 1)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End, 0, 1)) + V2 = OriV1; + else if (fitsRegularPattern(Begin + 1, 2, End, Mask.size(), 1)) + V2 = OriV2; + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into VPICKEV (if possible). +/// +/// VPICKEV copies the even elements of each vector into the result vector. +/// +/// It is possible to lower into VPICKEV when the mask consists of two of the +/// following forms concatenated: +/// <0, 2, 4, ...> +/// +/// where n is the number of elements in the vector. +/// For example: +/// <0, 2, 4, ..., 0, 2, 4, ...> +/// <0, 2, 4, ..., n, n+2, n+4, ...> +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &Mid = Mask.begin() + Mask.size() / 2; + const auto &End = Mask.end(); + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 1, Mid, 0, 2)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 1, Mid, Mask.size(), 2)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(Mid, 1, End, 0, 2)) + V2 = OriV1; + else if (fitsRegularPattern(Mid, 1, End, Mask.size(), 2)) + V2 = OriV2; + + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into VPICKOD (if possible). +/// +/// VPICKOD copies the odd elements of each vector into the result vector. +/// +/// It is possible to lower into VPICKOD when the mask consists of two of the +/// following forms concatenated: +/// <1, 3, 5, ...> +/// +/// where n is the number of elements in the vector. +/// For example: +/// <1, 3, 5, ..., 1, 3, 5, ...> +/// <1, 3, 5, ..., n+1, n+3, n+5, ...> +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &Mid = Mask.begin() + Mask.size() / 2; + const auto &End = Mask.end(); + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 1, Mid, 1, 2)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 1, Mid, Mask.size() + 1, 2)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(Mid, 1, End, 1, 2)) + V2 = OriV1; + else if (fitsRegularPattern(Mid, 1, End, Mask.size() + 1, 2)) + V2 = OriV2; + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into VSHUF. +/// +/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and +/// adding it as an operand to the resulting VSHUF. +static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + SmallVector Ops; + for (auto M : Mask) + Ops.push_back(DAG.getConstant(M, DL, MVT::i64)); + + EVT MaskVecTy = VT.changeVectorElementTypeToInteger(); + SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops); + + // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. + // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> + // VSHF concatenates the vectors in a bitwise fashion: + // <0b00, 0b01> + <0b10, 0b11> -> + // 0b0100 + 0b1110 -> 0b01001110 + // <0b10, 0b11, 0b00, 0b01> + // We must therefore swap the operands to get the correct result. + return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1); +} + +/// Dispatching routine to lower various 128-bit LoongArch vector shuffles. +/// +/// This routine breaks down the specific type of 128-bit shuffle and +/// dispatches to the lowering routines accordingly. +static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, + SDValue V1, SDValue V2, SelectionDAG &DAG) { + assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 || + VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 || + VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) && + "Vector type is unsupported for lsx!"); + assert(V1.getSimpleValueType() == V2.getSimpleValueType() && + "Two operands have different types!"); + assert(VT.getVectorNumElements() == Mask.size() && + "Unexpected mask size for shuffle!"); + assert(Mask.size() % 2 == 0 && "Expected even mask size."); + + SDValue Result; + // TODO: Add more comparison patterns. + if (V2.isUndef()) { + if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG))) + return Result; + + // TODO: This comment may be enabled in the future to better match the + // pattern for instruction selection. + /* V2 = V1; */ + } + + // It is recommended not to change the pattern comparison order for better + // performance. + if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG))) + return Result; + + return SDValue(); +} + +/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible). +/// +/// It is a XVREPLVEI when the mask is: +/// +/// where the number of x is equal to n and n is half the length of vector. +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above form. +static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, + ArrayRef Mask, MVT VT, + SDValue V1, SDValue V2, + SelectionDAG &DAG) { + int SplatIndex = -1; + for (const auto &M : Mask) { + if (M != -1) { + SplatIndex = M; + break; + } + } + + if (SplatIndex == -1) + return DAG.getUNDEF(VT); + + const auto &Begin = Mask.begin(); + const auto &End = Mask.end(); + unsigned HalfSize = Mask.size() / 2; + + assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index"); + if (fitsRegularPattern(Begin, 1, End - HalfSize, SplatIndex, 0) && + fitsRegularPattern(Begin + HalfSize, 1, End, SplatIndex + HalfSize, + 0)) { + APInt Imm(64, SplatIndex); + return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1, + DAG.getConstant(Imm, DL, MVT::i64)); + } + + return SDValue(); +} + +/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + // When the size is less than or equal to 4, lower cost instructions may be + // used. + if (Mask.size() <= 4) + return SDValue(); + return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG); +} + +/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG); +} + +/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG); +} + +/// Lower VECTOR_SHUFFLE into XVILVH (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &End = Mask.end(); + unsigned HalfSize = Mask.size() / 2; + unsigned LeftSize = HalfSize / 2; + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 2, End - HalfSize, HalfSize - LeftSize, + 1) && + fitsRegularPattern(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 2, End - HalfSize, + Mask.size() + HalfSize - LeftSize, 1) && + fitsRegularPattern(Begin + HalfSize, 2, End, + Mask.size() + HalfSize + LeftSize, 1)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize, + 1) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize, + 1)) + V2 = OriV1; + else if (fitsRegularPattern(Begin + 1, 2, End - HalfSize, + Mask.size() + HalfSize - LeftSize, 1) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, + Mask.size() + HalfSize + LeftSize, 1)) + V2 = OriV2; + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into XVILVL (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &End = Mask.end(); + unsigned HalfSize = Mask.size() / 2; + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 2, End - HalfSize, 0, 1) && + fitsRegularPattern(Begin + HalfSize, 2, End, HalfSize, 1)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 2, End - HalfSize, Mask.size(), 1) && + fitsRegularPattern(Begin + HalfSize, 2, End, + Mask.size() + HalfSize, 1)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End - HalfSize, 0, 1) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, HalfSize, 1)) + V2 = OriV1; + else if (fitsRegularPattern(Begin + 1, 2, End - HalfSize, Mask.size(), + 1) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, + Mask.size() + HalfSize, 1)) + V2 = OriV2; + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &LeftMid = Mask.begin() + Mask.size() / 4; + const auto &Mid = Mask.begin() + Mask.size() / 2; + const auto &RightMid = Mask.end() - Mask.size() / 4; + const auto &End = Mask.end(); + unsigned HalfSize = Mask.size() / 2; + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 1, LeftMid, 0, 2) && + fitsRegularPattern(Mid, 1, RightMid, HalfSize, 2)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 1, LeftMid, Mask.size(), 2) && + fitsRegularPattern(Mid, 1, RightMid, Mask.size() + HalfSize, 2)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(LeftMid, 1, Mid, 0, 2) && + fitsRegularPattern(RightMid, 1, End, HalfSize, 2)) + V2 = OriV1; + else if (fitsRegularPattern(LeftMid, 1, Mid, Mask.size(), 2) && + fitsRegularPattern(RightMid, 1, End, Mask.size() + HalfSize, 2)) + V2 = OriV2; + + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &LeftMid = Mask.begin() + Mask.size() / 4; + const auto &Mid = Mask.begin() + Mask.size() / 2; + const auto &RightMid = Mask.end() - Mask.size() / 4; + const auto &End = Mask.end(); + unsigned HalfSize = Mask.size() / 2; + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 1, LeftMid, 1, 2) && + fitsRegularPattern(Mid, 1, RightMid, HalfSize + 1, 2)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 1, LeftMid, Mask.size() + 1, 2) && + fitsRegularPattern(Mid, 1, RightMid, Mask.size() + HalfSize + 1, + 2)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(LeftMid, 1, Mid, 1, 2) && + fitsRegularPattern(RightMid, 1, End, HalfSize + 1, 2)) + V2 = OriV1; + else if (fitsRegularPattern(LeftMid, 1, Mid, Mask.size() + 1, 2) && + fitsRegularPattern(RightMid, 1, End, Mask.size() + HalfSize + 1, + 2)) + V2 = OriV2; + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into XVSHUF (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + int MaskSize = Mask.size(); + int HalfSize = Mask.size() / 2; + const auto &Begin = Mask.begin(); + const auto &Mid = Mask.begin() + HalfSize; + const auto &End = Mask.end(); + + // VECTOR_SHUFFLE concatenates the vectors: + // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15> + // shuffling -> + // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15> + // + // XVSHUF concatenates the vectors: + // + + // shuffling -> + // + + SmallVector MaskAlloc; + for (auto it = Begin; it < Mid; it++) { + if (*it < 0) // UNDEF + MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64)); + else if ((*it >= 0 && *it < HalfSize) || + (*it >= MaskSize && *it <= MaskSize + HalfSize)) { + int M = *it < HalfSize ? *it : *it - HalfSize; + MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64)); + } else + return SDValue(); + } + assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!"); + + for (auto it = Mid; it < End; it++) { + if (*it < 0) // UNDEF + MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64)); + else if ((*it >= HalfSize && *it < MaskSize) || + (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) { + int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize; + MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64)); + } else + return SDValue(); + } + assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!"); + + EVT MaskVecTy = VT.changeVectorElementTypeToInteger(); + SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc); + return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1); +} + +/// Shuffle vectors by lane to generate more optimized instructions. +/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles. +/// +/// Therefore, except for the following four cases, other cases are regarded +/// as cross-lane shuffles, where optimization is relatively limited. +/// +/// - Shuffle high, low lanes of two inputs vector +/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6> +/// - Shuffle low, high lanes of two inputs vector +/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5> +/// - Shuffle low, low lanes of two inputs vector +/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6> +/// - Shuffle high, high lanes of two inputs vector +/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5> +/// +/// The first case is the closest to LoongArch instructions and the other +/// cases need to be converted to it for processing. +/// +/// This function may modify V1, V2 and Mask +static void canonicalizeShuffleVectorByLane(const SDLoc &DL, + MutableArrayRef Mask, MVT VT, + SDValue &V1, SDValue &V2, + SelectionDAG &DAG) { + + enum HalfMaskType { HighLaneTy, LowLaneTy, None }; + + int MaskSize = Mask.size(); + int HalfSize = Mask.size() / 2; + + HalfMaskType preMask = None, postMask = None; + + if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) { + return M < 0 || (M >= 0 && M < HalfSize) || + (M >= MaskSize && M < MaskSize + HalfSize); + })) + preMask = HighLaneTy; + else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) { + return M < 0 || (M >= HalfSize && M < MaskSize) || + (M >= MaskSize + HalfSize && M < MaskSize * 2); + })) + preMask = LowLaneTy; + + if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) { + return M < 0 || (M >= 0 && M < HalfSize) || + (M >= MaskSize && M < MaskSize + HalfSize); + })) + postMask = HighLaneTy; + else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) { + return M < 0 || (M >= HalfSize && M < MaskSize) || + (M >= MaskSize + HalfSize && M < MaskSize * 2); + })) + postMask = LowLaneTy; + + // The pre-half of mask is high lane type, and the post-half of mask + // is low lane type, which is closest to the LoongArch instructions. + // + // Note: In the LoongArch architecture, the high lane of mask corresponds + // to the lower 128-bit of vector register, and the low lane of mask + // corresponds the higher 128-bit of vector register. + if (preMask == HighLaneTy && postMask == LowLaneTy) { + return; + } + if (preMask == LowLaneTy && postMask == HighLaneTy) { + V1 = DAG.getBitcast(MVT::v4i64, V1); + V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1, + DAG.getConstant(0b01001110, DL, MVT::i64)); + V1 = DAG.getBitcast(VT, V1); + + if (!V2.isUndef()) { + V2 = DAG.getBitcast(MVT::v4i64, V2); + V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2, + DAG.getConstant(0b01001110, DL, MVT::i64)); + V2 = DAG.getBitcast(VT, V2); + } + + for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) { + *it = *it < 0 ? *it : *it - HalfSize; + } + for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) { + *it = *it < 0 ? *it : *it + HalfSize; + } + } else if (preMask == LowLaneTy && postMask == LowLaneTy) { + V1 = DAG.getBitcast(MVT::v4i64, V1); + V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1, + DAG.getConstant(0b11101110, DL, MVT::i64)); + V1 = DAG.getBitcast(VT, V1); + + if (!V2.isUndef()) { + V2 = DAG.getBitcast(MVT::v4i64, V2); + V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2, + DAG.getConstant(0b11101110, DL, MVT::i64)); + V2 = DAG.getBitcast(VT, V2); + } + + for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) { + *it = *it < 0 ? *it : *it - HalfSize; + } + } else if (preMask == HighLaneTy && postMask == HighLaneTy) { + V1 = DAG.getBitcast(MVT::v4i64, V1); + V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1, + DAG.getConstant(0b01000100, DL, MVT::i64)); + V1 = DAG.getBitcast(VT, V1); + + if (!V2.isUndef()) { + V2 = DAG.getBitcast(MVT::v4i64, V2); + V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2, + DAG.getConstant(0b01000100, DL, MVT::i64)); + V2 = DAG.getBitcast(VT, V2); + } + + for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) { + *it = *it < 0 ? *it : *it + HalfSize; + } + } else { // cross-lane + return; + } +} + +/// Dispatching routine to lower various 256-bit LoongArch vector shuffles. +/// +/// This routine breaks down the specific type of 256-bit shuffle and +/// dispatches to the lowering routines accordingly. +static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, + SDValue V1, SDValue V2, SelectionDAG &DAG) { + assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 || + VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 || + VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) && + "Vector type is unsupported for lasx!"); + assert(V1.getSimpleValueType() == V2.getSimpleValueType() && + "Two operands have different types!"); + assert(VT.getVectorNumElements() == Mask.size() && + "Unexpected mask size for shuffle!"); + assert(Mask.size() % 2 == 0 && "Expected even mask size."); + assert(Mask.size() >= 4 && "Mask size is less than 4."); + + // canonicalize non cross-lane shuffle vector + SmallVector NewMask(Mask); + canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG); + + SDValue Result; + // TODO: Add more comparison patterns. + if (V2.isUndef()) { + if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG))) + return Result; + + // TODO: This comment may be enabled in the future to better match the + // pattern for instruction selection. + /* V2 = V1; */ + } + + // It is recommended not to change the pattern comparison order for better + // performance. + if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG))) + return Result; + + return SDValue(); +} + SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { - // TODO: custom shuffle. + ShuffleVectorSDNode *SVOp = cast(Op); + ArrayRef OrigMask = SVOp->getMask(); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + MVT VT = Op.getSimpleValueType(); + int NumElements = VT.getVectorNumElements(); + SDLoc DL(Op); + + bool V1IsUndef = V1.isUndef(); + bool V2IsUndef = V2.isUndef(); + if (V1IsUndef && V2IsUndef) + return DAG.getUNDEF(VT); + + // When we create a shuffle node we put the UNDEF node to second operand, + // but in some cases the first operand may be transformed to UNDEF. + // In this case we should just commute the node. + if (V1IsUndef) + return DAG.getCommutedVectorShuffle(*SVOp); + + // Check for non-undef masks pointing at an undef vector and make the masks + // undef as well. This makes it easier to match the shuffle based solely on + // the mask. + if (V2IsUndef && + any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) { + SmallVector NewMask(OrigMask); + for (int &M : NewMask) + if (M >= NumElements) + M = -1; + return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); + } + + // Check for illegal shuffle mask element index values. + int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2); + (void)MaskUpperLimit; + assert(llvm::all_of(OrigMask, + [&](int M) { return -1 <= M && M < MaskUpperLimit; }) && + "Out of bounds shuffle index"); + + // For each vector width, delegate to a specialized lowering routine. + if (VT.is128BitVector()) + return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG); + + if (VT.is256BitVector()) + return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG); + return SDValue(); } @@ -3706,6 +4623,16 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(MOVFCSR2GR) NODE_NAME_CASE(CACOP_D) NODE_NAME_CASE(CACOP_W) + NODE_NAME_CASE(VSHUF) + NODE_NAME_CASE(VPICKEV) + NODE_NAME_CASE(VPICKOD) + NODE_NAME_CASE(VPACKEV) + NODE_NAME_CASE(VPACKOD) + NODE_NAME_CASE(VILVL) + NODE_NAME_CASE(VILVH) + NODE_NAME_CASE(VSHUF4I) + NODE_NAME_CASE(VREPLVEI) + NODE_NAME_CASE(XVPERMI) NODE_NAME_CASE(VPICK_SEXT_ELT) NODE_NAME_CASE(VPICK_ZEXT_ELT) NODE_NAME_CASE(VREPLVE) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index f4c57f80fdbe4..fc5b36c2124e0 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -120,6 +120,16 @@ enum NodeType : unsigned { // Vector Shuffle VREPLVE, + VSHUF, + VPICKEV, + VPICKOD, + VPACKEV, + VPACKOD, + VILVL, + VILVH, + VSHUF4I, + VREPLVEI, + XVPERMI, // Extended vector element extraction VPICK_SEXT_ELT, diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 3de1fe2b722e5..6f1969bf8cae0 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -10,6 +10,8 @@ // //===----------------------------------------------------------------------===// +def loongarch_xvpermi: SDNode<"LoongArchISD::XVPERMI", SDT_loongArchV1RUimm>; + def lasxsplati8 : PatFrag<(ops node:$e0), (v32i8 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, @@ -1575,6 +1577,134 @@ def : Pat<(loongarch_vreplve v8i32:$xj, GRLenVT:$rk), def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk), (XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>; +// XVSHUF_{B/H/W/D} +def : Pat<(loongarch_vshuf v32i8:$xa, v32i8:$xj, v32i8:$xk), + (XVSHUF_B v32i8:$xj, v32i8:$xk, v32i8:$xa)>; +def : Pat<(loongarch_vshuf v16i16:$xd, v16i16:$xj, v16i16:$xk), + (XVSHUF_H v16i16:$xd, v16i16:$xj, v16i16:$xk)>; +def : Pat<(loongarch_vshuf v8i32:$xd, v8i32:$xj, v8i32:$xk), + (XVSHUF_W v8i32:$xd, v8i32:$xj, v8i32:$xk)>; +def : Pat<(loongarch_vshuf v4i64:$xd, v4i64:$xj, v4i64:$xk), + (XVSHUF_D v4i64:$xd, v4i64:$xj, v4i64:$xk)>; +def : Pat<(loongarch_vshuf v8i32:$xd, v8f32:$xj, v8f32:$xk), + (XVSHUF_W v8i32:$xd, v8f32:$xj, v8f32:$xk)>; +def : Pat<(loongarch_vshuf v4i64:$xd, v4f64:$xj, v4f64:$xk), + (XVSHUF_D v4i64:$xd, v4f64:$xj, v4f64:$xk)>; + +// XVPICKEV_{B/H/W/D} +def : Pat<(loongarch_vpickev v32i8:$xj, v32i8:$xk), + (XVPICKEV_B v32i8:$xj, v32i8:$xk)>; +def : Pat<(loongarch_vpickev v16i16:$xj, v16i16:$xk), + (XVPICKEV_H v16i16:$xj, v16i16:$xk)>; +def : Pat<(loongarch_vpickev v8i32:$xj, v8i32:$xk), + (XVPICKEV_W v8i32:$xj, v8i32:$xk)>; +def : Pat<(loongarch_vpickev v4i64:$xj, v4i64:$xk), + (XVPICKEV_D v4i64:$xj, v4i64:$xk)>; +def : Pat<(loongarch_vpickev v8f32:$xj, v8f32:$xk), + (XVPICKEV_W v8f32:$xj, v8f32:$xk)>; +def : Pat<(loongarch_vpickev v4f64:$xj, v4f64:$xk), + (XVPICKEV_D v4f64:$xj, v4f64:$xk)>; + +// XVPICKOD_{B/H/W/D} +def : Pat<(loongarch_vpickod v32i8:$xj, v32i8:$xk), + (XVPICKOD_B v32i8:$xj, v32i8:$xk)>; +def : Pat<(loongarch_vpickod v16i16:$xj, v16i16:$xk), + (XVPICKOD_H v16i16:$xj, v16i16:$xk)>; +def : Pat<(loongarch_vpickod v8i32:$xj, v8i32:$xk), + (XVPICKOD_W v8i32:$xj, v8i32:$xk)>; +def : Pat<(loongarch_vpickod v4i64:$xj, v4i64:$xk), + (XVPICKOD_D v4i64:$xj, v4i64:$xk)>; +def : Pat<(loongarch_vpickod v8f32:$xj, v8f32:$xk), + (XVPICKOD_W v8f32:$xj, v8f32:$xk)>; +def : Pat<(loongarch_vpickod v4f64:$xj, v4f64:$xk), + (XVPICKOD_D v4f64:$xj, v4f64:$xk)>; + +// XVPACKEV_{B/H/W/D} +def : Pat<(loongarch_vpackev v32i8:$xj, v32i8:$xk), + (XVPACKEV_B v32i8:$xj, v32i8:$xk)>; +def : Pat<(loongarch_vpackev v16i16:$xj, v16i16:$xk), + (XVPACKEV_H v16i16:$xj, v16i16:$xk)>; +def : Pat<(loongarch_vpackev v8i32:$xj, v8i32:$xk), + (XVPACKEV_W v8i32:$xj, v8i32:$xk)>; +def : Pat<(loongarch_vpackev v4i64:$xj, v4i64:$xk), + (XVPACKEV_D v4i64:$xj, v4i64:$xk)>; +def : Pat<(loongarch_vpackev v8f32:$xj, v8f32:$xk), + (XVPACKEV_W v8f32:$xj, v8f32:$xk)>; +def : Pat<(loongarch_vpackev v4f64:$xj, v4f64:$xk), + (XVPACKEV_D v4f64:$xj, v4f64:$xk)>; + +// XVPACKOD_{B/H/W/D} +def : Pat<(loongarch_vpackod v32i8:$xj, v32i8:$xk), + (XVPACKOD_B v32i8:$xj, v32i8:$xk)>; +def : Pat<(loongarch_vpackod v16i16:$xj, v16i16:$xk), + (XVPACKOD_H v16i16:$xj, v16i16:$xk)>; +def : Pat<(loongarch_vpackod v8i32:$xj, v8i32:$xk), + (XVPACKOD_W v8i32:$xj, v8i32:$xk)>; +def : Pat<(loongarch_vpackod v4i64:$xj, v4i64:$xk), + (XVPACKOD_D v4i64:$xj, v4i64:$xk)>; +def : Pat<(loongarch_vpackod v8f32:$xj, v8f32:$xk), + (XVPACKOD_W v8f32:$xj, v8f32:$xk)>; +def : Pat<(loongarch_vpackod v4f64:$xj, v4f64:$xk), + (XVPACKOD_D v4f64:$xj, v4f64:$xk)>; + +// XVILVL_{B/H/W/D} +def : Pat<(loongarch_vilvl v32i8:$xj, v32i8:$xk), + (XVILVL_B v32i8:$xj, v32i8:$xk)>; +def : Pat<(loongarch_vilvl v16i16:$xj, v16i16:$xk), + (XVILVL_H v16i16:$xj, v16i16:$xk)>; +def : Pat<(loongarch_vilvl v8i32:$xj, v8i32:$xk), + (XVILVL_W v8i32:$xj, v8i32:$xk)>; +def : Pat<(loongarch_vilvl v4i64:$xj, v4i64:$xk), + (XVILVL_D v4i64:$xj, v4i64:$xk)>; +def : Pat<(loongarch_vilvl v8f32:$xj, v8f32:$xk), + (XVILVL_W v8f32:$xj, v8f32:$xk)>; +def : Pat<(loongarch_vilvl v4f64:$xj, v4f64:$xk), + (XVILVL_D v4f64:$xj, v4f64:$xk)>; + +// XVILVH_{B/H/W/D} +def : Pat<(loongarch_vilvh v32i8:$xj, v32i8:$xk), + (XVILVH_B v32i8:$xj, v32i8:$xk)>; +def : Pat<(loongarch_vilvh v16i16:$xj, v16i16:$xk), + (XVILVH_H v16i16:$xj, v16i16:$xk)>; +def : Pat<(loongarch_vilvh v8i32:$xj, v8i32:$xk), + (XVILVH_W v8i32:$xj, v8i32:$xk)>; +def : Pat<(loongarch_vilvh v4i64:$xj, v4i64:$xk), + (XVILVH_D v4i64:$xj, v4i64:$xk)>; +def : Pat<(loongarch_vilvh v8f32:$xj, v8f32:$xk), + (XVILVH_W v8f32:$xj, v8f32:$xk)>; +def : Pat<(loongarch_vilvh v4f64:$xj, v4f64:$xk), + (XVILVH_D v4f64:$xj, v4f64:$xk)>; + +// XVSHUF4I_{B/H/W} +def : Pat<(loongarch_vshuf4i v32i8:$xj, immZExt8:$ui8), + (XVSHUF4I_B v32i8:$xj, immZExt8:$ui8)>; +def : Pat<(loongarch_vshuf4i v16i16:$xj, immZExt8:$ui8), + (XVSHUF4I_H v16i16:$xj, immZExt8:$ui8)>; +def : Pat<(loongarch_vshuf4i v8i32:$xj, immZExt8:$ui8), + (XVSHUF4I_W v8i32:$xj, immZExt8:$ui8)>; +def : Pat<(loongarch_vshuf4i v8f32:$xj, immZExt8:$ui8), + (XVSHUF4I_W v8f32:$xj, immZExt8:$ui8)>; + +// XVREPL128VEI_{B/H/W/D} +def : Pat<(loongarch_vreplvei v32i8:$xj, immZExt4:$ui4), + (XVREPL128VEI_B v32i8:$xj, immZExt4:$ui4)>; +def : Pat<(loongarch_vreplvei v16i16:$xj, immZExt3:$ui3), + (XVREPL128VEI_H v16i16:$xj, immZExt3:$ui3)>; +def : Pat<(loongarch_vreplvei v8i32:$xj, immZExt2:$ui2), + (XVREPL128VEI_W v8i32:$xj, immZExt2:$ui2)>; +def : Pat<(loongarch_vreplvei v4i64:$xj, immZExt1:$ui1), + (XVREPL128VEI_D v4i64:$xj, immZExt1:$ui1)>; +def : Pat<(loongarch_vreplvei v8f32:$xj, immZExt2:$ui2), + (XVREPL128VEI_W v8f32:$xj, immZExt2:$ui2)>; +def : Pat<(loongarch_vreplvei v4f64:$xj, immZExt1:$ui1), + (XVREPL128VEI_D v4f64:$xj, immZExt1:$ui1)>; + +// XVPERMI_D +def : Pat<(loongarch_xvpermi v4i64:$xj, immZExt8: $ui8), + (XVPERMI_D v4i64:$xj, immZExt8: $ui8)>; +def : Pat<(loongarch_xvpermi v4f64:$xj, immZExt8: $ui8), + (XVPERMI_D v4f64:$xj, immZExt8: $ui8)>; + // XVREPLVE0_{W/D} def : Pat<(lasxsplatf32 FPR32:$fj), (XVREPLVE0_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32))>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 39ee861cd0565..0580683c3ce30 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -15,6 +15,15 @@ def SDT_LoongArchVreplve : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>]>; def SDT_LoongArchVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>; +def SDT_LoongArchVShuf : SDTypeProfile<1, 3, [SDTCisVec<0>, + SDTCisInt<1>, SDTCisVec<1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<2, 3>]>; +def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>, + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; +def SDT_loongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>, + SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>; + // Target nodes. def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>; def loongarch_vall_nonzero : SDNode<"LoongArchISD::VALL_NONZERO", @@ -31,6 +40,23 @@ def loongarch_vpick_sext_elt : SDNode<"LoongArchISD::VPICK_SEXT_ELT", def loongarch_vpick_zext_elt : SDNode<"LoongArchISD::VPICK_ZEXT_ELT", SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>>; +def loongarch_vshuf: SDNode<"LoongArchISD::VSHUF", SDT_LoongArchVShuf>; +def loongarch_vpickev: SDNode<"LoongArchISD::VPICKEV", SDT_LoongArchV2R>; +def loongarch_vpickod: SDNode<"LoongArchISD::VPICKOD", SDT_LoongArchV2R>; +def loongarch_vpackev: SDNode<"LoongArchISD::VPACKEV", SDT_LoongArchV2R>; +def loongarch_vpackod: SDNode<"LoongArchISD::VPACKOD", SDT_LoongArchV2R>; +def loongarch_vilvl: SDNode<"LoongArchISD::VILVL", SDT_LoongArchV2R>; +def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>; + +def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_loongArchV1RUimm>; +def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_loongArchV1RUimm>; + +def immZExt1 : ImmLeaf(Imm);}]>; +def immZExt2 : ImmLeaf(Imm);}]>; +def immZExt3 : ImmLeaf(Imm);}]>; +def immZExt4 : ImmLeaf(Imm);}]>; +def immZExt8 : ImmLeaf(Imm);}]>; + class VecCond : Pseudo<(outs GPR:$rd), (ins RC:$vj), @@ -1682,6 +1708,128 @@ def : Pat<(loongarch_vreplve v4i32:$vj, GRLenVT:$rk), def : Pat<(loongarch_vreplve v2i64:$vj, GRLenVT:$rk), (VREPLVE_D v2i64:$vj, GRLenVT:$rk)>; +// VSHUF_{B/H/W/D} +def : Pat<(loongarch_vshuf v16i8:$va, v16i8:$vj, v16i8:$vk), + (VSHUF_B v16i8:$vj, v16i8:$vk, v16i8:$va)>; +def : Pat<(loongarch_vshuf v8i16:$vd, v8i16:$vj, v8i16:$vk), + (VSHUF_H v8i16:$vd, v8i16:$vj, v8i16:$vk)>; +def : Pat<(loongarch_vshuf v4i32:$vd, v4i32:$vj, v4i32:$vk), + (VSHUF_W v4i32:$vd, v4i32:$vj, v4i32:$vk)>; +def : Pat<(loongarch_vshuf v2i64:$vd, v2i64:$vj, v2i64:$vk), + (VSHUF_D v2i64:$vd, v2i64:$vj, v2i64:$vk)>; +def : Pat<(loongarch_vshuf v4i32:$vd, v4f32:$vj, v4f32:$vk), + (VSHUF_W v4i32:$vd, v4f32:$vj, v4f32:$vk)>; +def : Pat<(loongarch_vshuf v2i64:$vd, v2f64:$vj, v2f64:$vk), + (VSHUF_D v2i64:$vd, v2f64:$vj, v2f64:$vk)>; + +// VPICKEV_{B/H/W/D} +def : Pat<(loongarch_vpickev v16i8:$vj, v16i8:$vk), + (VPICKEV_B v16i8:$vj, v16i8:$vk)>; +def : Pat<(loongarch_vpickev v8i16:$vj, v8i16:$vk), + (VPICKEV_H v8i16:$vj, v8i16:$vk)>; +def : Pat<(loongarch_vpickev v4i32:$vj, v4i32:$vk), + (VPICKEV_W v4i32:$vj, v4i32:$vk)>; +def : Pat<(loongarch_vpickev v2i64:$vj, v2i64:$vk), + (VPICKEV_D v2i64:$vj, v2i64:$vk)>; +def : Pat<(loongarch_vpickev v4f32:$vj, v4f32:$vk), + (VPICKEV_W v4f32:$vj, v4f32:$vk)>; +def : Pat<(loongarch_vpickev v2f64:$vj, v2f64:$vk), + (VPICKEV_D v2f64:$vj, v2f64:$vk)>; + +// VPICKOD_{B/H/W/D} +def : Pat<(loongarch_vpickod v16i8:$vj, v16i8:$vk), + (VPICKOD_B v16i8:$vj, v16i8:$vk)>; +def : Pat<(loongarch_vpickod v8i16:$vj, v8i16:$vk), + (VPICKOD_H v8i16:$vj, v8i16:$vk)>; +def : Pat<(loongarch_vpickod v4i32:$vj, v4i32:$vk), + (VPICKOD_W v4i32:$vj, v4i32:$vk)>; +def : Pat<(loongarch_vpickod v2i64:$vj, v2i64:$vk), + (VPICKOD_D v2i64:$vj, v2i64:$vk)>; +def : Pat<(loongarch_vpickod v4f32:$vj, v4f32:$vk), + (VPICKOD_W v4f32:$vj, v4f32:$vk)>; +def : Pat<(loongarch_vpickod v2f64:$vj, v2f64:$vk), + (VPICKOD_D v2f64:$vj, v2f64:$vk)>; + +// VPACKEV_{B/H/W/D} +def : Pat<(loongarch_vpackev v16i8:$vj, v16i8:$vk), + (VPACKEV_B v16i8:$vj, v16i8:$vk)>; +def : Pat<(loongarch_vpackev v8i16:$vj, v8i16:$vk), + (VPACKEV_H v8i16:$vj, v8i16:$vk)>; +def : Pat<(loongarch_vpackev v4i32:$vj, v4i32:$vk), + (VPACKEV_W v4i32:$vj, v4i32:$vk)>; +def : Pat<(loongarch_vpackev v2i64:$vj, v2i64:$vk), + (VPACKEV_D v2i64:$vj, v2i64:$vk)>; +def : Pat<(loongarch_vpackev v4f32:$vj, v4f32:$vk), + (VPACKEV_W v4f32:$vj, v4f32:$vk)>; +def : Pat<(loongarch_vpackev v2f64:$vj, v2f64:$vk), + (VPACKEV_D v2f64:$vj, v2f64:$vk)>; + +// VPACKOD_{B/H/W/D} +def : Pat<(loongarch_vpackod v16i8:$vj, v16i8:$vk), + (VPACKOD_B v16i8:$vj, v16i8:$vk)>; +def : Pat<(loongarch_vpackod v8i16:$vj, v8i16:$vk), + (VPACKOD_H v8i16:$vj, v8i16:$vk)>; +def : Pat<(loongarch_vpackod v4i32:$vj, v4i32:$vk), + (VPACKOD_W v4i32:$vj, v4i32:$vk)>; +def : Pat<(loongarch_vpackod v2i64:$vj, v2i64:$vk), + (VPACKOD_D v2i64:$vj, v2i64:$vk)>; +def : Pat<(loongarch_vpackod v4f32:$vj, v4f32:$vk), + (VPACKOD_W v4f32:$vj, v4f32:$vk)>; +def : Pat<(loongarch_vpackod v2f64:$vj, v2f64:$vk), + (VPACKOD_D v2f64:$vj, v2f64:$vk)>; + +// VILVL_{B/H/W/D} +def : Pat<(loongarch_vilvl v16i8:$vj, v16i8:$vk), + (VILVL_B v16i8:$vj, v16i8:$vk)>; +def : Pat<(loongarch_vilvl v8i16:$vj, v8i16:$vk), + (VILVL_H v8i16:$vj, v8i16:$vk)>; +def : Pat<(loongarch_vilvl v4i32:$vj, v4i32:$vk), + (VILVL_W v4i32:$vj, v4i32:$vk)>; +def : Pat<(loongarch_vilvl v2i64:$vj, v2i64:$vk), + (VILVL_D v2i64:$vj, v2i64:$vk)>; +def : Pat<(loongarch_vilvl v4f32:$vj, v4f32:$vk), + (VILVL_W v4f32:$vj, v4f32:$vk)>; +def : Pat<(loongarch_vilvl v2f64:$vj, v2f64:$vk), + (VILVL_D v2f64:$vj, v2f64:$vk)>; + +// VILVH_{B/H/W/D} +def : Pat<(loongarch_vilvh v16i8:$vj, v16i8:$vk), + (VILVH_B v16i8:$vj, v16i8:$vk)>; +def : Pat<(loongarch_vilvh v8i16:$vj, v8i16:$vk), + (VILVH_H v8i16:$vj, v8i16:$vk)>; +def : Pat<(loongarch_vilvh v4i32:$vj, v4i32:$vk), + (VILVH_W v4i32:$vj, v4i32:$vk)>; +def : Pat<(loongarch_vilvh v2i64:$vj, v2i64:$vk), + (VILVH_D v2i64:$vj, v2i64:$vk)>; +def : Pat<(loongarch_vilvh v4f32:$vj, v4f32:$vk), + (VILVH_W v4f32:$vj, v4f32:$vk)>; +def : Pat<(loongarch_vilvh v2f64:$vj, v2f64:$vk), + (VILVH_D v2f64:$vj, v2f64:$vk)>; + +// VSHUF4I_{B/H/W} +def : Pat<(loongarch_vshuf4i v16i8:$vj, immZExt8:$ui8), + (VSHUF4I_B v16i8:$vj, immZExt8:$ui8)>; +def : Pat<(loongarch_vshuf4i v8i16:$vj, immZExt8:$ui8), + (VSHUF4I_H v8i16:$vj, immZExt8:$ui8)>; +def : Pat<(loongarch_vshuf4i v4i32:$vj, immZExt8:$ui8), + (VSHUF4I_W v4i32:$vj, immZExt8:$ui8)>; +def : Pat<(loongarch_vshuf4i v4f32:$vj, immZExt8:$ui8), + (VSHUF4I_W v4f32:$vj, immZExt8:$ui8)>; + +// VREPLVEI_{B/H/W/D} +def : Pat<(loongarch_vreplvei v16i8:$vj, immZExt4:$ui4), + (VREPLVEI_B v16i8:$vj, immZExt4:$ui4)>; +def : Pat<(loongarch_vreplvei v8i16:$vj, immZExt3:$ui3), + (VREPLVEI_H v8i16:$vj, immZExt3:$ui3)>; +def : Pat<(loongarch_vreplvei v4i32:$vj, immZExt2:$ui2), + (VREPLVEI_W v4i32:$vj, immZExt2:$ui2)>; +def : Pat<(loongarch_vreplvei v2i64:$vj, immZExt1:$ui1), + (VREPLVEI_D v2i64:$vj, immZExt1:$ui1)>; +def : Pat<(loongarch_vreplvei v4f32:$vj, immZExt2:$ui2), + (VREPLVEI_W v4f32:$vj, immZExt2:$ui2)>; +def : Pat<(loongarch_vreplvei v2f64:$vj, immZExt1:$ui1), + (VREPLVEI_D v2f64:$vj, immZExt1:$ui1)>; + // VREPLVEI_{W/D} def : Pat<(lsxsplatf32 FPR32:$fj), (VREPLVEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>; diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvilv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvilv.ll new file mode 100644 index 0000000000000..22ab19b9fa446 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvilv.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s + +;; xvilvl.b +define <32 x i8> @shufflevector_xvilvl_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_xvilvl_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvilvl.b $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvilvl.h +define <16 x i16> @shufflevector_xvilvl_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_xvilvl_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvilvl.h $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvilvl.w +define <8 x i32> @shufflevector_xvilvl_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_xvilvl_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvilvl.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvilvh.b +define <32 x i8> @shufflevector_xvilvh_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_xvilvh_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvilvh.b $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvilvh.h +define <16 x i16> @shufflevector_xvilvh_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_xvilvh_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvilvh.h $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvilvh.w +define <8 x i32> @shufflevector_xvilvh_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_xvilvh_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvilvh.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvilvh.w +define <8 x float> @shufflevector_xvilvh_v8f32(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: shufflevector_xvilvh_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvilvh.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpack.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpack.ll new file mode 100644 index 0000000000000..2ff9af4069b9b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpack.ll @@ -0,0 +1,124 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s + +;; xvpackev.b +define <32 x i8> @shufflevector_pack_ev_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackev.b $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvpackev.h +define <16 x i16> @shufflevector_pack_ev_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackev.h $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvpackev.w +define <8 x i32> @shufflevector_pack_ev_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackev.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvpickev.d/xvpackev.d/xvilvl.d +define <4 x i64> @shufflevector_pack_ev_v4i64(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackev.d $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %c +} + +;; xvpackev.w +define <8 x float> @shufflevector_pack_ev_v8f32(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackev.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %c +} + +;; xvpickev.d/xvpackev.d/xvilvl.d +define <4 x double> @shufflevector_pack_ev_v4f64(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackev.d $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %c +} + +;; xvpackod.b +define <32 x i8> @shufflevector_pack_od_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_pack_od_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackod.b $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvpackod.h +define <16 x i16> @shufflevector_pack_od_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_pack_od_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackod.h $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvpackod.w +define <8 x i32> @shufflevector_pack_od_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_pack_od_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackod.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvpickod.d/xvpackod.d/xvilvh.d +define <4 x i64> @shufflodector_pack_od_v4i64(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: shufflodector_pack_od_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackod.d $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %c +} + +;; xvpackod.w +define <8 x float> @shufflodector_pack_od_v8f32(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: shufflodector_pack_od_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackod.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %c +} + +;; xvpickod.d/xvpackod.d/xvilvh.d +define <4 x double> @shufflodector_pack_od_v4f64(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: shufflodector_pack_od_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackod.d $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpick.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpick.ll new file mode 100644 index 0000000000000..294d292d17640 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpick.ll @@ -0,0 +1,84 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s + +;; xvpickev.b +define <32 x i8> @shufflevector_pick_ev_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_pick_ev_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickev.b $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvpickev.h +define <16 x i16> @shufflevector_pick_ev_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_pick_ev_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickev.h $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvpickev.w +define <8 x i32> @shufflevector_pick_ev_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_pick_ev_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickev.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvpickev.w +define <8 x float> @shufflevector_pick_ev_v8f32(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: shufflevector_pick_ev_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickev.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %c +} + +;; xvpickod.b +define <32 x i8> @shufflevector_pick_od_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_pick_od_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickod.b $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvpickod.h +define <16 x i16> @shufflevector_pick_od_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_pick_od_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickod.h $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvpickod.w +define <8 x i32> @shufflevector_pick_od_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_pick_od_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickod.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvpickod.w +define <8 x float> @shufflodector_pick_od_v8f32(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: shufflodector_pick_od_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickod.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvrepl128vei.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvrepl128vei.ll new file mode 100644 index 0000000000000..dce1e4b777e29 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvrepl128vei.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s + +;; xvrepl128vei.b +define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvrepl128vei.b $xr0, $xr0, 1 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvrepl128vei.h +define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvrepl128vei.h $xr0, $xr0, 3 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvrepl128vei.w +define <8 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78 +; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 3 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvrepl128vei.d +define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: shufflevector_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 1 +; CHECK-NEXT: ret + %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %c +} + +;; xvrepl128vei.w +define <8 x float> @shufflevector_v8f32(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: shufflevector_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 3 +; CHECK-NEXT: ret + %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %c +} + +;; xvrepl128vei.d +define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: shufflevector_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvrepl128vei.d $xr0, $xr1, 1 +; CHECK-NEXT: ret + %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll new file mode 100644 index 0000000000000..4cc819018f0a8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s + +;; xvshuf.b +define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_0) +; CHECK-NEXT: xvld $xr2, $a0, 0 +; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvshuf.h +define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI1_0) +; CHECK-NEXT: xvld $xr2, $a0, 0 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78 +; CHECK-NEXT: xvpermi.d $xr1, $xr1, 78 +; CHECK-NEXT: xvshuf.h $xr2, $xr1, $xr0 +; CHECK-NEXT: xvori.b $xr0, $xr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvshuf.w +define <8 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI2_0) +; CHECK-NEXT: xvld $xr2, $a0, 0 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 68 +; CHECK-NEXT: xvpermi.d $xr1, $xr1, 68 +; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0 +; CHECK-NEXT: xvori.b $xr0, $xr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvshuf.d +define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: shufflevector_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_0) +; CHECK-NEXT: xvld $xr2, $a0, 0 +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 238 +; CHECK-NEXT: xvpermi.d $xr1, $xr1, 238 +; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0 +; CHECK-NEXT: xvori.b $xr0, $xr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %c +} + +;; xvshuf.w +define <8 x float> @shufflevector_v8f32(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: shufflevector_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: xvld $xr2, $a0, 0 +; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0 +; CHECK-NEXT: xvori.b $xr0, $xr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll new file mode 100644 index 0000000000000..dc4532a7292ab --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s + +;; xxvshuf4i.b +define <32 x i8> @shufflevector_xvshuf4i_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_xvshuf4i_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 27 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvshuf4i.h +define <16 x i16> @shufflevector_xvshuf4i_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_xvshuf4i_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvshuf4i.h $xr0, $xr0, 27 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvshuf4i.w +define <8 x i32> @shufflevector_xvshuf4i_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_xvshuf4i_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvshuf4i.w $xr0, $xr0, 27 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvshuf4i.w +define <8 x float> @shufflevector_xvshuf4i_v8f32(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: shufflevector_xvshuf4i_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvshuf4i.w $xr0, $xr0, 27 +; CHECK-NEXT: ret + %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vilv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vilv.ll new file mode 100644 index 0000000000000..31398c6081c0a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vilv.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s + +;; vilvl.b +define <16 x i8> @shufflevector_vilvl_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_vilvl_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vilvl.b $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vilvl.h +define <8 x i16> @shufflevector_vilvl_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_vilvl_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vilvl.w +define <4 x i32> @shufflevector_vilvl_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_vilvl_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vilvl.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vilvl.w +define <4 x float> @shufflevector_vilvl_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflevector_vilvl_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vilvl.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +;; vilvh.b +define <16 x i8> @shufflevector_vilvh_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_vilvh_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vilvh.b $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vilvh.h +define <8 x i16> @shufflevector_vilvh_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_vilvh_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vilvh.h $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vilvh.w +define <4 x i32> @shufflevector_vilvh_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_vilvh_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vilvh.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vilvh.w +define <4 x float> @shufflevector_vilvh_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflevector_vilvh_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vilvh.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll new file mode 100644 index 0000000000000..171e68306cd11 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s + +;; vpackev.b +define <16 x i8> @shufflevector_pack_ev_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.b $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vpackev.h +define <8 x i16> @shufflevector_pack_ev_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.h $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vpackev.w +define <4 x i32> @shufflevector_pack_ev_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vpickev.d/vpackev.d/vilvl.d +define <2 x i64> @shufflevector_pack_ev_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.d $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %c +} + +;; vpackev.w +define <4 x float> @shufflevector_pack_ev_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +;; vpickev.d/vpackev.d/vilvl.d +define <2 x double> @shufflevector_pack_ev_v2f64(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.d $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %c +} + +;; vpackod.b +define <16 x i8> @shufflevector_pack_od_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_pack_od_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.b $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vpackod.h +define <8 x i16> @shufflevector_pack_od_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_pack_od_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.h $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vpackod.w +define <4 x i32> @shufflevector_pack_od_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_pack_od_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vpickod.d/vpackod.d/vilvh.d +define <2 x i64> @shufflodector_pack_od_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shufflodector_pack_od_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.d $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %c +} + +;; vpackod.w +define <4 x float> @shufflodector_pack_od_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflodector_pack_od_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +;; vpickod.d/vpackod.d/vilvh.d +define <2 x double> @shufflodector_pack_od_v2f64(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: shufflodector_pack_od_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.d $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpick.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpick.ll new file mode 100644 index 0000000000000..ca636d942b583 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpick.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s + +;; vpickev.b +define <16 x i8> @shufflevector_pick_ev_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_pick_ev_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickev.b $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vpickev.h +define <8 x i16> @shufflevector_pick_ev_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_pick_ev_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickev.h $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vpickev.w +define <4 x i32> @shufflevector_pick_ev_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_pick_ev_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickev.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vpickev.w +define <4 x float> @shufflevector_pick_ev_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflevector_pick_ev_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickev.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +;; vpickod.b +define <16 x i8> @shufflevector_pick_od_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_pick_od_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickod.b $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vpickod.h +define <8 x i16> @shufflevector_pick_od_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_pick_od_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickod.h $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vpickod.w +define <4 x i32> @shufflevector_pick_od_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_pick_od_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickod.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vpickod.w +define <4 x float> @shufflodector_pick_od_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflodector_pick_od_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickod.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vreplvei.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vreplvei.ll new file mode 100644 index 0000000000000..10510786f3216 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vreplvei.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s + +;; vreplvei.b +define <16 x i8> @shufflevector_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vreplvei.b $vr0, $vr0, 1 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vreplvei.h +define <8 x i16> @shufflevector_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vreplvei.h $vr0, $vr1, 2 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vreplvei.w +define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vreplvei.d +define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shufflevector_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 +; CHECK-NEXT: ret + %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %c +} + +;; vreplvei.w +define <4 x float> @shufflevector_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflevector_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +;; vreplvei.d +define <2 x double> @shufflevector_v2f64(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: shufflevector_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 +; CHECK-NEXT: ret + %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll new file mode 100644 index 0000000000000..55800b31446b3 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll @@ -0,0 +1,84 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s + +define <16 x i8> @shufflevector_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_0) +; CHECK-NEXT: vld $vr2, $a0, 0 +; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vshuf.h +define <8 x i16> @shufflevector_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI1_0) +; CHECK-NEXT: vld $vr2, $a0, 0 +; CHECK-NEXT: vshuf.h $vr2, $vr1, $vr0 +; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vshuf.w +define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI2_0) +; CHECK-NEXT: vld $vr2, $a0, 0 +; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0 +; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vshuf.d +define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shufflevector_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_0) +; CHECK-NEXT: vld $vr2, $a0, 0 +; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0 +; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %c +} + +;; vshuf.w +define <4 x float> @shufflevector_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflevector_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: vld $vr2, $a0, 0 +; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0 +; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +;; vshuf.d +define <2 x double> @shufflevector_v2f64(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: shufflevector_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI5_0) +; CHECK-NEXT: vld $vr2, $a0, 0 +; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0 +; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll new file mode 100644 index 0000000000000..660b9581c3d1f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s + +;; vilvh.b +define <16 x i8> @shufflevector_vshuf4i_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_vshuf4i_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 27 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vilvh.h +define <8 x i16> @shufflevector_vshuf4i_v8i4(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_vshuf4i_v8i4: +; CHECK: # %bb.0: +; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 27 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vilvh.w +define <4 x i32> @shufflevector_vshuf4i_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_vshuf4i_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 27 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vilvh.w +define <4 x float> @shufflevector_vshuf4i_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflevector_vshuf4i_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 27 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +}