@@ -5887,36 +5887,47 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
58875887 EVT VT = N->getValueType(0);
58885888 unsigned ValSize = VT.getSizeInBits();
58895889 unsigned IID = N->getConstantOperandVal(0);
5890+ bool IsPermLane16 = IID == Intrinsic::amdgcn_permlane16 ||
5891+ IID == Intrinsic::amdgcn_permlanex16;
58905892 SDLoc SL(N);
58915893 MVT IntVT = MVT::getIntegerVT(ValSize);
58925894
58935895 auto createLaneOp = [&DAG, &SL, N, IID](SDValue Src0, SDValue Src1,
58945896 SDValue Src2, MVT ValT) -> SDValue {
58955897 SmallVector<SDValue, 8> Operands;
5896- Operands.push_back(DAG.getTargetConstant(IID, SL, MVT::i32));
58975898 switch (IID) {
5898- case Intrinsic::amdgcn_readfirstlane:
5899- Operands.push_back(Src0);
5900- break;
5899+ case Intrinsic::amdgcn_permlane16:
5900+ case Intrinsic::amdgcn_permlanex16:
5901+ Operands.push_back(N->getOperand(6));
5902+ Operands.push_back(N->getOperand(5));
5903+ Operands.push_back(N->getOperand(4));
5904+ [[fallthrough]];
5905+ case Intrinsic::amdgcn_writelane:
5906+ Operands.push_back(Src2);
5907+ [[fallthrough]];
59015908 case Intrinsic::amdgcn_readlane:
5902- Operands.push_back(Src0);
59035909 Operands.push_back(Src1);
5904- break;
5905- case Intrinsic::amdgcn_writelane:
5910+ [[fallthrough]];
5911+ case Intrinsic::amdgcn_readfirstlane:
5912+ case Intrinsic::amdgcn_permlane64:
59065913 Operands.push_back(Src0);
5907- Operands.push_back(Src1);
5908- Operands.push_back(Src2);
59095914 break;
5915+ default:
5916+ llvm_unreachable("unhandled lane op");
59105917 }
59115918
5919+ Operands.push_back(DAG.getTargetConstant(IID, SL, MVT::i32));
5920+ std::reverse(Operands.begin(), Operands.end());
5921+
59125922 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SL, ValT, Operands);
59135923 };
59145924
59155925 SDValue Src0 = N->getOperand(1);
59165926 SDValue Src1, Src2;
5917- if (IID == Intrinsic::amdgcn_readlane || IID == Intrinsic::amdgcn_writelane) {
5927+ if (IID == Intrinsic::amdgcn_readlane || IID == Intrinsic::amdgcn_writelane ||
5928+ IsPermLane16) {
59185929 Src1 = N->getOperand(2);
5919- if (IID == Intrinsic::amdgcn_writelane)
5930+ if (IID == Intrinsic::amdgcn_writelane || IsPermLane16 )
59205931 Src2 = N->getOperand(3);
59215932 }
59225933
@@ -5929,10 +5940,17 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
59295940 bool IsFloat = VT.isFloatingPoint();
59305941 Src0 = DAG.getAnyExtOrTrunc(IsFloat ? DAG.getBitcast(IntVT, Src0) : Src0,
59315942 SL, MVT::i32);
5932- if (Src2.getNode()) {
5943+
5944+ if (IsPermLane16) {
5945+ Src1 = DAG.getAnyExtOrTrunc(IsFloat ? DAG.getBitcast(IntVT, Src1) : Src1,
5946+ SL, MVT::i32);
5947+ }
5948+
5949+ if (IID == Intrinsic::amdgcn_writelane) {
59335950 Src2 = DAG.getAnyExtOrTrunc(IsFloat ? DAG.getBitcast(IntVT, Src2) : Src2,
59345951 SL, MVT::i32);
59355952 }
5953+
59365954 SDValue LaneOp = createLaneOp(Src0, Src1, Src2, MVT::i32);
59375955 SDValue Trunc = DAG.getAnyExtOrTrunc(LaneOp, SL, IntVT);
59385956 return IsFloat ? DAG.getBitcast(VT, Trunc) : Trunc;
@@ -5984,17 +6002,23 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
59846002 case MVT::bf16: {
59856003 MVT SubVecVT = MVT::getVectorVT(EltTy, 2);
59866004 SmallVector<SDValue, 4> Pieces;
6005+ SDValue Src0SubVec, Src1SubVec, Src2SubVec;
59876006 for (unsigned i = 0, EltIdx = 0; i < ValSize / 32; i++) {
5988- SDValue Src0SubVec =
5989- DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, SubVecVT, Src0,
5990- DAG.getConstant(EltIdx, SL, MVT::i32));
6007+ Src0SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, SubVecVT, Src0,
6008+ DAG.getConstant(EltIdx, SL, MVT::i32));
59916009
5992- SDValue Src2SubVec;
5993- if (Src2)
6010+ if (IsPermLane16)
6011+ Src1SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, SubVecVT, Src1,
6012+ DAG.getConstant(EltIdx, SL, MVT::i32));
6013+
6014+ if (IID == Intrinsic::amdgcn_writelane)
59946015 Src2SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, SubVecVT, Src2,
59956016 DAG.getConstant(EltIdx, SL, MVT::i32));
59966017
5997- Pieces.push_back(createLaneOp(Src0SubVec, Src1, Src2SubVec, SubVecVT));
6018+ Pieces.push_back(
6019+ IsPermLane16
6020+ ? createLaneOp(Src0SubVec, Src1SubVec, Src2, SubVecVT)
6021+ : createLaneOp(Src0SubVec, Src1, Src2SubVec, SubVecVT));
59986022 EltIdx += 2;
59996023 }
60006024 return DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, Pieces);
@@ -6008,7 +6032,10 @@ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
60086032 MVT VecVT = MVT::getVectorVT(MVT::i32, ValSize / 32);
60096033 Src0 = DAG.getBitcast(VecVT, Src0);
60106034
6011- if (Src2)
6035+ if (IsPermLane16)
6036+ Src1 = DAG.getBitcast(VecVT, Src1);
6037+
6038+ if (IID == Intrinsic::amdgcn_writelane)
60126039 Src2 = DAG.getBitcast(VecVT, Src2);
60136040
60146041 SDValue LaneOp = createLaneOp(Src0, Src1, Src2, VecVT);
@@ -8464,6 +8491,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
84648491 case Intrinsic::amdgcn_readlane:
84658492 case Intrinsic::amdgcn_readfirstlane:
84668493 case Intrinsic::amdgcn_writelane:
8494+ case Intrinsic::amdgcn_permlane16:
8495+ case Intrinsic::amdgcn_permlanex16:
8496+ case Intrinsic::amdgcn_permlane64:
84678497 return lowerLaneOp(*this, Op.getNode(), DAG);
84688498 default:
84698499 if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
0 commit comments