@@ -5882,6 +5882,140 @@ static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N,
58825882 DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
58835883}
58845884
5885+ static SDValue lowerLaneOp(const SITargetLowering &TLI, SDNode *N,
5886+ SelectionDAG &DAG) {
5887+ EVT VT = N->getValueType(0);
5888+ unsigned ValSize = VT.getSizeInBits();
5889+ unsigned IID = N->getConstantOperandVal(0);
5890+ SDLoc SL(N);
5891+ MVT IntVT = MVT::getIntegerVT(ValSize);
5892+
5893+ auto createLaneOp = [&DAG, &SL, N, IID](SDValue Src0, SDValue Src1,
5894+ SDValue Src2, MVT ValT) -> SDValue {
5895+ SmallVector<SDValue, 8> Operands;
5896+ Operands.push_back(DAG.getTargetConstant(IID, SL, MVT::i32));
5897+ switch (IID) {
5898+ case Intrinsic::amdgcn_readfirstlane:
5899+ Operands.push_back(Src0);
5900+ break;
5901+ case Intrinsic::amdgcn_readlane:
5902+ Operands.push_back(Src0);
5903+ Operands.push_back(Src1);
5904+ break;
5905+ case Intrinsic::amdgcn_writelane:
5906+ Operands.push_back(Src0);
5907+ Operands.push_back(Src1);
5908+ Operands.push_back(Src2);
5909+ break;
5910+ }
5911+
5912+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SL, ValT, Operands);
5913+ };
5914+
5915+ SDValue Src0 = N->getOperand(1);
5916+ SDValue Src1, Src2;
5917+ if (IID == Intrinsic::amdgcn_readlane || IID == Intrinsic::amdgcn_writelane) {
5918+ Src1 = N->getOperand(2);
5919+ if (IID == Intrinsic::amdgcn_writelane)
5920+ Src2 = N->getOperand(3);
5921+ }
5922+
5923+ if (ValSize == 32) {
5924+ // Already legal
5925+ return SDValue();
5926+ }
5927+
5928+ if (ValSize < 32) {
5929+ bool IsFloat = VT.isFloatingPoint();
5930+ Src0 = DAG.getAnyExtOrTrunc(IsFloat ? DAG.getBitcast(IntVT, Src0) : Src0,
5931+ SL, MVT::i32);
5932+ if (Src2.getNode()) {
5933+ Src2 = DAG.getAnyExtOrTrunc(IsFloat ? DAG.getBitcast(IntVT, Src2) : Src2,
5934+ SL, MVT::i32);
5935+ }
5936+ SDValue LaneOp = createLaneOp(Src0, Src1, Src2, MVT::i32);
5937+ SDValue Trunc = DAG.getAnyExtOrTrunc(LaneOp, SL, IntVT);
5938+ return IsFloat ? DAG.getBitcast(VT, Trunc) : Trunc;
5939+ }
5940+
5941+ if (ValSize % 32 != 0)
5942+ return SDValue();
5943+
5944+ auto unrollLaneOp = [&DAG, &SL](SDNode *N) -> SDValue {
5945+ EVT VT = N->getValueType(0);
5946+ unsigned NE = VT.getVectorNumElements();
5947+ EVT EltVT = VT.getVectorElementType();
5948+ SmallVector<SDValue, 8> Scalars;
5949+ unsigned NumOperands = N->getNumOperands();
5950+ SmallVector<SDValue, 4> Operands(NumOperands);
5951+
5952+ for (unsigned i = 0; i != NE; ++i) {
5953+ for (unsigned j = 0, e = NumOperands; j != e; ++j) {
5954+ SDValue Operand = N->getOperand(j);
5955+ EVT OperandVT = Operand.getValueType();
5956+ if (OperandVT.isVector()) {
5957+ // A vector operand; extract a single element.
5958+ EVT OperandEltVT = OperandVT.getVectorElementType();
5959+ Operands[j] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, OperandEltVT,
5960+ Operand, DAG.getVectorIdxConstant(i, SL));
5961+ } else {
5962+ // A scalar operand; just use it as is.
5963+ Operands[j] = Operand;
5964+ }
5965+ }
5966+
5967+ Scalars.push_back(DAG.getNode(N->getOpcode(), SL, EltVT, Operands));
5968+ }
5969+
5970+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NE);
5971+ return DAG.getBuildVector(VecVT, SL, Scalars);
5972+ };
5973+
5974+ if (VT.isVector()) {
5975+ switch (MVT::SimpleValueType EltTy =
5976+ VT.getVectorElementType().getSimpleVT().SimpleTy) {
5977+ case MVT::i32:
5978+ case MVT::f32: {
5979+ SDValue LaneOp = createLaneOp(Src0, Src1, Src2, VT.getSimpleVT());
5980+ return unrollLaneOp(LaneOp.getNode());
5981+ }
5982+ case MVT::i16:
5983+ case MVT::f16:
5984+ case MVT::bf16: {
5985+ MVT SubVecVT = MVT::getVectorVT(EltTy, 2);
5986+ SmallVector<SDValue, 4> Pieces;
5987+ for (unsigned i = 0, EltIdx = 0; i < ValSize / 32; i++) {
5988+ SDValue Src0SubVec =
5989+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, SubVecVT, Src0,
5990+ DAG.getConstant(EltIdx, SL, MVT::i32));
5991+
5992+ SDValue Src2SubVec;
5993+ if (Src2)
5994+ Src2SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, SubVecVT, Src2,
5995+ DAG.getConstant(EltIdx, SL, MVT::i32));
5996+
5997+ Pieces.push_back(createLaneOp(Src0SubVec, Src1, Src2SubVec, SubVecVT));
5998+ EltIdx += 2;
5999+ }
6000+ return DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, Pieces);
6001+ }
6002+ default:
6003+ // Handle all other cases by bitcasting to i32 vectors
6004+ break;
6005+ }
6006+ }
6007+
6008+ MVT VecVT = MVT::getVectorVT(MVT::i32, ValSize / 32);
6009+ Src0 = DAG.getBitcast(VecVT, Src0);
6010+
6011+ if (Src2)
6012+ Src2 = DAG.getBitcast(VecVT, Src2);
6013+
6014+ SDValue LaneOp = createLaneOp(Src0, Src1, Src2, VecVT);
6015+ SDValue UnrolledLaneOp = unrollLaneOp(LaneOp.getNode());
6016+ return DAG.getBitcast(VT, UnrolledLaneOp);
6017+ }
6018+
58856019void SITargetLowering::ReplaceNodeResults(SDNode *N,
58866020 SmallVectorImpl<SDValue> &Results,
58876021 SelectionDAG &DAG) const {
@@ -8327,6 +8461,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
83278461 Op.getOperand(3), Op.getOperand(4), Op.getOperand(5),
83288462 IndexKeyi32, Op.getOperand(7)});
83298463 }
8464+ case Intrinsic::amdgcn_readlane:
8465+ case Intrinsic::amdgcn_readfirstlane:
8466+ case Intrinsic::amdgcn_writelane:
8467+ return lowerLaneOp(*this, Op.getNode(), DAG);
83308468 default:
83318469 if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
83328470 AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
0 commit comments