@@ -194,6 +194,10 @@ class AArch64InstructionSelector : public InstructionSelector {
194194 MachineInstr &I);
195195 bool selectVectorLoadLaneIntrinsic (unsigned Opc, unsigned NumVecs,
196196 MachineInstr &I);
197+ void selectVectorStoreIntrinsic (MachineInstr &I, unsigned NumVecs,
198+ unsigned Opc);
199+ bool selectVectorStoreLaneIntrinsic (MachineInstr &I, unsigned NumVecs,
200+ unsigned Opc);
197201 bool selectIntrinsicWithSideEffects (MachineInstr &I,
198202 MachineRegisterInfo &MRI);
199203 bool selectIntrinsic (MachineInstr &I, MachineRegisterInfo &MRI);
@@ -5697,7 +5701,56 @@ bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
56975701 !emitNarrowVector (I.getOperand (Idx).getReg (), WideReg, MIB, MRI))
56985702 return false ;
56995703 }
5704+ return true ;
5705+ }
5706+
5707+ void AArch64InstructionSelector::selectVectorStoreIntrinsic (MachineInstr &I,
5708+ unsigned NumVecs,
5709+ unsigned Opc) {
5710+ MachineRegisterInfo &MRI = I.getParent ()->getParent ()->getRegInfo ();
5711+ LLT Ty = MRI.getType (I.getOperand (1 ).getReg ());
5712+ Register Ptr = I.getOperand (1 + NumVecs).getReg ();
5713+
5714+ SmallVector<Register, 2 > Regs (NumVecs);
5715+ std::transform (I.operands_begin () + 1 , I.operands_begin () + 1 + NumVecs,
5716+ Regs.begin (), [](auto MO) { return MO.getReg (); });
5717+
5718+ Register Tuple = Ty.getSizeInBits () == 128 ? createQTuple (Regs, MIB)
5719+ : createDTuple (Regs, MIB);
5720+ auto Store = MIB.buildInstr (Opc, {}, {Tuple, Ptr});
5721+ Store.cloneMemRefs (I);
5722+ constrainSelectedInstRegOperands (*Store, TII, TRI, RBI);
5723+ }
5724+
5725+ bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic (
5726+ MachineInstr &I, unsigned NumVecs, unsigned Opc) {
5727+ MachineRegisterInfo &MRI = I.getParent ()->getParent ()->getRegInfo ();
5728+ LLT Ty = MRI.getType (I.getOperand (1 ).getReg ());
5729+ bool Narrow = Ty.getSizeInBits () == 64 ;
5730+
5731+ SmallVector<Register, 2 > Regs (NumVecs);
5732+ std::transform (I.operands_begin () + 1 , I.operands_begin () + 1 + NumVecs,
5733+ Regs.begin (), [](auto MO) { return MO.getReg (); });
5734+
5735+ if (Narrow)
5736+ transform (Regs, Regs.begin (), [this ](Register Reg) {
5737+ return emitScalarToVector (64 , &AArch64::FPR128RegClass, Reg, MIB)
5738+ ->getOperand (0 )
5739+ .getReg ();
5740+ });
57005741
5742+ Register Tuple = createQTuple (Regs, MIB);
5743+
5744+ auto LaneNo = getIConstantVRegVal (I.getOperand (1 + NumVecs).getReg (), MRI);
5745+ if (!LaneNo)
5746+ return false ;
5747+ Register Ptr = I.getOperand (1 + NumVecs + 1 ).getReg ();
5748+ auto Store = MIB.buildInstr (Opc, {}, {})
5749+ .addReg (Tuple)
5750+ .addImm (LaneNo->getZExtValue ())
5751+ .addReg (Ptr);
5752+ Store.cloneMemRefs (I);
5753+ constrainSelectedInstRegOperands (*Store, TII, TRI, RBI);
57015754 return true ;
57025755}
57035756
@@ -6005,11 +6058,80 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
60056058 selectVectorLoadIntrinsic (Opc, 4 , I);
60066059 break ;
60076060 }
6061+ case Intrinsic::aarch64_neon_st1x2: {
6062+ LLT Ty = MRI.getType (I.getOperand (1 ).getReg ());
6063+ unsigned Opc;
6064+ if (Ty == LLT::fixed_vector (8 , S8))
6065+ Opc = AArch64::ST1Twov8b;
6066+ else if (Ty == LLT::fixed_vector (16 , S8))
6067+ Opc = AArch64::ST1Twov16b;
6068+ else if (Ty == LLT::fixed_vector (4 , S16))
6069+ Opc = AArch64::ST1Twov4h;
6070+ else if (Ty == LLT::fixed_vector (8 , S16))
6071+ Opc = AArch64::ST1Twov8h;
6072+ else if (Ty == LLT::fixed_vector (2 , S32))
6073+ Opc = AArch64::ST1Twov2s;
6074+ else if (Ty == LLT::fixed_vector (4 , S32))
6075+ Opc = AArch64::ST1Twov4s;
6076+ else if (Ty == LLT::fixed_vector (2 , S64) || Ty == LLT::fixed_vector (2 , P0))
6077+ Opc = AArch64::ST1Twov2d;
6078+ else if (Ty == S64 || Ty == P0)
6079+ Opc = AArch64::ST1Twov1d;
6080+ else
6081+ llvm_unreachable (" Unexpected type for st1x2!" );
6082+ selectVectorStoreIntrinsic (I, 2 , Opc);
6083+ break ;
6084+ }
6085+ case Intrinsic::aarch64_neon_st1x3: {
6086+ LLT Ty = MRI.getType (I.getOperand (1 ).getReg ());
6087+ unsigned Opc;
6088+ if (Ty == LLT::fixed_vector (8 , S8))
6089+ Opc = AArch64::ST1Threev8b;
6090+ else if (Ty == LLT::fixed_vector (16 , S8))
6091+ Opc = AArch64::ST1Threev16b;
6092+ else if (Ty == LLT::fixed_vector (4 , S16))
6093+ Opc = AArch64::ST1Threev4h;
6094+ else if (Ty == LLT::fixed_vector (8 , S16))
6095+ Opc = AArch64::ST1Threev8h;
6096+ else if (Ty == LLT::fixed_vector (2 , S32))
6097+ Opc = AArch64::ST1Threev2s;
6098+ else if (Ty == LLT::fixed_vector (4 , S32))
6099+ Opc = AArch64::ST1Threev4s;
6100+ else if (Ty == LLT::fixed_vector (2 , S64) || Ty == LLT::fixed_vector (2 , P0))
6101+ Opc = AArch64::ST1Threev2d;
6102+ else if (Ty == S64 || Ty == P0)
6103+ Opc = AArch64::ST1Threev1d;
6104+ else
6105+ llvm_unreachable (" Unexpected type for st1x3!" );
6106+ selectVectorStoreIntrinsic (I, 3 , Opc);
6107+ break ;
6108+ }
6109+ case Intrinsic::aarch64_neon_st1x4: {
6110+ LLT Ty = MRI.getType (I.getOperand (1 ).getReg ());
6111+ unsigned Opc;
6112+ if (Ty == LLT::fixed_vector (8 , S8))
6113+ Opc = AArch64::ST1Fourv8b;
6114+ else if (Ty == LLT::fixed_vector (16 , S8))
6115+ Opc = AArch64::ST1Fourv16b;
6116+ else if (Ty == LLT::fixed_vector (4 , S16))
6117+ Opc = AArch64::ST1Fourv4h;
6118+ else if (Ty == LLT::fixed_vector (8 , S16))
6119+ Opc = AArch64::ST1Fourv8h;
6120+ else if (Ty == LLT::fixed_vector (2 , S32))
6121+ Opc = AArch64::ST1Fourv2s;
6122+ else if (Ty == LLT::fixed_vector (4 , S32))
6123+ Opc = AArch64::ST1Fourv4s;
6124+ else if (Ty == LLT::fixed_vector (2 , S64) || Ty == LLT::fixed_vector (2 , P0))
6125+ Opc = AArch64::ST1Fourv2d;
6126+ else if (Ty == S64 || Ty == P0)
6127+ Opc = AArch64::ST1Fourv1d;
6128+ else
6129+ llvm_unreachable (" Unexpected type for st1x4!" );
6130+ selectVectorStoreIntrinsic (I, 4 , Opc);
6131+ break ;
6132+ }
60086133 case Intrinsic::aarch64_neon_st2: {
6009- Register Src1 = I.getOperand (1 ).getReg ();
6010- Register Src2 = I.getOperand (2 ).getReg ();
6011- Register Ptr = I.getOperand (3 ).getReg ();
6012- LLT Ty = MRI.getType (Src1);
6134+ LLT Ty = MRI.getType (I.getOperand (1 ).getReg ());
60136135 unsigned Opc;
60146136 if (Ty == LLT::fixed_vector (8 , S8))
60156137 Opc = AArch64::ST2Twov8b;
@@ -6029,12 +6151,109 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
60296151 Opc = AArch64::ST1Twov1d;
60306152 else
60316153 llvm_unreachable (" Unexpected type for st2!" );
6032- SmallVector<Register, 2 > Regs = {Src1, Src2};
6033- Register Tuple = Ty.getSizeInBits () == 128 ? createQTuple (Regs, MIB)
6034- : createDTuple (Regs, MIB);
6035- auto Store = MIB.buildInstr (Opc, {}, {Tuple, Ptr});
6036- Store.cloneMemRefs (I);
6037- constrainSelectedInstRegOperands (*Store, TII, TRI, RBI);
6154+ selectVectorStoreIntrinsic (I, 2 , Opc);
6155+ break ;
6156+ }
6157+ case Intrinsic::aarch64_neon_st3: {
6158+ LLT Ty = MRI.getType (I.getOperand (1 ).getReg ());
6159+ unsigned Opc;
6160+ if (Ty == LLT::fixed_vector (8 , S8))
6161+ Opc = AArch64::ST3Threev8b;
6162+ else if (Ty == LLT::fixed_vector (16 , S8))
6163+ Opc = AArch64::ST3Threev16b;
6164+ else if (Ty == LLT::fixed_vector (4 , S16))
6165+ Opc = AArch64::ST3Threev4h;
6166+ else if (Ty == LLT::fixed_vector (8 , S16))
6167+ Opc = AArch64::ST3Threev8h;
6168+ else if (Ty == LLT::fixed_vector (2 , S32))
6169+ Opc = AArch64::ST3Threev2s;
6170+ else if (Ty == LLT::fixed_vector (4 , S32))
6171+ Opc = AArch64::ST3Threev4s;
6172+ else if (Ty == LLT::fixed_vector (2 , S64) || Ty == LLT::fixed_vector (2 , P0))
6173+ Opc = AArch64::ST3Threev2d;
6174+ else if (Ty == S64 || Ty == P0)
6175+ Opc = AArch64::ST1Threev1d;
6176+ else
6177+ llvm_unreachable (" Unexpected type for st3!" );
6178+ selectVectorStoreIntrinsic (I, 3 , Opc);
6179+ break ;
6180+ }
6181+ case Intrinsic::aarch64_neon_st4: {
6182+ LLT Ty = MRI.getType (I.getOperand (1 ).getReg ());
6183+ unsigned Opc;
6184+ if (Ty == LLT::fixed_vector (8 , S8))
6185+ Opc = AArch64::ST4Fourv8b;
6186+ else if (Ty == LLT::fixed_vector (16 , S8))
6187+ Opc = AArch64::ST4Fourv16b;
6188+ else if (Ty == LLT::fixed_vector (4 , S16))
6189+ Opc = AArch64::ST4Fourv4h;
6190+ else if (Ty == LLT::fixed_vector (8 , S16))
6191+ Opc = AArch64::ST4Fourv8h;
6192+ else if (Ty == LLT::fixed_vector (2 , S32))
6193+ Opc = AArch64::ST4Fourv2s;
6194+ else if (Ty == LLT::fixed_vector (4 , S32))
6195+ Opc = AArch64::ST4Fourv4s;
6196+ else if (Ty == LLT::fixed_vector (2 , S64) || Ty == LLT::fixed_vector (2 , P0))
6197+ Opc = AArch64::ST4Fourv2d;
6198+ else if (Ty == S64 || Ty == P0)
6199+ Opc = AArch64::ST1Fourv1d;
6200+ else
6201+ llvm_unreachable (" Unexpected type for st4!" );
6202+ selectVectorStoreIntrinsic (I, 4 , Opc);
6203+ break ;
6204+ }
6205+ case Intrinsic::aarch64_neon_st2lane: {
6206+ LLT Ty = MRI.getType (I.getOperand (1 ).getReg ());
6207+ unsigned Opc;
6208+ if (Ty == LLT::fixed_vector (8 , S8) || Ty == LLT::fixed_vector (16 , S8))
6209+ Opc = AArch64::ST2i8;
6210+ else if (Ty == LLT::fixed_vector (4 , S16) || Ty == LLT::fixed_vector (8 , S16))
6211+ Opc = AArch64::ST2i16;
6212+ else if (Ty == LLT::fixed_vector (2 , S32) || Ty == LLT::fixed_vector (4 , S32))
6213+ Opc = AArch64::ST2i32;
6214+ else if (Ty == LLT::fixed_vector (2 , S64) ||
6215+ Ty == LLT::fixed_vector (2 , P0) || Ty == S64 || Ty == P0)
6216+ Opc = AArch64::ST2i64;
6217+ else
6218+ llvm_unreachable (" Unexpected type for st2lane!" );
6219+ if (!selectVectorStoreLaneIntrinsic (I, 2 , Opc))
6220+ return false ;
6221+ break ;
6222+ }
6223+ case Intrinsic::aarch64_neon_st3lane: {
6224+ LLT Ty = MRI.getType (I.getOperand (1 ).getReg ());
6225+ unsigned Opc;
6226+ if (Ty == LLT::fixed_vector (8 , S8) || Ty == LLT::fixed_vector (16 , S8))
6227+ Opc = AArch64::ST3i8;
6228+ else if (Ty == LLT::fixed_vector (4 , S16) || Ty == LLT::fixed_vector (8 , S16))
6229+ Opc = AArch64::ST3i16;
6230+ else if (Ty == LLT::fixed_vector (2 , S32) || Ty == LLT::fixed_vector (4 , S32))
6231+ Opc = AArch64::ST3i32;
6232+ else if (Ty == LLT::fixed_vector (2 , S64) ||
6233+ Ty == LLT::fixed_vector (2 , P0) || Ty == S64 || Ty == P0)
6234+ Opc = AArch64::ST3i64;
6235+ else
6236+ llvm_unreachable (" Unexpected type for st3lane!" );
6237+ if (!selectVectorStoreLaneIntrinsic (I, 3 , Opc))
6238+ return false ;
6239+ break ;
6240+ }
6241+ case Intrinsic::aarch64_neon_st4lane: {
6242+ LLT Ty = MRI.getType (I.getOperand (1 ).getReg ());
6243+ unsigned Opc;
6244+ if (Ty == LLT::fixed_vector (8 , S8) || Ty == LLT::fixed_vector (16 , S8))
6245+ Opc = AArch64::ST4i8;
6246+ else if (Ty == LLT::fixed_vector (4 , S16) || Ty == LLT::fixed_vector (8 , S16))
6247+ Opc = AArch64::ST4i16;
6248+ else if (Ty == LLT::fixed_vector (2 , S32) || Ty == LLT::fixed_vector (4 , S32))
6249+ Opc = AArch64::ST4i32;
6250+ else if (Ty == LLT::fixed_vector (2 , S64) ||
6251+ Ty == LLT::fixed_vector (2 , P0) || Ty == S64 || Ty == P0)
6252+ Opc = AArch64::ST4i64;
6253+ else
6254+ llvm_unreachable (" Unexpected type for st4lane!" );
6255+ if (!selectVectorStoreLaneIntrinsic (I, 4 , Opc))
6256+ return false ;
60386257 break ;
60396258 }
60406259 case Intrinsic::aarch64_mops_memset_tag: {
0 commit comments