@@ -702,57 +702,66 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
702702 // intrinsics.
703703 setOperationAction (ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
704704
705- // Turn FP extload into load/fpextend
706- setLoadExtAction (ISD::EXTLOAD, MVT::f32 , MVT::f16 , Expand);
707- setLoadExtAction (ISD::EXTLOAD, MVT::f64 , MVT::f16 , Expand);
708- setLoadExtAction (ISD::EXTLOAD, MVT::f32 , MVT::bf16 , Expand);
709- setLoadExtAction (ISD::EXTLOAD, MVT::f64 , MVT::bf16 , Expand);
710- setLoadExtAction (ISD::EXTLOAD, MVT::f64 , MVT::f32 , Expand);
711- setLoadExtAction (ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
712- setLoadExtAction (ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
713- setLoadExtAction (ISD::EXTLOAD, MVT::v2f32, MVT::v2bf16, Expand);
714- setLoadExtAction (ISD::EXTLOAD, MVT::v2f64, MVT::v2bf16, Expand);
715- setLoadExtAction (ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
716- setLoadExtAction (ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
717- setLoadExtAction (ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
718- setLoadExtAction (ISD::EXTLOAD, MVT::v4f32, MVT::v4bf16, Expand);
719- setLoadExtAction (ISD::EXTLOAD, MVT::v4f64, MVT::v4bf16, Expand);
720- setLoadExtAction (ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand);
721- setLoadExtAction (ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand);
722- setLoadExtAction (ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand);
723- setLoadExtAction (ISD::EXTLOAD, MVT::v8f32, MVT::v8bf16, Expand);
724- setLoadExtAction (ISD::EXTLOAD, MVT::v8f64, MVT::v8bf16, Expand);
725- // Turn FP truncstore into trunc + store.
726- // FIXME: vector types should also be expanded
727- setTruncStoreAction (MVT::f32 , MVT::f16 , Expand);
728- setTruncStoreAction (MVT::f64 , MVT::f16 , Expand);
729- setTruncStoreAction (MVT::f32 , MVT::bf16 , Expand);
730- setTruncStoreAction (MVT::f64 , MVT::bf16 , Expand);
731- setTruncStoreAction (MVT::f64 , MVT::f32 , Expand);
732- setTruncStoreAction (MVT::v2f32, MVT::v2f16, Expand);
733- setTruncStoreAction (MVT::v2f32, MVT::v2bf16, Expand);
705+ // FP extload/truncstore is not legal in PTX. We need to expand all these.
706+ for (auto FloatVTs :
707+ {MVT::fp_valuetypes (), MVT::fp_fixedlen_vector_valuetypes ()}) {
708+ for (MVT ValVT : FloatVTs) {
709+ for (MVT MemVT : FloatVTs) {
710+ setLoadExtAction (ISD::EXTLOAD, ValVT, MemVT, Expand);
711+ setTruncStoreAction (ValVT, MemVT, Expand);
712+ }
713+ }
714+ }
734715
735- // PTX does not support load / store predicate registers
736- setOperationAction (ISD::LOAD, MVT::i1, Custom);
737- setOperationAction (ISD::STORE, MVT::i1, Custom);
716+ // To improve CodeGen we'll legalize any-extend loads to zext loads. This is
717+ // how they'll be lowered in ISel anyway, and by doing this a little earlier
718+ // we allow for more DAG combine opportunities.
719+ for (auto IntVTs :
720+ {MVT::integer_valuetypes (), MVT::integer_fixedlen_vector_valuetypes ()})
721+ for (MVT ValVT : IntVTs)
722+ for (MVT MemVT : IntVTs)
723+ if (isTypeLegal (ValVT))
724+ setLoadExtAction (ISD::EXTLOAD, ValVT, MemVT, Custom);
738725
726+ // PTX does not support load / store predicate registers
727+ setOperationAction ({ISD::LOAD, ISD::STORE}, MVT::i1, Custom);
739728 for (MVT VT : MVT::integer_valuetypes ()) {
740- setLoadExtAction (ISD::SEXTLOAD, VT, MVT::i1, Promote);
741- setLoadExtAction (ISD::ZEXTLOAD, VT, MVT::i1, Promote);
742- setLoadExtAction (ISD::EXTLOAD, VT, MVT::i1, Promote);
729+ setLoadExtAction ({ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD}, VT, MVT::i1,
730+ Promote);
743731 setTruncStoreAction (VT, MVT::i1, Expand);
744732 }
745733
734+ // Disable generations of extload/truncstore for v2i16/v2i8. The generic
735+ // expansion for these nodes when they are unaligned is incorrect if the
736+ // type is a vector.
737+ //
738+ // TODO: Fix the generic expansion for these nodes found in
739+ // TargetLowering::expandUnalignedLoad/Store.
740+ setLoadExtAction ({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i16,
741+ MVT::v2i8, Expand);
742+ setTruncStoreAction (MVT::v2i16, MVT::v2i8, Expand);
743+
744+ // Register custom handling for illegal type loads/stores. We'll try to custom
745+ // lower almost all illegal types and logic in the lowering will discard cases
746+ // we can't handle.
747+ setOperationAction ({ISD::LOAD, ISD::STORE}, {MVT::i128 , MVT::f128 }, Custom);
748+ for (MVT VT : MVT::fixedlen_vector_valuetypes ())
749+ if (!isTypeLegal (VT) && VT.getStoreSizeInBits () <= 256 )
750+ setOperationAction ({ISD::STORE, ISD::LOAD}, VT, Custom);
751+
752+ // Custom legalization for LDU intrinsics.
753+ // TODO: The logic to lower these is not very robust and we should rewrite it.
754+ // Perhaps LDU should not be represented as an intrinsic at all.
755+ setOperationAction (ISD::INTRINSIC_W_CHAIN, MVT::i8 , Custom);
756+ for (MVT VT : MVT::fixedlen_vector_valuetypes ())
757+ if (IsPTXVectorType (VT))
758+ setOperationAction (ISD::INTRINSIC_W_CHAIN, VT, Custom);
759+
746760 setCondCodeAction ({ISD::SETNE, ISD::SETEQ, ISD::SETUGE, ISD::SETULE,
747761 ISD::SETUGT, ISD::SETULT, ISD::SETGT, ISD::SETLT,
748762 ISD::SETGE, ISD::SETLE},
749763 MVT::i1, Expand);
750764
751- // expand extload of vector of integers.
752- setLoadExtAction ({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::v2i16,
753- MVT::v2i8, Expand);
754- setTruncStoreAction (MVT::v2i16, MVT::v2i8, Expand);
755-
756765 // This is legal in NVPTX
757766 setOperationAction (ISD::ConstantFP, MVT::f64 , Legal);
758767 setOperationAction (ISD::ConstantFP, MVT::f32 , Legal);
@@ -767,24 +776,12 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
767776 // DEBUGTRAP can be lowered to PTX brkpt
768777 setOperationAction (ISD::DEBUGTRAP, MVT::Other, Legal);
769778
770- // Register custom handling for vector loads/stores
771- for (MVT VT : MVT::fixedlen_vector_valuetypes ())
772- if (IsPTXVectorType (VT))
773- setOperationAction ({ISD::LOAD, ISD::STORE, ISD::INTRINSIC_W_CHAIN}, VT,
774- Custom);
775-
776- setOperationAction ({ISD::LOAD, ISD::STORE, ISD::INTRINSIC_W_CHAIN},
777- {MVT::i128 , MVT::f128 }, Custom);
778-
779779 // Support varargs.
780780 setOperationAction (ISD::VASTART, MVT::Other, Custom);
781781 setOperationAction (ISD::VAARG, MVT::Other, Custom);
782782 setOperationAction (ISD::VACOPY, MVT::Other, Expand);
783783 setOperationAction (ISD::VAEND, MVT::Other, Expand);
784784
785- // Custom handling for i8 intrinsics
786- setOperationAction (ISD::INTRINSIC_W_CHAIN, MVT::i8 , Custom);
787-
788785 setOperationAction ({ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX},
789786 {MVT::i16 , MVT::i32 , MVT::i64 }, Legal);
790787
@@ -3092,39 +3089,14 @@ static void replaceLoadVector(SDNode *N, SelectionDAG &DAG,
30923089 SmallVectorImpl<SDValue> &Results,
30933090 const NVPTXSubtarget &STI);
30943091
3095- SDValue NVPTXTargetLowering::LowerLOAD (SDValue Op, SelectionDAG &DAG) const {
3096- if (Op.getValueType () == MVT::i1)
3097- return LowerLOADi1 (Op, DAG);
3098-
3099- EVT VT = Op.getValueType ();
3100-
3101- if (NVPTX::isPackedVectorTy (VT)) {
3102- // v2f32/v2f16/v2bf16/v2i16/v4i8 are legal, so we can't rely on legalizer to
3103- // handle unaligned loads and have to handle it here.
3104- LoadSDNode *Load = cast<LoadSDNode>(Op);
3105- EVT MemVT = Load->getMemoryVT ();
3106- if (!allowsMemoryAccessForAlignment (*DAG.getContext (), DAG.getDataLayout (),
3107- MemVT, *Load->getMemOperand ())) {
3108- SDValue Ops[2 ];
3109- std::tie (Ops[0 ], Ops[1 ]) = expandUnalignedLoad (Load, DAG);
3110- return DAG.getMergeValues (Ops, SDLoc (Op));
3111- }
3112- }
3113-
3114- return SDValue ();
3115- }
3116-
31173092// v = ld i1* addr
31183093// =>
31193094// v1 = ld i8* addr (-> i16)
31203095// v = trunc i16 to i1
3121- SDValue NVPTXTargetLowering::LowerLOADi1 (SDValue Op, SelectionDAG &DAG) const {
3122- SDNode *Node = Op.getNode ();
3123- LoadSDNode *LD = cast<LoadSDNode>(Node);
3124- SDLoc dl (Node);
3096+ static SDValue lowerLOADi1 (LoadSDNode *LD, SelectionDAG &DAG) {
3097+ SDLoc dl (LD);
31253098 assert (LD->getExtensionType () == ISD::NON_EXTLOAD);
3126- assert (Node->getValueType (0 ) == MVT::i1 &&
3127- " Custom lowering for i1 load only" );
3099+ assert (LD->getValueType (0 ) == MVT::i1 && " Custom lowering for i1 load only" );
31283100 SDValue newLD = DAG.getExtLoad (ISD::ZEXTLOAD, dl, MVT::i16 , LD->getChain (),
31293101 LD->getBasePtr (), LD->getPointerInfo (),
31303102 MVT::i8 , LD->getAlign (),
@@ -3133,8 +3105,27 @@ SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
31333105 // The legalizer (the caller) is expecting two values from the legalized
31343106 // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
31353107 // in LegalizeDAG.cpp which also uses MergeValues.
3136- SDValue Ops[] = { result, LD->getChain () };
3137- return DAG.getMergeValues (Ops, dl);
3108+ return DAG.getMergeValues ({result, LD->getChain ()}, dl);
3109+ }
3110+
3111+ SDValue NVPTXTargetLowering::LowerLOAD (SDValue Op, SelectionDAG &DAG) const {
3112+ LoadSDNode *LD = cast<LoadSDNode>(Op);
3113+
3114+ if (Op.getValueType () == MVT::i1)
3115+ return lowerLOADi1 (LD, DAG);
3116+
3117+ // To improve CodeGen we'll legalize any-extend loads to zext loads. This is
3118+ // how they'll be lowered in ISel anyway, and by doing this a little earlier
3119+ // we allow for more DAG combine opportunities.
3120+ if (LD->getExtensionType () == ISD::EXTLOAD) {
3121+ assert (LD->getValueType (0 ).isInteger () && LD->getMemoryVT ().isInteger () &&
3122+ " Unexpected fpext-load" );
3123+ return DAG.getExtLoad (ISD::ZEXTLOAD, SDLoc (Op), Op.getValueType (),
3124+ LD->getChain (), LD->getBasePtr (), LD->getMemoryVT (),
3125+ LD->getMemOperand ());
3126+ }
3127+
3128+ llvm_unreachable (" Unexpected custom lowering for load" );
31383129}
31393130
31403131SDValue NVPTXTargetLowering::LowerSTORE (SDValue Op, SelectionDAG &DAG) const {
@@ -3144,17 +3135,6 @@ SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
31443135 if (VT == MVT::i1)
31453136 return LowerSTOREi1 (Op, DAG);
31463137
3147- // v2f32/v2f16/v2bf16/v2i16/v4i8 are legal, so we can't rely on legalizer to
3148- // handle unaligned stores and have to handle it here.
3149- if (NVPTX::isPackedVectorTy (VT) &&
3150- !allowsMemoryAccessForAlignment (*DAG.getContext (), DAG.getDataLayout (),
3151- VT, *Store->getMemOperand ()))
3152- return expandUnalignedStore (Store, DAG);
3153-
3154- // v2f16/v2bf16/v2i16 don't need special handling.
3155- if (NVPTX::isPackedVectorTy (VT) && VT.is32BitVector ())
3156- return SDValue ();
3157-
31583138 // Lower store of any other vector type, including v2f32 as we want to break
31593139 // it apart since this is not a widely-supported type.
31603140 return LowerSTOREVector (Op, DAG);
@@ -4010,14 +3990,8 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
40103990 case Intrinsic::nvvm_ldu_global_i:
40113991 case Intrinsic::nvvm_ldu_global_f:
40123992 case Intrinsic::nvvm_ldu_global_p: {
4013- auto &DL = I.getDataLayout ();
40143993 Info.opc = ISD::INTRINSIC_W_CHAIN;
4015- if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
4016- Info.memVT = getValueType (DL, I.getType ());
4017- else if (Intrinsic == Intrinsic::nvvm_ldu_global_p)
4018- Info.memVT = getPointerTy (DL);
4019- else
4020- Info.memVT = getValueType (DL, I.getType ());
3994+ Info.memVT = getValueType (I.getDataLayout (), I.getType ());
40213995 Info.ptrVal = I.getArgOperand (0 );
40223996 Info.offset = 0 ;
40233997 Info.flags = MachineMemOperand::MOLoad;
0 commit comments