@@ -77,6 +77,13 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
7777 case Intrinsic::amdgcn_workgroup_id_z:
7878 case Intrinsic::r600_read_tgid_z:
7979 return WORKGROUP_ID_Z;
80+ case Intrinsic::amdgcn_cluster_id_x:
81+ NonKernelOnly = true ;
82+ return CLUSTER_ID_X;
83+ case Intrinsic::amdgcn_cluster_id_y:
84+ return CLUSTER_ID_Y;
85+ case Intrinsic::amdgcn_cluster_id_z:
86+ return CLUSTER_ID_Z;
8087 case Intrinsic::amdgcn_lds_kernel_id:
8188 return LDS_KERNEL_ID;
8289 case Intrinsic::amdgcn_dispatch_ptr:
@@ -1296,6 +1303,157 @@ struct AAAMDGPUNoAGPR
12961303
12971304const char AAAMDGPUNoAGPR::ID = 0 ;
12981305
1306+ // / An abstract attribute to propagate the function attribute
1307+ // / "amdgpu-cluster-dims" from kernel entry functions to device functions.
1308+ struct AAAMDGPUClusterDims
1309+ : public StateWrapper<BooleanState, AbstractAttribute> {
1310+ using Base = StateWrapper<BooleanState, AbstractAttribute>;
1311+ AAAMDGPUClusterDims (const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1312+
1313+ // / Create an abstract attribute view for the position \p IRP.
1314+ static AAAMDGPUClusterDims &createForPosition (const IRPosition &IRP,
1315+ Attributor &A);
1316+
1317+ // / See AbstractAttribute::getName().
1318+ StringRef getName () const override { return " AAAMDGPUClusterDims" ; }
1319+
1320+ // / See AbstractAttribute::getIdAddr().
1321+ const char *getIdAddr () const override { return &ID; }
1322+
1323+ // / This function should return true if the type of the \p AA is
1324+ // / AAAMDGPUClusterDims.
1325+ static bool classof (const AbstractAttribute *AA) {
1326+ return (AA->getIdAddr () == &ID);
1327+ }
1328+
1329+ virtual const AMDGPU::ClusterDimsAttr &getClusterDims () const = 0;
1330+
1331+ // / Unique ID (due to the unique address)
1332+ static const char ID;
1333+ };
1334+
1335+ const char AAAMDGPUClusterDims::ID = 0 ;
1336+
1337+ struct AAAMDGPUClusterDimsFunction : public AAAMDGPUClusterDims {
1338+ AAAMDGPUClusterDimsFunction (const IRPosition &IRP, Attributor &A)
1339+ : AAAMDGPUClusterDims(IRP, A) {}
1340+
1341+ void initialize (Attributor &A) override {
1342+ Function *F = getAssociatedFunction ();
1343+ assert (F && " empty associated function" );
1344+
1345+ Attr = AMDGPU::ClusterDimsAttr::get (*F);
1346+
1347+ // No matter what a kernel function has, it is final.
1348+ if (AMDGPU::isEntryFunctionCC (F->getCallingConv ())) {
1349+ if (Attr.isUnknown ())
1350+ indicatePessimisticFixpoint ();
1351+ else
1352+ indicateOptimisticFixpoint ();
1353+ }
1354+ }
1355+
1356+ const std::string getAsStr (Attributor *A) const override {
1357+ if (!getAssumed () || Attr.isUnknown ())
1358+ return " unknown" ;
1359+ if (Attr.isNoCluster ())
1360+ return " no" ;
1361+ if (Attr.isVariableedDims ())
1362+ return " variable" ;
1363+ return Attr.to_string ();
1364+ }
1365+
1366+ void trackStatistics () const override {}
1367+
1368+ ChangeStatus updateImpl (Attributor &A) override {
1369+ auto OldState = Attr;
1370+
1371+ auto CheckCallSite = [&](AbstractCallSite CS) {
1372+ const auto *CallerAA = A.getAAFor <AAAMDGPUClusterDims>(
1373+ *this , IRPosition::function (*CS.getInstruction ()->getFunction ()),
1374+ DepClassTy::REQUIRED);
1375+ if (!CallerAA || !CallerAA->isValidState ())
1376+ return false ;
1377+
1378+ return merge (CallerAA->getClusterDims ());
1379+ };
1380+
1381+ bool UsedAssumedInformation = false ;
1382+ if (!A.checkForAllCallSites (CheckCallSite, *this ,
1383+ /* RequireAllCallSites=*/ true ,
1384+ UsedAssumedInformation))
1385+ return indicatePessimisticFixpoint ();
1386+
1387+ return OldState == Attr ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
1388+ }
1389+
1390+ ChangeStatus manifest (Attributor &A) override {
1391+ if (Attr.isUnknown ())
1392+ return ChangeStatus::UNCHANGED;
1393+ return A.manifestAttrs (
1394+ getIRPosition (),
1395+ {Attribute::get (getAssociatedFunction ()->getContext (), AttrName,
1396+ Attr.to_string ())},
1397+ /* ForceReplace=*/ true );
1398+ }
1399+
1400+ const AMDGPU::ClusterDimsAttr &getClusterDims () const override {
1401+ return Attr;
1402+ }
1403+
1404+ private:
1405+ bool merge (const AMDGPU::ClusterDimsAttr &Other) {
1406+ // Case 1: Both of them are unknown yet, we do nothing and continue wait for
1407+ // propagation.
1408+ if (Attr.isUnknown () && Other.isUnknown ())
1409+ return true ;
1410+
1411+ // Case 2: The other is determined, but we are unknown yet, we simply take
1412+ // the other's value.
1413+ if (Attr.isUnknown ()) {
1414+ Attr = Other;
1415+ return true ;
1416+ }
1417+
1418+ // Case 3: We are determined but the other is unknown yet, we simply keep
1419+ // everything unchanged.
1420+ if (Other.isUnknown ())
1421+ return true ;
1422+
1423+ // After this point, both are determined.
1424+
1425+ // Case 4: If they are same, we do nothing.
1426+ if (Attr == Other)
1427+ return true ;
1428+
1429+ // Now they are not same.
1430+
1431+ // Case 5: If either of us uses cluster (but not both; otherwise case 4
1432+ // would hold), then it is unknown whether cluster will be used, and the
1433+ // state is final, unlike case 1.
1434+ if (Attr.isNoCluster () || Other.isNoCluster ()) {
1435+ Attr.setUnknown ();
1436+ return false ;
1437+ }
1438+
1439+ // Case 6: Both of us use cluster, but the dims are different, so the result
1440+ // is, cluster is used, but we just don't have a fixed dims.
1441+ Attr.setVariableDims ();
1442+ return true ;
1443+ }
1444+
1445+ AMDGPU::ClusterDimsAttr Attr;
1446+
1447+ static constexpr const char AttrName[] = " amdgpu-cluster-dims" ;
1448+ };
1449+
1450+ AAAMDGPUClusterDims &
1451+ AAAMDGPUClusterDims::createForPosition (const IRPosition &IRP, Attributor &A) {
1452+ if (IRP.getPositionKind () == IRPosition::IRP_FUNCTION)
1453+ return *new (A.Allocator ) AAAMDGPUClusterDimsFunction (IRP, A);
1454+ llvm_unreachable (" AAAMDGPUClusterDims is only valid for function position" );
1455+ }
1456+
12991457static bool runImpl (Module &M, AnalysisGetter &AG, TargetMachine &TM,
13001458 AMDGPUAttributorOptions Options,
13011459 ThinOrFullLTOPhase LTOPhase) {
@@ -1314,7 +1472,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
13141472 &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
13151473 &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
13161474 &AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID,
1317- &AAIndirectCallInfo::ID});
1475+ &AAIndirectCallInfo::ID, &AAAMDGPUClusterDims::ID });
13181476
13191477 AttributorConfig AC (CGUpdater);
13201478 AC.IsClosedWorldModule = Options.IsClosedWorld ;
@@ -1352,6 +1510,10 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
13521510 A.getOrCreateAAFor <AAAMDWavesPerEU>(IRPosition::function (*F));
13531511 }
13541512
1513+ const GCNSubtarget &ST = TM.getSubtarget <GCNSubtarget>(*F);
1514+ if (!F->isDeclaration () && ST.hasClusters ())
1515+ A.getOrCreateAAFor <AAAMDGPUClusterDims>(IRPosition::function (*F));
1516+
13551517 for (auto &I : instructions (F)) {
13561518 Value *Ptr = nullptr ;
13571519 if (auto *LI = dyn_cast<LoadInst>(&I))
0 commit comments