@@ -1288,16 +1288,17 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
12881288 return std::min (MaxVirtReg + MaxPhysReg, 256u );
12891289}
12901290
1291- // TODO: Migrate to range merge of amdgpu-agpr-alloc.
1292- struct AAAMDGPUNoAGPR : public StateWrapper <BooleanState , AbstractAttribute> {
1293- using Base = StateWrapper<BooleanState , AbstractAttribute>;
1294- AAAMDGPUNoAGPR (const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1291+ struct AAAMDGPUMinAGPRAlloc
1292+ : public StateWrapper<DecIntegerState<> , AbstractAttribute> {
1293+ using Base = StateWrapper<DecIntegerState<> , AbstractAttribute>;
1294+ AAAMDGPUMinAGPRAlloc (const IRPosition &IRP, Attributor &A) : Base(IRP) {}
12951295
1296- static AAAMDGPUNoAGPR &createForPosition (const IRPosition &IRP,
1297- Attributor &A) {
1296+ static AAAMDGPUMinAGPRAlloc &createForPosition (const IRPosition &IRP,
1297+ Attributor &A) {
12981298 if (IRP.getPositionKind () == IRPosition::IRP_FUNCTION)
1299- return *new (A.Allocator ) AAAMDGPUNoAGPR (IRP, A);
1300- llvm_unreachable (" AAAMDGPUNoAGPR is only valid for function position" );
1299+ return *new (A.Allocator ) AAAMDGPUMinAGPRAlloc (IRP, A);
1300+ llvm_unreachable (
1301+ " AAAMDGPUMinAGPRAlloc is only valid for function position" );
13011302 }
13021303
13031304 void initialize (Attributor &A) override {
@@ -1310,25 +1311,33 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
13101311 }
13111312
13121313 const std::string getAsStr (Attributor *A) const override {
1313- return getAssumed () ? " amdgpu-no-agpr" : " amdgpu-maybe-agpr" ;
1314+ std::string Str = " amdgpu-agpr-alloc=" ;
1315+ raw_string_ostream OS (Str);
1316+ OS << getAssumed ();
1317+ return OS.str ();
13141318 }
13151319
13161320 void trackStatistics () const override {}
13171321
13181322 ChangeStatus updateImpl (Attributor &A) override {
1319- // TODO: Use AACallEdges, but then we need a way to inspect asm edges.
1323+ DecIntegerState<> Maximum;
13201324
1321- auto CheckForNoAGPRs = [&](Instruction &I) {
1325+ // Check for cases which require allocation of AGPRs. The only cases where
1326+ // AGPRs are required are if there are direct references to AGPRs, so inline
1327+ // assembly and special intrinsics.
1328+ auto CheckForMinAGPRAllocs = [&](Instruction &I) {
13221329 const auto &CB = cast<CallBase>(I);
13231330 const Value *CalleeOp = CB.getCalledOperand ();
1324- const Function *Callee = dyn_cast<Function>(CalleeOp);
1325- if (!Callee) {
1326- if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
1327- return inlineAsmGetNumRequiredAGPRs (IA, CB) == 0 ;
1328- return false ;
1331+
1332+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp)) {
1333+ // Technically, the inline asm could be invoking a call to an unknown
1334+ // external function that requires AGPRs, but ignore that.
1335+ unsigned NumRegs = inlineAsmGetNumRequiredAGPRs (IA, CB);
1336+ Maximum.takeAssumedMaximum (NumRegs);
1337+ return true ;
13291338 }
13301339
1331- switch (Callee-> getIntrinsicID ()) {
1340+ switch (CB. getIntrinsicID ()) {
13321341 case Intrinsic::not_intrinsic:
13331342 break ;
13341343 case Intrinsic::write_register:
@@ -1338,7 +1347,10 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
13381347 cast<MetadataAsValue>(CB.getArgOperand (0 ))->getMetadata ());
13391348 auto [Kind, RegIdx, NumRegs] =
13401349 AMDGPU::parseAsmPhysRegName (RegName->getString ());
1341- return Kind != ' a' ;
1350+ if (Kind == ' a' )
1351+ Maximum.takeAssumedMaximum (std::min (RegIdx + NumRegs, 256u ));
1352+
1353+ return true ;
13421354 }
13431355 default :
13441356 // Some intrinsics may use AGPRs, but if we have a choice, we are not
@@ -1347,40 +1359,58 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
13471359 }
13481360
13491361 // TODO: Handle callsite attributes
1350- const auto *CalleeInfo = A.getAAFor <AAAMDGPUNoAGPR>(
1351- *this , IRPosition::function (*Callee), DepClassTy::REQUIRED);
1352- return CalleeInfo && CalleeInfo->isValidState () &&
1353- CalleeInfo->getAssumed ();
1362+ auto *CBEdges = A.getAAFor <AACallEdges>(
1363+ *this , IRPosition::callsite_function (CB), DepClassTy::REQUIRED);
1364+ if (!CBEdges || CBEdges->hasUnknownCallee ()) {
1365+ Maximum.indicatePessimisticFixpoint ();
1366+ return false ;
1367+ }
1368+
1369+ for (const Function *PossibleCallee : CBEdges->getOptimisticEdges ()) {
1370+ const auto *CalleeInfo = A.getAAFor <AAAMDGPUMinAGPRAlloc>(
1371+ *this , IRPosition::function (*PossibleCallee), DepClassTy::REQUIRED);
1372+ if (!CalleeInfo || !CalleeInfo->isValidState ()) {
1373+ Maximum.indicatePessimisticFixpoint ();
1374+ return false ;
1375+ }
1376+
1377+ Maximum.takeAssumedMaximum (CalleeInfo->getAssumed ());
1378+ }
1379+
1380+ return true ;
13541381 };
13551382
13561383 bool UsedAssumedInformation = false ;
1357- if (!A.checkForAllCallLikeInstructions (CheckForNoAGPRs , *this ,
1384+ if (!A.checkForAllCallLikeInstructions (CheckForMinAGPRAllocs , *this ,
13581385 UsedAssumedInformation))
13591386 return indicatePessimisticFixpoint ();
1360- return ChangeStatus::UNCHANGED;
1387+
1388+ return clampStateAndIndicateChange (getState (), Maximum);
13611389 }
13621390
13631391 ChangeStatus manifest (Attributor &A) override {
1364- if (!getAssumed ())
1365- return ChangeStatus::UNCHANGED;
13661392 LLVMContext &Ctx = getAssociatedFunction ()->getContext ();
1367- return A.manifestAttrs (getIRPosition (),
1368- {Attribute::get (Ctx, " amdgpu-agpr-alloc" , " 0" )});
1393+ SmallString<4 > Buffer;
1394+ raw_svector_ostream OS (Buffer);
1395+ OS << getAssumed ();
1396+
1397+ return A.manifestAttrs (
1398+ getIRPosition (), {Attribute::get (Ctx, " amdgpu-agpr-alloc" , OS.str ())});
13691399 }
13701400
1371- StringRef getName () const override { return " AAAMDGPUNoAGPR " ; }
1401+ StringRef getName () const override { return " AAAMDGPUMinAGPRAlloc " ; }
13721402 const char *getIdAddr () const override { return &ID; }
13731403
13741404 // / This function should return true if the type of the \p AA is
1375- // / AAAMDGPUNoAGPRs
1405+ // / AAAMDGPUMinAGPRAllocs
13761406 static bool classof (const AbstractAttribute *AA) {
13771407 return (AA->getIdAddr () == &ID);
13781408 }
13791409
13801410 static const char ID;
13811411};
13821412
1383- const char AAAMDGPUNoAGPR ::ID = 0 ;
1413+ const char AAAMDGPUMinAGPRAlloc ::ID = 0 ;
13841414
13851415// / An abstract attribute to propagate the function attribute
13861416// / "amdgpu-cluster-dims" from kernel entry functions to device functions.
@@ -1548,10 +1578,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
15481578 DenseSet<const char *> Allowed (
15491579 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
15501580 &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
1551- &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
1552- &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
1553- &AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID,
1554- &AAIndirectCallInfo::ID, &AAAMDGPUClusterDims::ID});
1581+ &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID,
1582+ &AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
1583+ &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID,
1584+ &AANoAliasAddrSpace::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
1585+ &AAAMDGPUClusterDims::ID});
15551586
15561587 AttributorConfig AC (CGUpdater);
15571588 AC.IsClosedWorldModule = Options.IsClosedWorld ;
@@ -1593,7 +1624,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
15931624 A.getOrCreateAAFor <AAAMDGPUClusterDims>(IRPosition::function (*F));
15941625
15951626 if (ST.hasGFX90AInsts ())
1596- A.getOrCreateAAFor <AAAMDGPUNoAGPR >(IRPosition::function (*F));
1627+ A.getOrCreateAAFor <AAAMDGPUMinAGPRAlloc >(IRPosition::function (*F));
15971628
15981629 for (auto &I : instructions (F)) {
15991630 Value *Ptr = nullptr ;
0 commit comments