@@ -1288,16 +1288,17 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
12881288 return std::min (MaxVirtReg + MaxPhysReg, 256u );
12891289}
12901290
1291- // TODO: Migrate to range merge of amdgpu-agpr-alloc.
1292- struct AAAMDGPUNoAGPR : public StateWrapper <BooleanState , AbstractAttribute> {
1293- using Base = StateWrapper<BooleanState , AbstractAttribute>;
1294- AAAMDGPUNoAGPR (const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1291+ struct AAAMDGPUMinAGPRAlloc
1292+ : public StateWrapper<DecIntegerState<> , AbstractAttribute> {
1293+ using Base = StateWrapper<DecIntegerState<> , AbstractAttribute>;
1294+ AAAMDGPUMinAGPRAlloc (const IRPosition &IRP, Attributor &A) : Base(IRP) {}
12951295
1296- static AAAMDGPUNoAGPR &createForPosition (const IRPosition &IRP,
1297- Attributor &A) {
1296+ static AAAMDGPUMinAGPRAlloc &createForPosition (const IRPosition &IRP,
1297+ Attributor &A) {
12981298 if (IRP.getPositionKind () == IRPosition::IRP_FUNCTION)
1299- return *new (A.Allocator ) AAAMDGPUNoAGPR (IRP, A);
1300- llvm_unreachable (" AAAMDGPUNoAGPR is only valid for function position" );
1299+ return *new (A.Allocator ) AAAMDGPUMinAGPRAlloc (IRP, A);
1300+ llvm_unreachable (
1301+ " AAAMDGPUMinAGPRAlloc is only valid for function position" );
13011302 }
13021303
13031304 void initialize (Attributor &A) override {
@@ -1310,25 +1311,33 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
13101311 }
13111312
13121313 const std::string getAsStr (Attributor *A) const override {
1313- return getAssumed () ? " amdgpu-no-agpr" : " amdgpu-maybe-agpr" ;
1314+ std::string Str = " amdgpu-agpr-alloc=" ;
1315+ raw_string_ostream OS (Str);
1316+ OS << getAssumed ();
1317+ return OS.str ();
13141318 }
13151319
13161320 void trackStatistics () const override {}
13171321
13181322 ChangeStatus updateImpl (Attributor &A) override {
1319- // TODO: Use AACallEdges, but then we need a way to inspect asm edges.
1323+ DecIntegerState<> Maximum;
13201324
1321- auto CheckForNoAGPRs = [&](Instruction &I) {
1325+ // Check for cases which require allocation of AGPRs. The only cases where
1326+ // AGPRs are required are if there are direct references to AGPRs, so inline
1327+ // assembly and special intrinsics.
1328+ auto CheckForMinAGPRAllocs = [&](Instruction &I) {
13221329 const auto &CB = cast<CallBase>(I);
13231330 const Value *CalleeOp = CB.getCalledOperand ();
1324- const Function *Callee = dyn_cast<Function>(CalleeOp);
1325- if (!Callee) {
1326- if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
1327- return inlineAsmGetNumRequiredAGPRs (IA, CB) == 0 ;
1328- return false ;
1331+
1332+ if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp)) {
1333+ // Technically, the inline asm could be invoking a call to an unknown
1334+ // external function that requires AGPRs, but ignore that.
1335+ unsigned NumRegs = inlineAsmGetNumRequiredAGPRs (IA, CB);
1336+ Maximum.takeAssumedMaximum (NumRegs);
1337+ return true ;
13291338 }
13301339
1331- switch (Callee-> getIntrinsicID ()) {
1340+ switch (CB. getIntrinsicID ()) {
13321341 case Intrinsic::not_intrinsic:
13331342 break ;
13341343 case Intrinsic::write_register:
@@ -1340,7 +1349,10 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
13401349 ->getOperand (0 ));
13411350 auto [Kind, RegIdx, NumRegs] =
13421351 AMDGPU::parseAsmPhysRegName (RegName->getString ());
1343- return Kind != ' a' ;
1352+ if (Kind == ' a' )
1353+ Maximum.takeAssumedMaximum (std::min (RegIdx + NumRegs, 256u ));
1354+
1355+ return true ;
13441356 }
13451357 default :
13461358 // Some intrinsics may use AGPRs, but if we have a choice, we are not
@@ -1349,40 +1361,58 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
13491361 }
13501362
13511363 // TODO: Handle callsite attributes
1352- const auto *CalleeInfo = A.getAAFor <AAAMDGPUNoAGPR>(
1353- *this , IRPosition::function (*Callee), DepClassTy::REQUIRED);
1354- return CalleeInfo && CalleeInfo->isValidState () &&
1355- CalleeInfo->getAssumed ();
1364+ auto *CBEdges = A.getAAFor <AACallEdges>(
1365+ *this , IRPosition::callsite_function (CB), DepClassTy::REQUIRED);
1366+ if (!CBEdges || CBEdges->hasUnknownCallee ()) {
1367+ Maximum.indicatePessimisticFixpoint ();
1368+ return false ;
1369+ }
1370+
1371+ for (const Function *PossibleCallee : CBEdges->getOptimisticEdges ()) {
1372+ const auto *CalleeInfo = A.getAAFor <AAAMDGPUMinAGPRAlloc>(
1373+ *this , IRPosition::function (*PossibleCallee), DepClassTy::REQUIRED);
1374+ if (!CalleeInfo || !CalleeInfo->isValidState ()) {
1375+ Maximum.indicatePessimisticFixpoint ();
1376+ return false ;
1377+ }
1378+
1379+ Maximum.takeAssumedMaximum (CalleeInfo->getAssumed ());
1380+ }
1381+
1382+ return true ;
13561383 };
13571384
13581385 bool UsedAssumedInformation = false ;
1359- if (!A.checkForAllCallLikeInstructions (CheckForNoAGPRs , *this ,
1386+ if (!A.checkForAllCallLikeInstructions (CheckForMinAGPRAllocs , *this ,
13601387 UsedAssumedInformation))
13611388 return indicatePessimisticFixpoint ();
1362- return ChangeStatus::UNCHANGED;
1389+
1390+ return clampStateAndIndicateChange (getState (), Maximum);
13631391 }
13641392
13651393 ChangeStatus manifest (Attributor &A) override {
1366- if (!getAssumed ())
1367- return ChangeStatus::UNCHANGED;
13681394 LLVMContext &Ctx = getAssociatedFunction ()->getContext ();
1369- return A.manifestAttrs (getIRPosition (),
1370- {Attribute::get (Ctx, " amdgpu-agpr-alloc" , " 0" )});
1395+ SmallString<4 > Buffer;
1396+ raw_svector_ostream OS (Buffer);
1397+ OS << getAssumed ();
1398+
1399+ return A.manifestAttrs (
1400+ getIRPosition (), {Attribute::get (Ctx, " amdgpu-agpr-alloc" , OS.str ())});
13711401 }
13721402
1373- StringRef getName () const override { return " AAAMDGPUNoAGPR " ; }
1403+ StringRef getName () const override { return " AAAMDGPUMinAGPRAlloc " ; }
13741404 const char *getIdAddr () const override { return &ID; }
13751405
13761406 // / This function should return true if the type of the \p AA is
1377- // / AAAMDGPUNoAGPRs
1407+ // / AAAMDGPUMinAGPRAllocs
13781408 static bool classof (const AbstractAttribute *AA) {
13791409 return (AA->getIdAddr () == &ID);
13801410 }
13811411
13821412 static const char ID;
13831413};
13841414
1385- const char AAAMDGPUNoAGPR ::ID = 0 ;
1415+ const char AAAMDGPUMinAGPRAlloc ::ID = 0 ;
13861416
13871417// / An abstract attribute to propagate the function attribute
13881418// / "amdgpu-cluster-dims" from kernel entry functions to device functions.
@@ -1550,10 +1580,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
15501580 DenseSet<const char *> Allowed (
15511581 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
15521582 &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
1553- &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
1554- &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
1555- &AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID,
1556- &AAIndirectCallInfo::ID, &AAAMDGPUClusterDims::ID});
1583+ &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID,
1584+ &AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
1585+ &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID,
1586+ &AANoAliasAddrSpace::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
1587+ &AAAMDGPUClusterDims::ID});
15571588
15581589 AttributorConfig AC (CGUpdater);
15591590 AC.IsClosedWorldModule = Options.IsClosedWorld ;
@@ -1595,7 +1626,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
15951626 A.getOrCreateAAFor <AAAMDGPUClusterDims>(IRPosition::function (*F));
15961627
15971628 if (ST.hasGFX90AInsts ())
1598- A.getOrCreateAAFor <AAAMDGPUNoAGPR >(IRPosition::function (*F));
1629+ A.getOrCreateAAFor <AAAMDGPUMinAGPRAlloc >(IRPosition::function (*F));
15991630
16001631 for (auto &I : instructions (F)) {
16011632 Value *Ptr = nullptr ;
0 commit comments