Skip to content

Commit 1cc1194

Browse files
committed
AMDGPU: Render non-0 values for amdgpu-agpr-alloc
This now tries to compute a lower bound on the number of registers for individual inline asm uses. Also starts using AACallEdges to handling indirect calls.
1 parent 424d6ac commit 1cc1194

File tree

2 files changed

+385
-93
lines changed

2 files changed

+385
-93
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 67 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1288,16 +1288,17 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
12881288
return std::min(MaxVirtReg + MaxPhysReg, 256u);
12891289
}
12901290

1291-
// TODO: Migrate to range merge of amdgpu-agpr-alloc.
1292-
struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
1293-
using Base = StateWrapper<BooleanState, AbstractAttribute>;
1294-
AAAMDGPUNoAGPR(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
1291+
struct AAAMDGPUMinAGPRAlloc
1292+
: public StateWrapper<DecIntegerState<>, AbstractAttribute> {
1293+
using Base = StateWrapper<DecIntegerState<>, AbstractAttribute>;
1294+
AAAMDGPUMinAGPRAlloc(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
12951295

1296-
static AAAMDGPUNoAGPR &createForPosition(const IRPosition &IRP,
1297-
Attributor &A) {
1296+
static AAAMDGPUMinAGPRAlloc &createForPosition(const IRPosition &IRP,
1297+
Attributor &A) {
12981298
if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
1299-
return *new (A.Allocator) AAAMDGPUNoAGPR(IRP, A);
1300-
llvm_unreachable("AAAMDGPUNoAGPR is only valid for function position");
1299+
return *new (A.Allocator) AAAMDGPUMinAGPRAlloc(IRP, A);
1300+
llvm_unreachable(
1301+
"AAAMDGPUMinAGPRAlloc is only valid for function position");
13011302
}
13021303

13031304
void initialize(Attributor &A) override {
@@ -1310,25 +1311,33 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
13101311
}
13111312

13121313
const std::string getAsStr(Attributor *A) const override {
1313-
return getAssumed() ? "amdgpu-no-agpr" : "amdgpu-maybe-agpr";
1314+
std::string Str = "amdgpu-agpr-alloc=";
1315+
raw_string_ostream OS(Str);
1316+
OS << getAssumed();
1317+
return OS.str();
13141318
}
13151319

13161320
void trackStatistics() const override {}
13171321

13181322
ChangeStatus updateImpl(Attributor &A) override {
1319-
// TODO: Use AACallEdges, but then we need a way to inspect asm edges.
1323+
DecIntegerState<> Maximum;
13201324

1321-
auto CheckForNoAGPRs = [&](Instruction &I) {
1325+
// Check for cases which require allocation of AGPRs. The only cases where
1326+
// AGPRs are required are if there are direct references to AGPRs, so inline
1327+
// assembly and special intrinsics.
1328+
auto CheckForMinAGPRAllocs = [&](Instruction &I) {
13221329
const auto &CB = cast<CallBase>(I);
13231330
const Value *CalleeOp = CB.getCalledOperand();
1324-
const Function *Callee = dyn_cast<Function>(CalleeOp);
1325-
if (!Callee) {
1326-
if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
1327-
return inlineAsmGetNumRequiredAGPRs(IA, CB) == 0;
1328-
return false;
1331+
1332+
if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp)) {
1333+
// Technically, the inline asm could be invoking a call to an unknown
1334+
// external function that requires AGPRs, but ignore that.
1335+
unsigned NumRegs = inlineAsmGetNumRequiredAGPRs(IA, CB);
1336+
Maximum.takeAssumedMaximum(NumRegs);
1337+
return true;
13291338
}
13301339

1331-
switch (Callee->getIntrinsicID()) {
1340+
switch (CB.getIntrinsicID()) {
13321341
case Intrinsic::not_intrinsic:
13331342
break;
13341343
case Intrinsic::write_register:
@@ -1338,7 +1347,10 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
13381347
cast<MetadataAsValue>(CB.getArgOperand(0))->getMetadata());
13391348
auto [Kind, RegIdx, NumRegs] =
13401349
AMDGPU::parseAsmPhysRegName(RegName->getString());
1341-
return Kind != 'a';
1350+
if (Kind == 'a')
1351+
Maximum.takeAssumedMaximum(std::min(RegIdx + NumRegs, 256u));
1352+
1353+
return true;
13421354
}
13431355
default:
13441356
// Some intrinsics may use AGPRs, but if we have a choice, we are not
@@ -1347,40 +1359,58 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
13471359
}
13481360

13491361
// TODO: Handle callsite attributes
1350-
const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(
1351-
*this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
1352-
return CalleeInfo && CalleeInfo->isValidState() &&
1353-
CalleeInfo->getAssumed();
1362+
auto *CBEdges = A.getAAFor<AACallEdges>(
1363+
*this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
1364+
if (!CBEdges || CBEdges->hasUnknownCallee()) {
1365+
Maximum.indicatePessimisticFixpoint();
1366+
return false;
1367+
}
1368+
1369+
for (const Function *PossibleCallee : CBEdges->getOptimisticEdges()) {
1370+
const auto *CalleeInfo = A.getAAFor<AAAMDGPUMinAGPRAlloc>(
1371+
*this, IRPosition::function(*PossibleCallee), DepClassTy::REQUIRED);
1372+
if (!CalleeInfo || !CalleeInfo->isValidState()) {
1373+
Maximum.indicatePessimisticFixpoint();
1374+
return false;
1375+
}
1376+
1377+
Maximum.takeAssumedMaximum(CalleeInfo->getAssumed());
1378+
}
1379+
1380+
return true;
13541381
};
13551382

13561383
bool UsedAssumedInformation = false;
1357-
if (!A.checkForAllCallLikeInstructions(CheckForNoAGPRs, *this,
1384+
if (!A.checkForAllCallLikeInstructions(CheckForMinAGPRAllocs, *this,
13581385
UsedAssumedInformation))
13591386
return indicatePessimisticFixpoint();
1360-
return ChangeStatus::UNCHANGED;
1387+
1388+
return clampStateAndIndicateChange(getState(), Maximum);
13611389
}
13621390

13631391
ChangeStatus manifest(Attributor &A) override {
1364-
if (!getAssumed())
1365-
return ChangeStatus::UNCHANGED;
13661392
LLVMContext &Ctx = getAssociatedFunction()->getContext();
1367-
return A.manifestAttrs(getIRPosition(),
1368-
{Attribute::get(Ctx, "amdgpu-agpr-alloc", "0")});
1393+
SmallString<4> Buffer;
1394+
raw_svector_ostream OS(Buffer);
1395+
OS << getAssumed();
1396+
1397+
return A.manifestAttrs(
1398+
getIRPosition(), {Attribute::get(Ctx, "amdgpu-agpr-alloc", OS.str())});
13691399
}
13701400

1371-
StringRef getName() const override { return "AAAMDGPUNoAGPR"; }
1401+
StringRef getName() const override { return "AAAMDGPUMinAGPRAlloc"; }
13721402
const char *getIdAddr() const override { return &ID; }
13731403

13741404
/// This function should return true if the type of the \p AA is
1375-
/// AAAMDGPUNoAGPRs
1405+
/// AAAMDGPUMinAGPRAllocs
13761406
static bool classof(const AbstractAttribute *AA) {
13771407
return (AA->getIdAddr() == &ID);
13781408
}
13791409

13801410
static const char ID;
13811411
};
13821412

1383-
const char AAAMDGPUNoAGPR::ID = 0;
1413+
const char AAAMDGPUMinAGPRAlloc::ID = 0;
13841414

13851415
/// An abstract attribute to propagate the function attribute
13861416
/// "amdgpu-cluster-dims" from kernel entry functions to device functions.
@@ -1548,10 +1578,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
15481578
DenseSet<const char *> Allowed(
15491579
{&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
15501580
&AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
1551-
&AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
1552-
&AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
1553-
&AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID,
1554-
&AAIndirectCallInfo::ID, &AAAMDGPUClusterDims::ID});
1581+
&AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID,
1582+
&AAAMDGPUMinAGPRAlloc::ID, &AACallEdges::ID, &AAPointerInfo::ID,
1583+
&AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID,
1584+
&AANoAliasAddrSpace::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
1585+
&AAAMDGPUClusterDims::ID});
15551586

15561587
AttributorConfig AC(CGUpdater);
15571588
AC.IsClosedWorldModule = Options.IsClosedWorld;
@@ -1593,7 +1624,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
15931624
A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F));
15941625

15951626
if (ST.hasGFX90AInsts())
1596-
A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(*F));
1627+
A.getOrCreateAAFor<AAAMDGPUMinAGPRAlloc>(IRPosition::function(*F));
15971628

15981629
for (auto &I : instructions(F)) {
15991630
Value *Ptr = nullptr;

0 commit comments

Comments
 (0)