1414#include " GCNSubtarget.h"
1515#include " Utils/AMDGPUBaseInfo.h"
1616#include " llvm/Analysis/CycleAnalysis.h"
17+ #include " llvm/Analysis/TargetTransformInfo.h"
1718#include " llvm/CodeGen/TargetPassConfig.h"
19+ #include " llvm/IR/CallingConv.h"
1820#include " llvm/IR/IntrinsicsAMDGPU.h"
1921#include " llvm/IR/IntrinsicsR600.h"
22+ #include " llvm/Support/Casting.h"
2023#include " llvm/Target/TargetMachine.h"
2124#include " llvm/Transforms/IPO/Attributor.h"
25+ #include < optional>
2226
2327#define DEBUG_TYPE " amdgpu-attributor"
2428
@@ -933,7 +937,8 @@ static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
933937 }
934938}
935939
936- static bool runImpl (Module &M, AnalysisGetter &AG, TargetMachine &TM) {
940+ static bool runImpl (Module &M, AnalysisGetter &AG, TargetMachine &TM,
941+ bool HasWholeProgramVisibility) {
937942 SetVector<Function *> Functions;
938943 for (Function &F : M) {
939944 if (!F.isIntrinsic ())
@@ -947,12 +952,31 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
947952 {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
948953 &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
949954 &AAAMDWavesPerEU::ID, &AACallEdges::ID, &AAPointerInfo::ID,
950- &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID});
955+ &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID,
956+ &AAIndirectCallInfo::ID});
957+
958+ // / Helper to decide if we should specialize the indirect \p CB for \p Callee,
959+ // / which is one of the \p NumCallees potential callees.
960+ auto IndirectCalleeSpecializationCallback =
961+ [&](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
962+ Function &Callee, unsigned NumCallees) {
963+ if (AMDGPU::isEntryFunctionCC (Callee.getCallingConv ()))
964+ return false ;
965+ // Singleton functions should be specialized.
966+ if (NumCallees == 1 )
967+ return true ;
968+ // Otherewise specialize uniform values.
969+ const auto &TTI = TM.getTargetTransformInfo (*CB.getCaller ());
970+ return TTI.isAlwaysUniform (CB.getCalledOperand ());
971+ };
951972
952973 AttributorConfig AC (CGUpdater);
953974 AC.Allowed = &Allowed;
954975 AC.IsModulePass = true ;
955976 AC.DefaultInitializeLiveInternals = false ;
977+ AC.IsClosedWorldModule = HasWholeProgramVisibility;
978+ AC.IndirectCalleeSpecializationCallback =
979+ IndirectCalleeSpecializationCallback;
956980 AC.IPOAmendableCB = [](const Function &F) {
957981 return F.getCallingConv () == CallingConv::AMDGPU_KERNEL;
958982 };
@@ -978,8 +1002,12 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
9781002}
9791003
9801004class AMDGPUAttributorLegacy : public ModulePass {
1005+ // / Asserts whether we can assume whole program visibility during codegen.
1006+ bool HasWholeProgramVisibility = false ;
1007+
9811008public:
982- AMDGPUAttributorLegacy () : ModulePass(ID) {}
1009+ AMDGPUAttributorLegacy (bool HasWholeProgramVisibility = false )
1010+ : ModulePass(ID), HasWholeProgramVisibility(HasWholeProgramVisibility) {}
9831011
9841012 // / doInitialization - Virtual method overridden by subclasses to do
9851013 // / any necessary initialization before any pass is run.
@@ -994,7 +1022,7 @@ class AMDGPUAttributorLegacy : public ModulePass {
9941022
9951023 bool runOnModule (Module &M) override {
9961024 AnalysisGetter AG (this );
997- return runImpl (M, AG, *TM);
1025+ return runImpl (M, AG, *TM, HasWholeProgramVisibility );
9981026 }
9991027
10001028 void getAnalysisUsage (AnalysisUsage &AU) const override {
@@ -1015,14 +1043,15 @@ PreservedAnalyses llvm::AMDGPUAttributorPass::run(Module &M,
10151043 AnalysisGetter AG (FAM);
10161044
10171045 // TODO: Probably preserves CFG
1018- return runImpl (M, AG, TM) ? PreservedAnalyses::none ()
1019- : PreservedAnalyses::all ();
1046+ return runImpl (M, AG, TM, HasWholeProgramVisibility)
1047+ ? PreservedAnalyses::none ()
1048+ : PreservedAnalyses::all ();
10201049}
10211050
10221051char AMDGPUAttributorLegacy::ID = 0 ;
10231052
1024- Pass *llvm::createAMDGPUAttributorLegacyPass () {
1025- return new AMDGPUAttributorLegacy ();
1053+ Pass *llvm::createAMDGPUAttributorLegacyPass (bool HasWholeProgramVisibility ) {
1054+ return new AMDGPUAttributorLegacy (HasWholeProgramVisibility );
10261055}
10271056INITIALIZE_PASS_BEGIN (AMDGPUAttributorLegacy, DEBUG_TYPE, " AMDGPU Attributor" ,
10281057 false , false )
0 commit comments