|
14 | 14 | #include "GCNSubtarget.h" |
15 | 15 | #include "Utils/AMDGPUBaseInfo.h" |
16 | 16 | #include "llvm/Analysis/CycleAnalysis.h" |
| 17 | +#include "llvm/Analysis/TargetTransformInfo.h" |
17 | 18 | #include "llvm/CodeGen/TargetPassConfig.h" |
18 | 19 | #include "llvm/IR/IntrinsicsAMDGPU.h" |
19 | 20 | #include "llvm/IR/IntrinsicsR600.h" |
@@ -1041,11 +1042,28 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM, |
1041 | 1042 | &AAPointerInfo::ID, &AAPotentialConstantValues::ID, |
1042 | 1043 | &AAUnderlyingObjects::ID, &AAIndirectCallInfo::ID}); |
1043 | 1044 |
|
| 1045 | + /// Helper to decide if we should specialize the indirect \p CB for \p Callee. |
| 1046 | + /// \p IsSingleton indicates whether the \p Callee is the only assumed callee. |
| 1047 | + auto IndirectCalleeSpecializationCallback = |
| 1048 | + [&](Attributor &A, const AbstractAttribute &AA, CallBase &CB, |
| 1049 | + Function &Callee, bool IsSingleton) { |
| 1050 | + if (AMDGPU::isEntryFunctionCC(Callee.getCallingConv())) |
| 1051 | + return false; |
| 1052 | + // Singleton functions should be specialized. |
| 1053 | + if (IsSingleton) |
| 1054 | + return true; |
| 1055 | + // Otherwise specialize uniform values. |
| 1056 | + const auto &TTI = TM.getTargetTransformInfo(*CB.getCaller()); |
| 1057 | + return TTI.isAlwaysUniform(CB.getCalledOperand()); |
| 1058 | + }; |
| 1059 | + |
1044 | 1060 | AttributorConfig AC(CGUpdater); |
1045 | 1061 | AC.IsClosedWorldModule = HasWholeProgramVisibility; |
1046 | 1062 | AC.Allowed = &Allowed; |
1047 | 1063 | AC.IsModulePass = true; |
1048 | 1064 | AC.DefaultInitializeLiveInternals = false; |
| 1065 | + AC.IndirectCalleeSpecializationCallback = |
| 1066 | + IndirectCalleeSpecializationCallback; |
1049 | 1067 | AC.IPOAmendableCB = [](const Function &F) { |
1050 | 1068 | return F.getCallingConv() == CallingConv::AMDGPU_KERNEL; |
1051 | 1069 | }; |
|
0 commit comments