|
14 | 14 | #include "GCNSubtarget.h" |
15 | 15 | #include "Utils/AMDGPUBaseInfo.h" |
16 | 16 | #include "llvm/Analysis/CycleAnalysis.h" |
| 17 | +#include "llvm/Analysis/TargetTransformInfo.h" |
17 | 18 | #include "llvm/CodeGen/TargetPassConfig.h" |
18 | 19 | #include "llvm/IR/IntrinsicsAMDGPU.h" |
19 | 20 | #include "llvm/IR/IntrinsicsR600.h" |
@@ -1038,12 +1039,24 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) { |
1038 | 1039 | &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID, |
1039 | 1040 | &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID, |
1040 | 1041 | &AAPointerInfo::ID, &AAPotentialConstantValues::ID, |
1041 | | - &AAUnderlyingObjects::ID}); |
| 1042 | + &AAUnderlyingObjects::ID, &AAIndirectCallInfo::ID, &AAInstanceInfo::ID}); |
1042 | 1043 |
|
1043 | 1044 | AttributorConfig AC(CGUpdater); |
1044 | 1045 | AC.Allowed = &Allowed; |
1045 | 1046 | AC.IsModulePass = true; |
1046 | 1047 | AC.DefaultInitializeLiveInternals = false; |
| 1048 | + AC.IndirectCalleeSpecializationCallback = |
| 1049 | + [&TM](Attributor &A, const AbstractAttribute &AA, CallBase &CB, |
| 1050 | + Function &Callee, unsigned NumAssummedCallee) { |
| 1051 | + if (AMDGPU::isEntryFunctionCC(Callee.getCallingConv())) |
| 1052 | + return false; |
| 1053 | + // Singleton functions can be specialized. |
| 1054 | + if (NumAssummedCallee == 1) |
| 1055 | + return true; |
| 1056 | + // Otherwise specialize uniform values. |
| 1057 | + const auto &TTI = TM.getTargetTransformInfo(*CB.getCaller()); |
| 1058 | + return TTI.isAlwaysUniform(CB.getCalledOperand()); |
| 1059 | + }; |
1047 | 1060 | AC.IPOAmendableCB = [](const Function &F) { |
1048 | 1061 | return F.getCallingConv() == CallingConv::AMDGPU_KERNEL; |
1049 | 1062 | }; |
|
0 commit comments