2727#include " llvm/Analysis/TargetTransformInfo.h"
2828#include " llvm/Analysis/ValueTracking.h"
2929#include " llvm/Config/llvm-config.h"
30+ #include " llvm/IR/AssemblyAnnotationWriter.h"
3031#include " llvm/IR/CallingConv.h"
3132#include " llvm/IR/DataLayout.h"
3233#include " llvm/IR/Dominators.h"
3839#include " llvm/IR/PatternMatch.h"
3940#include " llvm/Support/CommandLine.h"
4041#include " llvm/Support/Debug.h"
42+ #include " llvm/Support/FormattedStream.h"
4143#include " llvm/Support/raw_ostream.h"
4244
4345using namespace llvm ;
@@ -51,6 +53,10 @@ static cl::opt<int>
5153 cl::ZeroOrMore,
5254 cl::desc(" Default amount of inlining to perform" ));
5355
56+ static cl::opt<bool > PrintDebugInstructionDeltas (" print-instruction-deltas" ,
57+ cl::Hidden, cl::init(false ),
58+ cl::desc(" Prints deltas of cost and threshold per instruction" ));
59+
5460static cl::opt<int > InlineThreshold (
5561 " inline-threshold" , cl::Hidden, cl::init(225 ), cl::ZeroOrMore,
5662 cl::desc(" Control the amount of inlining to perform (default = 225)" ));
@@ -99,6 +105,26 @@ static cl::opt<bool> OptComputeFullInlineCost(
99105
100106namespace {
101107class InlineCostCallAnalyzer ;
108+
109+ // This struct is used to store information about inline cost of a
110+ // particular instruction
111+ struct InstructionCostDetail {
112+ int CostBefore;
113+ int CostAfter;
114+ int ThresholdBefore;
115+ int ThresholdAfter;
116+ };
117+
118+ class CostAnnotationWriter : public AssemblyAnnotationWriter {
119+ public:
120+ // This DenseMap stores the delta change in cost and threshold after
121+ // accounting for the given instruction.
122+ DenseMap <const Instruction *, InstructionCostDetail> CostThresholdMap;
123+
124+ virtual void emitInstructionAnnot (const Instruction *I,
125+ formatted_raw_ostream &OS);
126+ };
127+
102128class CallAnalyzer : public InstVisitor <CallAnalyzer, bool > {
103129 typedef InstVisitor<CallAnalyzer, bool > Base;
104130 friend class InstVisitor <CallAnalyzer, bool >;
@@ -135,6 +161,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
135161 // / Called after a basic block was analyzed.
136162 virtual void onBlockAnalyzed (const BasicBlock *BB) {}
137163
164+ // / Called before an instruction was analyzed
165+ virtual void onInstructionAnalysisStart (const Instruction *I) {}
166+
167+ // / Called after an instruction was analyzed
168+ virtual void onInstructionAnalysisFinish (const Instruction *I) {}
169+
138170 // / Called at the end of the analysis of the callsite. Return the outcome of
139171 // / the analysis, i.e. 'InlineResult(true)' if the inlining may happen, or
140172 // / the reason it can't.
@@ -538,6 +570,24 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
538570 }
539571 }
540572
573+ void onInstructionAnalysisStart (const Instruction *I) override {
574+ // This function is called to store the initial cost of inlining before
575+ // the given instruction was assessed.
576+ if (!PrintDebugInstructionDeltas)
577+ return ;
578+ Writer.CostThresholdMap [I].CostBefore = Cost;
579+ Writer.CostThresholdMap [I].ThresholdBefore = Threshold;
580+ }
581+
582+ void onInstructionAnalysisFinish (const Instruction *I) override {
583+ // This function is called to find new values of cost and threshold after
584+ // the instruction has been assessed.
585+ if (!PrintDebugInstructionDeltas)
586+ return ;
587+ Writer.CostThresholdMap [I].CostAfter = Cost;
588+ Writer.CostThresholdMap [I].ThresholdAfter = Threshold;
589+ }
590+
541591 InlineResult finalizeAnalysis () override {
542592 // Loops generally act a lot like calls in that they act like barriers to
543593 // movement, require a certain amount of setup, etc. So when optimising for
@@ -637,6 +687,10 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
637687 Params.ComputeFullInlineCost || ORE),
638688 Params(Params), Threshold(Params.DefaultThreshold),
639689 BoostIndirectCalls(BoostIndirect) {}
690+
691+ // / Annotation Writer for cost annotation
692+ CostAnnotationWriter Writer;
693+
640694 void dump ();
641695
642696 virtual ~InlineCostCallAnalyzer () {}
@@ -655,6 +709,25 @@ void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {
655709 EnabledSROAAllocas.erase (SROAArg);
656710 disableLoadElimination ();
657711}
712+
713+ void CostAnnotationWriter::emitInstructionAnnot (
714+ const Instruction *I, formatted_raw_ostream &OS) {
715+ // The cost of inlining of the given instruction is printed always.
716+ // The threshold delta is printed only when it is non-zero. It happens
717+ // when we decided to give a bonus at a particular instruction.
718+ OS << " ; cost before = " << CostThresholdMap[I].CostBefore <<
719+ " , cost after = " << CostThresholdMap[I].CostAfter <<
720+ " , threshold before = " << CostThresholdMap[I].ThresholdBefore <<
721+ " , threshold after = " << CostThresholdMap[I].ThresholdAfter <<
722+ " , " ;
723+ OS << " cost delta = " << CostThresholdMap[I].CostAfter -
724+ CostThresholdMap[I].CostBefore ;
725+ if (CostThresholdMap[I].ThresholdAfter != CostThresholdMap[I].ThresholdBefore )
726+ OS << " , threshold delta = " << CostThresholdMap[I].ThresholdAfter -
727+ CostThresholdMap[I].ThresholdBefore ;
728+ OS << " \n " ;
729+ }
730+
658731// / If 'V' maps to a SROA candidate, disable SROA for it.
659732void CallAnalyzer::disableSROA (Value *V) {
660733 if (auto *SROAArg = getSROAArgForValueOrNull (V)) {
@@ -1763,11 +1836,14 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
17631836 // all of the per-instruction logic. The visit tree returns true if we
17641837 // consumed the instruction in any way, and false if the instruction's base
17651838 // cost should count against inlining.
1839+ onInstructionAnalysisStart (&*I);
1840+
17661841 if (Base::visit (&*I))
17671842 ++NumInstructionsSimplified;
17681843 else
17691844 onMissedSimplification ();
17701845
1846+ onInstructionAnalysisFinish (&*I);
17711847 using namespace ore ;
17721848 // If the visit this instruction detected an uninlinable pattern, abort.
17731849 InlineResult IR = InlineResult::success ();
@@ -2049,6 +2125,8 @@ InlineResult CallAnalyzer::analyze() {
20492125// / Dump stats about this call's analysis.
20502126LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump () {
20512127#define DEBUG_PRINT_STAT (x ) dbgs() << " " #x " : " << x << " \n "
2128+ if (PrintDebugInstructionDeltas)
2129+ F.print (dbgs (), &Writer);
20522130 DEBUG_PRINT_STAT (NumConstantArgs);
20532131 DEBUG_PRINT_STAT (NumConstantOffsetPtrArgs);
20542132 DEBUG_PRINT_STAT (NumAllocaArgs);
0 commit comments