Skip to content

Commit c965fd9

Browse files
Kirill Naumovarpilipe
authored andcommitted
Cost Annotation Writer for InlineCost
Add extra diagnostics for the inline cost analysis under -print-instruction-deltas cl option. When enabled along with -debug-only=inline-cost it prints the IR of inline candidate annotated with cost and threshold change per every instruction. Reviewed By: apilipenko, davidxl, mtrofin Differential Revision: https://reviews.llvm.org/D71501
1 parent 82a21c1 commit c965fd9

File tree

2 files changed

+116
-0
lines changed

2 files changed

+116
-0
lines changed

llvm/lib/Analysis/InlineCost.cpp

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "llvm/Analysis/TargetTransformInfo.h"
2828
#include "llvm/Analysis/ValueTracking.h"
2929
#include "llvm/Config/llvm-config.h"
30+
#include "llvm/IR/AssemblyAnnotationWriter.h"
3031
#include "llvm/IR/CallingConv.h"
3132
#include "llvm/IR/DataLayout.h"
3233
#include "llvm/IR/Dominators.h"
@@ -38,6 +39,7 @@
3839
#include "llvm/IR/PatternMatch.h"
3940
#include "llvm/Support/CommandLine.h"
4041
#include "llvm/Support/Debug.h"
42+
#include "llvm/Support/FormattedStream.h"
4143
#include "llvm/Support/raw_ostream.h"
4244

4345
using namespace llvm;
@@ -51,6 +53,10 @@ static cl::opt<int>
5153
cl::ZeroOrMore,
5254
cl::desc("Default amount of inlining to perform"));
5355

56+
static cl::opt<bool> PrintDebugInstructionDeltas("print-instruction-deltas",
57+
cl::Hidden, cl::init(false),
58+
cl::desc("Prints deltas of cost and threshold per instruction"));
59+
5460
static cl::opt<int> InlineThreshold(
5561
"inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
5662
cl::desc("Control the amount of inlining to perform (default = 225)"));
@@ -99,6 +105,26 @@ static cl::opt<bool> OptComputeFullInlineCost(
99105

100106
namespace {
101107
class InlineCostCallAnalyzer;
108+
109+
// This struct is used to store information about inline cost of a
110+
// particular instruction
111+
struct InstructionCostDetail {
112+
int CostBefore;
113+
int CostAfter;
114+
int ThresholdBefore;
115+
int ThresholdAfter;
116+
};
117+
118+
class CostAnnotationWriter : public AssemblyAnnotationWriter {
119+
public:
120+
// This DenseMap stores the delta change in cost and threshold after
121+
// accounting for the given instruction.
122+
DenseMap <const Instruction *, InstructionCostDetail> CostThresholdMap;
123+
124+
virtual void emitInstructionAnnot(const Instruction *I,
125+
formatted_raw_ostream &OS);
126+
};
127+
102128
class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
103129
typedef InstVisitor<CallAnalyzer, bool> Base;
104130
friend class InstVisitor<CallAnalyzer, bool>;
@@ -135,6 +161,12 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
135161
/// Called after a basic block was analyzed.
136162
virtual void onBlockAnalyzed(const BasicBlock *BB) {}
137163

164+
/// Called before an instruction was analyzed
165+
virtual void onInstructionAnalysisStart(const Instruction *I) {}
166+
167+
/// Called after an instruction was analyzed
168+
virtual void onInstructionAnalysisFinish(const Instruction *I) {}
169+
138170
/// Called at the end of the analysis of the callsite. Return the outcome of
139171
/// the analysis, i.e. 'InlineResult(true)' if the inlining may happen, or
140172
/// the reason it can't.
@@ -538,6 +570,24 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
538570
}
539571
}
540572

573+
void onInstructionAnalysisStart(const Instruction *I) override {
574+
// This function is called to store the initial cost of inlining before
575+
// the given instruction was assessed.
576+
if (!PrintDebugInstructionDeltas)
577+
return ;
578+
Writer.CostThresholdMap[I].CostBefore = Cost;
579+
Writer.CostThresholdMap[I].ThresholdBefore = Threshold;
580+
}
581+
582+
void onInstructionAnalysisFinish(const Instruction *I) override {
583+
// This function is called to find new values of cost and threshold after
584+
// the instruction has been assessed.
585+
if (!PrintDebugInstructionDeltas)
586+
return ;
587+
Writer.CostThresholdMap[I].CostAfter = Cost;
588+
Writer.CostThresholdMap[I].ThresholdAfter = Threshold;
589+
}
590+
541591
InlineResult finalizeAnalysis() override {
542592
// Loops generally act a lot like calls in that they act like barriers to
543593
// movement, require a certain amount of setup, etc. So when optimising for
@@ -637,6 +687,10 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
637687
Params.ComputeFullInlineCost || ORE),
638688
Params(Params), Threshold(Params.DefaultThreshold),
639689
BoostIndirectCalls(BoostIndirect) {}
690+
691+
/// Annotation Writer for cost annotation
692+
CostAnnotationWriter Writer;
693+
640694
void dump();
641695

642696
virtual ~InlineCostCallAnalyzer() {}
@@ -655,6 +709,25 @@ void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) {
655709
EnabledSROAAllocas.erase(SROAArg);
656710
disableLoadElimination();
657711
}
712+
713+
void CostAnnotationWriter::emitInstructionAnnot(
714+
const Instruction *I, formatted_raw_ostream &OS) {
715+
// The cost of inlining of the given instruction is printed always.
716+
// The threshold delta is printed only when it is non-zero. It happens
717+
// when we decided to give a bonus at a particular instruction.
718+
OS << "; cost before = " << CostThresholdMap[I].CostBefore <<
719+
", cost after = " << CostThresholdMap[I].CostAfter <<
720+
", threshold before = " << CostThresholdMap[I].ThresholdBefore <<
721+
", threshold after = " << CostThresholdMap[I].ThresholdAfter <<
722+
", ";
723+
OS << "cost delta = " << CostThresholdMap[I].CostAfter -
724+
CostThresholdMap[I].CostBefore;
725+
if (CostThresholdMap[I].ThresholdAfter != CostThresholdMap[I].ThresholdBefore)
726+
OS << ", threshold delta = " << CostThresholdMap[I].ThresholdAfter -
727+
CostThresholdMap[I].ThresholdBefore;
728+
OS << "\n";
729+
}
730+
658731
/// If 'V' maps to a SROA candidate, disable SROA for it.
659732
void CallAnalyzer::disableSROA(Value *V) {
660733
if (auto *SROAArg = getSROAArgForValueOrNull(V)) {
@@ -1763,11 +1836,14 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
17631836
// all of the per-instruction logic. The visit tree returns true if we
17641837
// consumed the instruction in any way, and false if the instruction's base
17651838
// cost should count against inlining.
1839+
onInstructionAnalysisStart(&*I);
1840+
17661841
if (Base::visit(&*I))
17671842
++NumInstructionsSimplified;
17681843
else
17691844
onMissedSimplification();
17701845

1846+
onInstructionAnalysisFinish(&*I);
17711847
using namespace ore;
17721848
// If the visit this instruction detected an uninlinable pattern, abort.
17731849
InlineResult IR = InlineResult::success();
@@ -2049,6 +2125,8 @@ InlineResult CallAnalyzer::analyze() {
20492125
/// Dump stats about this call's analysis.
20502126
LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() {
20512127
#define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
2128+
if (PrintDebugInstructionDeltas)
2129+
F.print(dbgs(), &Writer);
20522130
DEBUG_PRINT_STAT(NumConstantArgs);
20532131
DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
20542132
DEBUG_PRINT_STAT(NumAllocaArgs);
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; RUN: opt < %s -inline -debug-only=inline-cost -disable-output -print-instruction-deltas 2>&1 | FileCheck %s
2+
3+
; CHECK: Analyzing call of callee1... (caller:foo)
4+
; CHECK: define i32 @callee1(i32 %x) {
5+
; CHECK: ; cost before = {{.*}}, cost after = {{.*}}, threshold before = {{.*}}, threshold after = {{.*}}, cost delta = 5
6+
; CHECK: %x1 = add i32 %x, 1
7+
; CHECK: ; cost before = {{.*}}, cost after = {{.*}}, threshold before = {{.*}}, threshold after = {{.*}}, cost delta = 5
8+
; CHECK: %x2 = add i32 %x1, 1
9+
; CHECK: ; cost before = {{.*}}, cost after = {{.*}}, threshold before = {{.*}}, threshold after = {{.*}}, cost delta = 5
10+
; CHECK: %x3 = add i32 %x2, 1
11+
; CHECK: ; cost before = {{.*}}, cost after = {{.*}}, threshold before = {{.*}}, threshold after = {{.*}}, cost delta = 0
12+
; CHECK: ret i32 %x3
13+
; CHECK: }
14+
; CHECK: NumConstantArgs: 0
15+
; CHECK: NumConstantOffsetPtrArgs: 0
16+
; CHECK: NumAllocaArgs: 0
17+
; CHECK: NumConstantPtrCmps: 0
18+
; CHECK: NumConstantPtrDiffs: 0
19+
; CHECK: NumInstructionsSimplified: 1
20+
; CHECK: NumInstructions: 4
21+
; CHECK: SROACostSavings: 0
22+
; CHECK: SROACostSavingsLost: 0
23+
; CHECK: LoadEliminationCost: 0
24+
; CHECK: ContainsNoDuplicateCall: 0
25+
; CHECK: Cost: {{.*}}
26+
; CHECK: Threshold: {{.*}}
27+
28+
define i32 @foo(i32 %y) {
29+
%x = call i32 @callee1(i32 %y)
30+
ret i32 %x
31+
}
32+
33+
define i32 @callee1(i32 %x) {
34+
%x1 = add i32 %x, 1
35+
%x2 = add i32 %x1, 1
36+
%x3 = add i32 %x2, 1
37+
ret i32 %x3
38+
}

0 commit comments

Comments
 (0)