Skip to content

Commit f58df39

Browse files
committed
Do not want to use BFI to get profile count for sample pgo
Summary: For SamplePGO, we already record the callsite count in the call instruction itself. So we do not want to use BFI to get profile count as it is less accurate. Reviewers: tejohnson, davidxl, eraman Reviewed By: eraman Subscribers: sanjoy, llvm-commits, mehdi_amini Differential Revision: https://reviews.llvm.org/D36025 llvm-svn: 309964
1 parent 99d0ab3 commit f58df39

File tree

4 files changed

+86
-36
lines changed

4 files changed

+86
-36
lines changed

llvm/lib/Analysis/ProfileSummaryInfo.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,12 @@ static cl::opt<int> ProfileSummaryCutoffCold(
3939
cl::desc("A count is cold if it is below the minimum count"
4040
" to reach this percentile of total counts."));
4141

42+
static cl::opt<bool> AccurateSampleProfile(
43+
"accurate-sample-profile", cl::Hidden, cl::init(false),
44+
cl::desc("If the sample profile is accurate, we will mark all un-sampled "
45+
"callsite as cold. Otherwise, treat un-sampled callsites as if "
46+
"we have no profile."));
47+
4248
// Find the minimum count to reach a desired percentile of counts.
4349
static uint64_t getMinCountForPercentile(SummaryEntryVector &DS,
4450
uint64_t Percentile) {
@@ -78,10 +84,12 @@ ProfileSummaryInfo::getProfileCount(const Instruction *Inst,
7884
if (hasSampleProfile()) {
7985
// In sample PGO mode, check if there is a profile metadata on the
8086
// instruction. If it is present, determine hotness solely based on that,
81-
// since the sampled entry count may not be accurate.
87+
// since the sampled entry count may not be accurate. If there is no
88+
// annotated on the instruction, return None.
8289
uint64_t TotalCount;
8390
if (Inst->extractProfTotalWeight(TotalCount))
8491
return TotalCount;
92+
return None;
8593
}
8694
if (BFI)
8795
return BFI->getBlockProfileCount(Inst->getParent());
@@ -199,7 +207,15 @@ bool ProfileSummaryInfo::isHotCallSite(const CallSite &CS,
199207
bool ProfileSummaryInfo::isColdCallSite(const CallSite &CS,
200208
BlockFrequencyInfo *BFI) {
201209
auto C = getProfileCount(CS.getInstruction(), BFI);
202-
return C && isColdCount(*C);
210+
if (C)
211+
return isColdCount(*C);
212+
213+
// In SamplePGO, if the caller has been sampled, and there is no profile
214+
// annotatedon the callsite, we consider the callsite as cold.
215+
// If there is no profile for the caller, and we know the profile is
216+
// accurate, we consider the callsite as cold.
217+
return (hasSampleProfile() &&
218+
(CS.getCaller()->getEntryCount() || AccurateSampleProfile));
203219
}
204220

205221
INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info",
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
; ModuleID = 'thinlto-function-summary-callgraph-profile-summary2.ll'
2+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
3+
target triple = "x86_64-unknown-linux-gnu"
4+
5+
define void @hot1() #1 {
6+
ret void
7+
}
8+
define void @hot2() #1 {
9+
ret void
10+
}
11+
define void @hot3() #1 {
12+
ret void
13+
}
14+
define void @cold1() #1 {
15+
ret void
16+
}
17+
define void @cold2() #1 {
18+
ret void
19+
}
20+
define void @cold3() #1 {
21+
ret void
22+
}
23+
define void @none1() #1 {
24+
ret void
25+
}
26+
define void @none2() #1 {
27+
ret void
28+
}
29+
define void @none3() #1 {
30+
ret void
31+
}
Lines changed: 30 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; Test to check the callgraph in summary when there is PGO
22
; RUN: opt -module-summary %s -o %t.o
33
; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s
4-
; RUN: opt -module-summary %p/Inputs/thinlto-function-summary-callgraph-profile-summary.ll -o %t2.o
4+
; RUN: opt -module-summary %p/Inputs/thinlto-function-summary-callgraph-sample-profile-summary.ll -o %t2.o
55
; RUN: llvm-lto -thinlto -o %t3 %t.o %t2.o
66
; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED
77

@@ -16,24 +16,26 @@
1616
; "hot3"
1717
; CHECK-NEXT: <FUNCTION op0=20 op1=4
1818
; "hot4"
19-
; CHECK-NEXT: <FUNCTION op0=24 op1=4
19+
; CHECK-NEXT: <FUNCTION op0=24 op1=5
2020
; "cold"
21-
; CHECK-NEXT: <FUNCTION op0=28 op1=4
21+
; CHECK-NEXT: <FUNCTION op0=29 op1=5
2222
; "none1"
23-
; CHECK-NEXT: <FUNCTION op0=32 op1=5
23+
; CHECK-NEXT: <FUNCTION op0=34 op1=5
2424
; "none2"
25-
; CHECK-NEXT: <FUNCTION op0=37 op1=5
25+
; CHECK-NEXT: <FUNCTION op0=39 op1=5
2626
; "none3"
27-
; CHECK-NEXT: <FUNCTION op0=42 op1=5
27+
; CHECK-NEXT: <FUNCTION op0=44 op1=5
28+
; CHECK-NEXT: <FUNCTION op0=49 op1=5
29+
2830
; CHECK-LABEL: <GLOBALVAL_SUMMARY_BLOCK
2931
; CHECK-NEXT: <VERSION
30-
; CHECK-NEXT: <VALUE_GUID op0=25 op1=123/>
31-
; op4=hot1 op6=cold op8=hot2 op10=hot4 op12=none1 op14=hot3 op16=none2 op18=none3 op20=123
32-
; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op4=1 op5=3 op6=5 op7=1 op8=2 op9=3 op10=4 op11=3 op12=6 op13=2 op14=3 op15=3 op16=7 op17=2 op18=8 op19=2 op20=25 op21=4/>
32+
; CHECK-NEXT: <VALUE_GUID op0=26 op1=123/>
33+
; op4=none1 op6=hot1 op8=cold1 op10=none2 op12=hot2 op14=cold2 op16=none3 op18=hot3 op20=cold3 op22=123
34+
; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op4=7 op5=0 op6=1 op7=3 op8=4 op9=1 op10=8 op11=0 op12=2 op13=3 op14=5 op15=1 op16=9 op17=0 op18=3 op19=3 op20=6 op21=1 op22=26 op23=4/>
3335
; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
3436

3537
; CHECK: <STRTAB_BLOCK
36-
; CHECK-NEXT: blob data = 'hot_functionhot1hot2hot3hot4coldnone1none2none3{{.*}}'
38+
; CHECK-NEXT: blob data = 'hot_functionhot1hot2hot3cold1cold2cold3none1none2none3{{.*}}'
3739

3840
; COMBINED: <GLOBALVAL_SUMMARY_BLOCK
3941
; COMBINED-NEXT: <VERSION
@@ -45,13 +47,17 @@
4547
; COMBINED-NEXT: <VALUE_GUID
4648
; COMBINED-NEXT: <VALUE_GUID
4749
; COMBINED-NEXT: <VALUE_GUID
50+
; COMBINED-NEXT: <VALUE_GUID
51+
; COMBINED-NEXT: <VALUE_GUID
52+
; COMBINED-NEXT: <COMBINED abbrevid=
4853
; COMBINED-NEXT: <COMBINED abbrevid=
4954
; COMBINED-NEXT: <COMBINED abbrevid=
5055
; COMBINED-NEXT: <COMBINED abbrevid=
5156
; COMBINED-NEXT: <COMBINED abbrevid=
5257
; COMBINED-NEXT: <COMBINED abbrevid=
5358
; COMBINED-NEXT: <COMBINED abbrevid=
54-
; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op5=[[HOT1:.*]] op6=3 op7=[[COLD:.*]] op8=1 op9=[[HOT2:.*]] op10=3 op11=[[NONE1:.*]] op12=2 op13=[[HOT3:.*]] op14=3 op15=[[NONE2:.*]] op16=2 op17=[[NONE3:.*]] op18=2/>
59+
; COMBINED-NEXT: <COMBINED abbrevid=
60+
; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op5=[[NONE1:.*]] op6=0 op7=[[HOT1:.*]] op8=3 op9=[[COLD1:.*]] op10=1 op11=[[NONE2:.*]] op12=0 op13=[[HOT2:.*]] op14=3 op15=[[COLD2:.*]] op16=1 op17=[[NONE3:.*]] op18=0 op19=[[HOT3:.*]] op20=3 op21=[[COLD3:.*]] op22=1/>
5561
; COMBINED_NEXT: <COMBINED abbrevid=
5662
; COMBINED_NEXT: </GLOBALVAL_SUMMARY_BLOCK>
5763

@@ -63,24 +69,19 @@ target triple = "x86_64-unknown-linux-gnu"
6369
; This function have high profile count, so entry block is hot.
6470
define void @hot_function(i1 %a, i1 %a2) !prof !20 {
6571
entry:
66-
call void @hot1()
67-
br i1 %a, label %Cold, label %Hot, !prof !41
68-
Cold: ; 1/1000 goes here
69-
call void @cold()
70-
call void @hot2()
71-
call void @hot4(), !prof !15
72-
call void @none1()
73-
br label %exit
74-
Hot: ; 999/1000 goes here
75-
call void @hot2()
76-
call void @hot3()
77-
br i1 %a2, label %None1, label %None2, !prof !42
78-
None1: ; half goes here
7972
call void @none1()
73+
call void @hot1(), !prof !15
74+
call void @cold1(), !prof !16
75+
br i1 %a, label %Cold, label %Hot, !prof !41
76+
Cold: ; 1/1000 goes here
8077
call void @none2()
78+
call void @hot2(), !prof !15
79+
call void @cold2(), !prof !16
8180
br label %exit
82-
None2: ; half goes here
81+
Hot: ; 999/1000 goes here
8382
call void @none3()
83+
call void @hot3(), !prof !15
84+
call void @cold3(), !prof !16
8485
br label %exit
8586
exit:
8687
ret void
@@ -89,17 +90,14 @@ exit:
8990
declare void @hot1() #1
9091
declare void @hot2() #1
9192
declare void @hot3() #1
92-
declare void @hot4() #1
93-
declare void @cold() #1
93+
declare void @cold1() #1
94+
declare void @cold2() #1
95+
declare void @cold3() #1
9496
declare void @none1() #1
9597
declare void @none2() #1
9698
declare void @none3() #1
9799

98-
99100
!41 = !{!"branch_weights", i32 1, i32 1000}
100-
!42 = !{!"branch_weights", i32 1, i32 1}
101-
102-
103101

104102
!llvm.module.flags = !{!1}
105103
!20 = !{!"function_entry_count", i64 110, i64 123}
@@ -119,3 +117,4 @@ declare void @none3() #1
119117
!13 = !{i32 999000, i64 100, i32 1}
120118
!14 = !{i32 999999, i64 1, i32 2}
121119
!15 = !{!"branch_weights", i32 100}
120+
!16 = !{!"branch_weights", i32 1}

llvm/unittests/Analysis/ProfileSummaryInfoTest.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -196,14 +196,18 @@ TEST_F(ProfileSummaryInfoTest, SampleProf) {
196196

197197
CallSite CS1(BB1->getFirstNonPHI());
198198
auto *CI2 = BB2->getFirstNonPHI();
199+
// Manually attach branch weights metadata to the call instruction.
200+
SmallVector<uint32_t, 1> Weights;
201+
Weights.push_back(1000);
202+
MDBuilder MDB(M->getContext());
203+
CI2->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
199204
CallSite CS2(CI2);
200205

201-
EXPECT_TRUE(PSI.isHotCallSite(CS1, &BFI));
202-
EXPECT_FALSE(PSI.isHotCallSite(CS2, &BFI));
206+
EXPECT_FALSE(PSI.isHotCallSite(CS1, &BFI));
207+
EXPECT_TRUE(PSI.isHotCallSite(CS2, &BFI));
203208

204209
// Test that CS2 is considered hot when it gets an MD_prof metadata with
205210
// weights that exceed the hot count threshold.
206-
MDBuilder MDB(M->getContext());
207211
CI2->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights({400}));
208212
EXPECT_TRUE(PSI.isHotCallSite(CS2, &BFI));
209213
}

0 commit comments

Comments
 (0)