From 52fad27bc8a7ab58e4a66e9bf7dd569af74ada68 Mon Sep 17 00:00:00 2001 From: Tim Creech Date: Tue, 23 Jul 2024 12:11:49 -0400 Subject: [PATCH 1/2] Revert "[llvm-profgen] Add --sample-period to estimate absolute counts (#99826)" This reverts commit 01d783643aa750aa160bdcd097176fcde934dbd1. --- .../tools/llvm-profgen/period-scaling.test | 84 ------------------- llvm/tools/llvm-profgen/PerfReader.cpp | 14 ---- 2 files changed, 98 deletions(-) delete mode 100644 llvm/test/tools/llvm-profgen/period-scaling.test diff --git a/llvm/test/tools/llvm-profgen/period-scaling.test b/llvm/test/tools/llvm-profgen/period-scaling.test deleted file mode 100644 index a44c9a78caeaf..0000000000000 --- a/llvm/test/tools/llvm-profgen/period-scaling.test +++ /dev/null @@ -1,84 +0,0 @@ -// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --skip-symbolization --perf-event=br_inst_retired.near_taken:upp --sample-period=1000003 -// RUN: FileCheck %s --input-file %t --check-prefix=CHECK-RAW-PROFILE -// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --perf-event=br_inst_retired.near_taken:upp --sample-period=1000003 -// RUN: FileCheck %s --input-file %t --check-prefix=CHECK - -// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --skip-symbolization --perf-event=br_misp_retired.all_branches:upp --leading-ip-only --sample-period=1000003 -// RUN: FileCheck %s --input-file %t --check-prefix=UNPRED-RAW-PROFILE -// RUN: llvm-profgen --format=text --perfscript=%S/Inputs/cmov_3.perfscript --binary=%S/Inputs/cmov_3.perfbin --output=%t --perf-event=br_misp_retired.all_branches:upp --leading-ip-only --sample-period=1000003 -// RUN: FileCheck %s --input-file %t --check-prefix=UNPRED - -// Check that we can use perf event filtering to generate multiple types of -// source-level profiles from a single perf profile. In this case, we generate -// a typical execution frequency profile using br_inst_retired.near_taken LBRs, -// and a branch mispredict profile using br_misp_retired.all_branches sample -// IPs. - -// Check that we can use --sample-period to compute LBR and IP-based profiles -// which have comparable and absolute magnitudes. For example, in this case the -// branch of interest (at source line offset 4) is in a loop body which is -// executed ~20M times in total, and it's mispredicted about 9M times, yielding -// a mispredict rate of roughly 0.45. - -// The source example below is based on perfKernelCpp/cmov_3, except a -// misleading builtin is used to persuade the compiler not to use cmov, which -// induces branch mispredicts. - -// CHECK: sel_arr:652547082:0 -// CHECK: 3.1: 20225766 -// CHECK: 3.2: 20225766 -// CHECK: 4: 19838670 -// CHECK: 5: 20225766 - -// UNPRED: sel_arr:18000054:0 -// UNPRED: 3.1: 0 -// UNPRED: 3.2: 0 -// UNPRED: 4: 9000027 -// UNPRED: 5: 0 - -// CHECK-RAW-PROFILE: 3 -// CHECK-RAW-PROFILE-NEXT: 2f0-2fa:9774174 -// CHECK-RAW-PROFILE-NEXT: 2f0-310:10064496 -// CHECK-RAW-PROFILE-NEXT: 2ff-310:10161270 - -// UNPRED-RAW-PROFILE: 1 -// UNPRED-RAW-PROFILE-NEXT: 2fa-2fa:9000027 - -// original code: -// icx -fprofile-sample-generate lit.c -#include - -#define N 20000 -#define ITERS 10000 - -static int *m_s1, *m_s2, *m_s3, *m_dst; - -void init(void) { - m_s1 = malloc(sizeof(int)*N); - m_s2 = malloc(sizeof(int)*N); - m_s3 = malloc(sizeof(int)*N); - m_dst = malloc(sizeof(int)*N); - srand(42); - - for (int i = 0; i < N; i++) { - m_s1[i] = rand() % N; - m_s2[i] = 0; - m_s3[i] = 1; - } -} - -void __attribute__((noinline)) sel_arr(int *dst, int *s1, int *s2, int *s3) { -#pragma nounroll -#pragma clang loop vectorize(disable) interleave(disable) - for (int i = 0; i < N; i++) { - int *p = __builtin_expect((s1[i] < 10035), 0) ? &s2[i] : &s3[i]; - dst[i] = *p; - } -} - -int main(void) { - init(); - for(int i=0; i - SamplePeriod("sample-period", cl::init(1), - cl::desc("The sampling period (-c) used for perf data")); - extern cl::opt PerfTraceFilename; extern cl::opt ShowDisassemblyOnly; extern cl::opt ShowSourceLocations; @@ -1004,16 +1000,6 @@ void LBRPerfReader::parseSample(TraceStream &TraceIt, uint64_t Count) { if (extractLBRStack(TraceIt, Sample->LBRStack)) { warnIfMissingMMap(); // Record LBR only samples by aggregation - // If a sampling period is given we can adjust the magnitude of sample - // counts to estimate the absolute magnitute. - if (SamplePeriod.getNumOccurrences()) { - Count *= SamplePeriod; - // If counts are LBR-based, as opposed to IP-based, then the magnitude is - // now amplified by roughly the LBR stack size. By adjusting this down, we - // can produce LBR-based and IP-based profiles with comparable magnitudes. - if (!LeadingIPOnly && Sample->LBRStack.size() > 1) - Count /= (Sample->LBRStack.size() - 1); - } AggregatedSamples[Hashable(Sample)] += Count; } } From 1dafad1a2a120fdde72fd30086a2a5ac92b64ee4 Mon Sep 17 00:00:00 2001 From: Tim Creech Date: Tue, 23 Jul 2024 12:11:54 -0400 Subject: [PATCH 2/2] Revert "[llvm-profgen] Support creating profiles of arbitrary events (#99026)" This reverts commit 0caf0c93e759816663af52e8632d1c3953dbc715. --- .../tools/llvm-profgen/Inputs/cmov_3.perfbin | Bin 27192 -> 0 bytes .../llvm-profgen/Inputs/cmov_3.perfscript | 39 ------ .../Inputs/ip-duplication.perfscript | 2 - .../Inputs/noprobe-skid.perfscript | 5 - .../tools/llvm-profgen/event-filtering.test | 78 ------------ .../llvm-profgen/iponly-nodupfactor.test | 22 ---- llvm/test/tools/llvm-profgen/iponly.test | 58 --------- llvm/tools/llvm-profgen/PerfReader.cpp | 116 ++---------------- llvm/tools/llvm-profgen/ProfileGenerator.cpp | 31 ++--- 9 files changed, 20 insertions(+), 331 deletions(-) delete mode 100755 llvm/test/tools/llvm-profgen/Inputs/cmov_3.perfbin delete mode 100644 llvm/test/tools/llvm-profgen/Inputs/cmov_3.perfscript delete mode 100644 llvm/test/tools/llvm-profgen/Inputs/ip-duplication.perfscript delete mode 100644 llvm/test/tools/llvm-profgen/Inputs/noprobe-skid.perfscript delete mode 100644 llvm/test/tools/llvm-profgen/event-filtering.test delete mode 100644 llvm/test/tools/llvm-profgen/iponly-nodupfactor.test delete mode 100644 llvm/test/tools/llvm-profgen/iponly.test diff --git a/llvm/test/tools/llvm-profgen/Inputs/cmov_3.perfbin b/llvm/test/tools/llvm-profgen/Inputs/cmov_3.perfbin deleted file mode 100755 index 7a1543041f8055c4dbc60c23645f91f680ba621b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 27192 zcmeHQeQX@Zb)Wm#6D9GRjzxXNtAw&CDYrb5lFC|Nd;HSTrX)t9WhrsII^HeGtB!Z< z-JU|FK_i)t6Gn!jq)wx>MqmR)8>2~(KLW=HoS24^NNVdu4$#183r7j!Le7W7B#NuL zuCMRS%scKb_Z*={3kcW&Zr{A$dv8A8%+BuY42K^Z7#<7;0*peC?PkawYt%U;u8cjC zZd)NjR0VE7RolB647}1x4LO`@e>V<%!T`NX7)g5#l=NKbOB}9sLi$RxB zL3{T(Jd^b!`lKueQM(<_PZjAfRTmLScglhhwcDX@m?qTOZe2tq)umr1 zB_%s#(XLV2HA;P=S7m-g$)8w8{PoCs^n`TakQY=%27=13jJfTONV_9apD5`QCEp)< z#Y+Eg@*9zMVVPi9`jO~PSw5na-}}Li`%}vQ5TRy1DD$h<4vhRSX+S32kRf@K&eND0AIGMmk(K#q`< zPbD)sRxBiQX|{KGsJGAP(z>*5m3a47$S=q}6XMqZaBBdg_A9N)EJx4vN$l&N`Ag^q zL3RLm3=4a7PO)#GKPlX>;nXH5Yt?nGvJ9@&76_;|ocbK%?KWHvEeQL;_kiyK-vhn} zd=K~@@IByr!1ur{>4D!hy!(yB>@RB)XOvev7)yM7-U^nlBxb);bAi{VyzQp|mpA?l zt_^qS5JP?vWv^Z>L)iE)$m6T`t5-PxL*((*`qg>hG5yY84b8pyU}Em4iP@hmjEoL+ z&Uao;T)e2ZKyvRo4(iv(wT3$$#xxn~Usz<%J)^@5cw!e`t(Ip{1Bp`b@FAh;*8j!X#k|o!@%!V4g}x zvjTXb0{p!IzJtJKD9N=?A#i5>Ujs0FX4~UH;?LEBVED|&&jT~gL&bmB{tVU6Co-45 za^_zr6LbHNc>d?$f&YvKUd&up_cONXQtw>&%H4`_mgV|6utaH=@*^zP=Q8Sfe&4%+>@2N5G(Ho-vhn}d=K~@@IByr z!1sXf0pA0@2Ye5_GY{Z;TVQQyM-L=}YiR2h%H@v&eE{ev&_{vdY=8JSf|>8z=@`Uw#&TD%8h9AWrG^^7Q)a_1Vay z%GvOzLyrW5Mtuyjfy_j*ah@z%`N8*q?*ZQfz6X2{_#W^*;CsOLfbRj{1GkI^^d_B` zwNX->T48Wg+>?o+RuGs}={!~^`Nv=}7sG0Ph0AW8$JR<7=V2Hqof;`$C*aC{=8#z%(K{*k^dTRPyU_T{IhGg-5M33fxuF}0n?X0(`g6vDoJ{r#I5 zwl~i2vBU0-w*|VVJ7e3nGWLipFHseBFbF-RP|D>pxe0Y5m2wxHsa$bUH<`=j$1*uj z-AaF#=}u=$6QG()PMLSB(h!rQekt`w-u^FFxRJ^x zi<6ACIAohs{Q~w&r|LW|9R^8=0NjOqN=0M`18}f zx8VHroc|pO`uMH+j)dRB@Y^6BZ{n^4wd?0(`{MVkMP2XjC~h17JC*f-ZR)qFzwWbmKm6Z+PsW8M$68*pvvLj@t>by=&@-HYcdp(8 zE3=BmT-xltsK>TLR)Ij^zv}Cd0ECb6Ku4)q=*Z+!*;3ks-R2ofOM!$Htu!2HrA?v^ zENWJKzgnqXF{`YId{L>5lpM4U_?3VwK$kyRx~#O+99Gsujwnqvmm)_$qqLm8Dem4` zWfcpCgCA2AB#|#Gfym0^3ak07vU$bpN^|6kp>Zt|TDwzejNGHF2c2*vAxt7Wh0Qpa zbSdm5CA6aEvQqZ}<+g~Wtg#gJn~GASDNQ`xvJ&D{i^L);wIKQgR-0jQd@NanCH!}~ zM6X7pxFVcYqvNGq=1)r|XA8Mx7SC2Yo}4hFxLjTYdD^ZVns7+Bnt(Qh}9og-r`sVO53v=M{V{f72v^?Z80f@nnd*txRmOX# zem$e{)``PT301d4JRX9l2%RTr7lv39(>?W@EBlq4`Ynw1DLL`mnAg6U5bQRh^XQ!V zFuT-sJH+E5*j*I!#9Kk#OTV()#F17Ac2!Lt^U|l?R}OXPRkZ8MjjwkCk_)kSFTrsv z471#e3cegG z>`@;6@U`8iD%wMcNqx5;^eXshmGt?^DjxZss-pjeD)`e?@b6c_e_jPISHW9@P-!S9 zeIwycKtD*_?T2BByW5wa9OF^nPgc=?stW%7D)?(v@MhfT3A3e+^hWCxqd54=sHgV--j{mkobrT*Cl?$g%3#lm{F~D)w;eK9}xc<^{`wZaVoh9KP0arc7^QQoB1l&EIe+|& zT_yd?G9CBx;zxk1OG?AoD}XlzSFs-HPg40U;<&?5)j0$#4~JlrUo-Rv`@{kT0g2n+ z=Mi7daf~^`-XPYul})X~ey7B@fdZ!E_FJ6iTgU8^2{ai5 z9P8`WKZyFlrSBo%;Q7kT>;w<~?a(xN{%@*ab!vI0$)#{3IhHYUu-_791|FDU!ZEiV zxHg%$yl~X7Ceuo}Z05L`Jz3T8?G!YAI&W#Zs`1;%HFE}DYdxyLi_3)ucZ8LkSX9Gz zVY^oZ@AQbkoHbt5^TIv6U^7_i0hz;++S!b%$XvCy893bm=2j+U6ik>a*z4=Wo%MFg zob`0dmi7pz+`}JE1)jiCzc`ha_710}(8Ay2^RHzS=DPJ>Aso(+vo zoX(h{D2v^wni}3&a5EOI5*((LVrjErPGpL(&)Bf0U~e?+9xuYd)9Ji1kz2tI6P)VJUKR2Fi#K% z-#<)>gU9U8s&^2_nIV6bmub*<3HpBw?inJzPg0ieg$VvF3_R|6=n3hP3SJeYa;5h| z0=7fMZcpDi=>HI?q$nx9rx4r^k=NmGXDI&}ng0yD?_!|x!{eU^cm8nHAKw>D_UEPj zd1-%6+Tq_rVBN`{{;uaRV5my=^nImW7L@)T2~>E%H0f? z;VpWJ|IPx2KEd+wAm9JsrH=>qmkXJ!5W;m1es+6R+N;uDuV{*hgeiIUn9H8tKaWYp zt+F3bGGtHuU%Bk*`_yACLZk9`IMTpf{wJh;vy{{OI8z1S)+e6mm!v(lAI)FF(tf8% zrg9}a0-lBl`j71CyO^#DjW-3UT&evDn1@I`{3>qRtpf)EKj}ko>l07(pFjuy&WY^l zJ6p|7+P?(0*muaDzQ@u3O}UDdgFyX@@}+kC3CK`|?CHDRLI+ym+`}2jp6I`W*lrIK z4`C2<5W&Ee?1=svFuOg?>tfQrTK<&a?`4`)ewz2_(!QM(C8c}`egh(yzf>ofk^QL( zA|j}ZfoKRKIDS*0?~n5`!Amp%O9glT!*wG{i_$>)$$f&u^&ER}v20@%`!DPlR`=VD zA?~*43s5c{WEZ6UT`mI`%l63p-TqrOA` - -#define N 20000 -#define ITERS 10000 - -static int *m_s1, *m_s2, *m_s3, *m_dst; - -void init(void) { - m_s1 = malloc(sizeof(int)*N); - m_s2 = malloc(sizeof(int)*N); - m_s3 = malloc(sizeof(int)*N); - m_dst = malloc(sizeof(int)*N); - srand(42); - - for (int i = 0; i < N; i++) { - m_s1[i] = rand() % N; - m_s2[i] = 0; - m_s3[i] = 1; - } -} - -void __attribute__((noinline)) sel_arr(int *dst, int *s1, int *s2, int *s3) { -#pragma nounroll -#pragma clang loop vectorize(disable) interleave(disable) - for (int i = 0; i < N; i++) { - int *p = __builtin_expect((s1[i] < 10035), 0) ? &s2[i] : &s3[i]; - dst[i] = *p; - } -} - -int main(void) { - init(); - for(int i=0; i - -int bar(int x, int y) { - if (x % 3) { - return x - y; - } - return x + y; -} - -void foo() { - int s, i = 0; - while (i++ < 4000 * 4000) - if (i % 91) s = bar(i, s); else s += 30; - printf("sum is %d\n", s); -} - -int main() { - foo(); - return 0; -} diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp index b4e4911fb8912..111c546f5329f 100644 --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -41,17 +41,6 @@ static cl::opt "and produce context-insensitive profile.")); cl::opt ShowDetailedWarning("show-detailed-warning", cl::desc("Show detailed warning message.")); -cl::opt - LeadingIPOnly("leading-ip-only", - cl::desc("Form a profile based only on sample IPs")); - -static cl::list PerfEventFilter( - "perf-event", - cl::desc("Ignore samples not matching the given event names")); -static cl::alias - PerfEventFilterPlural("perf-events", cl::CommaSeparated, - cl::desc("Comma-delimited version of -perf-event"), - cl::aliasopt(PerfEventFilter)); extern cl::opt PerfTraceFilename; extern cl::opt ShowDisassemblyOnly; @@ -415,18 +404,13 @@ PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary, bool SkipPID, } } - // If filtering by events was requested, additionally request the "event" - // field. - const std::string FieldList = - PerfEventFilter.empty() ? "ip,brstack" : "event,ip,brstack"; - // Run perf script again to retrieve events for PIDs collected above SmallVector ScriptSampleArgs; ScriptSampleArgs.push_back(PerfPath); ScriptSampleArgs.push_back("script"); ScriptSampleArgs.push_back("--show-mmap-events"); ScriptSampleArgs.push_back("-F"); - ScriptSampleArgs.push_back(FieldList); + ScriptSampleArgs.push_back("ip,brstack"); ScriptSampleArgs.push_back("-i"); ScriptSampleArgs.push_back(PerfData); if (!PIDs.empty()) { @@ -591,54 +575,14 @@ bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt, // Skip the leading instruction pointer. size_t Index = 0; - - StringRef EventName; - // Skip a perf event name. This may or may not exist. - if (Records.size() > Index && Records[Index].ends_with(":")) { - EventName = Records[Index].ltrim().rtrim(':'); - Index++; - - if (PerfEventFilter.empty()) { - WithColor::warning() << "No --perf-event filter was specified, but an " - "\"event\" field was found in line " - << TraceIt.getLineNumber() << ": " - << TraceIt.getCurrentLine() << "\n"; - } else if (std::find(PerfEventFilter.begin(), PerfEventFilter.end(), - EventName) == PerfEventFilter.end()) { - TraceIt.advance(); - return false; - } - - } else if (!PerfEventFilter.empty()) { - WithColor::warning() << "A --perf-event filter was specified, but no " - "\"event\" field found in line " - << TraceIt.getLineNumber() << ": " - << TraceIt.getCurrentLine() << "\n"; - } - uint64_t LeadingAddr; - if (Records.size() > Index && !Records[Index].contains('/')) { - if (Records[Index].getAsInteger(16, LeadingAddr)) { + if (!Records.empty() && !Records[0].contains('/')) { + if (Records[0].getAsInteger(16, LeadingAddr)) { WarnInvalidLBR(TraceIt); TraceIt.advance(); return false; } - Index++; - } - - // We assume that if we saw an event name we also saw a leading addr. - // In other words, LeadingAddr is set if Index is 1 or 2. - if (LeadingIPOnly && Index > 0) { - // Form a profile only from the sample IP. Do not assume an LBR stack - // follows, and ignore it if it does. - uint64_t SampleIP = Binary->canonicalizeVirtualAddress(LeadingAddr); - bool SampleIPIsInternal = Binary->addressIsCode(SampleIP); - if (SampleIPIsInternal) { - // Form a half LBR entry where the sample IP is the destination. - LBRStack.emplace_back(LBREntry(SampleIP, SampleIP)); - } - TraceIt.advance(); - return !LBRStack.empty(); + Index = 1; } // Now extract LBR samples - note that we do not reverse the @@ -958,20 +902,6 @@ void PerfScriptReader::computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat) { SampleCounter &Counter = SampleCounters.begin()->second; uint64_t EndAddress = 0; - - if (LeadingIPOnly) { - assert(Sample->LBRStack.size() == 1 && - "Expected only half LBR entries for ip-only mode"); - const LBREntry &LBR = *(Sample->LBRStack.begin()); - uint64_t SourceAddress = LBR.Source; - uint64_t TargetAddress = LBR.Target; - if (SourceAddress == TargetAddress && - Binary->addressIsCode(TargetAddress)) { - Counter.recordRangeCount(SourceAddress, TargetAddress, Repeat); - } - return; - } - for (const LBREntry &LBR : Sample->LBRStack) { uint64_t SourceAddress = LBR.Source; uint64_t TargetAddress = LBR.Target; @@ -1132,18 +1062,6 @@ bool PerfScriptReader::isLBRSample(StringRef Line) { Line.trim().split(Records, " ", 2, false); if (Records.size() < 2) return false; - // Check if there is an event name before the leading IP. - // If there is, it will be in Records[0]. To skip it, we'll re-split on - // Records[1], which should contain the rest of the line. - if (Records[0].contains(":")) { - // If so, consume the event name and continue processing the rest of the - // line. - StringRef IPAndLBR = Records[1].ltrim(); - Records.clear(); - IPAndLBR.split(Records, " ", 2, false); - if (Records.size() < 2) - return false; - } if (Records[1].starts_with("0x") && Records[1].contains('/')) return true; return false; @@ -1234,18 +1152,6 @@ void PerfScriptReader::warnInvalidRange() { const PerfSample *Sample = Item.first.getPtr(); uint64_t Count = Item.second; uint64_t EndAddress = 0; - - if (LeadingIPOnly) { - assert(Sample->LBRStack.size() == 1 && - "Expected only half LBR entries for ip-only mode"); - const LBREntry &LBR = *(Sample->LBRStack.begin()); - if (LBR.Source == LBR.Target && LBR.Source != ExternalAddr) { - // This is an leading-addr-only profile. - Ranges[{LBR.Source, LBR.Source}] += Count; - } - continue; - } - for (const LBREntry &LBR : Sample->LBRStack) { uint64_t SourceAddress = LBR.Source; uint64_t StartAddress = LBR.Target; @@ -1293,15 +1199,11 @@ void PerfScriptReader::warnInvalidRange() { !Binary->addressIsCode(EndAddress)) continue; - // IP samples can indicate activity on individual instructions rather than - // basic blocks/edges. In this mode, don't warn if sampled IPs aren't - // branches. - if (!LeadingIPOnly) - if (!Binary->addressIsCode(StartAddress) || - !Binary->addressIsTransfer(EndAddress)) { - InstNotBoundary += I.second; - WarnInvalidRange(StartAddress, EndAddress, EndNotBoundaryMsg); - } + if (!Binary->addressIsCode(StartAddress) || + !Binary->addressIsTransfer(EndAddress)) { + InstNotBoundary += I.second; + WarnInvalidRange(StartAddress, EndAddress, EndNotBoundaryMsg); + } auto *FRange = Binary->findFuncRange(StartAddress); if (!FRange) { diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp index 175556c2220e6..53a25b279b432 100644 --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -104,8 +104,6 @@ cl::opt InferMissingFrames( "Infer missing call frames due to compiler tail call elimination."), llvm::cl::Optional); -extern cl::opt LeadingIPOnly; - using namespace llvm; using namespace sampleprof; @@ -390,25 +388,18 @@ void ProfileGeneratorBase::updateBodySamplesforFunctionProfile( // Use the maximum count of samples with same line location uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator); - if (LeadingIPOnly) { - // When computing an IP-based profile we take the SUM of counts at the - // location instead of applying duplication factors and taking the MAX. + // Use duplication factor to compensated for loop unroll/vectorization. + // Note that this is only needed when we're taking MAX of the counts at + // the location instead of SUM. + Count *= getDuplicationFactor(LeafLoc.Location.Discriminator); + + ErrorOr R = + FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator); + + uint64_t PreviousCount = R ? R.get() : 0; + if (PreviousCount <= Count) { FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator, - Count); - } else { - // Otherwise, use duplication factor to compensate for loop - // unroll/vectorization. Note that this is only needed when we're taking - // MAX of the counts at the location instead of SUM. - Count *= getDuplicationFactor(LeafLoc.Location.Discriminator); - - ErrorOr R = FunctionProfile.findSamplesAt( - LeafLoc.Location.LineOffset, Discriminator); - - uint64_t PreviousCount = R ? R.get() : 0; - if (PreviousCount <= Count) { - FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator, - Count - PreviousCount); - } + Count - PreviousCount); } }