diff --git a/bolt/include/bolt/Passes/ContinuityStats.h b/bolt/include/bolt/Passes/ContinuityStats.h new file mode 100644 index 0000000000000..bd4d491ad4a55 --- /dev/null +++ b/bolt/include/bolt/Passes/ContinuityStats.h @@ -0,0 +1,61 @@ +//===- bolt/Passes/ContinuityStats.h ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass checks how well the BOLT input profile satisfies the following +// "CFG continuity" property of a perfect profile: +// +// Each positive-execution-count block in the function’s CFG +// should be *reachable* from a positive-execution-count function +// entry block through a positive-execution-count path. +// +// More specifically, for each of the hottest 1000 functions, the pass +// calculates the function’s fraction of basic block execution counts +// that is *unreachable*. It then reports the 95th percentile of the +// distribution of the 1000 unreachable fractions in a single BOLT-INFO line. +// The smaller the reported value is, the better the BOLT profile +// satisfies the CFG continuity property. + +// The default value of 1000 above can be changed via the hidden BOLT option +// `-num-functions-for-continuity-check=[N]`. +// If more detailed stats are needed, `-v=1` can be used: the hottest N +// functions will be grouped into 5 equally-sized buckets, from the hottest +// to the coldest; for each bucket, various summary statistics of the +// distribution of the unreachable fractions and the raw unreachable execution +// counts will be reported. +// +//===----------------------------------------------------------------------===// + +#ifndef BOLT_PASSES_CONTINUITYSTATS_H +#define BOLT_PASSES_CONTINUITYSTATS_H + +#include "bolt/Passes/BinaryPasses.h" +#include + +namespace llvm { + +class raw_ostream; + +namespace bolt { +class BinaryContext; + +/// Compute and report to the user the function CFG continuity quality +class PrintContinuityStats : public BinaryFunctionPass { +public: + explicit PrintContinuityStats(const cl::opt &PrintPass) + : BinaryFunctionPass(PrintPass) {} + + bool shouldOptimize(const BinaryFunction &BF) const override; + const char *getName() const override { return "continuity-stats"; } + bool shouldPrint(const BinaryFunction &) const override { return false; } + Error runOnFunctions(BinaryContext &BC) override; +}; + +} // namespace bolt +} // namespace llvm + +#endif // BOLT_PASSES_CONTINUITYSTATS_H diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt index 407d8b03f7397..1c1273b3d2420 100644 --- a/bolt/lib/Passes/CMakeLists.txt +++ b/bolt/lib/Passes/CMakeLists.txt @@ -26,6 +26,7 @@ add_llvm_library(LLVMBOLTPasses PatchEntries.cpp PettisAndHansen.cpp PLTCall.cpp + ContinuityStats.cpp RegAnalysis.cpp RegReAssign.cpp ReorderAlgorithm.cpp diff --git a/bolt/lib/Passes/ContinuityStats.cpp b/bolt/lib/Passes/ContinuityStats.cpp new file mode 100644 index 0000000000000..b32365b59065d --- /dev/null +++ b/bolt/lib/Passes/ContinuityStats.cpp @@ -0,0 +1,250 @@ +//===- bolt/Passes/ContinuityStats.cpp --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the continuity stats calculation pass. +// +//===----------------------------------------------------------------------===// + +#include "bolt/Passes/ContinuityStats.h" +#include "bolt/Core/BinaryBasicBlock.h" +#include "bolt/Core/BinaryFunction.h" +#include "bolt/Utils/CommandLineOpts.h" +#include "llvm/Support/CommandLine.h" +#include +#include +#include + +#define DEBUG_TYPE "bolt-opts" + +using namespace llvm; +using namespace bolt; + +namespace opts { +extern cl::opt Verbosity; +cl::opt NumFunctionsForContinuityCheck( + "num-functions-for-continuity-check", + cl::desc("number of hottest functions to print aggregated " + "CFG discontinuity stats of."), + cl::init(1000), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); +} // namespace opts + +namespace { +using FunctionListType = std::vector; +using function_iterator = FunctionListType::iterator; + +template +void printDistribution(raw_ostream &OS, std::vector &values, + bool Fraction = false) { + if (values.empty()) + return; + // Sort values from largest to smallest and print the MAX, TOP 1%, 5%, 10%, + // 20%, 50%, 80%, MIN. If Fraction is true, then values are printed as + // fractions instead of integers. + std::sort(values.begin(), values.end()); + + auto printLine = [&](std::string Text, double Percent) { + int Rank = int(values.size() * (1.0 - Percent / 100)); + if (Percent == 0) + Rank = values.size() - 1; + if (Fraction) + OS << " " << Text << std::string(9 - Text.length(), ' ') << ": " + << format("%.2lf%%", values[Rank] * 100) << "\n"; + else + OS << " " << Text << std::string(9 - Text.length(), ' ') << ": " + << values[Rank] << "\n"; + }; + + printLine("MAX", 0); + const int percentages[] = {1, 5, 10, 20, 50, 80}; + for (size_t i = 0; i < sizeof(percentages) / sizeof(percentages[0]); ++i) { + printLine("TOP " + std::to_string(percentages[i]) + "%", percentages[i]); + } + printLine("MIN", 100); +} + +void printCFGContinuityStats(raw_ostream &OS, + iterator_range &Functions) { + // Given a perfect profile, every positive-execution-count BB should be + // connected to an entry of the function through a positive-execution-count + // directed path in the control flow graph. + std::vector NumUnreachables; + std::vector SumECUnreachables; + std::vector FractionECUnreachables; + + for (auto it = Functions.begin(); it != Functions.end(); ++it) { + const BinaryFunction *Function = *it; + if (Function->size() <= 1) + continue; + + // Compute the sum of all BB execution counts (ECs). + size_t NumPosECBBs = 0; + size_t SumAllBBEC = 0; + for (const BinaryBasicBlock &BB : *Function) { + const size_t BBEC = BB.getKnownExecutionCount(); + NumPosECBBs += BBEC > 0 ? 1 : 0; + SumAllBBEC += BBEC; + } + + // Perform BFS on subgraph of CFG induced by positive weight edges. + // Compute the number of BBs reachable from the entry(s) of the function and + // the sum of their execution counts (ECs). + std::unordered_map IndexToBB; + std::unordered_set Visited; + std::queue Queue; + for (const BinaryBasicBlock &BB : *Function) { + // Make sure BB.getIndex() is not already in IndexToBB. + assert(IndexToBB.find(BB.getIndex()) == IndexToBB.end()); + IndexToBB[BB.getIndex()] = &BB; + if (BB.isEntryPoint() && BB.getKnownExecutionCount() > 0) { + Queue.push(BB.getIndex()); + Visited.insert(BB.getIndex()); + } + } + while (!Queue.empty()) { + const unsigned BBIndex = Queue.front(); + const BinaryBasicBlock *BB = IndexToBB[BBIndex]; + Queue.pop(); + auto SuccBIIter = BB->branch_info_begin(); + for (const BinaryBasicBlock *Succ : BB->successors()) { + const uint64_t Count = SuccBIIter->Count; + if (Count == BinaryBasicBlock::COUNT_NO_PROFILE || Count == 0) { + ++SuccBIIter; + continue; + } + if (!Visited.insert(Succ->getIndex()).second) { + ++SuccBIIter; + continue; + } + Queue.push(Succ->getIndex()); + ++SuccBIIter; + } + } + + const size_t NumReachableBBs = Visited.size(); + + // Loop through Visited, and sum the corresponding BBs' execution counts + // (ECs). + size_t SumReachableBBEC = 0; + for (const unsigned BBIndex : Visited) { + const BinaryBasicBlock *BB = IndexToBB[BBIndex]; + SumReachableBBEC += BB->getKnownExecutionCount(); + } + + const size_t NumPosECBBsUnreachableFromEntry = + NumPosECBBs - NumReachableBBs; + const size_t SumUnreachableBBEC = SumAllBBEC - SumReachableBBEC; + const double FractionECUnreachable = + (double)SumUnreachableBBEC / SumAllBBEC; + + if (opts::Verbosity >= 2 && FractionECUnreachable >= 0.05) { + OS << "Non-trivial CFG discontinuity observed in function " + << Function->getPrintName() << "\n"; + LLVM_DEBUG(Function->dump()); + } + + NumUnreachables.push_back(NumPosECBBsUnreachableFromEntry); + SumECUnreachables.push_back(SumUnreachableBBEC); + FractionECUnreachables.push_back(FractionECUnreachable); + } + + if (FractionECUnreachables.empty()) + return; + + std::sort(FractionECUnreachables.begin(), FractionECUnreachables.end()); + const int Rank = int(FractionECUnreachables.size() * 0.95); + OS << format("top 5%% function CFG discontinuity is %.2lf%%\n", + FractionECUnreachables[Rank] * 100); + + if (opts::Verbosity >= 1) { + OS << "abbreviations: EC = execution count, POS BBs = positive EC BBs\n" + << "distribution of NUM(unreachable POS BBs) among all focal " + "functions\n"; + printDistribution(OS, NumUnreachables); + + OS << "distribution of SUM_EC(unreachable POS BBs) among all focal " + "functions\n"; + printDistribution(OS, SumECUnreachables); + + OS << "distribution of [(SUM_EC(unreachable POS BBs) / SUM_EC(all " + "POS BBs))] among all focal functions\n"; + printDistribution(OS, FractionECUnreachables, /*Fraction=*/true); + } +} + +void printAll(BinaryContext &BC, FunctionListType &ValidFunctions, + size_t NumTopFunctions) { + // Sort the list of functions by execution counts (reverse). + llvm::sort(ValidFunctions, + [&](const BinaryFunction *A, const BinaryFunction *B) { + return A->getKnownExecutionCount() > B->getKnownExecutionCount(); + }); + + const size_t RealNumTopFunctions = + std::min(NumTopFunctions, ValidFunctions.size()); + + iterator_range Functions( + ValidFunctions.begin(), ValidFunctions.begin() + RealNumTopFunctions); + + BC.outs() << format("BOLT-INFO: among the hottest %zu functions ", + RealNumTopFunctions); + printCFGContinuityStats(BC.outs(), Functions); + + // Print more detailed bucketed stats if requested. + if (opts::Verbosity >= 1 && RealNumTopFunctions >= 5) { + const size_t PerBucketSize = RealNumTopFunctions / 5; + BC.outs() << format( + "Detailed stats for 5 buckets, each with %zu functions:\n", + PerBucketSize); + + // For each bucket, print the CFG continuity stats of the functions in the + // bucket. + for (size_t BucketIndex = 0; BucketIndex < 5; ++BucketIndex) { + const size_t StartIndex = BucketIndex * PerBucketSize; + const size_t EndIndex = StartIndex + PerBucketSize; + iterator_range Functions( + ValidFunctions.begin() + StartIndex, + ValidFunctions.begin() + EndIndex); + const size_t MaxFunctionExecutionCount = + ValidFunctions[StartIndex]->getKnownExecutionCount(); + const size_t MinFunctionExecutionCount = + ValidFunctions[EndIndex - 1]->getKnownExecutionCount(); + BC.outs() << format("----------------\n| Bucket %zu: " + "|\n----------------\n", + BucketIndex + 1) + << format( + "execution counts of the %zu functions in the bucket: " + "%zu-%zu\n", + EndIndex - StartIndex, MinFunctionExecutionCount, + MaxFunctionExecutionCount); + printCFGContinuityStats(BC.outs(), Functions); + } + } +} +} // namespace + +bool PrintContinuityStats::shouldOptimize(const BinaryFunction &BF) const { + if (BF.empty() || !BF.hasValidProfile()) + return false; + + return BinaryFunctionPass::shouldOptimize(BF); +} + +Error PrintContinuityStats::runOnFunctions(BinaryContext &BC) { + // Create a list of functions with valid profiles. + FunctionListType ValidFunctions; + for (const auto &BFI : BC.getBinaryFunctions()) { + const BinaryFunction *Function = &BFI.second; + if (PrintContinuityStats::shouldOptimize(*Function)) + ValidFunctions.push_back(Function); + } + if (ValidFunctions.empty() || opts::NumFunctionsForContinuityCheck == 0) + return Error::success(); + + printAll(BC, ValidFunctions, opts::NumFunctionsForContinuityCheck); + return Error::success(); +} diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index 5dfef0b71cc79..b090604183348 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -12,6 +12,7 @@ #include "bolt/Passes/AllocCombiner.h" #include "bolt/Passes/AsmDump.h" #include "bolt/Passes/CMOVConversion.h" +#include "bolt/Passes/ContinuityStats.h" #include "bolt/Passes/FixRISCVCallsPass.h" #include "bolt/Passes/FixRelaxationPass.h" #include "bolt/Passes/FrameOptimizer.h" @@ -373,6 +374,8 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { if (opts::PrintProfileStats) Manager.registerPass(std::make_unique(NeverPrint)); + Manager.registerPass(std::make_unique(NeverPrint)); + Manager.registerPass(std::make_unique(NeverPrint)); Manager.registerPass(std::make_unique(NeverPrint)); diff --git a/bolt/test/X86/cfg-discontinuity-reporting.test b/bolt/test/X86/cfg-discontinuity-reporting.test new file mode 100644 index 0000000000000..4d7d3305cdb75 --- /dev/null +++ b/bolt/test/X86/cfg-discontinuity-reporting.test @@ -0,0 +1,4 @@ +## Check profile discontinuity reporting +RUN: yaml2obj %p/Inputs/blarge_new.yaml &> %t.exe +RUN: llvm-bolt %t.exe -o %t.out --pa -p %p/Inputs/blarge_new.preagg.txt | FileCheck %s +CHECK: among the hottest 5 functions top 5% function CFG discontinuity is 100.00%