diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 14957cba50174..ca8b786f4ab69 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -388,6 +388,10 @@ class BinaryFunction { /// The profile data for the number of times the function was executed. uint64_t ExecutionCount{COUNT_NO_PROFILE}; + /// Profile data for the number of times this function was entered from + /// external code (DSO, JIT, etc). + uint64_t ExternEntryCount{0}; + /// Profile match ratio. float ProfileMatchRatio{0.0f}; @@ -1877,6 +1881,10 @@ class BinaryFunction { return *this; } + /// Set the profile data for the number of times the function was entered from + /// external code (DSO/JIT). + void setExternEntryCount(uint64_t Count) { ExternEntryCount = Count; } + /// Adjust execution count for the function by a given \p Count. The value /// \p Count will be subtracted from the current function count. /// @@ -1904,6 +1912,10 @@ class BinaryFunction { /// Return COUNT_NO_PROFILE if there's no profile info. uint64_t getExecutionCount() const { return ExecutionCount; } + /// Return the profile information about the number of times the function was + /// entered from external code (DSO/JIT). + uint64_t getExternEntryCount() const { return ExternEntryCount; } + /// Return the raw profile information about the number of branch /// executions corresponding to this function. uint64_t getRawSampleCount() const { return RawSampleCount; } diff --git a/bolt/include/bolt/Profile/DataReader.h b/bolt/include/bolt/Profile/DataReader.h index 5df1b5a8f4a00..60316ff09ae35 100644 --- a/bolt/include/bolt/Profile/DataReader.h +++ b/bolt/include/bolt/Profile/DataReader.h @@ -109,6 +109,9 @@ struct FuncBranchData { /// Total execution count for the function. int64_t ExecutionCount{0}; + /// Total entry count from external code for the function. + uint64_t ExternEntryCount{0}; + /// Indicate if the data was used. bool Used{false}; diff --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h b/bolt/include/bolt/Profile/ProfileYAMLMapping.h index a8d9a15311d94..41e2bd1651efd 100644 --- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h +++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h @@ -206,6 +206,7 @@ struct BinaryFunctionProfile { uint32_t Id{0}; llvm::yaml::Hex64 Hash{0}; uint64_t ExecCount{0}; + uint64_t ExternEntryCount{0}; std::vector Blocks; std::vector InlineTree; bool Used{false}; @@ -218,6 +219,7 @@ template <> struct MappingTraits { YamlIO.mapRequired("fid", BFP.Id); YamlIO.mapRequired("hash", BFP.Hash); YamlIO.mapRequired("exec", BFP.ExecCount); + YamlIO.mapOptional("extern", BFP.ExternEntryCount, 0); YamlIO.mapRequired("nblocks", BFP.NumBasicBlocks); YamlIO.mapOptional("blocks", BFP.Blocks, std::vector()); diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 6d1969f5c6c30..b998d7160aae7 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -471,6 +471,8 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) { OS << "\n Sample Count: " << RawSampleCount; OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f); } + if (ExternEntryCount) + OS << "\n Extern Entry Count: " << ExternEntryCount; if (opts::PrintDynoStats && !getLayout().block_empty()) { OS << '\n'; diff --git a/bolt/lib/Passes/ProfileQualityStats.cpp b/bolt/lib/Passes/ProfileQualityStats.cpp index dfd74d3dd5719..64cc662c3ab29 100644 --- a/bolt/lib/Passes/ProfileQualityStats.cpp +++ b/bolt/lib/Passes/ProfileQualityStats.cpp @@ -532,6 +532,9 @@ void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) { std::vector &MaxCountMap = TotalMaxCountMaps[FunctionNum]; std::vector &MinCountMap = TotalMinCountMaps[FunctionNum]; + // Record external entry count into CallGraphIncomingFlows + CallGraphIncomingFlows[FunctionNum] += Function->getExternEntryCount(); + // Update MaxCountMap, MinCountMap, and CallGraphIncomingFlows auto recordCall = [&](const BinaryBasicBlock *SourceBB, const MCSymbol *DestSymbol, uint64_t Count, diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 2527b5bfe38d2..752b00c7de4e8 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -2289,6 +2289,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, YamlBF.Id = BF->getFunctionNumber(); YamlBF.Hash = BAT->getBFHash(FuncAddress); YamlBF.ExecCount = BF->getKnownExecutionCount(); + YamlBF.ExternEntryCount = BF->getExternEntryCount(); YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(FuncAddress); const BoltAddressTranslation::BBHashMapTy &BlockMap = BAT->getBBHashMap(FuncAddress); diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp index c512394f26a3b..afe24216d7f5d 100644 --- a/bolt/lib/Profile/DataReader.cpp +++ b/bolt/lib/Profile/DataReader.cpp @@ -85,6 +85,7 @@ void FuncBranchData::appendFrom(const FuncBranchData &FBD, uint64_t Offset) { } llvm::stable_sort(Data); ExecutionCount += FBD.ExecutionCount; + ExternEntryCount += FBD.ExternEntryCount; for (auto I = FBD.EntryData.begin(), E = FBD.EntryData.end(); I != E; ++I) { assert(I->To.Name == FBD.Name); auto NewElmt = EntryData.insert(EntryData.end(), *I); @@ -269,6 +270,7 @@ Error DataReader::preprocessProfile(BinaryContext &BC) { if (FuncBranchData *FuncData = getBranchDataForNames(Function.getNames())) { setBranchData(Function, FuncData); Function.ExecutionCount = FuncData->ExecutionCount; + Function.ExternEntryCount = FuncData->ExternEntryCount; FuncData->Used = true; } } @@ -419,6 +421,7 @@ void DataReader::matchProfileData(BinaryFunction &BF) { if (fetchProfileForOtherEntryPoints(BF)) { BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD); BF.ExecutionCount = FBD->ExecutionCount; + BF.ExternEntryCount = FBD->ExternEntryCount; BF.RawSampleCount = FBD->getNumExecutedBranches(); } return; @@ -449,6 +452,7 @@ void DataReader::matchProfileData(BinaryFunction &BF) { setBranchData(BF, NewBranchData); NewBranchData->Used = true; BF.ExecutionCount = NewBranchData->ExecutionCount; + BF.ExternEntryCount = NewBranchData->ExternEntryCount; BF.ProfileMatchRatio = 1.0f; break; } @@ -1190,6 +1194,8 @@ std::error_code DataReader::parse() { if (BI.To.IsSymbol && BI.To.Offset == 0) { I = GetOrCreateFuncEntry(BI.To.Name); I->second.ExecutionCount += BI.Branches; + if (!BI.From.IsSymbol) + I->second.ExternEntryCount += BI.Branches; } } diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 33ce40ac2eeec..086e47b661e10 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -176,6 +176,7 @@ bool YAMLProfileReader::parseFunctionProfile( uint64_t FunctionExecutionCount = 0; BF.setExecutionCount(YamlBF.ExecCount); + BF.setExternEntryCount(YamlBF.ExternEntryCount); uint64_t FuncRawBranchCount = 0; for (const yaml::bolt::BinaryBasicBlockProfile &YamlBB : YamlBF.Blocks) diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp index f1fe45f21a0f6..f4308d6fc1992 100644 --- a/bolt/lib/Profile/YAMLProfileWriter.cpp +++ b/bolt/lib/Profile/YAMLProfileWriter.cpp @@ -226,6 +226,7 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS, YamlBF.Hash = BF.getHash(); YamlBF.NumBasicBlocks = BF.size(); YamlBF.ExecCount = BF.getKnownExecutionCount(); + YamlBF.ExternEntryCount = BF.getExternEntryCount(); DenseMap InlineTreeNodeId; if (PseudoProbeDecoder && BF.getGUID()) { std::tie(YamlBF.InlineTree, InlineTreeNodeId) = diff --git a/bolt/test/X86/shrinkwrapping.test b/bolt/test/X86/shrinkwrapping.test index 8581d7e0c0f7b..521b4561b3ba6 100644 --- a/bolt/test/X86/shrinkwrapping.test +++ b/bolt/test/X86/shrinkwrapping.test @@ -8,6 +8,7 @@ REQUIRES: shell RUN: %clangxx %cxxflags -no-pie %S/Inputs/exc4sw.S -o %t.exe -Wl,-q RUN: llvm-bolt %t.exe -o %t --relocs --frame-opt=all \ +RUN: --print-only=main --print-cfg \ RUN: --data=%p/Inputs/exc4sw.fdata --reorder-blocks=cache 2>&1 | \ RUN: FileCheck %s --check-prefix=CHECK-BOLT @@ -19,6 +20,7 @@ RUN: llvm-objdump --dwarf=frames %t | grep -A20 -e \ RUN: `llvm-nm --numeric-sort %t | grep main | tail -n 1 | cut -f1 -d' ' | \ RUN: tail -c9` 2>&1 | FileCheck %s --check-prefix=CHECK-OUTPUT +CHECK-BOLT: Extern Entry Count: 100 CHECK-BOLT: Shrink wrapping moved 2 spills inserting load/stores and 0 spills inserting push/pops CHECK-INPUT: DW_CFA_advance_loc: 2