From 8eea1ea0109f07d590d189bc17d5f5411556d77a Mon Sep 17 00:00:00 2001 From: mingmingl Date: Tue, 4 Feb 2025 11:19:44 -0800 Subject: [PATCH 1/9] [CodeGen][StaticDataPartitioning]Place module-internal global variables based on profile information --- llvm/include/llvm/IR/Function.h | 6 - llvm/include/llvm/IR/GlobalObject.h | 11 + llvm/include/llvm/IR/MDBuilder.h | 4 +- llvm/lib/CodeGen/StaticDataSplitter.cpp | 201 +++++++++++++----- .../CodeGen/TargetLoweringObjectFileImpl.cpp | 6 + llvm/lib/IR/Function.cpp | 16 -- llvm/lib/IR/Globals.cpp | 30 +++ llvm/lib/IR/MDBuilder.cpp | 6 +- llvm/test/CodeGen/X86/data-section-prefix.ll | 27 +++ .../CodeGen/X86/global-variable-partition.ll | 159 ++++++++++++++ .../CodeGenPrepare/X86/section-samplepgo.ll | 4 +- .../Transforms/CodeGenPrepare/X86/section.ll | 4 +- .../Transforms/HotColdSplit/coldentrycount.ll | 4 +- .../section-accurate-samplepgo.ll | 6 +- 14 files changed, 396 insertions(+), 88 deletions(-) create mode 100644 llvm/test/CodeGen/X86/data-section-prefix.ll create mode 100644 llvm/test/CodeGen/X86/global-variable-partition.ll diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h index fcd5396ccfdbc..29041688124bc 100644 --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -346,12 +346,6 @@ class LLVM_ABI Function : public GlobalObject, public ilist_node { /// sample PGO, to enable the same inlines as the profiled optimized binary. DenseSet getImportGUIDs() const; - /// Set the section prefix for this function. - void setSectionPrefix(StringRef Prefix); - - /// Get the section prefix for this function. - std::optional getSectionPrefix() const; - /// hasGC/getGC/setGC/clearGC - The name of the garbage collection algorithm /// to use during code generation. bool hasGC() const { diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h index 08edc13d81f88..bb50c39813e14 100644 --- a/llvm/include/llvm/IR/GlobalObject.h +++ b/llvm/include/llvm/IR/GlobalObject.h @@ -124,6 +124,17 @@ class GlobalObject : public GlobalValue { /// appropriate default object file section. void setSection(StringRef S); + /// Set the section prefix for this global object. + void setSectionPrefix(StringRef Prefix); + + /// Update the section prefix, unless the existing prefix is the same as + /// `KeepPrefix`. + void updateSectionPrefix(StringRef Prefix, + std::optional KeepPrefix = std::nullopt); + + /// Get the section prefix for this global object. + std::optional getSectionPrefix() const; + bool hasComdat() const { return getComdat() != nullptr; } const Comdat *getComdat() const { return ObjComdat; } Comdat *getComdat() { return ObjComdat; } diff --git a/llvm/include/llvm/IR/MDBuilder.h b/llvm/include/llvm/IR/MDBuilder.h index e02ec8f5a3d8b..ce4e1da656049 100644 --- a/llvm/include/llvm/IR/MDBuilder.h +++ b/llvm/include/llvm/IR/MDBuilder.h @@ -89,8 +89,8 @@ class MDBuilder { MDNode *createFunctionEntryCount(uint64_t Count, bool Synthetic, const DenseSet *Imports); - /// Return metadata containing the section prefix for a function. - MDNode *createFunctionSectionPrefix(StringRef Prefix); + /// Return metadata containing the section prefix for a global object. + MDNode *createGlobalObjectSectionPrefix(StringRef Prefix); /// Return metadata containing the pseudo probe descriptor for a function. MDNode *createPseudoProbeDesc(uint64_t GUID, uint64_t Hash, StringRef FName); diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index e5bf0a5a3a255..f09e3b41e0723 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -9,13 +9,13 @@ // The pass uses branch profile data to assign hotness based section qualifiers // for the following types of static data: // - Jump tables +// - Module-internal global variables // - Constant pools (TODO) -// - Other module-internal data (TODO) // // For the original RFC of this pass please see // https://discourse.llvm.org/t/rfc-profile-guided-static-data-partitioning/83744 -#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/APInt.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/MBFIWrapper.h" @@ -27,9 +27,12 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; @@ -46,12 +49,27 @@ class StaticDataSplitter : public MachineFunctionPass { const MachineBlockFrequencyInfo *MBFI = nullptr; const ProfileSummaryInfo *PSI = nullptr; - // Returns true iff any jump table is hot-cold categorized. - bool splitJumpTables(MachineFunction &MF); + void updateStats(bool ProfileAvailable, const MachineJumpTableInfo *MJTI); + void updateJumpTableStats(bool ProfileAvailable, + const MachineJumpTableInfo &MJTI); - // Same as above but works on functions with profile information. - bool splitJumpTablesWithProfiles(const MachineFunction &MF, - MachineJumpTableInfo &MJTI); + // Use profiles to partition static data. + bool partitionStaticDataWithProfiles(MachineFunction &MF); + + // If the global value is a local linkage global variable, return it. + // Otherwise, return nullptr. + const GlobalVariable *getLocalLinkageGlobalVariable(const GlobalValue *GV); + + // Returns true if the global variable is in one of {.rodata, .bss, .data, + // .data.rel.ro} sections + bool inStaticDataSection(const GlobalVariable *GV, const TargetMachine &TM); + + // Iterate all global variables in the module and update the section prefix + // of the module-internal data. + void updateGlobalVariableSectionPrefix(MachineFunction &MF); + + // Accummulated data profile count across machine functions in the module. + DenseMap DataProfileCounts; public: static char ID; @@ -77,13 +95,24 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) { MBFI = &getAnalysis().getMBFI(); PSI = &getAnalysis().getPSI(); - return splitJumpTables(MF); + const bool ProfileAvailable = PSI && PSI->hasProfileSummary() && MBFI && + MF.getFunction().hasProfileData(); + bool Changed = false; + + if (ProfileAvailable) + Changed |= partitionStaticDataWithProfiles(MF); + + updateGlobalVariableSectionPrefix(MF); + updateStats(ProfileAvailable, MF.getJumpTableInfo()); + return Changed; } -bool StaticDataSplitter::splitJumpTablesWithProfiles( - const MachineFunction &MF, MachineJumpTableInfo &MJTI) { +bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) { int NumChangedJumpTables = 0; + const TargetMachine &TM = MF.getTarget(); + MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + // Jump table could be used by either terminating instructions or // non-terminating ones, so we walk all instructions and use // `MachineOperand::isJTI()` to identify jump table operands. @@ -92,63 +121,131 @@ bool StaticDataSplitter::splitJumpTablesWithProfiles( for (const auto &MBB : MF) { for (const MachineInstr &I : MBB) { for (const MachineOperand &Op : I.operands()) { - if (!Op.isJTI()) - continue; - const int JTI = Op.getIndex(); - // This is not a source block of jump table. - if (JTI == -1) + std::optional Count = std::nullopt; + if (!Op.isJTI() && !Op.isGlobal()) continue; - auto Hotness = MachineFunctionDataHotness::Hot; + Count = MBFI->getBlockProfileCount(&MBB); + + if (Op.isJTI()) { + assert(MJTI != nullptr && "Jump table info is not available."); + const int JTI = Op.getIndex(); + // This is not a source block of jump table. + if (JTI == -1) + continue; + + auto Hotness = MachineFunctionDataHotness::Hot; + + // Hotness is based on source basic block hotness. + // TODO: PSI APIs are about instruction hotness. Introduce API for + // data access hotness. + if (Count && PSI->isColdCount(*Count)) + Hotness = MachineFunctionDataHotness::Cold; - // Hotness is based on source basic block hotness. - // TODO: PSI APIs are about instruction hotness. Introduce API for data - // access hotness. - if (PSI->isColdBlock(&MBB, MBFI)) - Hotness = MachineFunctionDataHotness::Cold; + if (MJTI->updateJumpTableEntryHotness(JTI, Hotness)) + ++NumChangedJumpTables; + } else if (Op.isGlobal()) { + // Find global variables with local linkage + const GlobalVariable *GV = + getLocalLinkageGlobalVariable(Op.getGlobal()); + if (!GV || !inStaticDataSection(GV, TM)) + continue; - if (MJTI.updateJumpTableEntryHotness(JTI, Hotness)) - ++NumChangedJumpTables; + // Acccumulate data profile count across machine function + // instructions. + // TODO: Analyze global variable's initializers. + if (Count) { + auto [It, Inserted] = + DataProfileCounts.try_emplace(GV, APInt(128, 0)); + It->second += *Count; + } + } } } } return NumChangedJumpTables > 0; } -bool StaticDataSplitter::splitJumpTables(MachineFunction &MF) { - MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); - if (!MJTI || MJTI->getJumpTables().empty()) - return false; - - const bool ProfileAvailable = PSI && PSI->hasProfileSummary() && MBFI && - MF.getFunction().hasProfileData(); - auto statOnExit = llvm::make_scope_exit([&] { - if (!AreStatisticsEnabled()) - return; +void StaticDataSplitter::updateJumpTableStats( + bool ProfileAvailable, const MachineJumpTableInfo &MJTI) { + if (!ProfileAvailable) { + NumUnknownJumpTables += MJTI.getJumpTables().size(); + return; + } - if (!ProfileAvailable) { - NumUnknownJumpTables += MJTI->getJumpTables().size(); - return; + for (size_t JTI = 0; JTI < MJTI.getJumpTables().size(); JTI++) { + auto Hotness = MJTI.getJumpTables()[JTI].Hotness; + if (Hotness == MachineFunctionDataHotness::Hot) { + ++NumHotJumpTables; + } else { + assert(Hotness == MachineFunctionDataHotness::Cold && + "A jump table is either hot or cold when profile information is " + "available."); + ++NumColdJumpTables; } + } +} - for (size_t JTI = 0; JTI < MJTI->getJumpTables().size(); JTI++) { - auto Hotness = MJTI->getJumpTables()[JTI].Hotness; - if (Hotness == MachineFunctionDataHotness::Hot) { - ++NumHotJumpTables; - } else { - assert(Hotness == MachineFunctionDataHotness::Cold && - "A jump table is either hot or cold when profile information is " - "available."); - ++NumColdJumpTables; - } - } - }); +void StaticDataSplitter::updateStats(bool ProfileAvailable, + const MachineJumpTableInfo *MJTI) { + if (!AreStatisticsEnabled()) + return; - // Place jump tables according to block hotness if function has profile data. - if (ProfileAvailable) - return splitJumpTablesWithProfiles(MF, *MJTI); + if (MJTI) + updateJumpTableStats(ProfileAvailable, *MJTI); +} - return true; +const GlobalVariable * +StaticDataSplitter::getLocalLinkageGlobalVariable(const GlobalValue *GV) { + if (!GV || GV->isDeclarationForLinker()) + return nullptr; + + return GV->hasLocalLinkage() ? dyn_cast(GV) : nullptr; +} + +bool StaticDataSplitter::inStaticDataSection(const GlobalVariable *GV, + const TargetMachine &TM) { + assert(GV && "Caller guaranteed"); + + // Skip LLVM reserved symbols. + if (GV->getName().starts_with("llvm.")) + return false; + + SectionKind Kind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); + return Kind.isData() || Kind.isReadOnly() || Kind.isReadOnlyWithRel() || + Kind.isBSS(); +} + +void StaticDataSplitter::updateGlobalVariableSectionPrefix( + MachineFunction &MF) { + for (GlobalVariable &GV : MF.getFunction().getParent()->globals()) { + if (GV.isDeclarationForLinker()) + continue; + // DataProfileCounts accumulates data profile count across all machine + // function instructions, and it can't model the indirect accesses through + // other global variables' initializers. + // TODO: Analyze the users of module-internal global variables and see + // through the users' initializers. Do not place a global variable into + // unlikely section if any of its users are potentially hot. + auto Iter = DataProfileCounts.find(&GV); + if (Iter == DataProfileCounts.end()) + continue; + + // StaticDataSplitter is made a machine function pass rather than a module + // pass because (Lazy)MachineBlockFrequencyInfo is a machine-function + // analysis pass and cannot be used for a legacy module pass. + // As a result, we use `DataProfileCounts` to accumulate data + // profile count across machine functions and update global variable section + // prefix once per machine function. + // FIXME: Make StaticDataSplitter a module pass under new pass manager + // framework, and set global variable section prefix once per module after + // analyzing all machine functions. + if (PSI->isColdCount(Iter->second.getZExtValue())) { + GV.updateSectionPrefix("unlikely", std::make_optional(StringRef("hot"))); + } else if (PSI->isHotCount(Iter->second.getZExtValue())) { + GV.updateSectionPrefix("hot"); + } + } } char StaticDataSplitter::ID = 0; diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 3c2c7c8c9fed6..d20ab29cc1979 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -670,6 +670,7 @@ getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind, } bool HasPrefix = false; + if (const auto *F = dyn_cast(GO)) { // Jump table hotness takes precedence over its enclosing function's hotness // if it's known. The function's section prefix is used if jump table entry @@ -687,6 +688,11 @@ getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind, raw_svector_ostream(Name) << '.' << *Prefix; HasPrefix = true; } + } else if (const auto *GV = dyn_cast(GO)) { + if (std::optional Prefix = GV->getSectionPrefix()) { + raw_svector_ostream(Name) << '.' << *Prefix; + HasPrefix = true; + } } if (UniqueSectionName) { diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index e6f0d64d071ba..5666f0a53866f 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -1164,22 +1164,6 @@ DenseSet Function::getImportGUIDs() const { return R; } -void Function::setSectionPrefix(StringRef Prefix) { - MDBuilder MDB(getContext()); - setMetadata(LLVMContext::MD_section_prefix, - MDB.createFunctionSectionPrefix(Prefix)); -} - -std::optional Function::getSectionPrefix() const { - if (MDNode *MD = getMetadata(LLVMContext::MD_section_prefix)) { - assert(cast(MD->getOperand(0))->getString() == - "function_section_prefix" && - "Metadata not match"); - return cast(MD->getOperand(1))->getString(); - } - return std::nullopt; -} - bool Function::nullPointerIsDefined() const { return hasFnAttribute(Attribute::NullPointerIsValid); } diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp index db5e1cb57b1ba..884089262e465 100644 --- a/llvm/lib/IR/Globals.cpp +++ b/llvm/lib/IR/Globals.cpp @@ -18,6 +18,7 @@ #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" @@ -286,6 +287,35 @@ void GlobalObject::setSection(StringRef S) { setGlobalObjectFlag(HasSectionHashEntryBit, !S.empty()); } +void GlobalObject::setSectionPrefix(StringRef Prefix) { + MDBuilder MDB(getContext()); + setMetadata(LLVMContext::MD_section_prefix, + MDB.createGlobalObjectSectionPrefix(Prefix)); +} + +void GlobalObject::updateSectionPrefix(StringRef Prefix, + std::optional KeepPrefix) { + auto SectionPrefix = getSectionPrefix(); + if (SectionPrefix && (*SectionPrefix == Prefix || + (KeepPrefix && *SectionPrefix == *KeepPrefix))) + return; + + setSectionPrefix(Prefix); + return; +} + +std::optional GlobalObject::getSectionPrefix() const { + if (MDNode *MD = getMetadata(LLVMContext::MD_section_prefix)) { + [[maybe_unused]] StringRef MDName = + cast(MD->getOperand(0))->getString(); + assert((MDName == "section_prefix" || + (isa(this) && MDName == "function_section_prefix")) && + "Metadata not match"); + return cast(MD->getOperand(1))->getString(); + } + return std::nullopt; +} + bool GlobalValue::isNobuiltinFnDef() const { const Function *F = dyn_cast(this); if (!F || F->empty()) diff --git a/llvm/lib/IR/MDBuilder.cpp b/llvm/lib/IR/MDBuilder.cpp index 26c8ab9fc36c8..b6aa8844a7eaf 100644 --- a/llvm/lib/IR/MDBuilder.cpp +++ b/llvm/lib/IR/MDBuilder.cpp @@ -87,9 +87,9 @@ MDNode *MDBuilder::createFunctionEntryCount( return MDNode::get(Context, Ops); } -MDNode *MDBuilder::createFunctionSectionPrefix(StringRef Prefix) { - return MDNode::get( - Context, {createString("function_section_prefix"), createString(Prefix)}); +MDNode *MDBuilder::createGlobalObjectSectionPrefix(StringRef Prefix) { + return MDNode::get(Context, + {createString("section_prefix"), createString(Prefix)}); } MDNode *MDBuilder::createRange(const APInt &Lo, const APInt &Hi) { diff --git a/llvm/test/CodeGen/X86/data-section-prefix.ll b/llvm/test/CodeGen/X86/data-section-prefix.ll new file mode 100644 index 0000000000000..4812fc70758fb --- /dev/null +++ b/llvm/test/CodeGen/X86/data-section-prefix.ll @@ -0,0 +1,27 @@ +; RUN: llc -mtriple x86_64-linux-gnu -data-sections %s -o - | FileCheck %s --check-prefix=ELF +; RUN: llc -mtriple x86_64-linux-gnu -unique-section-names=0 -data-sections %s -o - | FileCheck %s --check-prefix=ELF-NOUNIQ + +; RUN: llc -mtriple x86_64-windows-msvc -data-sections %s -o - | FileCheck %s --check-prefix=COFF-MSVC + +; ELF: .section .data.hot.foo, +; ELF: .section .data.bar, +; ELF: .section .bss.unlikely.baz, +; ELF: .section .bss.quz, + +; ELF-NOUNIQ: .section .data.hot.,"aw",@progbits,unique,1 +; ELF-NOUNIQ: .section .data,"aw",@progbits,unique,2 +; ELF-NOUNIQ: .section .bss.unlikely.,"aw",@nobits,unique,3 +; ELF-NOUNIQ: .section .bss,"aw",@nobits,unique,4 + +; COFF-MSVC: .section .data,"dw",one_only,foo +; COFF-MSVC: .section .data,"dw",one_only,bar +; COFF-MSVC: .section .bss,"bw",one_only,baz +; COFF-MSVC: .section .bss,"bw",one_only,quz + +@foo = global i32 1, !section_prefix !0 +@bar = global i32 2 +@baz = global i32 0, !section_prefix !1 +@quz = global i32 0 + +!0 = !{!"section_prefix", !"hot"} +!1 = !{!"section_prefix", !"unlikely"} diff --git a/llvm/test/CodeGen/X86/global-variable-partition.ll b/llvm/test/CodeGen/X86/global-variable-partition.ll new file mode 100644 index 0000000000000..d457d766999e2 --- /dev/null +++ b/llvm/test/CodeGen/X86/global-variable-partition.ll @@ -0,0 +1,159 @@ + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -data-sections=true \ +; RUN: -unique-section-names=true -relocation-model=pic \ +; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=SYM,DATA + +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -data-sections=true \ +; RUN: -unique-section-names=false -relocation-model=pic \ +; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=UNIQ,DATA + +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -data-sections=false \ +; RUN: -unique-section-names=false -relocation-model=pic \ +; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=AGG,DATA + +; SYM: .section .rodata.str1.1.hot. +; UNIQ: .section .rodata.str1.1.hot.,"aMS",@progbits,1 +; AGG: .section .rodata.str1.1.hot +; DATA: .L.str +; DATA: "hot\t" +; DATA: .L.str.1 +; DATA: "%d\t%d\t%d\n" + + +; SYM: .section .data.rel.ro.hot.hot_relro_array +; SYM: .section .data.hot.hot_data,"aw",@progbits +; SYM: .section .bss.hot.hot_bss,"aw",@nobits + +; UNIQ: .section .data.rel.ro.hot.,"aw",@progbits,unique,3 +; UNIQ: .section .data.hot.,"aw",@progbits,unique,4 +; UNIQ: .section .bss.hot.,"aw",@nobits,unique,5 + +; AGG: .section .data.rel.ro.hot.,"aw",@progbits +; AGG: .section .data.hot.,"aw",@progbits +; AGG: .section .bss.hot.,"aw",@nobits + + +; SYM: .section .rodata.str1.1.unlikely.,"aMS",@progbits,1 +; UNIQ: section .rodata.str1.1.unlikely.,"aMS",@progbits,1 +; AGG: .section .rodata.str1.1.unlikely.,"aMS",@progbits,1 +; DATA: .L.str.2: +; DATA: "cold%d\t%d\t%d\n" + + +; SYM: .section .bss.unlikely.cold_bss,"aw",@nobits +; SYM: .section .data.unlikely.cold_data,"aw",@progbits +; SYM: .section .data.rel.ro.unlikely.cold_relro_array,"aw",@progbits +; SYM: .section .bss.unlikely._ZL4bss2,"aw",@nobits +; SYM: .section .data.unlikely._ZL5data3,"aw",@progbits + +; UNIQ: .section .bss.unlikely.,"aw",@nobits,unique,6 +; UNIQ: .section .data.unlikely.,"aw",@progbits,unique,7 +; UNIQ: .section .data.rel.ro.unlikely.,"aw",@progbits,unique,8 +; UNIQ: .section .bss.unlikely.,"aw",@nobits,unique,9 +; UNIQ: .section .data.unlikely.,"aw",@progbits,unique,10 + +; AGG: .section .bss.unlikely.,"aw",@nobits +; AGG: .section .data.unlikely.,"aw",@progbits +; AGG: .section .data.rel.ro.unlikely.,"aw",@progbits +; AGG: .section .bss.unlikely.,"aw",@nobits +; AGG: .section .data.unlikely.,"aw",@progbits + +@.str = private unnamed_addr constant [5 x i8] c"hot\09\00", align 1 +@.str.1 = private unnamed_addr constant [10 x i8] c"%d\09%d\09%d\0A\00", align 1 +@hot_relro_array = internal constant [2 x ptr] [ptr @_ZL4bss2, ptr @_ZL5data3] +@hot_data = internal global i32 5 +@hot_bss = internal global i32 0 +@.str.2 = private unnamed_addr constant [14 x i8] c"cold%d\09%d\09%d\0A\00", align 1 +@cold_bss = internal global i32 0 +@cold_data = internal global i32 4 +@cold_relro_array = internal constant [2 x ptr] [ptr @_ZL5data3, ptr @_ZL4bss2] +@_ZL4bss2 = internal global i32 0 +@_ZL5data3 = internal global i32 3 + +define void @hot_callee(i32 %0) !prof !51 { + %2 = call i32 (ptr, ...) @printf(ptr @.str) + %3 = srem i32 %0, 2 + %4 = sext i32 %3 to i64 + %5 = getelementptr inbounds [2 x ptr], ptr @hot_relro_array, i64 0, i64 %4 + %6 = load ptr, ptr %5 + %7 = load i32, ptr %6 + %8 = load i32, ptr @hot_data + %9 = load i32, ptr @hot_bss + %10 = call i32 (ptr, ...) @printf(ptr @.str.1, i32 %7, i32 %8, i32 %9) + ret void +} + +define void @cold_callee(i32 %0) !prof !52 { + %2 = load i32, ptr @cold_bss + %3 = load i32, ptr @cold_data + %4 = srem i32 %0, 2 + %5 = sext i32 %4 to i64 + %6 = getelementptr inbounds [2 x ptr], ptr @cold_relro_array, i64 0, i64 %5 + %7 = load ptr, ptr %6 + %8 = load i32, ptr %7 + %9 = call i32 (ptr, ...) @printf(ptr @.str.2, i32 %2, i32 %3, i32 %8) + ret void +} + +define i32 @main(i32 %0, ptr %1) !prof !52 { + %3 = call i64 @time(ptr null) + %4 = trunc i64 %3 to i32 + call void @srand(i32 %4) + br label %11 + +5: ; preds = %11 + %6 = call i32 @rand() + store i32 %6, ptr @cold_bss + store i32 %6, ptr @cold_data + store i32 %6, ptr @_ZL4bss2 + store i32 %6, ptr @_ZL5data3 + call void @cold_callee(i32 %6) + ret i32 0 + +11: ; preds = %11, %2 + %12 = phi i32 [ 0, %2 ], [ %19, %11 ] + %13 = call i32 @rand() + %14 = srem i32 %13, 2 + %15 = sext i32 %14 to i64 + %16 = getelementptr inbounds [2 x ptr], ptr @hot_relro_array, i64 0, i64 %15 + %17 = load ptr, ptr %16 + store i32 %13, ptr %17 + store i32 %13, ptr @hot_data + %18 = add i32 %13, 1 + store i32 %18, ptr @hot_bss + call void @hot_callee(i32 %12) + %19 = add i32 %12, 1 + %20 = icmp eq i32 %19, 100000 + br i1 %20, label %5, label %11, !prof !53 +} + +declare void @srand(i32) +declare i64 @time(ptr) +declare i32 @rand() +declare i32 @printf(ptr, ...) + +!llvm.module.flags = !{!12} + +!12 = !{i32 1, !"ProfileSummary", !13} +!13 = !{!14, !15, !16, !17, !18, !19, !20, !23} +!14 = !{!"ProfileFormat", !"InstrProf"} +!15 = !{!"TotalCount", i64 1460183} +!16 = !{!"MaxCount", i64 849024} +!17 = !{!"MaxInternalCount", i64 32769} +!18 = !{!"MaxFunctionCount", i64 849024} +!19 = !{!"NumCounts", i64 23627} +!20 = !{!"NumFunctions", i64 3271} +!23 = !{!"DetailedSummary", !24} +!24 = !{!36, !40} +!36 = !{i32 990000, i64 166, i32 73} +!40 = !{i32 999999, i64 1, i32 1443} +!51 = !{!"function_entry_count", i64 100000} +!52 = !{!"function_entry_count", i64 1} +!53 = !{!"branch_weights", i32 1, i32 99999} diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/section-samplepgo.ll b/llvm/test/Transforms/CodeGenPrepare/X86/section-samplepgo.ll index 58af88d8cf365..48d02e5cebc69 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/section-samplepgo.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/section-samplepgo.ll @@ -34,8 +34,8 @@ define void @cold_func() !prof !16 { ret void } -; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !"hot"} -; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !"unlikely"} +; CHECK: ![[HOT_ID]] = !{!"section_prefix", !"hot"} +; CHECK: ![[COLD_ID]] = !{!"section_prefix", !"unlikely"} !llvm.module.flags = !{!1} !1 = !{i32 1, !"ProfileSummary", !2} !2 = !{!3, !4, !5, !6, !7, !8, !9, !10} diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/section.ll b/llvm/test/Transforms/CodeGenPrepare/X86/section.ll index 6dad1122e4294..4baa0b5baa4be 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/section.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/section.ll @@ -66,8 +66,8 @@ define void @cold_func3() !prof !16 { ret void } -; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !"hot"} -; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !"unlikely"} +; CHECK: ![[HOT_ID]] = !{!"section_prefix", !"hot"} +; CHECK: ![[COLD_ID]] = !{!"section_prefix", !"unlikely"} !llvm.module.flags = !{!1} !1 = !{i32 1, !"ProfileSummary", !2} !2 = !{!3, !4, !5, !6, !7, !8, !9, !10} diff --git a/llvm/test/Transforms/HotColdSplit/coldentrycount.ll b/llvm/test/Transforms/HotColdSplit/coldentrycount.ll index 6e5ef1aa25392..1e8825e651ec4 100644 --- a/llvm/test/Transforms/HotColdSplit/coldentrycount.ll +++ b/llvm/test/Transforms/HotColdSplit/coldentrycount.ll @@ -27,9 +27,9 @@ declare void @sink() cold ; CHECK: define {{.*}} @fun.cold.1{{.*}} ![[PROF:[0-9]+]] {{.*}}section_prefix ![[UNLIKELY:[0-9]+]] ; CHECK: ![[HOTPROF]] = !{!"function_entry_count", i64 100} -; CHECK: ![[LIKELY]] = !{!"function_section_prefix", !"hot"} +; CHECK: ![[LIKELY]] = !{!"section_prefix", !"hot"} ; CHECK: ![[PROF]] = !{!"function_entry_count", i64 0} -; CHECK: ![[UNLIKELY]] = !{!"function_section_prefix", !"unlikely"} +; CHECK: ![[UNLIKELY]] = !{!"section_prefix", !"unlikely"} !llvm.module.flags = !{!0} !0 = !{i32 1, !"ProfileSummary", !1} diff --git a/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll b/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll index ef2ddbc33cee4..af4b875818f6f 100644 --- a/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll +++ b/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll @@ -36,11 +36,11 @@ attributes #1 = { "use-sample-profile" } ; CHECK: ![[NOPROFILE_ID]] = !{!"function_entry_count", i64 -1} ; CHECK: ![[ZERO_ID]] = !{!"function_entry_count", i64 0} -; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !"unlikely"} +; CHECK: ![[COLD_ID]] = !{!"section_prefix", !"unlikely"} ; UNKNOWN: ![[NOPROFILE_ID]] = !{!"function_entry_count", i64 -1} -; UNKNOWN: ![[UNKNOWN_ID]] = !{!"function_section_prefix", !"unknown"} +; UNKNOWN: ![[UNKNOWN_ID]] = !{!"section_prefix", !"unknown"} ; ACCURATE: ![[ZERO_ID]] = !{!"function_entry_count", i64 0} -; ACCURATE: ![[COLD_ID]] = !{!"function_section_prefix", !"unlikely"} +; ACCURATE: ![[COLD_ID]] = !{!"section_prefix", !"unlikely"} !llvm.module.flags = !{!1} !1 = !{i32 1, !"ProfileSummary", !2} !2 = !{!3, !4, !5, !6, !7, !8, !9, !10} From 93d9881754b4713a6202011a2e1ffe520cf80367 Mon Sep 17 00:00:00 2001 From: mingmingl Date: Tue, 4 Feb 2025 12:13:52 -0800 Subject: [PATCH 2/9] add comment for bss22 and data3 --- .../CodeGen/X86/global-variable-partition.ll | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/llvm/test/CodeGen/X86/global-variable-partition.ll b/llvm/test/CodeGen/X86/global-variable-partition.ll index d457d766999e2..bb77f3362406b 100644 --- a/llvm/test/CodeGen/X86/global-variable-partition.ll +++ b/llvm/test/CodeGen/X86/global-variable-partition.ll @@ -50,8 +50,8 @@ target triple = "x86_64-unknown-linux-gnu" ; SYM: .section .bss.unlikely.cold_bss,"aw",@nobits ; SYM: .section .data.unlikely.cold_data,"aw",@progbits ; SYM: .section .data.rel.ro.unlikely.cold_relro_array,"aw",@progbits -; SYM: .section .bss.unlikely._ZL4bss2,"aw",@nobits -; SYM: .section .data.unlikely._ZL5data3,"aw",@progbits +; SYM: .section .bss.unlikely.bss2,"aw",@nobits +; SYM: .section .data.unlikely.data3,"aw",@progbits ; UNIQ: .section .bss.unlikely.,"aw",@nobits,unique,6 ; UNIQ: .section .data.unlikely.,"aw",@progbits,unique,7 @@ -67,15 +67,21 @@ target triple = "x86_64-unknown-linux-gnu" @.str = private unnamed_addr constant [5 x i8] c"hot\09\00", align 1 @.str.1 = private unnamed_addr constant [10 x i8] c"%d\09%d\09%d\0A\00", align 1 -@hot_relro_array = internal constant [2 x ptr] [ptr @_ZL4bss2, ptr @_ZL5data3] +@hot_relro_array = internal constant [2 x ptr] [ptr @bss2, ptr @data3] @hot_data = internal global i32 5 @hot_bss = internal global i32 0 @.str.2 = private unnamed_addr constant [14 x i8] c"cold%d\09%d\09%d\0A\00", align 1 @cold_bss = internal global i32 0 @cold_data = internal global i32 4 -@cold_relro_array = internal constant [2 x ptr] [ptr @_ZL5data3, ptr @_ZL4bss2] -@_ZL4bss2 = internal global i32 0 -@_ZL5data3 = internal global i32 3 +@cold_relro_array = internal constant [2 x ptr] [ptr @data3, ptr @bss2] + +; COM: Currently static-data-splitter only analyzes access from code. +; COM: @bss2 and @data3 are indirectly accessed by code through @hot_relro_array +; COM: and @cold_relro_array. +; COM: A follow-up item is to analyze access from data and prune the unlikely +; COM: list. +@bss2 = internal global i32 0 +@data3 = internal global i32 3 define void @hot_callee(i32 %0) !prof !51 { %2 = call i32 (ptr, ...) @printf(ptr @.str) @@ -112,8 +118,8 @@ define i32 @main(i32 %0, ptr %1) !prof !52 { %6 = call i32 @rand() store i32 %6, ptr @cold_bss store i32 %6, ptr @cold_data - store i32 %6, ptr @_ZL4bss2 - store i32 %6, ptr @_ZL5data3 + store i32 %6, ptr @bss2 + store i32 %6, ptr @data3 call void @cold_callee(i32 %6) ret i32 0 From 8f21570c7d9dd528ad56ff2eefe85af8125ba99c Mon Sep 17 00:00:00 2001 From: mingmingl Date: Wed, 5 Feb 2025 14:20:12 -0800 Subject: [PATCH 3/9] apply code review suggestions --- llvm/lib/CodeGen/StaticDataSplitter.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index e6e20db0724a5..0716f6dac27f1 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -123,11 +123,10 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) { for (const auto &MBB : MF) { for (const MachineInstr &I : MBB) { for (const MachineOperand &Op : I.operands()) { - std::optional Count = std::nullopt; if (!Op.isJTI() && !Op.isGlobal()) continue; - Count = MBFI->getBlockProfileCount(&MBB); + std::optional Count = MBFI->getBlockProfileCount(&MBB); if (Op.isJTI()) { assert(MJTI != nullptr && "Jump table info is not available."); From f07d34d0d02bf9a8c46364cf4e1e605d3b626b92 Mon Sep 17 00:00:00 2001 From: mingmingl Date: Wed, 5 Feb 2025 14:48:39 -0800 Subject: [PATCH 4/9] record global variable section prefix updates as module updates --- llvm/include/llvm/IR/GlobalObject.h | 2 +- llvm/lib/CodeGen/StaticDataSplitter.cpp | 13 +++++++++---- llvm/lib/IR/Globals.cpp | 6 +++--- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h index bb50c39813e14..400ea6a1a7fca 100644 --- a/llvm/include/llvm/IR/GlobalObject.h +++ b/llvm/include/llvm/IR/GlobalObject.h @@ -129,7 +129,7 @@ class GlobalObject : public GlobalValue { /// Update the section prefix, unless the existing prefix is the same as /// `KeepPrefix`. - void updateSectionPrefix(StringRef Prefix, + bool updateSectionPrefix(StringRef Prefix, std::optional KeepPrefix = std::nullopt); /// Get the section prefix for this global object. diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index 0716f6dac27f1..bbe3f9ff8cbd3 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -59,7 +59,7 @@ class StaticDataSplitter : public MachineFunctionPass { // Iterate all global variables in the module and update the section prefix // of the module-internal data. - void updateGlobalVariableSectionPrefix(MachineFunction &MF); + bool updateGlobalVariableSectionPrefix(MachineFunction &MF); // Accummulated data profile count across machine functions in the module. DenseMap DataProfileCounts; @@ -105,6 +105,8 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) { bool Changed = partitionStaticDataWithProfiles(MF); + Changed |= updateGlobalVariableSectionPrefix(MF); + updateStatsWithProfiles(MF); return Changed; } @@ -188,8 +190,9 @@ bool StaticDataSplitter::inStaticDataSection(const GlobalVariable *GV, Kind.isBSS(); } -void StaticDataSplitter::updateGlobalVariableSectionPrefix( +bool StaticDataSplitter::updateGlobalVariableSectionPrefix( MachineFunction &MF) { + bool Changed = false; for (GlobalVariable &GV : MF.getFunction().getParent()->globals()) { if (GV.isDeclarationForLinker()) continue; @@ -213,11 +216,13 @@ void StaticDataSplitter::updateGlobalVariableSectionPrefix( // framework, and set global variable section prefix once per module after // analyzing all machine functions. if (PSI->isColdCount(Iter->second.getZExtValue())) { - GV.updateSectionPrefix("unlikely", std::make_optional(StringRef("hot"))); + Changed |= GV.updateSectionPrefix("unlikely", + std::make_optional(StringRef("hot"))); } else if (PSI->isHotCount(Iter->second.getZExtValue())) { - GV.updateSectionPrefix("hot"); + Changed |= GV.updateSectionPrefix("hot"); } } + return Changed; } void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) { diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp index 884089262e465..5baf854cd552b 100644 --- a/llvm/lib/IR/Globals.cpp +++ b/llvm/lib/IR/Globals.cpp @@ -293,15 +293,15 @@ void GlobalObject::setSectionPrefix(StringRef Prefix) { MDB.createGlobalObjectSectionPrefix(Prefix)); } -void GlobalObject::updateSectionPrefix(StringRef Prefix, +bool GlobalObject::updateSectionPrefix(StringRef Prefix, std::optional KeepPrefix) { auto SectionPrefix = getSectionPrefix(); if (SectionPrefix && (*SectionPrefix == Prefix || (KeepPrefix && *SectionPrefix == *KeepPrefix))) - return; + return false; setSectionPrefix(Prefix); - return; + return true; } std::optional GlobalObject::getSectionPrefix() const { From 4a2a881a66bc99ba28609c05c1a03e432d16694c Mon Sep 17 00:00:00 2001 From: mingmingl Date: Fri, 7 Feb 2025 17:46:52 -0800 Subject: [PATCH 5/9] remove blank line --- llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index d20ab29cc1979..6cbc4b9776a1b 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -670,7 +670,6 @@ getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind, } bool HasPrefix = false; - if (const auto *F = dyn_cast(GO)) { // Jump table hotness takes precedence over its enclosing function's hotness // if it's known. The function's section prefix is used if jump table entry From 1f50494cd83b2c222191353050a955dd36beb610 Mon Sep 17 00:00:00 2001 From: mingmingl Date: Mon, 24 Feb 2025 15:51:49 -0800 Subject: [PATCH 6/9] Implement module-wide analysis of global variable hotness. * In StaticDataProfileInfo.h/cpp, add an immutable pass to keep track of constants and their profile information across functions in a module. * Add a module pass, StaticDataAnnotator, to set global variable's section prefix based on module-wide hotness. --- .../llvm/Analysis/StaticDataProfileInfo.h | 68 +++++++ llvm/include/llvm/CodeGen/Passes.h | 9 +- llvm/include/llvm/InitializePasses.h | 2 + .../llvm/Passes/MachinePassRegistry.def | 1 + llvm/lib/Analysis/CMakeLists.txt | 1 + llvm/lib/Analysis/StaticDataProfileInfo.cpp | 50 +++++ llvm/lib/CodeGen/CMakeLists.txt | 1 + llvm/lib/CodeGen/CodeGen.cpp | 1 + llvm/lib/CodeGen/StaticDataAnnotator.cpp | 119 ++++++++++++ llvm/lib/CodeGen/StaticDataSplitter.cpp | 103 ++++------- llvm/lib/CodeGen/TargetPassConfig.cpp | 7 +- .../CodeGen/X86/global-variable-partition.ll | 173 +++++++++++------- 12 files changed, 392 insertions(+), 143 deletions(-) create mode 100644 llvm/include/llvm/Analysis/StaticDataProfileInfo.h create mode 100644 llvm/lib/Analysis/StaticDataProfileInfo.cpp create mode 100644 llvm/lib/CodeGen/StaticDataAnnotator.cpp diff --git a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h new file mode 100644 index 0000000000000..4220f7d820db9 --- /dev/null +++ b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h @@ -0,0 +1,68 @@ +#ifndef LLVM_ANALYSIS_STATICDATAPROFILEINFO_H +#define LLVM_ANALYSIS_STATICDATAPROFILEINFO_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/IR/Constant.h" +#include "llvm/Pass.h" + +namespace llvm { + +/// A class that holds the constants that represent static data and their +/// profile information and provides methods to operate on them. +class StaticDataProfileInfo { +public: + /// Accummulate the profile count of a constant that will be lowered to static + /// data sections. + DenseMap ConstantProfileCounts; + + /// Keeps track of the constants that are seen at least once without profile + /// counts. + DenseSet ConstantWithoutCounts; + +public: + StaticDataProfileInfo() = default; + + /// If \p Count is not nullopt, add it to the profile count of the constant \p + /// C in a saturating way, and clamp the count to \p getInstrMaxCountValue if + /// the result exceeds it. Otherwise, mark the constant as having no profile + /// count. + void addConstantProfileCount(const Constant *C, + std::optional Count); + + /// If \p C has a count, return it. Otherwise, return std::nullopt. + std::optional getConstantProfileCount(const Constant *C) const; + + /// Return true if the constant \p C is seen at least once without profiles. + bool hasUnknownCount(const Constant *C) const { + return ConstantWithoutCounts.count(C); + } +}; + +/// This wraps the StaticDataProfileInfo object as an immutable pass, for a +/// backend pass to operate on. +class StaticDataProfileInfoWrapperPass : public ImmutablePass { +public: + static char ID; + StaticDataProfileInfoWrapperPass(); + bool doInitialization(Module &M) override; + bool doFinalization(Module &M) override; + + StaticDataProfileInfo &getStaticDataProfileInfo() { return *Info; } + const StaticDataProfileInfo &getStaticDataProfileInfo() const { + return *Info; + } + + /// This pass provides StaticDataProfileInfo for reads/writes but does not + /// modify \p M or other analysis. All analysis are preserved. + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + +private: + std::unique_ptr Info; +}; + +} // namespace llvm + +#endif // LLVM_ANALYSIS_STATICDATAPROFILEINFO_H diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index b5d2a7e6bf035..95e4de91b068c 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -71,10 +71,15 @@ namespace llvm { /// using profile information. MachineFunctionPass *createMachineFunctionSplitterPass(); - /// createStaticDataSplitterPass - This pass partitions a static data section - /// into a hot and cold section using profile information. + /// createStaticDataSplitterPass - This is a machine-function pass that + /// categorizes static data hotness using profile information. MachineFunctionPass *createStaticDataSplitterPass(); + /// createStaticDataAnnotatorPASS - This is a module pass that reads from + /// StaticDataProfileInfoWrapperPass and annotates the section prefix of + /// global variables. + ModulePass *createStaticDataAnnotatorPass(); + /// MachineFunctionPrinter pass - This pass prints out the machine function to /// the given stream as a debugging tool. MachineFunctionPass * diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index b8df4d1ecab1d..96c240ab4965f 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -205,6 +205,8 @@ void initializeMachineLoopInfoWrapperPassPass(PassRegistry &); void initializeMachineModuleInfoWrapperPassPass(PassRegistry &); void initializeMachineOptimizationRemarkEmitterPassPass(PassRegistry &); void initializeMachineOutlinerPass(PassRegistry &); +void initializeStaticDataProfileInfoWrapperPassPass(PassRegistry &); +void initializeStaticDataAnnotatorPass(PassRegistry &); void initializeMachinePipelinerPass(PassRegistry &); void initializeMachinePostDominatorTreeWrapperPassPass(PassRegistry &); void initializeMachineRegionInfoPassPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index e6b4a4b0a56ae..c5080a324864d 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -207,6 +207,7 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS( #define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME) #endif DUMMY_MACHINE_MODULE_PASS("machine-outliner", MachineOutlinerPass) +DUMMY_MACHINE_MODULE_PASS("static-data-annotator", StaticDataAnnotator) DUMMY_MACHINE_MODULE_PASS("pseudo-probe-inserter", PseudoProbeInserterPass) DUMMY_MACHINE_MODULE_PASS("mir-debugify", DebugifyMachineModule) DUMMY_MACHINE_MODULE_PASS("mir-check-debugify", CheckDebugMachineModulePass) diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index a44f6c6a135ef..fb2d7a82f670b 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -126,6 +126,7 @@ add_llvm_component_library(LLVMAnalysis ScalarEvolutionAliasAnalysis.cpp ScalarEvolutionDivision.cpp ScalarEvolutionNormalization.cpp + StaticDataProfileInfo.cpp StackLifetime.cpp StackSafetyAnalysis.cpp StructuralHash.cpp diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp new file mode 100644 index 0000000000000..b124e101f8cdf --- /dev/null +++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp @@ -0,0 +1,50 @@ +#include "llvm/Analysis/StaticDataProfileInfo.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/InitializePasses.h" +#include "llvm/ProfileData/InstrProf.h" +#include + +using namespace llvm; +void StaticDataProfileInfo::addConstantProfileCount( + const Constant *C, std::optional Count) { + if (!Count) { + ConstantWithoutCounts.insert(C); + return; + } + uint64_t &OriginalCount = ConstantProfileCounts[C]; + OriginalCount += llvm::SaturatingAdd(*Count, OriginalCount); + // Clamp the count to getInstrMaxCountValue. InstrFDO reserves a few + // large values for special use. + if (OriginalCount > getInstrMaxCountValue()) + OriginalCount = getInstrMaxCountValue(); +} + +std::optional +StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const { + auto I = ConstantProfileCounts.find(C); + if (I == ConstantProfileCounts.end()) + return std::nullopt; + return I->second; +} + +bool StaticDataProfileInfoWrapperPass::doInitialization(Module &M) { + Info.reset(new StaticDataProfileInfo()); + return false; +} + +bool StaticDataProfileInfoWrapperPass::doFinalization(Module &M) { + Info.reset(); + return false; +} + +INITIALIZE_PASS(StaticDataProfileInfoWrapperPass, "static-data-profile-info", + "Static Data Profile Info", false, true) + +StaticDataProfileInfoWrapperPass::StaticDataProfileInfoWrapperPass() + : ImmutablePass(ID) { + initializeStaticDataProfileInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); +} + +char StaticDataProfileInfoWrapperPass::ID = 0; diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 88f863d8204d0..0680e01223e2b 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -227,6 +227,7 @@ add_llvm_component_library(LLVMCodeGen StackProtector.cpp StackSlotColoring.cpp StaticDataSplitter.cpp + StaticDataAnnotator.cpp SwiftErrorValueTracking.cpp SwitchLoweringUtils.cpp TailDuplication.cpp diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 35df2a479a545..7d37e1ce5ce8b 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -132,6 +132,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeStackProtectorPass(Registry); initializeStackSlotColoringLegacyPass(Registry); initializeStaticDataSplitterPass(Registry); + initializeStaticDataAnnotatorPass(Registry); initializeStripDebugMachineModulePass(Registry); initializeTailDuplicateLegacyPass(Registry); initializeTargetPassConfigPass(Registry); diff --git a/llvm/lib/CodeGen/StaticDataAnnotator.cpp b/llvm/lib/CodeGen/StaticDataAnnotator.cpp new file mode 100644 index 0000000000000..04d918585f8af --- /dev/null +++ b/llvm/lib/CodeGen/StaticDataAnnotator.cpp @@ -0,0 +1,119 @@ +//===- StaticDataAnnotator - Annotate static data's section prefix --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// To reason about module-wide data hotness in a module granularity, this file +// implements a module pass StaticDataAnnotator to work coordinately with the +// StaticDataSplitter pass. +// +// The StaticDataSplitter pass is a machine function pass. It analyzes data +// hotness based on code and adds counters in the StaticDataProfileInfo. +// The StaticDataAnnotator pass is a module pass. It iterates global variables +// in the module, looks up counters from StaticDataProfileInfo and sets the +// section prefix based on profiles. +// +// The three-pass structure is implemented for practical reasons, to work around +// the limitation that a module pass based on legacy pass manager cannot make +// use of MachineBlockFrequencyInfo analysis. In the future, we can consider +// porting the StaticDataSplitter pass to a module-pass using the new pass +// manager framework. That way, analysis are lazily computed as opposed to +// eagerly scheduled, and a module pass can use MachineBlockFrequencyInfo. +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/StaticDataProfileInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Analysis.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "static-data-annotator" + +using namespace llvm; + +class StaticDataAnnotator : public ModulePass { +public: + static char ID; + + StaticDataProfileInfo *SDPI = nullptr; + const ProfileSummaryInfo *PSI = nullptr; + + StaticDataAnnotator() : ModulePass(ID) { + initializeStaticDataAnnotatorPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.setPreservesAll(); + ModulePass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return "Static Data Annotator"; } + + bool runOnModule(Module &M) override; +}; + +// Returns true if the global variable already has a section prefix that is the +// same as `Prefix`. +static bool alreadyHasSectionPrefix(const GlobalVariable &GV, + StringRef Prefix) { + std::optional SectionPrefix = GV.getSectionPrefix(); + return SectionPrefix && (*SectionPrefix == Prefix); +} + +bool StaticDataAnnotator::runOnModule(Module &M) { + SDPI = &getAnalysis() + .getStaticDataProfileInfo(); + PSI = &getAnalysis().getPSI(); + + if (!PSI->hasProfileSummary()) + return false; + + bool Changed = false; + for (auto &GV : M.globals()) { + if (GV.isDeclarationForLinker()) + continue; + + // Skip global variables without profile counts. The module may not be + // profiled or instrumented. + auto Count = SDPI->getConstantProfileCount(&GV); + if (!Count) + continue; + + if (PSI->isHotCount(*Count) && !alreadyHasSectionPrefix(GV, "hot")) { + // The variable counter is hot, set 'hot' section prefix if the section + // prefix isn't hot already. + GV.setSectionPrefix("hot"); + Changed = true; + } else if (PSI->isColdCount(*Count) && !SDPI->hasUnknownCount(&GV) && + !alreadyHasSectionPrefix(GV, "unlikely")) { + // The variable counter is cold, set 'unlikely' section prefix when + // 1) the section prefix isn't unlikely already, and + // 2) the variable is not seen without profile counts. The reason is that + // a variable without profile counts doesn't have all its uses profiled, + // for example when a function is not instrumented, or not sampled (new + // code paths). + GV.setSectionPrefix("unlikely"); + Changed = true; + } + } + + return Changed; +} + +char StaticDataAnnotator::ID = 0; + +INITIALIZE_PASS(StaticDataAnnotator, DEBUG_TYPE, "Static Data Annotator", false, + false) + +ModulePass *llvm::createStaticDataAnnotatorPass() { + return new StaticDataAnnotator(); +} diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index aaf898e9b59c6..c647c3075d79c 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/StaticDataProfileInfo.h" #include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -30,9 +31,7 @@ #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; @@ -49,6 +48,7 @@ class StaticDataSplitter : public MachineFunctionPass { const MachineBranchProbabilityInfo *MBPI = nullptr; const MachineBlockFrequencyInfo *MBFI = nullptr; const ProfileSummaryInfo *PSI = nullptr; + StaticDataProfileInfo *SDPI = nullptr; // If the global value is a local linkage global variable, return it. // Otherwise, return nullptr. @@ -58,19 +58,16 @@ class StaticDataSplitter : public MachineFunctionPass { // .data.rel.ro} sections. bool inStaticDataSection(const GlobalVariable *GV, const TargetMachine &TM); - // Iterate all global variables in the module and update the section prefix - // of the module-internal data. - bool updateGlobalVariableSectionPrefix(MachineFunction &MF); + // Use profiles to partition static data. + bool partitionStaticDataWithProfiles(MachineFunction &MF); - // Accummulated data profile count across machine functions in the module. - DenseMap DataProfileCounts; - // Update LLVM statistics for a machine function without profiles. - void updateStatsWithoutProfiles(const MachineFunction &MF); // Update LLVM statistics for a machine function with profiles. void updateStatsWithProfiles(const MachineFunction &MF); - // Use profiles to partition static data. - bool partitionStaticDataWithProfiles(MachineFunction &MF); + // Update LLVM statistics for a machine function without profiles. + void updateStatsWithoutProfiles(const MachineFunction &MF); + + void annotateStaticDataWithoutProfiles(const MachineFunction &MF); public: static char ID; @@ -86,6 +83,7 @@ class StaticDataSplitter : public MachineFunctionPass { AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); } bool runOnMachineFunction(MachineFunction &MF) override; @@ -96,18 +94,20 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) { MBFI = &getAnalysis().getMBFI(); PSI = &getAnalysis().getPSI(); + SDPI = &getAnalysis() + .getStaticDataProfileInfo(); + const bool ProfileAvailable = PSI && PSI->hasProfileSummary() && MBFI && MF.getFunction().hasProfileData(); if (!ProfileAvailable) { + annotateStaticDataWithoutProfiles(MF); updateStatsWithoutProfiles(MF); return false; } bool Changed = partitionStaticDataWithProfiles(MF); - Changed |= updateGlobalVariableSectionPrefix(MF); - updateStatsWithProfiles(MF); return Changed; } @@ -158,18 +158,7 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) { if (!GV || GV->getName().starts_with("llvm.") || !inStaticDataSection(GV, TM)) continue; - - // Acccumulate data profile count across machine function - // instructions. - // TODO: Analyze global variable's initializers. - if (Count) { - uint64_t &GVCount = DataProfileCounts[GV]; - GVCount = llvm::SaturatingAdd(GVCount, *Count); - // Clamp the count to getInstrMaxCountValue. InstrFDO reserves a few - // large values for special use. - if (GVCount > getInstrMaxCountValue()) - GVCount = getInstrMaxCountValue(); - } + SDPI->addConstantProfileCount(GV, Count); } } } @@ -194,51 +183,6 @@ bool StaticDataSplitter::inStaticDataSection(const GlobalVariable *GV, Kind.isBSS(); } -bool StaticDataSplitter::updateGlobalVariableSectionPrefix( - MachineFunction &MF) { - bool Changed = false; - for (GlobalVariable &GV : MF.getFunction().getParent()->globals()) { - if (GV.isDeclarationForLinker()) - continue; - // DataProfileCounts accumulates data profile count across all machine - // function instructions, and it can't model the indirect accesses through - // other global variables' initializers. - // TODO: Analyze the users of module-internal global variables and see - // through the users' initializers. Do not place a global variable into - // unlikely section if any of its users are potentially hot. - auto Iter = DataProfileCounts.find(&GV); - if (Iter == DataProfileCounts.end()) - continue; - - const std::optional Prefix = GV.getSectionPrefix(); - - // StaticDataSplitter is made a machine function pass rather than a module - // pass because (Lazy)MachineBlockFrequencyInfo is a machine-function - // analysis pass and cannot be used for a legacy module pass. - // As a result, we use `DataProfileCounts` to accumulate data - // profile count across machine functions and update global variable section - // prefix once per machine function. - // FIXME: Make StaticDataSplitter a module pass under new pass manager - // framework, and set global variable section prefix once per module after - // analyzing all machine functions. - if (PSI->isColdCount(Iter->second)) { - assert((!Prefix || *Prefix != "hot") && - "Count monotonically increased so a hot variable won't become " - "cold again."); - if (!Prefix || *Prefix != "unlikely") { - GV.setSectionPrefix("unlikely"); - Changed |= true; - } - } else if (PSI->isHotCount(Iter->second)) { - if (!Prefix || *Prefix != "hot") { - GV.setSectionPrefix("hot"); - Changed |= true; - } - } - } - return Changed; -} - void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) { if (!AreStatisticsEnabled()) return; @@ -257,6 +201,24 @@ void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) { } } +void StaticDataSplitter::annotateStaticDataWithoutProfiles( + const MachineFunction &MF) { + for (const auto &MBB : MF) { + for (const MachineInstr &I : MBB) { + for (const MachineOperand &Op : I.operands()) { + if (!Op.isGlobal()) + continue; + const GlobalVariable *GV = + getLocalLinkageGlobalVariable(Op.getGlobal()); + if (!GV || GV->getName().starts_with("llvm.") || + !inStaticDataSection(GV, MF.getTarget())) + continue; + SDPI->addConstantProfileCount(GV, std::nullopt); + } + } + } +} + void StaticDataSplitter::updateStatsWithoutProfiles(const MachineFunction &MF) { if (!AreStatisticsEnabled()) return; @@ -273,6 +235,7 @@ INITIALIZE_PASS_BEGIN(StaticDataSplitter, DEBUG_TYPE, "Split static data", INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(StaticDataProfileInfoWrapperPass) INITIALIZE_PASS_END(StaticDataSplitter, DEBUG_TYPE, "Split static data", false, false) diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 5d9da9df9092a..7f89043c0b20c 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1257,8 +1257,13 @@ void TargetPassConfig::addMachinePasses() { } } addPass(createMachineFunctionSplitterPass()); - if (SplitStaticData || TM->Options.EnableStaticDataPartitioning) + if (SplitStaticData || TM->Options.EnableStaticDataPartitioning) { + // The static data splitter pass is a machine function pass. and + // static data annotator pass is a module-wide pass. See the file comment + // in StaticDataAnnotator.cpp for the motivation. addPass(createStaticDataSplitterPass()); + addPass(createStaticDataAnnotatorPass()); + } } // We run the BasicBlockSections pass if either we need BB sections or BB // address map (or both). diff --git a/llvm/test/CodeGen/X86/global-variable-partition.ll b/llvm/test/CodeGen/X86/global-variable-partition.ll index bb77f3362406b..b216047a5ea66 100644 --- a/llvm/test/CodeGen/X86/global-variable-partition.ll +++ b/llvm/test/CodeGen/X86/global-variable-partition.ll @@ -1,23 +1,35 @@ - +; The static-data-splitter processes data from @cold_func first, +; @unprofiled_func secondly, and @hot_func after the two functions above. +; Tests that data hotness is based on aggregated module-wide profile +; information. This way linker-mergable data is emitted once per module. target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" +; The three RUN commands set `-relocation-model=pic` so `hot_relro_array` and +; `cold_relro_array` are placed in the .data.rel.ro-prefixed section. + +; This RUN command sets `-data-sections=true -unique-section-names=true` so data +; sections are uniqufied by numbers. ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \ ; RUN: -partition-static-data-sections=true -data-sections=true \ ; RUN: -unique-section-names=true -relocation-model=pic \ ; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=SYM,DATA +; This RUN command sets `-data-sections=true -unique-section-names=false` so +; data sections are uniqufied by variable names. ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \ ; RUN: -partition-static-data-sections=true -data-sections=true \ ; RUN: -unique-section-names=false -relocation-model=pic \ ; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=UNIQ,DATA +; This RUN command sets `-data-sections=false -unique-section-names=false`. ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \ ; RUN: -partition-static-data-sections=true -data-sections=false \ ; RUN: -unique-section-names=false -relocation-model=pic \ ; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=AGG,DATA +; For @.str and @.str.1 ; SYM: .section .rodata.str1.1.hot. ; UNIQ: .section .rodata.str1.1.hot.,"aMS",@progbits,1 ; AGG: .section .rodata.str1.1.hot @@ -26,45 +38,66 @@ target triple = "x86_64-unknown-linux-gnu" ; DATA: .L.str.1 ; DATA: "%d\t%d\t%d\n" +; For @hot_relro_array +; SYM: .section .data.rel.ro.hot.hot_relro_array +; UNIQ: .section .data.rel.ro.hot.,"aw",@progbits,unique,3 +; AGG: .section .data.rel.ro.hot.,"aw",@progbits -; SYM: .section .data.rel.ro.hot.hot_relro_array +; For @hot_data, which is accessed by {cold_func, unprofiled_func, hot_func}. ; SYM: .section .data.hot.hot_data,"aw",@progbits -; SYM: .section .bss.hot.hot_bss,"aw",@nobits - -; UNIQ: .section .data.rel.ro.hot.,"aw",@progbits,unique,3 ; UNIQ: .section .data.hot.,"aw",@progbits,unique,4 -; UNIQ: .section .bss.hot.,"aw",@nobits,unique,5 - -; AGG: .section .data.rel.ro.hot.,"aw",@progbits ; AGG: .section .data.hot.,"aw",@progbits -; AGG: .section .bss.hot.,"aw",@nobits +; For @hot_bss, which is accessed by {unprofiled_func, hot_func}. +; SYM: .section .bss.hot.hot_bss,"aw",@nobits +; UNIQ: .section .bss.hot.,"aw",@nobits,unique,5 +; AGG: .section .bss.hot.,"aw",@nobits +; For @.str.2 ; SYM: .section .rodata.str1.1.unlikely.,"aMS",@progbits,1 ; UNIQ: section .rodata.str1.1.unlikely.,"aMS",@progbits,1 ; AGG: .section .rodata.str1.1.unlikely.,"aMS",@progbits,1 ; DATA: .L.str.2: ; DATA: "cold%d\t%d\t%d\n" - +; For @cold_bss ; SYM: .section .bss.unlikely.cold_bss,"aw",@nobits -; SYM: .section .data.unlikely.cold_data,"aw",@progbits -; SYM: .section .data.rel.ro.unlikely.cold_relro_array,"aw",@progbits -; SYM: .section .bss.unlikely.bss2,"aw",@nobits -; SYM: .section .data.unlikely.data3,"aw",@progbits - ; UNIQ: .section .bss.unlikely.,"aw",@nobits,unique,6 -; UNIQ: .section .data.unlikely.,"aw",@progbits,unique,7 -; UNIQ: .section .data.rel.ro.unlikely.,"aw",@progbits,unique,8 -; UNIQ: .section .bss.unlikely.,"aw",@nobits,unique,9 -; UNIQ: .section .data.unlikely.,"aw",@progbits,unique,10 - ; AGG: .section .bss.unlikely.,"aw",@nobits + +; For @cold_data +; SYM: .section .data.unlikely.cold_data,"aw",@progbits +; UNIQ: .section .data.unlikely.,"aw",@progbits,unique,7 ; AGG: .section .data.unlikely.,"aw",@progbits + +; For @cold_relro_array +; SYM: .section .data.rel.ro.unlikely.cold_relro_array,"aw",@progbits +; UNIQ: .section .data.rel.ro.unlikely.,"aw",@progbits,unique,8 ; AGG: .section .data.rel.ro.unlikely.,"aw",@progbits + +; Currently static-data-splitter only analyzes access from code. +; @bss2 and @data3 are indirectly accessed by code through @hot_relro_array +; and @cold_relro_array. A follow-up item is to analyze indirect access via data +; and prune the unlikely list. +; For @bss2 +; SYM: .section .bss.unlikely.bss2,"aw",@nobits +; UNIQ: .section .bss.unlikely.,"aw",@nobits,unique,9 ; AGG: .section .bss.unlikely.,"aw",@nobits + +; For @data3 +; SYM: .section .data.unlikely.data3,"aw",@progbits +; UNIQ: .section .data.unlikely.,"aw",@progbits,unique,10 ; AGG: .section .data.unlikely.,"aw",@progbits +; For @data_with_unknown_hotness +; SYM: .type .Ldata_with_unknown_hotness,@object # @data_with_unknown_hotness +; SYM: .section .data..Ldata_with_unknown_hotness,"aw",@progbits +; UNIQ: .section .data,"aw",@progbits,unique,11 +; The `.section` directive is omitted for .data with -unique-section-names=false. +; See MCSectionELF::shouldOmitSectionDirective for the implementation details. +; AGG: .data +; DATA: .Ldata_with_unknown_hotness: + @.str = private unnamed_addr constant [5 x i8] c"hot\09\00", align 1 @.str.1 = private unnamed_addr constant [10 x i8] c"%d\09%d\09%d\0A\00", align 1 @hot_relro_array = internal constant [2 x ptr] [ptr @bss2, ptr @data3] @@ -74,29 +107,11 @@ target triple = "x86_64-unknown-linux-gnu" @cold_bss = internal global i32 0 @cold_data = internal global i32 4 @cold_relro_array = internal constant [2 x ptr] [ptr @data3, ptr @bss2] - -; COM: Currently static-data-splitter only analyzes access from code. -; COM: @bss2 and @data3 are indirectly accessed by code through @hot_relro_array -; COM: and @cold_relro_array. -; COM: A follow-up item is to analyze access from data and prune the unlikely -; COM: list. @bss2 = internal global i32 0 @data3 = internal global i32 3 +@data_with_unknown_hotness = private global i32 5 -define void @hot_callee(i32 %0) !prof !51 { - %2 = call i32 (ptr, ...) @printf(ptr @.str) - %3 = srem i32 %0, 2 - %4 = sext i32 %3 to i64 - %5 = getelementptr inbounds [2 x ptr], ptr @hot_relro_array, i64 0, i64 %4 - %6 = load ptr, ptr %5 - %7 = load i32, ptr %6 - %8 = load i32, ptr @hot_data - %9 = load i32, ptr @hot_bss - %10 = call i32 (ptr, ...) @printf(ptr @.str.1, i32 %7, i32 %8, i32 %9) - ret void -} - -define void @cold_callee(i32 %0) !prof !52 { +define void @cold_func(i32 %0) !prof !15 { %2 = load i32, ptr @cold_bss %3 = load i32, ptr @cold_data %4 = srem i32 %0, 2 @@ -104,14 +119,34 @@ define void @cold_callee(i32 %0) !prof !52 { %6 = getelementptr inbounds [2 x ptr], ptr @cold_relro_array, i64 0, i64 %5 %7 = load ptr, ptr %6 %8 = load i32, ptr %7 - %9 = call i32 (ptr, ...) @printf(ptr @.str.2, i32 %2, i32 %3, i32 %8) + %9 = load i32, ptr @data_with_unknown_hotness + %11 = load i32, ptr @hot_data + %12 = call i32 (...) @func_taking_arbitrary_param(ptr @.str.2, i32 %2, i32 %3, i32 %8, i32 %9, i32 %11) + ret void +} + +define i32 @unprofiled_func() { + %a = load i32, ptr @data_with_unknown_hotness + %b = load i32, ptr @hot_data + %c = load i32, ptr @hot_bss + %ret = call i32 (...) @func_taking_arbitrary_param(i32 %a, i32 %b, i32 %c) + ret i32 %ret +} + +define void @hot_func(i32 %0) !prof !14 { + %2 = call i32 (...) @func_taking_arbitrary_param(ptr @.str) + %3 = srem i32 %0, 2 + %4 = sext i32 %3 to i64 + %5 = getelementptr inbounds [2 x ptr], ptr @hot_relro_array, i64 0, i64 %4 + %6 = load ptr, ptr %5 + %7 = load i32, ptr %6 + %8 = load i32, ptr @hot_data + %9 = load i32, ptr @hot_bss + %10 = call i32 (...) @func_taking_arbitrary_param(ptr @.str.1, i32 %7, i32 %8, i32 %9) ret void } -define i32 @main(i32 %0, ptr %1) !prof !52 { - %3 = call i64 @time(ptr null) - %4 = trunc i64 %3 to i32 - call void @srand(i32 %4) +define i32 @main(i32 %0, ptr %1) !prof !15 { br label %11 5: ; preds = %11 @@ -120,7 +155,7 @@ define i32 @main(i32 %0, ptr %1) !prof !52 { store i32 %6, ptr @cold_data store i32 %6, ptr @bss2 store i32 %6, ptr @data3 - call void @cold_callee(i32 %6) + call void @cold_func(i32 %6) ret i32 0 11: ; preds = %11, %2 @@ -134,32 +169,30 @@ define i32 @main(i32 %0, ptr %1) !prof !52 { store i32 %13, ptr @hot_data %18 = add i32 %13, 1 store i32 %18, ptr @hot_bss - call void @hot_callee(i32 %12) + call void @hot_func(i32 %12) %19 = add i32 %12, 1 %20 = icmp eq i32 %19, 100000 - br i1 %20, label %5, label %11, !prof !53 + br i1 %20, label %5, label %11, !prof !16 } -declare void @srand(i32) -declare i64 @time(ptr) declare i32 @rand() -declare i32 @printf(ptr, ...) - -!llvm.module.flags = !{!12} - -!12 = !{i32 1, !"ProfileSummary", !13} -!13 = !{!14, !15, !16, !17, !18, !19, !20, !23} -!14 = !{!"ProfileFormat", !"InstrProf"} -!15 = !{!"TotalCount", i64 1460183} -!16 = !{!"MaxCount", i64 849024} -!17 = !{!"MaxInternalCount", i64 32769} -!18 = !{!"MaxFunctionCount", i64 849024} -!19 = !{!"NumCounts", i64 23627} -!20 = !{!"NumFunctions", i64 3271} -!23 = !{!"DetailedSummary", !24} -!24 = !{!36, !40} -!36 = !{i32 990000, i64 166, i32 73} -!40 = !{i32 999999, i64 1, i32 1443} -!51 = !{!"function_entry_count", i64 100000} -!52 = !{!"function_entry_count", i64 1} -!53 = !{!"branch_weights", i32 1, i32 99999} +declare i32 @func_taking_arbitrary_param(...) + +!llvm.module.flags = !{!1} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 1460183} +!5 = !{!"MaxCount", i64 849024} +!6 = !{!"MaxInternalCount", i64 32769} +!7 = !{!"MaxFunctionCount", i64 849024} +!8 = !{!"NumCounts", i64 23627} +!9 = !{!"NumFunctions", i64 3271} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13} +!12 = !{i32 990000, i64 166, i32 73} +!13 = !{i32 999999, i64 3, i32 1443} +!14 = !{!"function_entry_count", i64 100000} +!15 = !{!"function_entry_count", i64 1} +!16 = !{!"branch_weights", i32 1, i32 99999} From 9302b2b708cdce89add071fa584c12ddb4f9d71d Mon Sep 17 00:00:00 2001 From: mingmingl Date: Thu, 20 Mar 2025 13:57:37 -0700 Subject: [PATCH 7/9] port code de-duplication based on feedback in the follow up patch (https://github.com/llvm/llvm-project/pull/129781) --- llvm/lib/CodeGen/StaticDataSplitter.cpp | 53 +++++++++++++------------ 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index c647c3075d79c..77778556ce44e 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -58,6 +58,11 @@ class StaticDataSplitter : public MachineFunctionPass { // .data.rel.ro} sections. bool inStaticDataSection(const GlobalVariable *GV, const TargetMachine &TM); + // Returns the constant if the operand refers to a global variable or constant + // that gets lowered to static data sections. Otherwise, return nullptr. + const Constant *getConstant(const MachineOperand &Op, + const TargetMachine &TM); + // Use profiles to partition static data. bool partitionStaticDataWithProfiles(MachineFunction &MF); @@ -84,6 +89,8 @@ class StaticDataSplitter : public MachineFunctionPass { AU.addRequired(); AU.addRequired(); AU.addRequired(); + // This pass does not modify the CFG. + AU.setPreservesCFG(); } bool runOnMachineFunction(MachineFunction &MF) override; @@ -112,6 +119,20 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) { return Changed; } +const Constant *StaticDataSplitter::getConstant(const MachineOperand &Op, + const TargetMachine &TM) { + if (!Op.isGlobal()) + return nullptr; + + // Find global variables with local linkage. + const GlobalVariable *GV = getLocalLinkageGlobalVariable(Op.getGlobal()); + // Skip 'llvm.'-prefixed global variables conservatively because they are + // often handled specially, and skip those not in static data sections. + if (!GV || GV->getName().starts_with("llvm.") || !inStaticDataSection(GV, TM)) + return nullptr; + return GV; +} + bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) { int NumChangedJumpTables = 0; @@ -148,17 +169,8 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) { if (MJTI->updateJumpTableEntryHotness(JTI, Hotness)) ++NumChangedJumpTables; - } else { - // Find global variables with local linkage. - const GlobalVariable *GV = - getLocalLinkageGlobalVariable(Op.getGlobal()); - // Skip 'special' global variables conservatively because they are - // often handled specially, and skip those not in static data - // sections. - if (!GV || GV->getName().starts_with("llvm.") || - !inStaticDataSection(GV, TM)) - continue; - SDPI->addConstantProfileCount(GV, Count); + } else if (const Constant *C = getConstant(Op, TM)) { + SDPI->addConstantProfileCount(C, Count); } } } @@ -203,20 +215,11 @@ void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) { void StaticDataSplitter::annotateStaticDataWithoutProfiles( const MachineFunction &MF) { - for (const auto &MBB : MF) { - for (const MachineInstr &I : MBB) { - for (const MachineOperand &Op : I.operands()) { - if (!Op.isGlobal()) - continue; - const GlobalVariable *GV = - getLocalLinkageGlobalVariable(Op.getGlobal()); - if (!GV || GV->getName().starts_with("llvm.") || - !inStaticDataSection(GV, MF.getTarget())) - continue; - SDPI->addConstantProfileCount(GV, std::nullopt); - } - } - } + for (const auto &MBB : MF) + for (const MachineInstr &I : MBB) + for (const MachineOperand &Op : I.operands()) + if (const Constant *C = getConstant(Op, MF.getTarget())) + SDPI->addConstantProfileCount(C, std::nullopt); } void StaticDataSplitter::updateStatsWithoutProfiles(const MachineFunction &MF) { From 97103c682321ff0fb9b2559e3f0dbe5a2dd56f0b Mon Sep 17 00:00:00 2001 From: mingmingl Date: Thu, 27 Mar 2025 22:51:46 -0700 Subject: [PATCH 8/9] resolve comments --- .../llvm/Analysis/StaticDataProfileInfo.h | 21 ++++++++++------ llvm/lib/Analysis/StaticDataProfileInfo.cpp | 17 ++++++++++++- llvm/lib/CodeGen/StaticDataAnnotator.cpp | 24 ++++--------------- 3 files changed, 34 insertions(+), 28 deletions(-) diff --git a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h index 4220f7d820db9..9e2e5fbfc6761 100644 --- a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h +++ b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h @@ -3,6 +3,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/IR/Constant.h" #include "llvm/Pass.h" @@ -20,6 +21,9 @@ class StaticDataProfileInfo { /// counts. DenseSet ConstantWithoutCounts; + /// If \p C has a count, return it. Otherwise, return std::nullopt. + std::optional getConstantProfileCount(const Constant *C) const; + public: StaticDataProfileInfo() = default; @@ -30,13 +34,16 @@ class StaticDataProfileInfo { void addConstantProfileCount(const Constant *C, std::optional Count); - /// If \p C has a count, return it. Otherwise, return std::nullopt. - std::optional getConstantProfileCount(const Constant *C) const; - - /// Return true if the constant \p C is seen at least once without profiles. - bool hasUnknownCount(const Constant *C) const { - return ConstantWithoutCounts.count(C); - } + /// Return a section prefix for the constant \p C based on its profile count. + /// - If a constant doesn't have a counter, return an empty string. + /// - Otherwise, + /// - If it has a hot count, return "hot". + /// - If it is seen by unprofiled function, return an empty string. + /// - If it has a cold count, return "unlikely". + /// - Otherwise (e.g. it's used by lukewarm functions), return an empty + /// string. + StringRef getConstantSectionPrefix(const Constant *C, + const ProfileSummaryInfo *PSI) const; }; /// This wraps the StaticDataProfileInfo object as an immutable pass, for a diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp index b124e101f8cdf..ec97cea12d88c 100644 --- a/llvm/lib/Analysis/StaticDataProfileInfo.cpp +++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp @@ -1,4 +1,5 @@ #include "llvm/Analysis/StaticDataProfileInfo.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/IR/Constant.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/InitializePasses.h" @@ -13,7 +14,7 @@ void StaticDataProfileInfo::addConstantProfileCount( return; } uint64_t &OriginalCount = ConstantProfileCounts[C]; - OriginalCount += llvm::SaturatingAdd(*Count, OriginalCount); + OriginalCount = llvm::SaturatingAdd(*Count, OriginalCount); // Clamp the count to getInstrMaxCountValue. InstrFDO reserves a few // large values for special use. if (OriginalCount > getInstrMaxCountValue()) @@ -28,6 +29,20 @@ StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const { return I->second; } +StringRef StaticDataProfileInfo::getConstantSectionPrefix( + const Constant *C, const ProfileSummaryInfo *PSI) const { + auto Count = getConstantProfileCount(C); + if (!Count) + return ""; + if (PSI->isHotCount(*Count)) + return "hot"; + if (ConstantWithoutCounts.count(C)) + return ""; + if (PSI->isColdCount(*Count)) + return "unlikely"; + return ""; +} + bool StaticDataProfileInfoWrapperPass::doInitialization(Module &M) { Info.reset(new StaticDataProfileInfo()); return false; diff --git a/llvm/lib/CodeGen/StaticDataAnnotator.cpp b/llvm/lib/CodeGen/StaticDataAnnotator.cpp index 04d918585f8af..f83787c195454 100644 --- a/llvm/lib/CodeGen/StaticDataAnnotator.cpp +++ b/llvm/lib/CodeGen/StaticDataAnnotator.cpp @@ -82,28 +82,12 @@ bool StaticDataAnnotator::runOnModule(Module &M) { if (GV.isDeclarationForLinker()) continue; - // Skip global variables without profile counts. The module may not be - // profiled or instrumented. - auto Count = SDPI->getConstantProfileCount(&GV); - if (!Count) + StringRef SectionPrefix = SDPI->getConstantSectionPrefix(&GV, PSI); + if (SectionPrefix.empty() || alreadyHasSectionPrefix(GV, SectionPrefix)) continue; - if (PSI->isHotCount(*Count) && !alreadyHasSectionPrefix(GV, "hot")) { - // The variable counter is hot, set 'hot' section prefix if the section - // prefix isn't hot already. - GV.setSectionPrefix("hot"); - Changed = true; - } else if (PSI->isColdCount(*Count) && !SDPI->hasUnknownCount(&GV) && - !alreadyHasSectionPrefix(GV, "unlikely")) { - // The variable counter is cold, set 'unlikely' section prefix when - // 1) the section prefix isn't unlikely already, and - // 2) the variable is not seen without profile counts. The reason is that - // a variable without profile counts doesn't have all its uses profiled, - // for example when a function is not instrumented, or not sampled (new - // code paths). - GV.setSectionPrefix("unlikely"); - Changed = true; - } + GV.setSectionPrefix(SectionPrefix); + Changed = true; } return Changed; From 38c8a03bf7c01ccec0d6930c4586f94e30b5cae7 Mon Sep 17 00:00:00 2001 From: mingmingl Date: Fri, 28 Mar 2025 11:10:33 -0700 Subject: [PATCH 9/9] resolve comments --- llvm/lib/Analysis/StaticDataProfileInfo.cpp | 7 + llvm/lib/CodeGen/StaticDataAnnotator.cpp | 26 ++-- llvm/lib/CodeGen/StaticDataSplitter.cpp | 10 +- .../CodeGen/X86/global-variable-partition.ll | 123 +++++++++++------- 4 files changed, 105 insertions(+), 61 deletions(-) diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp index ec97cea12d88c..a435aa00c2e9d 100644 --- a/llvm/lib/Analysis/StaticDataProfileInfo.cpp +++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp @@ -34,12 +34,19 @@ StringRef StaticDataProfileInfo::getConstantSectionPrefix( auto Count = getConstantProfileCount(C); if (!Count) return ""; + // The accummulated counter shows the constant is hot. Return 'hot' whether + // this variable is seen by unprofiled functions or not. if (PSI->isHotCount(*Count)) return "hot"; + // The constant is not hot, and seen by unprofiled functions. We don't want to + // assign it to unlikely sections, even if the counter says 'cold'. So return + // an empty prefix before checking whether the counter is cold. if (ConstantWithoutCounts.count(C)) return ""; + // The accummulated counter shows the constant is cold. Return 'unlikely'. if (PSI->isColdCount(*Count)) return "unlikely"; + // The counter says lukewarm. Return an empty prefix. return ""; } diff --git a/llvm/lib/CodeGen/StaticDataAnnotator.cpp b/llvm/lib/CodeGen/StaticDataAnnotator.cpp index f83787c195454..edf85aef41c86 100644 --- a/llvm/lib/CodeGen/StaticDataAnnotator.cpp +++ b/llvm/lib/CodeGen/StaticDataAnnotator.cpp @@ -11,7 +11,10 @@ // StaticDataSplitter pass. // // The StaticDataSplitter pass is a machine function pass. It analyzes data -// hotness based on code and adds counters in the StaticDataProfileInfo. +// hotness based on code and adds counters in StaticDataProfileInfo via its +// wrapper pass StaticDataProfileInfoWrapper. +// The StaticDataProfileInfoWrapper sits in the middle between the +// StaticDataSplitter and StaticDataAnnotator passes. // The StaticDataAnnotator pass is a module pass. It iterates global variables // in the module, looks up counters from StaticDataProfileInfo and sets the // section prefix based on profiles. @@ -38,6 +41,8 @@ using namespace llvm; +/// A module pass which iterates global variables in the module and annotates +/// their section prefixes based on profile-driven analysis. class StaticDataAnnotator : public ModulePass { public: static char ID; @@ -61,14 +66,6 @@ class StaticDataAnnotator : public ModulePass { bool runOnModule(Module &M) override; }; -// Returns true if the global variable already has a section prefix that is the -// same as `Prefix`. -static bool alreadyHasSectionPrefix(const GlobalVariable &GV, - StringRef Prefix) { - std::optional SectionPrefix = GV.getSectionPrefix(); - return SectionPrefix && (*SectionPrefix == Prefix); -} - bool StaticDataAnnotator::runOnModule(Module &M) { SDPI = &getAnalysis() .getStaticDataProfileInfo(); @@ -82,8 +79,17 @@ bool StaticDataAnnotator::runOnModule(Module &M) { if (GV.isDeclarationForLinker()) continue; + // The implementation below assumes prior passes don't set section prefixes, + // and specifically do 'assign' rather than 'update'. So report error if a + // section prefix is already set. + if (auto maybeSectionPrefix = GV.getSectionPrefix(); + maybeSectionPrefix && !maybeSectionPrefix->empty()) + llvm::report_fatal_error("Global variable " + GV.getName() + + " already has a section prefix " + + *maybeSectionPrefix); + StringRef SectionPrefix = SDPI->getConstantSectionPrefix(&GV, PSI); - if (SectionPrefix.empty() || alreadyHasSectionPrefix(GV, SectionPrefix)) + if (SectionPrefix.empty()) continue; GV.setSectionPrefix(SectionPrefix); diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index 77778556ce44e..60501b4495082 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -56,7 +56,7 @@ class StaticDataSplitter : public MachineFunctionPass { // Returns true if the global variable is in one of {.rodata, .bss, .data, // .data.rel.ro} sections. - bool inStaticDataSection(const GlobalVariable *GV, const TargetMachine &TM); + bool inStaticDataSection(const GlobalVariable &GV, const TargetMachine &TM); // Returns the constant if the operand refers to a global variable or constant // that gets lowered to static data sections. Otherwise, return nullptr. @@ -128,7 +128,8 @@ const Constant *StaticDataSplitter::getConstant(const MachineOperand &Op, const GlobalVariable *GV = getLocalLinkageGlobalVariable(Op.getGlobal()); // Skip 'llvm.'-prefixed global variables conservatively because they are // often handled specially, and skip those not in static data sections. - if (!GV || GV->getName().starts_with("llvm.") || !inStaticDataSection(GV, TM)) + if (!GV || GV->getName().starts_with("llvm.") || + !inStaticDataSection(*GV, TM)) return nullptr; return GV; } @@ -186,11 +187,10 @@ StaticDataSplitter::getLocalLinkageGlobalVariable(const GlobalValue *GV) { return (GV && GV->hasLocalLinkage()) ? dyn_cast(GV) : nullptr; } -bool StaticDataSplitter::inStaticDataSection(const GlobalVariable *GV, +bool StaticDataSplitter::inStaticDataSection(const GlobalVariable &GV, const TargetMachine &TM) { - assert(GV && "Caller guaranteed"); - SectionKind Kind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); + SectionKind Kind = TargetLoweringObjectFile::getKindForGlobal(&GV, TM); return Kind.isData() || Kind.isReadOnly() || Kind.isReadOnlyWithRel() || Kind.isBSS(); } diff --git a/llvm/test/CodeGen/X86/global-variable-partition.ll b/llvm/test/CodeGen/X86/global-variable-partition.ll index b216047a5ea66..91084d038cfe0 100644 --- a/llvm/test/CodeGen/X86/global-variable-partition.ll +++ b/llvm/test/CodeGen/X86/global-variable-partition.ll @@ -14,89 +14,114 @@ target triple = "x86_64-unknown-linux-gnu" ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \ ; RUN: -partition-static-data-sections=true -data-sections=true \ ; RUN: -unique-section-names=true -relocation-model=pic \ -; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=SYM,DATA +; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=SYM,COMMON --dump-input=always ; This RUN command sets `-data-sections=true -unique-section-names=false` so ; data sections are uniqufied by variable names. ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \ ; RUN: -partition-static-data-sections=true -data-sections=true \ ; RUN: -unique-section-names=false -relocation-model=pic \ -; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=UNIQ,DATA +; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=UNIQ,COMMON --dump-input=always ; This RUN command sets `-data-sections=false -unique-section-names=false`. ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \ ; RUN: -partition-static-data-sections=true -data-sections=false \ ; RUN: -unique-section-names=false -relocation-model=pic \ -; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=AGG,DATA +; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=AGG,COMMON --dump-input=always ; For @.str and @.str.1 -; SYM: .section .rodata.str1.1.hot. -; UNIQ: .section .rodata.str1.1.hot.,"aMS",@progbits,1 -; AGG: .section .rodata.str1.1.hot -; DATA: .L.str -; DATA: "hot\t" -; DATA: .L.str.1 -; DATA: "%d\t%d\t%d\n" +; COMMON: .type .L.str,@object +; SYM-NEXT: .section .rodata.str1.1.hot. +; UNIQ-NEXT: .section .rodata.str1.1.hot.,"aMS",@progbits,1 +; AGG-NEXT: .section .rodata.str1.1.hot +; COMMON-NEXT: .L.str: +; COMMON-NEXT: "hot\t" +; COMMON: .L.str.1: +; COMMON-NEXT: "%d\t%d\t%d\n" ; For @hot_relro_array -; SYM: .section .data.rel.ro.hot.hot_relro_array -; UNIQ: .section .data.rel.ro.hot.,"aw",@progbits,unique,3 -; AGG: .section .data.rel.ro.hot.,"aw",@progbits +; COMMON: .type hot_relro_array,@object +; SYM-NEXT: .section .data.rel.ro.hot.hot_relro_array +; UNIQ-NEXT: .section .data.rel.ro.hot.,"aw",@progbits,unique,3 +; AGG-NEXT: .section .data.rel.ro.hot.,"aw",@progbits ; For @hot_data, which is accessed by {cold_func, unprofiled_func, hot_func}. -; SYM: .section .data.hot.hot_data,"aw",@progbits -; UNIQ: .section .data.hot.,"aw",@progbits,unique,4 -; AGG: .section .data.hot.,"aw",@progbits +; COMMON: .type hot_data,@object +; SYM-NEXT: .section .data.hot.hot_data,"aw",@progbits +; UNIQ-NEXT: .section .data.hot.,"aw",@progbits,unique,4 +; AGG-NEXT: .section .data.hot.,"aw",@progbits ; For @hot_bss, which is accessed by {unprofiled_func, hot_func}. -; SYM: .section .bss.hot.hot_bss,"aw",@nobits -; UNIQ: .section .bss.hot.,"aw",@nobits,unique,5 -; AGG: .section .bss.hot.,"aw",@nobits +; COMMON: .type hot_bss,@object +; SYM-NEXT: .section .bss.hot.hot_bss,"aw",@nobits +; UNIQ-NEXT: .section .bss.hot.,"aw",@nobits,unique,5 +; AGG-NEXT: .section .bss.hot.,"aw",@nobits ; For @.str.2 -; SYM: .section .rodata.str1.1.unlikely.,"aMS",@progbits,1 -; UNIQ: section .rodata.str1.1.unlikely.,"aMS",@progbits,1 -; AGG: .section .rodata.str1.1.unlikely.,"aMS",@progbits,1 -; DATA: .L.str.2: -; DATA: "cold%d\t%d\t%d\n" +; COMMON: .type .L.str.2,@object +; SYM-NEXT: .section .rodata.str1.1.unlikely.,"aMS",@progbits,1 +; UNIQ-NEXT: .section .rodata.str1.1.unlikely.,"aMS",@progbits,1 +; AGG-NEXT: .section .rodata.str1.1.unlikely.,"aMS",@progbits,1 +; COMMON-NEXT: .L.str.2: +; COMMON-NEXT: "cold%d\t%d\t%d\n" ; For @cold_bss -; SYM: .section .bss.unlikely.cold_bss,"aw",@nobits -; UNIQ: .section .bss.unlikely.,"aw",@nobits,unique,6 -; AGG: .section .bss.unlikely.,"aw",@nobits +; COMMON: .type cold_bss,@object +; SYM-NEXT: .section .bss.unlikely.cold_bss,"aw",@nobits +; UNIQ-NEXT: .section .bss.unlikely.,"aw",@nobits,unique,6 +; AGG-NEXT: .section .bss.unlikely.,"aw",@nobits ; For @cold_data -; SYM: .section .data.unlikely.cold_data,"aw",@progbits -; UNIQ: .section .data.unlikely.,"aw",@progbits,unique,7 -; AGG: .section .data.unlikely.,"aw",@progbits +; COMMON: .type cold_data,@object +; SYM-NEXT: .section .data.unlikely.cold_data,"aw",@progbits +; UNIQ-NEXT: .section .data.unlikely.,"aw",@progbits,unique,7 +; AGG-NEXT: .section .data.unlikely.,"aw",@progbits + +; For @cold_data_custom_foo_section +; It has an explicit section 'foo' and shouldn't have hot or unlikely suffix. +; COMMON: .type cold_data_custom_foo_section,@object +; SYM-NEXT: .section foo,"aw",@progbits +; UNIQ-NEXT: .section foo,"aw",@progbits +; AGG-NEXT: .section foo,"aw",@progbits ; For @cold_relro_array -; SYM: .section .data.rel.ro.unlikely.cold_relro_array,"aw",@progbits -; UNIQ: .section .data.rel.ro.unlikely.,"aw",@progbits,unique,8 -; AGG: .section .data.rel.ro.unlikely.,"aw",@progbits +; COMMON: .type cold_relro_array,@object +; SYM-NEXT: .section .data.rel.ro.unlikely.cold_relro_array,"aw",@progbits +; UNIQ-NEXT: .section .data.rel.ro.unlikely.,"aw",@progbits,unique,8 +; AGG-NEXT: .section .data.rel.ro.unlikely.,"aw",@progbits ; Currently static-data-splitter only analyzes access from code. ; @bss2 and @data3 are indirectly accessed by code through @hot_relro_array ; and @cold_relro_array. A follow-up item is to analyze indirect access via data ; and prune the unlikely list. ; For @bss2 -; SYM: .section .bss.unlikely.bss2,"aw",@nobits -; UNIQ: .section .bss.unlikely.,"aw",@nobits,unique,9 -; AGG: .section .bss.unlikely.,"aw",@nobits +; COMMON: .type bss2,@object +; SYM-NEXT: .section .bss.unlikely.bss2,"aw",@nobits +; UNIQ-NEXT: .section .bss.unlikely.,"aw",@nobits,unique,9 +; AGG-NEXT: .section .bss.unlikely.,"aw",@nobits ; For @data3 -; SYM: .section .data.unlikely.data3,"aw",@progbits -; UNIQ: .section .data.unlikely.,"aw",@progbits,unique,10 -; AGG: .section .data.unlikely.,"aw",@progbits +; COMMON: .type data3,@object +; SYM-NEXT: .section .data.unlikely.data3,"aw",@progbits +; UNIQ-NEXT: .section .data.unlikely.,"aw",@progbits,unique,10 +; AGG-NEXT: .section .data.unlikely.,"aw",@progbits ; For @data_with_unknown_hotness -; SYM: .type .Ldata_with_unknown_hotness,@object # @data_with_unknown_hotness -; SYM: .section .data..Ldata_with_unknown_hotness,"aw",@progbits -; UNIQ: .section .data,"aw",@progbits,unique,11 +; SYM: .type .Ldata_with_unknown_hotness,@object # @data_with_unknown_hotness +; SYM: .section .data..Ldata_with_unknown_hotness,"aw",@progbits +; UNIQ: .section .data,"aw",@progbits,unique,11 ; The `.section` directive is omitted for .data with -unique-section-names=false. ; See MCSectionELF::shouldOmitSectionDirective for the implementation details. -; AGG: .data -; DATA: .Ldata_with_unknown_hotness: +; AGG: .data +; COMMON: .Ldata_with_unknown_hotness: + +; For @hot_data_custom_bar_section +; It has an explicit section attribute 'var' and shouldn't have hot or unlikely suffix. +; COMMON: .type hot_data_custom_bar_section,@object +; SYM-NEXT: .section bar,"aw",@progbits +; SYM: hot_data_custom_bar_section +; UNIQ: .section bar,"aw",@progbits +; AGG: .section bar,"aw",@progbits @.str = private unnamed_addr constant [5 x i8] c"hot\09\00", align 1 @.str.1 = private unnamed_addr constant [10 x i8] c"%d\09%d\09%d\0A\00", align 1 @@ -106,10 +131,12 @@ target triple = "x86_64-unknown-linux-gnu" @.str.2 = private unnamed_addr constant [14 x i8] c"cold%d\09%d\09%d\0A\00", align 1 @cold_bss = internal global i32 0 @cold_data = internal global i32 4 +@cold_data_custom_foo_section = internal global i32 100, section "foo" @cold_relro_array = internal constant [2 x ptr] [ptr @data3, ptr @bss2] @bss2 = internal global i32 0 @data3 = internal global i32 3 @data_with_unknown_hotness = private global i32 5 +@hot_data_custom_bar_section = internal global i32 101 #0 define void @cold_func(i32 %0) !prof !15 { %2 = load i32, ptr @cold_bss @@ -121,7 +148,8 @@ define void @cold_func(i32 %0) !prof !15 { %8 = load i32, ptr %7 %9 = load i32, ptr @data_with_unknown_hotness %11 = load i32, ptr @hot_data - %12 = call i32 (...) @func_taking_arbitrary_param(ptr @.str.2, i32 %2, i32 %3, i32 %8, i32 %9, i32 %11) + %12 = load i32, ptr @cold_data_custom_foo_section + %13 = call i32 (...) @func_taking_arbitrary_param(ptr @.str.2, i32 %2, i32 %3, i32 %8, i32 %9, i32 %11, i32 %12) ret void } @@ -142,7 +170,8 @@ define void @hot_func(i32 %0) !prof !14 { %7 = load i32, ptr %6 %8 = load i32, ptr @hot_data %9 = load i32, ptr @hot_bss - %10 = call i32 (...) @func_taking_arbitrary_param(ptr @.str.1, i32 %7, i32 %8, i32 %9) + %10 = load i32, ptr @hot_data_custom_bar_section + %11 = call i32 (...) @func_taking_arbitrary_param(ptr @.str.1, i32 %7, i32 %8, i32 %9, i32 %10) ret void } @@ -178,6 +207,8 @@ define i32 @main(i32 %0, ptr %1) !prof !15 { declare i32 @rand() declare i32 @func_taking_arbitrary_param(...) +attributes #0 = {"data-section"="bar"} + !llvm.module.flags = !{!1} !1 = !{i32 1, !"ProfileSummary", !2}