diff --git a/llvm/include/llvm/Transforms/IPO/UnpredictableProfileLoader.h b/llvm/include/llvm/Transforms/IPO/UnpredictableProfileLoader.h new file mode 100644 index 0000000000000..703f66bd6706a --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/UnpredictableProfileLoader.h @@ -0,0 +1,36 @@ +//===-- UnpredictableProfileLoader.h - Unpredictable Profile Loader -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_UNPREDICTABLEPROFILELOADER_H +#define LLVM_TRANSFORMS_IPO_UNPREDICTABLEPROFILELOADER_H + +#include "llvm/IR/PassManager.h" +#include "llvm/ProfileData/SampleProfReader.h" + +namespace llvm { + +class Module; + +struct UnpredictableProfileLoaderPass + : PassInfoMixin { + UnpredictableProfileLoaderPass(StringRef FrequencyProfileFile); + UnpredictableProfileLoaderPass(); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &); + std::unique_ptr FreqReader, MispReader; + bool loadSampleProfile(Module &M); + bool addUpredictableMetadata(Module &F); + bool addUpredictableMetadata(Function &F); + ErrorOr getMispredictRatio(const FunctionSamples *FreqSamples, + const FunctionSamples *MispSamples, + const Instruction *I); + const std::string FrequencyProfileFile; +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_UNPREDICTABLEPROFILELOADER_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 929690c2c74d6..dc055ee827d17 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -176,6 +176,7 @@ #include "llvm/Transforms/IPO/StripDeadPrototypes.h" #include "llvm/Transforms/IPO/StripSymbols.h" #include "llvm/Transforms/IPO/SyntheticCountsPropagation.h" +#include "llvm/Transforms/IPO/UnpredictableProfileLoader.h" #include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Instrumentation.h" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 4fd5ee1946bb7..50cf9a1c74c9d 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -67,6 +67,7 @@ #include "llvm/Transforms/IPO/SampleProfile.h" #include "llvm/Transforms/IPO/SampleProfileProbe.h" #include "llvm/Transforms/IPO/SyntheticCountsPropagation.h" +#include "llvm/Transforms/IPO/UnpredictableProfileLoader.h" #include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Instrumentation/CGProfile.h" @@ -1092,6 +1093,9 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // Cache ProfileSummaryAnalysis once to avoid the potential need to insert // RequireAnalysisPass for PSI before subsequent non-module passes. MPM.addPass(RequireAnalysisPass()); + // Run after inlining decisions made by SampleProfileLoader. This can apply + // mispredict metadata to specific inlined callees. + MPM.addPass(UnpredictableProfileLoaderPass(PGOOpt->ProfileFile)); // Do not invoke ICP in the LTOPrelink phase as it makes it hard // for the profile annotation to be accurate in the LTO backend. if (!isLTOPreLink(Phase)) diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 3b92823cd283b..6f6252932ebc5 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -140,6 +140,7 @@ MODULE_PASS("strip-nonlinetable-debuginfo", StripNonLineTableDebugInfoPass()) MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation()) MODULE_PASS("trigger-crash-module", TriggerCrashModulePass()) MODULE_PASS("trigger-verifier-error", TriggerVerifierErrorPass()) +MODULE_PASS("unpredictable-profile-loader", UnpredictableProfileLoaderPass()) MODULE_PASS("tsan-module", ModuleThreadSanitizerPass()) MODULE_PASS("verify", VerifierPass()) MODULE_PASS("view-callgraph", CallGraphViewerPass()) diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt index 92a9697720efd..4d09d0a70e13f 100644 --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -43,6 +43,7 @@ add_llvm_component_library(LLVMipo StripSymbols.cpp SyntheticCountsPropagation.cpp ThinLTOBitcodeWriter.cpp + UnpredictableProfileLoader.cpp WholeProgramDevirt.cpp ADDITIONAL_HEADER_DIRS diff --git a/llvm/lib/Transforms/IPO/UnpredictableProfileLoader.cpp b/llvm/lib/Transforms/IPO/UnpredictableProfileLoader.cpp new file mode 100644 index 0000000000000..fe914b05ee1ee --- /dev/null +++ b/llvm/lib/Transforms/IPO/UnpredictableProfileLoader.cpp @@ -0,0 +1,226 @@ +//=== UnpredictableProfileLoader.cpp - Unpredictable Profile Loader -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass reads a sample profile containing mispredict counts and a sample +// profile containing execution counts and computes branch mispredict ratios for +// each conditional instruction. If a sufficiently high mispredict ratio is +// found !unpredictable metadata is added. +// +// Note that this requires that the mispredict and frequency profiles have +// comparable magnitudes. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/UnpredictableProfileLoader.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/ProfileData/SampleProf.h" +#include "llvm/ProfileData/SampleProfReader.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/VirtualFileSystem.h" +#include "llvm/Transforms/IPO.h" + +using namespace llvm; + +#define DEBUG_TYPE "unpredictable-profile-loader" + +static cl::opt UnpredictableHintsFile( + "unpredictable-hints-file", + cl::desc("Path to the unpredictability hints profile"), cl::Hidden); + +// Typically this file will be provided via PGOOpt. This option is provided +// primarily for debugging and testing. +static cl::opt + FrequencyProfileOption("unpredictable-hints-frequency-profile", + cl::desc("Path to an execution frequency profile to " + "use as a baseline for unpredictability"), + cl::Hidden); + +// This determines the minimum apparent mispredict ratio which should earn a +// mispredict metadata annotation. +static cl::opt MinimumRatio( + "unpredictable-hints-min-ratio", + cl::desc( + "Absolute minimum branch miss ratio to apply MD_unpredictable from"), + cl::init(0.2), cl::Hidden); + +// This option is useful for dealing with two different sampling frequencies. +static cl::opt + RatioFactor("unpredictable-hints-factor", + cl::desc("Multiply all ratios by this factor"), cl::init(1.0), + cl::ReallyHidden); + +// Lookup execution frequency and mispredict samples for an Instruction's +// corresponding location in a the two FunctionSamples profiles and compute an +// effective branch mispredict ratio. The counts used to compute the ratio are +// uint64s read directly from the profile files. +ErrorOr UnpredictableProfileLoaderPass::getMispredictRatio( + const FunctionSamples *FuncFreqSamples, + const FunctionSamples *FuncMispSamples, const Instruction *I) { + + const auto &Loc = I->getDebugLoc(); + if (!Loc) + return std::error_code(); + + const FunctionSamples *FreqSamples = + FuncFreqSamples->findFunctionSamples(Loc, FreqReader->getRemapper()); + if (!FreqSamples) + return std::error_code(); + const ErrorOr FreqCount = FreqSamples->findSamplesAt( + FunctionSamples::getOffset(Loc), Loc->getBaseDiscriminator()); + if (!FreqCount) + return std::error_code(); + + const FunctionSamples *MispSamples = + FuncMispSamples->findFunctionSamples(Loc, MispReader->getRemapper()); + if (!MispSamples) + return std::error_code(); + const ErrorOr MispCount = MispSamples->findSamplesAt( + FunctionSamples::getOffset(Loc), Loc->getBaseDiscriminator()); + if (!MispCount) + return std::error_code(); + + const double Freq = FreqCount.get(); + if (!Freq) + return std::error_code(); + + const double Misp = MispCount.get(); + const double MissRatio = (Misp * RatioFactor) / Freq; + + LLVM_DEBUG(dbgs() << "Computing mispredict ratio of " << format("%0.2f", Misp) + << "/" << format("%0.2f", Freq) << " * " + << format("%0.2f", RatioFactor.getValue()) << " = " + << format("%0.2f", MissRatio) << " for instruction\n" + << *I << "\n"); + return MissRatio; +} + +// Examine all Branch, Select, and SwitchInsts in a function, adding +// !unpredictable metadata if they appear in the mispredict profile with +// sufficient weight. +bool UnpredictableProfileLoaderPass::addUpredictableMetadata(Function &F) { + + const FunctionSamples *FreqSamples = FreqReader->getSamplesFor(F); + if (!FreqSamples) + return false; + + const FunctionSamples *MispSamples = MispReader->getSamplesFor(F); + if (!MispSamples) + return false; + + bool MadeChange = false; + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + if (!isa(&I) && !isa(&I) && !isa(&I)) + continue; + if (I.hasMetadata(LLVMContext::MD_unpredictable)) + continue; + + const ErrorOr RatioOrError = + getMispredictRatio(FreqSamples, MispSamples, &I); + if (!RatioOrError) + continue; + const double MissRatio = RatioOrError.get(); + + if (MissRatio < MinimumRatio) { + LLVM_DEBUG(dbgs() << "\tRatio " << format("%0.2f", MissRatio) + << " is below threshold of " + << format("%0.2f", MinimumRatio.getValue()) + << "; ignoring.\n"); + continue; + } + + // In the future we probably want to attach more information here, such as + // the mispredict count or ratio. + MDNode *MD = MDNode::get(I.getContext(), std::nullopt); + I.setMetadata(LLVMContext::MD_unpredictable, MD); + MadeChange = true; + } + } + + return MadeChange; +} + +bool UnpredictableProfileLoaderPass::addUpredictableMetadata(Module &M) { + bool MadeChange = false; + + for (Function &F : M) + MadeChange |= addUpredictableMetadata(F); + + // Return an indication of whether we changed anything or not. + return MadeChange; +} + +bool UnpredictableProfileLoaderPass::loadSampleProfile(Module &M) { + if (MispReader && FreqReader) + return true; + + assert(!MispReader && !FreqReader && + "Expected both or neither profile readers"); + + LLVMContext &Ctx = M.getContext(); + auto FS = vfs::getRealFileSystem(); + + auto ReadProfile = [&Ctx, + &FS](const std::string ProfileFile, + std::unique_ptr &ReaderPtr) { + if (ProfileFile.empty()) + return false; + + ErrorOr> ReaderOrErr = + SampleProfileReader::create(ProfileFile, Ctx, *FS); + if (std::error_code EC = ReaderOrErr.getError()) { + std::string Msg = "Could not open profile: " + EC.message(); + Ctx.diagnose(DiagnosticInfoSampleProfile(ProfileFile, Msg, + DiagnosticSeverity::DS_Warning)); + return false; + } + + ReaderPtr = std::move(ReaderOrErr.get()); + if (std::error_code EC = ReaderPtr->read()) { + std::string Msg = "Profile reading failed: " + EC.message(); + Ctx.diagnose(DiagnosticInfoSampleProfile(ProfileFile, Msg)); + return false; + } + + return true; + }; + + if (!ReadProfile(UnpredictableHintsFile, MispReader)) + return false; + + if (!ReadProfile(FrequencyProfileFile, FreqReader)) + return false; + + return true; +} + +UnpredictableProfileLoaderPass::UnpredictableProfileLoaderPass() + : FrequencyProfileFile(FrequencyProfileOption) {} + +UnpredictableProfileLoaderPass::UnpredictableProfileLoaderPass( + StringRef PGOProfileFile) + : FrequencyProfileFile(FrequencyProfileOption.empty() + ? PGOProfileFile + : FrequencyProfileOption) {} + +PreservedAnalyses UnpredictableProfileLoaderPass::run(Module &M, + ModuleAnalysisManager &) { + if (!loadSampleProfile(M)) + return PreservedAnalyses::all(); + + if (addUpredictableMetadata(M)) { + PreservedAnalyses PA; + PA.preserveSet(); + return PA; + } + + return PreservedAnalyses::all(); +} diff --git a/llvm/test/Other/new-pm-pgo.ll b/llvm/test/Other/new-pm-pgo.ll index 4f856faacd332..b4be524158497 100644 --- a/llvm/test/Other/new-pm-pgo.ll +++ b/llvm/test/Other/new-pm-pgo.ll @@ -25,6 +25,7 @@ ; SAMPLE_USE_PRE_LINK: Running pass: SROAPass ; SAMPLE_USE_PRE_LINK: Running pass: EarlyCSEPass ; SAMPLE_USE: Running pass: SampleProfileLoaderPass +; SAMPLE_USE: Running pass: UnpredictableProfileLoaderPass ; SAMPLE_USE_O: Running pass: PGOIndirectCallPromotion ; SAMPLE_USE_POST_LINK-NOT: Running pass: GlobalOptPass ; SAMPLE_USE_POST_LINK: Running pass: PGOIndirectCallPromotion diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index ac80a31d8fd4b..3339630b42da4 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -35,6 +35,7 @@ ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis +; CHECK-O-NEXT: Running pass: UnpredictableProfileLoaderPass ; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion ; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis ; CHECK-O-NEXT: Running pass: OpenMPOptPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll index 210a4ef1f7664..eaef729619cb8 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -47,6 +47,7 @@ ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis +; CHECK-O-NEXT: Running pass: UnpredictableProfileLoaderPass ; CHECK-O-NEXT: Running pass: OpenMPOptPass ; CHECK-O-NEXT: Running pass: IPSCCPPass ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass diff --git a/llvm/test/Transforms/UnpredictableProfileLoader/Inputs/frequency.prof b/llvm/test/Transforms/UnpredictableProfileLoader/Inputs/frequency.prof new file mode 100644 index 0000000000000..5bdb6df9f5176 --- /dev/null +++ b/llvm/test/Transforms/UnpredictableProfileLoader/Inputs/frequency.prof @@ -0,0 +1,3 @@ +# This is a standard SPGO profile indicating basic block execution frequency. +sel_arr:1:0 + 11: 4000 diff --git a/llvm/test/Transforms/UnpredictableProfileLoader/Inputs/inline.freq.prof b/llvm/test/Transforms/UnpredictableProfileLoader/Inputs/inline.freq.prof new file mode 100644 index 0000000000000..aa8934672cf22 --- /dev/null +++ b/llvm/test/Transforms/UnpredictableProfileLoader/Inputs/inline.freq.prof @@ -0,0 +1,3 @@ +caller:1:0 + 1: callee:1 + 3: 997 diff --git a/llvm/test/Transforms/UnpredictableProfileLoader/Inputs/inline.misp.prof b/llvm/test/Transforms/UnpredictableProfileLoader/Inputs/inline.misp.prof new file mode 100644 index 0000000000000..d1e06a971e3a8 --- /dev/null +++ b/llvm/test/Transforms/UnpredictableProfileLoader/Inputs/inline.misp.prof @@ -0,0 +1,3 @@ +caller:1:0 + 1: callee:1 + 3: 400 diff --git a/llvm/test/Transforms/UnpredictableProfileLoader/Inputs/mispredict.prof b/llvm/test/Transforms/UnpredictableProfileLoader/Inputs/mispredict.prof new file mode 100644 index 0000000000000..14769e7de9c30 --- /dev/null +++ b/llvm/test/Transforms/UnpredictableProfileLoader/Inputs/mispredict.prof @@ -0,0 +1,4 @@ +# This profile indicates 1000 mispredict samples for instructions 11 source +# lines into in the sel_arr function. +sel_arr:1:0 + 11: 1000 diff --git a/llvm/test/Transforms/UnpredictableProfileLoader/inlined.ll b/llvm/test/Transforms/UnpredictableProfileLoader/inlined.ll new file mode 100644 index 0000000000000..60fb56af2f7ef --- /dev/null +++ b/llvm/test/Transforms/UnpredictableProfileLoader/inlined.ll @@ -0,0 +1,113 @@ +; RUN: opt < %s -passes=unpredictable-profile-loader -unpredictable-hints-file=%S/Inputs/inline.misp.prof -unpredictable-hints-frequency-profile=%S/Inputs/inline.freq.prof -unpredictable-hints-min-ratio=0.1 -S | FileCheck %s +; RUN: opt < %s -passes=unpredictable-profile-loader -unpredictable-hints-file=%S/Inputs/inline.misp.prof -unpredictable-hints-frequency-profile=%S/Inputs/inline.freq.prof -unpredictable-hints-min-ratio=0.5 -S | FileCheck --check-prefixes=MIN %s + +; Test that we can apply branch mispredict profile data when the branch of +; interest in `callee` has been inlined into `caller`. + +; // Original C source: +; static int callee(double *A, double *B) { +; int count = 0; +; for(int i=0; i<1000000; ++i) +; if(A[i] > 100) +; count += B[i] * 3; +; +; return count; +; } +; +; int caller(double *X, double *Y) { +; return callee(X, Y); +; } + +; CHECK-LABEL: @caller +define dso_local i32 @caller(ptr nocapture noundef readonly %X, ptr nocapture noundef readonly %Y) local_unnamed_addr !dbg !7 { +entry: + tail call void @llvm.dbg.value(metadata ptr %X, metadata !14, metadata !DIExpression()), !dbg !16 + tail call void @llvm.dbg.value(metadata ptr %Y, metadata !15, metadata !DIExpression()), !dbg !16 + tail call void @llvm.dbg.value(metadata ptr %X, metadata !17, metadata !DIExpression()), !dbg !24 + tail call void @llvm.dbg.value(metadata ptr %Y, metadata !20, metadata !DIExpression()), !dbg !24 + tail call void @llvm.dbg.value(metadata i32 0, metadata !21, metadata !DIExpression()), !dbg !24 + tail call void @llvm.dbg.value(metadata i32 0, metadata !22, metadata !DIExpression()), !dbg !26 + br label %for.body.i, !dbg !27 + +for.body.i: ; preds = %for.inc.i, %entry + %indvars.iv.i = phi i64 [ 0, %entry ], [ %indvars.iv.next.i, %for.inc.i ] + %count.09.i = phi i32 [ 0, %entry ], [ %count.1.i, %for.inc.i ] + tail call void @llvm.dbg.value(metadata i64 %indvars.iv.i, metadata !22, metadata !DIExpression()), !dbg !26 + tail call void @llvm.dbg.value(metadata i32 %count.09.i, metadata !21, metadata !DIExpression()), !dbg !24 + %arrayidx.i = getelementptr inbounds double, ptr %X, i64 %indvars.iv.i, !dbg !28 + %0 = load double, ptr %arrayidx.i, align 8, !dbg !28 + %cmp1.i = fcmp reassoc nsz arcp contract afn ogt double %0, 1.000000e+02, !dbg !35 +; CHECK: br i1 %cmp1.i, label %if.then.i, label %for.inc.i +; CHECK-SAME: !unpredictable +; MIN: br i1 %cmp1.i, label %if.then.i, label %for.inc.i +; MIN-NOT: !unpredictable + br i1 %cmp1.i, label %if.then.i, label %for.inc.i, !dbg !36 + +if.then.i: ; preds = %for.body.i + %arrayidx3.i = getelementptr inbounds double, ptr %Y, i64 %indvars.iv.i, !dbg !37 + %1 = load double, ptr %arrayidx3.i, align 8, !dbg !37 + %mul.i = fmul reassoc nsz arcp contract afn double %1, 3.000000e+00, !dbg !38 + %conv.i = sitofp i32 %count.09.i to double, !dbg !39 + %add.i = fadd reassoc nsz arcp contract afn double %mul.i, %conv.i, !dbg !39 + %conv4.i = fptosi double %add.i to i32, !dbg !39 + tail call void @llvm.dbg.value(metadata i32 %conv4.i, metadata !21, metadata !DIExpression()), !dbg !24 + br label %for.inc.i, !dbg !40 + +for.inc.i: ; preds = %if.then.i, %for.body.i + %count.1.i = phi i32 [ %conv4.i, %if.then.i ], [ %count.09.i, %for.body.i ] + tail call void @llvm.dbg.value(metadata i32 %count.1.i, metadata !21, metadata !DIExpression()), !dbg !24 + %indvars.iv.next.i = add nuw nsw i64 %indvars.iv.i, 1, !dbg !41 + tail call void @llvm.dbg.value(metadata i64 %indvars.iv.next.i, metadata !22, metadata !DIExpression()), !dbg !26 + %exitcond.not.i = icmp eq i64 %indvars.iv.next.i, 1000000, !dbg !42 + br i1 %exitcond.not.i, label %callee.exit, label %for.body.i, !dbg !27 + +callee.exit: ; preds = %for.inc.i + ret i32 %count.1.i, !dbg !47 +} + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1) +!1 = !DIFile(filename: "inlined.c", directory: "/test") +!2 = !{i32 7, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 7, !"uwtable", i32 2} +!7 = distinct !DISubprogram(name: "caller", scope: !1, file: !1, line: 10, type: !8, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !13) +!8 = !DISubroutineType(types: !9) +!9 = !{!10, !11, !11} +!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64) +!12 = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float) +!13 = !{!14, !15} +!14 = !DILocalVariable(name: "X", arg: 1, scope: !7, file: !1, line: 10, type: !11) +!15 = !DILocalVariable(name: "Y", arg: 2, scope: !7, file: !1, line: 10, type: !11) +!16 = !DILocation(line: 0, scope: !7) +!17 = !DILocalVariable(name: "A", arg: 1, scope: !18, file: !1, line: 1, type: !11) +!18 = distinct !DISubprogram(name: "callee", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !19) +!19 = !{!17, !20, !21, !22} +!20 = !DILocalVariable(name: "B", arg: 2, scope: !18, file: !1, line: 1, type: !11) +!21 = !DILocalVariable(name: "count", scope: !18, file: !1, line: 2, type: !10) +!22 = !DILocalVariable(name: "i", scope: !23, file: !1, line: 3, type: !10) +!23 = distinct !DILexicalBlock(scope: !18, file: !1, line: 3, column: 3) +!24 = !DILocation(line: 0, scope: !18, inlinedAt: !25) +!25 = distinct !DILocation(line: 11, column: 10, scope: !7) +!26 = !DILocation(line: 0, scope: !23, inlinedAt: !25) +!27 = !DILocation(line: 3, column: 3, scope: !23, inlinedAt: !25) +!28 = !DILocation(line: 4, column: 8, scope: !29, inlinedAt: !25) +!29 = distinct !DILexicalBlock(scope: !30, file: !1, line: 4, column: 8) +!30 = distinct !DILexicalBlock(scope: !23, file: !1, line: 3, column: 3) +!35 = !DILocation(line: 4, column: 13, scope: !29, inlinedAt: !25) +!36 = !DILocation(line: 4, column: 8, scope: !30, inlinedAt: !25) +!37 = !DILocation(line: 5, column: 16, scope: !29, inlinedAt: !25) +!38 = !DILocation(line: 5, column: 21, scope: !29, inlinedAt: !25) +!39 = !DILocation(line: 5, column: 13, scope: !29, inlinedAt: !25) +!40 = !DILocation(line: 5, column: 7, scope: !29, inlinedAt: !25) +!41 = !DILocation(line: 3, column: 27, scope: !30, inlinedAt: !25) +!42 = !DILocation(line: 3, column: 17, scope: !30, inlinedAt: !25) +!44 = !DILocation(line: 5, column: 23, scope: !23, inlinedAt: !25) +!47 = !DILocation(line: 11, column: 3, scope: !7) diff --git a/llvm/test/Transforms/UnpredictableProfileLoader/unpredictable_branch.ll b/llvm/test/Transforms/UnpredictableProfileLoader/unpredictable_branch.ll new file mode 100644 index 0000000000000..3fdc5c90e0c49 --- /dev/null +++ b/llvm/test/Transforms/UnpredictableProfileLoader/unpredictable_branch.ll @@ -0,0 +1,87 @@ +; RUN: opt < %s -passes=unpredictable-profile-loader -unpredictable-hints-file=%S/Inputs/mispredict.prof -unpredictable-hints-frequency-profile=%S/Inputs/frequency.prof -unpredictable-hints-min-ratio=0.1 -S | FileCheck %s +; RUN: opt < %s -passes=unpredictable-profile-loader -unpredictable-hints-file=%S/Inputs/mispredict.prof -unpredictable-hints-frequency-profile=%S/Inputs/frequency.prof -unpredictable-hints-min-ratio=0.5 -S | FileCheck --check-prefixes=MIN %s + +; CHECK-LABEL: @sel_arr +; MIN-LABEL: @sel_arr +define void @sel_arr(ptr %dst, ptr %s1, ptr %s2, ptr %s3) !dbg !8 { +entry: + call void @llvm.dbg.value(metadata ptr %dst, metadata !14, metadata !DIExpression()), !dbg !23 + call void @llvm.dbg.value(metadata ptr %s1, metadata !15, metadata !DIExpression()), !dbg !23 + call void @llvm.dbg.value(metadata ptr %s2, metadata !16, metadata !DIExpression()), !dbg !23 + call void @llvm.dbg.value(metadata ptr %s3, metadata !17, metadata !DIExpression()), !dbg !23 + call void @llvm.dbg.value(metadata i32 0, metadata !18, metadata !DIExpression()), !dbg !24 + br label %for.body, !dbg !25 + +for.cond.cleanup: ; preds = %for.body + ret void, !dbg !26 + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %latch ] + call void @llvm.dbg.value(metadata i64 %indvars.iv, metadata !18, metadata !DIExpression()), !dbg !24 + %arrayidx = getelementptr inbounds i32, ptr %s1, i64 %indvars.iv, !dbg !27 + %0 = load i32, ptr %arrayidx, align 4, !dbg !27 + %cmp1 = icmp slt i32 %0, 10035, !dbg !27 +; CHECK: br i1 %cmp1, label %if.then, label %if.else +; CHECK-SAME: !unpredictable +; MIN: br i1 %cmp1, label %if.then, label %if.else +; MIN-NOT: !unpredictable + br i1 %cmp1, label %if.then, label %if.else, !dbg !27 + +if.then: + %then.cond = getelementptr inbounds i32, ptr %s2, i64 %indvars.iv, !dbg !27 + call void @llvm.dbg.value(metadata ptr %then.cond, metadata !20, metadata !DIExpression()), !dbg !33 + %1 = load i32, ptr %then.cond, align 4, !dbg !34 + br label %latch + +if.else: + %else.cond = getelementptr inbounds i32, ptr %s3, i64 %indvars.iv, !dbg !27 + call void @llvm.dbg.value(metadata ptr %else.cond, metadata !20, metadata !DIExpression()), !dbg !33 + %2 = load i32, ptr %else.cond, align 4, !dbg !34 + br label %latch + +latch: + %3 = phi i32 [ %1, %if.then ], [ %2, %if.else ] + %arrayidx8 = getelementptr inbounds i32, ptr %dst, i64 %indvars.iv, !dbg !35 + store i32 %3, ptr %arrayidx8, align 4, !dbg !36 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !37 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next, metadata !18, metadata !DIExpression()), !dbg !24 + %exitcond.not = icmp eq i64 %indvars.iv.next, 20000, !dbg !38 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !25 +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1) +!1 = !DIFile(filename: "3.c", directory: "/test") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!8 = distinct !DISubprogram(name: "sel_arr", scope: !1, file: !1, line: 28, type: !9, scopeLine: 28, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !13) +!9 = !DISubroutineType(types: !10) +!10 = !{null, !11, !11, !11, !11} +!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64) +!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!13 = !{!14, !15, !16, !17, !18, !20} +!14 = !DILocalVariable(name: "dst", arg: 1, scope: !8, file: !1, line: 28, type: !11) +!15 = !DILocalVariable(name: "s1", arg: 2, scope: !8, file: !1, line: 28, type: !11) +!16 = !DILocalVariable(name: "s2", arg: 3, scope: !8, file: !1, line: 28, type: !11) +!17 = !DILocalVariable(name: "s3", arg: 4, scope: !8, file: !1, line: 28, type: !11) +!18 = !DILocalVariable(name: "i", scope: !19, file: !1, line: 38, type: !12) +!19 = distinct !DILexicalBlock(scope: !8, file: !1, line: 38, column: 5) +!20 = !DILocalVariable(name: "p", scope: !21, file: !1, line: 39, type: !11) +!21 = distinct !DILexicalBlock(scope: !22, file: !1, line: 38, column: 33) +!22 = distinct !DILexicalBlock(scope: !19, file: !1, line: 38, column: 5) +!23 = !DILocation(line: 0, scope: !8) +!24 = !DILocation(line: 0, scope: !19) +!25 = !DILocation(line: 38, column: 5, scope: !19) +!26 = !DILocation(line: 42, column: 1, scope: !8) +!27 = !DILocation(line: 39, column: 18, scope: !21) +!33 = !DILocation(line: 0, scope: !21) +!34 = !DILocation(line: 40, column: 18, scope: !21) +!35 = !DILocation(line: 40, column: 9, scope: !21) +!36 = !DILocation(line: 40, column: 16, scope: !21) +!37 = !DILocation(line: 38, column: 29, scope: !22) +!38 = !DILocation(line: 38, column: 23, scope: !22) diff --git a/llvm/test/Transforms/UnpredictableProfileLoader/unpredictable_select.ll b/llvm/test/Transforms/UnpredictableProfileLoader/unpredictable_select.ll new file mode 100644 index 0000000000000..f0ff704581c3f --- /dev/null +++ b/llvm/test/Transforms/UnpredictableProfileLoader/unpredictable_select.ll @@ -0,0 +1,75 @@ +; RUN: opt < %s -passes=unpredictable-profile-loader -unpredictable-hints-file=%S/Inputs/mispredict.prof -unpredictable-hints-frequency-profile=%S/Inputs/frequency.prof -unpredictable-hints-min-ratio=0.1 -S | FileCheck %s +; RUN: opt < %s -passes=unpredictable-profile-loader -unpredictable-hints-file=%S/Inputs/mispredict.prof -unpredictable-hints-frequency-profile=%S/Inputs/frequency.prof -unpredictable-hints-min-ratio=0.5 -S | FileCheck --check-prefixes=MIN %s + +; CHECK-LABEL: @sel_arr +; MIN-LABEL: @sel_arr +define void @sel_arr(ptr %dst, ptr %s1, ptr %s2, ptr %s3) !dbg !8 { +entry: + call void @llvm.dbg.value(metadata ptr %dst, metadata !14, metadata !DIExpression()), !dbg !23 + call void @llvm.dbg.value(metadata ptr %s1, metadata !15, metadata !DIExpression()), !dbg !23 + call void @llvm.dbg.value(metadata ptr %s2, metadata !16, metadata !DIExpression()), !dbg !23 + call void @llvm.dbg.value(metadata ptr %s3, metadata !17, metadata !DIExpression()), !dbg !23 + call void @llvm.dbg.value(metadata i32 0, metadata !18, metadata !DIExpression()), !dbg !24 + br label %for.body, !dbg !25 + +for.cond.cleanup: ; preds = %for.body + ret void, !dbg !26 + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + call void @llvm.dbg.value(metadata i64 %indvars.iv, metadata !18, metadata !DIExpression()), !dbg !24 + %arrayidx = getelementptr inbounds i32, ptr %s1, i64 %indvars.iv, !dbg !27 + %0 = load i32, ptr %arrayidx, align 4, !dbg !27 + %cmp1 = icmp slt i32 %0, 10035, !dbg !27 +; CHECK: %spec.select = select i1 +; CHECK-SAME: !unpredictable +; MIN: %spec.select = select i1 +; MIN-NOT: !unpredictable + %spec.select = select i1 %cmp1, ptr %s2, ptr %s3, !dbg !27 + %cond = getelementptr inbounds i32, ptr %spec.select, i64 %indvars.iv, !dbg !27 + call void @llvm.dbg.value(metadata ptr %cond, metadata !20, metadata !DIExpression()), !dbg !33 + %1 = load i32, ptr %cond, align 4, !dbg !34 + %arrayidx8 = getelementptr inbounds i32, ptr %dst, i64 %indvars.iv, !dbg !35 + store i32 %1, ptr %arrayidx8, align 4, !dbg !36 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !37 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next, metadata !18, metadata !DIExpression()), !dbg !24 + %exitcond.not = icmp eq i64 %indvars.iv.next, 20000, !dbg !38 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !25 +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1) +!1 = !DIFile(filename: "3.c", directory: "/test") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!8 = distinct !DISubprogram(name: "sel_arr", scope: !1, file: !1, line: 28, type: !9, scopeLine: 28, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !13) +!9 = !DISubroutineType(types: !10) +!10 = !{null, !11, !11, !11, !11} +!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64) +!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!13 = !{!14, !15, !16, !17, !18, !20} +!14 = !DILocalVariable(name: "dst", arg: 1, scope: !8, file: !1, line: 28, type: !11) +!15 = !DILocalVariable(name: "s1", arg: 2, scope: !8, file: !1, line: 28, type: !11) +!16 = !DILocalVariable(name: "s2", arg: 3, scope: !8, file: !1, line: 28, type: !11) +!17 = !DILocalVariable(name: "s3", arg: 4, scope: !8, file: !1, line: 28, type: !11) +!18 = !DILocalVariable(name: "i", scope: !19, file: !1, line: 38, type: !12) +!19 = distinct !DILexicalBlock(scope: !8, file: !1, line: 38, column: 5) +!20 = !DILocalVariable(name: "p", scope: !21, file: !1, line: 39, type: !11) +!21 = distinct !DILexicalBlock(scope: !22, file: !1, line: 38, column: 33) +!22 = distinct !DILexicalBlock(scope: !19, file: !1, line: 38, column: 5) +!23 = !DILocation(line: 0, scope: !8) +!24 = !DILocation(line: 0, scope: !19) +!25 = !DILocation(line: 38, column: 5, scope: !19) +!26 = !DILocation(line: 42, column: 1, scope: !8) +!27 = !DILocation(line: 39, column: 18, scope: !21) +!33 = !DILocation(line: 0, scope: !21) +!34 = !DILocation(line: 40, column: 18, scope: !21) +!35 = !DILocation(line: 40, column: 9, scope: !21) +!36 = !DILocation(line: 40, column: 16, scope: !21) +!37 = !DILocation(line: 38, column: 29, scope: !22) +!38 = !DILocation(line: 38, column: 23, scope: !22) diff --git a/llvm/test/Transforms/UnpredictableProfileLoader/unpredictable_switch.ll b/llvm/test/Transforms/UnpredictableProfileLoader/unpredictable_switch.ll new file mode 100644 index 0000000000000..6b09f8eee3d30 --- /dev/null +++ b/llvm/test/Transforms/UnpredictableProfileLoader/unpredictable_switch.ll @@ -0,0 +1,87 @@ +; RUN: opt < %s -passes=unpredictable-profile-loader -unpredictable-hints-file=%S/Inputs/mispredict.prof -unpredictable-hints-frequency-profile=%S/Inputs/frequency.prof -unpredictable-hints-min-ratio=0.1 -S | FileCheck %s +; RUN: opt < %s -passes=unpredictable-profile-loader -unpredictable-hints-file=%S/Inputs/mispredict.prof -unpredictable-hints-frequency-profile=%S/Inputs/frequency.prof -unpredictable-hints-min-ratio=0.5 -S | FileCheck --check-prefixes=MIN %s + +; CHECK-LABEL: @sel_arr +; MIN-LABEL: @sel_arr +define void @sel_arr(ptr %dst, ptr %s1, ptr %s2, ptr %s3) !dbg !8 { +entry: + call void @llvm.dbg.value(metadata ptr %dst, metadata !14, metadata !DIExpression()), !dbg !23 + call void @llvm.dbg.value(metadata ptr %s1, metadata !15, metadata !DIExpression()), !dbg !23 + call void @llvm.dbg.value(metadata ptr %s2, metadata !16, metadata !DIExpression()), !dbg !23 + call void @llvm.dbg.value(metadata ptr %s3, metadata !17, metadata !DIExpression()), !dbg !23 + call void @llvm.dbg.value(metadata i32 0, metadata !18, metadata !DIExpression()), !dbg !24 + br label %for.body, !dbg !25 + +for.cond.cleanup: ; preds = %for.body + ret void, !dbg !26 + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %latch ] + call void @llvm.dbg.value(metadata i64 %indvars.iv, metadata !18, metadata !DIExpression()), !dbg !24 + %arrayidx = getelementptr inbounds i32, ptr %s1, i64 %indvars.iv, !dbg !27 + %0 = load i32, ptr %arrayidx, align 4, !dbg !27 + %cmp1 = icmp slt i32 %0, 10035, !dbg !27 + switch i1 %cmp1, label %for.cond.cleanup [ i1 true, label %if.then + i1 false, label %if.else ], !dbg !27 +; CHECK: switch i1 %cmp1 +; CHECK: !unpredictable +; MIN: switch i1 %cmp1 +; MIN-NOT: !unpredictable +if.then: + %then.cond = getelementptr inbounds i32, ptr %s2, i64 %indvars.iv, !dbg !27 + call void @llvm.dbg.value(metadata ptr %then.cond, metadata !20, metadata !DIExpression()), !dbg !33 + %1 = load i32, ptr %then.cond, align 4, !dbg !34 + br label %latch + +if.else: + %else.cond = getelementptr inbounds i32, ptr %s3, i64 %indvars.iv, !dbg !27 + call void @llvm.dbg.value(metadata ptr %else.cond, metadata !20, metadata !DIExpression()), !dbg !33 + %2 = load i32, ptr %else.cond, align 4, !dbg !34 + br label %latch + +latch: + %3 = phi i32 [ %1, %if.then ], [ %2, %if.else ] + %arrayidx8 = getelementptr inbounds i32, ptr %dst, i64 %indvars.iv, !dbg !35 + store i32 %3, ptr %arrayidx8, align 4, !dbg !36 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !37 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next, metadata !18, metadata !DIExpression()), !dbg !24 + %exitcond.not = icmp eq i64 %indvars.iv.next, 20000, !dbg !38 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !25 +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1) +!1 = !DIFile(filename: "3.c", directory: "/test") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!8 = distinct !DISubprogram(name: "sel_arr", scope: !1, file: !1, line: 28, type: !9, scopeLine: 28, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !13) +!9 = !DISubroutineType(types: !10) +!10 = !{null, !11, !11, !11, !11} +!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64) +!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!13 = !{!14, !15, !16, !17, !18, !20} +!14 = !DILocalVariable(name: "dst", arg: 1, scope: !8, file: !1, line: 28, type: !11) +!15 = !DILocalVariable(name: "s1", arg: 2, scope: !8, file: !1, line: 28, type: !11) +!16 = !DILocalVariable(name: "s2", arg: 3, scope: !8, file: !1, line: 28, type: !11) +!17 = !DILocalVariable(name: "s3", arg: 4, scope: !8, file: !1, line: 28, type: !11) +!18 = !DILocalVariable(name: "i", scope: !19, file: !1, line: 38, type: !12) +!19 = distinct !DILexicalBlock(scope: !8, file: !1, line: 38, column: 5) +!20 = !DILocalVariable(name: "p", scope: !21, file: !1, line: 39, type: !11) +!21 = distinct !DILexicalBlock(scope: !22, file: !1, line: 38, column: 33) +!22 = distinct !DILexicalBlock(scope: !19, file: !1, line: 38, column: 5) +!23 = !DILocation(line: 0, scope: !8) +!24 = !DILocation(line: 0, scope: !19) +!25 = !DILocation(line: 38, column: 5, scope: !19) +!26 = !DILocation(line: 42, column: 1, scope: !8) +!27 = !DILocation(line: 39, column: 18, scope: !21) +!33 = !DILocation(line: 0, scope: !21) +!34 = !DILocation(line: 40, column: 18, scope: !21) +!35 = !DILocation(line: 40, column: 9, scope: !21) +!36 = !DILocation(line: 40, column: 16, scope: !21) +!37 = !DILocation(line: 38, column: 29, scope: !22) +!38 = !DILocation(line: 38, column: 23, scope: !22)