Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 51 additions & 9 deletions llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ STATISTIC(
STATISTIC(NumInvariantConditionsInjected,
"Number of invariant conditions injected and unswitched");

namespace llvm {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this change useful?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fly-by akin to #161240, and in particular because ProfcheckDisableMetadataFixes is llvm-qualified, so if I was going to add llvm { might as well.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the link. If adding namespace llvm {...} around static variables is worthwhile, it would be nice to extend the coding standards discussion of namespaces vs. static. @nhaehnle

static cl::opt<bool> EnableNonTrivialUnswitch(
"enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
cl::desc("Forcibly enables non-trivial loop unswitching rather than "
Expand Down Expand Up @@ -131,11 +132,17 @@ static cl::opt<bool> InjectInvariantConditions(

static cl::opt<unsigned> InjectInvariantConditionHotnesThreshold(
"simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
cl::Hidden, cl::desc("Only try to inject loop invariant conditions and "
"unswitch on them to eliminate branches that are "
"not-taken 1/<this option> times or less."),
cl::Hidden,
cl::desc("Only try to inject loop invariant conditions and "
"unswitch on them to eliminate branches that are "
"not-taken 1/<this option> times or less."),
cl::init(16));

static cl::opt<bool> EstimateProfile("simple-loop-unswitch-estimate-profile",
cl::Hidden, cl::init(true));
extern cl::opt<bool> ProfcheckDisableMetadataFixes;
} // namespace llvm

AnalysisKey ShouldRunExtraSimpleLoopUnswitch::Key;
namespace {
struct CompareDesc {
Expand Down Expand Up @@ -268,13 +275,42 @@ static bool areLoopExitPHIsLoopInvariant(const Loop &L,
llvm_unreachable("Basic blocks should never be empty!");
}

/// Copy a set of loop invariant values \p ToDuplicate and insert them at the
/// Copy a set of loop invariant values \p Invariants and insert them at the
/// end of \p BB and conditionally branch on the copied condition. We only
/// branch on a single value.
/// We attempt to estimate the profile of the resulting conditional branch from
/// \p ComputeProfFrom, which is the original conditional branch we're
/// unswitching.
/// When \p Direction is true, the \p Invariants form a disjunction, and the
/// branch conditioned on it exits the loop on the "true" case. When \p
/// Direction is false, the \p Invariants form a conjunction and the branch
/// exits on the "false" case.
static void buildPartialUnswitchConditionalBranch(
BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze,
const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) {
const Instruction *I, AssumptionCache *AC, const DominatorTree &DT,
const BranchInst &ComputeProfFrom) {

SmallVector<uint32_t> BranchWeights;
bool HasBranchWeights = EstimateProfile && !ProfcheckDisableMetadataFixes &&
extractBranchWeights(ComputeProfFrom, BranchWeights);
// If Direction is true, that means we had a disjunction and that the "true"
// case exits. The probability of the disjunction of the subset of terms is at
// most as high as the original one. So, if the probability is higher than the
// one we'd assign in absence of a profile (i.e. 0.5), we will use 0.5,
// but if it's lower, we will use the original probability.
// Conversely, if Direction is false, that means we had a conjunction, and the
// probability of exiting is captured in the second branch weight. That
// probability is a disjunction (of the negation of the original terms). The
// same reasoning applies as above.
// Issue #165649: should we expect BFI to conserve, and use that to calculate
// the branch weights?
if (HasBranchWeights &&
static_cast<double>(BranchWeights[Direction ? 0 : 1]) /
static_cast<double>(sum_of(BranchWeights)) >
0.5)
HasBranchWeights = false;

IRBuilder<> IRB(&BB);
IRB.SetCurrentDebugLocation(DebugLoc::getCompilerGenerated());

Expand All @@ -287,8 +323,14 @@ static void buildPartialUnswitchConditionalBranch(

Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants)
: IRB.CreateAnd(FrozenInvariants);
IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
Direction ? &NormalSucc : &UnswitchedSucc);
auto *BR = IRB.CreateCondBr(
Cond, Direction ? &UnswitchedSucc : &NormalSucc,
Direction ? &NormalSucc : &UnswitchedSucc,
HasBranchWeights ? ComputeProfFrom.getMetadata(LLVMContext::MD_prof)
: nullptr);
if (!HasBranchWeights)
setExplicitlyUnknownBranchWeightsIfProfiled(
*BR, *BR->getParent()->getParent(), DEBUG_TYPE);
}

/// Copy a set of loop invariant values, and conditionally branch on them.
Expand Down Expand Up @@ -658,7 +700,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
" condition!");
buildPartialUnswitchConditionalBranch(
*OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH,
FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT);
FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT, BI);
}

// Update the dominator tree with the added edge.
Expand Down Expand Up @@ -2477,7 +2519,7 @@ static void unswitchNontrivialInvariants(
else {
buildPartialUnswitchConditionalBranch(
*SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
FreezeLoopUnswitchCond, BI, &AC, DT);
FreezeLoopUnswitchCond, BI, &AC, DT, *BI);
}
DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
; RUN: split-file %s %t
; RUN: cat %t/main.ll %t/probable-or.prof > %t/probable-or.ll
; RUN: cat %t/main.ll %t/probable-and.prof > %t/probable-and.ll
; RUN: opt -passes='loop(simple-loop-unswitch<nontrivial>)' -S %t/probable-or.ll -o -| FileCheck %t/probable-or.prof
; RUN: opt -passes='loop(simple-loop-unswitch<nontrivial>)' -S %t/probable-and.ll -o -| FileCheck %t/probable-and.prof

;--- main.ll
declare i32 @a()
declare i32 @b()

define i32 @or(ptr %ptr, i1 %cond) !prof !0 {
entry:
br label %loop_begin

loop_begin:
%v1 = load i1, ptr %ptr
%cond_or = or i1 %v1, %cond
br i1 %cond_or, label %loop_a, label %loop_b, !prof !1

loop_a:
call i32 @a()
br label %latch

loop_b:
call i32 @b()
br label %latch

latch:
%v2 = load i1, ptr %ptr
br i1 %v2, label %loop_begin, label %loop_exit, !prof !2

loop_exit:
ret i32 0
}

define i32 @and(ptr %ptr, i1 %cond) !prof !0 {
entry:
br label %loop_begin

loop_begin:
%v1 = load i1, ptr %ptr
%cond_and = and i1 %v1, %cond
br i1 %cond_and, label %loop_a, label %loop_b, !prof !1

loop_a:
call i32 @a()
br label %latch

loop_b:
call i32 @b()
br label %latch

latch:
%v2 = load i1, ptr %ptr
br i1 %v2, label %loop_begin, label %loop_exit, !prof !2

loop_exit:
ret i32 0
}

;--- probable-or.prof
!0 = !{!"function_entry_count", i32 10}
!1 = !{!"branch_weights", i32 1, i32 1000}
!2 = !{!"branch_weights", i32 5, i32 7}
; CHECK-LABEL: @or
; CHECK-LABEL: entry:
; CHECK-NEXT: %cond.fr = freeze i1 %cond
; CHECK-NEXT: br i1 %cond.fr, label %entry.split.us, label %entry.split, !prof !1
; CHECK-LABEL: @and
; CHECK-LABEL: entry:
; CHECK-NEXT: %cond.fr = freeze i1 %cond
; CHECK-NEXT: br i1 %cond.fr, label %entry.split, label %entry.split.us, !prof !3
; CHECK: !1 = !{!"branch_weights", i32 1, i32 1000}
; CHECK: !3 = !{!"unknown", !"simple-loop-unswitch"}

;--- probable-and.prof
!0 = !{!"function_entry_count", i32 10}
!1 = !{!"branch_weights", i32 1000, i32 1}
!2 = !{!"branch_weights", i32 5, i32 7}
; CHECK-LABEL: @or
; CHECK-LABEL: entry:
; CHECK-NEXT: %cond.fr = freeze i1 %cond
; CHECK-NEXT: br i1 %cond.fr, label %entry.split.us, label %entry.split, !prof !1
; CHECK-LABEL: @and
; CHECK-LABEL: entry:
; CHECK-NEXT: %cond.fr = freeze i1 %cond
; CHECK-NEXT: br i1 %cond.fr, label %entry.split, label %entry.split.us, !prof !3
; CHECK: !1 = !{!"unknown", !"simple-loop-unswitch"}
; CHECK: !3 = !{!"branch_weights", i32 1000, i32 1}
11 changes: 8 additions & 3 deletions llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
; RUN: opt < %s -simple-loop-unswitch-inject-invariant-conditions=true -passes='loop(simple-loop-unswitch<nontrivial>,loop-instsimplify)' -S | FileCheck %s

define void @test() {
Expand All @@ -7,7 +7,7 @@ define void @test() {
; CHECK-NEXT: [[TMP:%.*]] = call i1 @llvm.experimental.widenable.condition()
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr addrspace(1) poison unordered, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, ptr addrspace(1) poison unordered, align 8
; CHECK-NEXT: br i1 [[TMP]], label [[BB_SPLIT:%.*]], label [[BB3_SPLIT_US:%.*]]
; CHECK-NEXT: br i1 [[TMP]], label [[BB_SPLIT:%.*]], label [[BB3_SPLIT_US:%.*]], !prof [[PROF0:![0-9]+]]
; CHECK: bb.split:
; CHECK-NEXT: br label [[BB3:%.*]]
; CHECK: bb3:
Expand All @@ -19,7 +19,7 @@ define void @test() {
; CHECK-NEXT: [[TMP6_US:%.*]] = phi i32 [ poison, [[BB3_SPLIT_US]] ]
; CHECK-NEXT: [[TMP7_US:%.*]] = add nuw nsw i32 [[TMP6_US]], 2
; CHECK-NEXT: [[TMP8_US:%.*]] = icmp ult i32 [[TMP7_US]], [[TMP2]]
; CHECK-NEXT: br i1 [[TMP8_US]], label [[BB9_US:%.*]], label [[BB16_SPLIT_US:%.*]], !prof [[PROF0:![0-9]+]]
; CHECK-NEXT: br i1 [[TMP8_US]], label [[BB9_US:%.*]], label [[BB16_SPLIT_US:%.*]], !prof [[PROF0]]
; CHECK: bb9.us:
; CHECK-NEXT: br label [[BB17_SPLIT_US:%.*]]
; CHECK: bb16.split.us:
Expand Down Expand Up @@ -96,3 +96,8 @@ declare i1 @llvm.experimental.widenable.condition()

!0 = !{!"branch_weights", i32 1048576, i32 1}

;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(inaccessiblemem: readwrite) }
;.
; CHECK: [[PROF0]] = !{!"branch_weights", i32 1048576, i32 1}
;.
157 changes: 157 additions & 0 deletions llvm/test/Transforms/SimpleLoopUnswitch/simple-unswitch-profile.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
; RUN: split-file %s %t
; RUN: cat %t/main.ll %t/probable-or.prof > %t/probable-or.ll
; RUN: cat %t/main.ll %t/probable-and.prof > %t/probable-and.ll
; RUN: opt -passes='loop-mssa(simple-loop-unswitch)' -S %t/probable-or.ll -o - | FileCheck %t/probable-or.prof
; RUN: opt -passes='loop-mssa(simple-loop-unswitch)' -S %t/probable-and.ll -o - | FileCheck %t/probable-and.prof
;
; RUN: opt -passes='module(print<block-freq>),function(loop-mssa(simple-loop-unswitch)),module(print<block-freq>)' \
; RUN: %t/probable-or.ll -disable-output -simple-loop-unswitch-estimate-profile=0 2>&1 | FileCheck %t/probable-or.prof --check-prefixes=PROFILE-COM,PROFILE-REF

; RUN: opt -passes='module(print<block-freq>),function(loop-mssa(simple-loop-unswitch)),module(print<block-freq>)' \
; RUN: %t/probable-or.ll -disable-output -simple-loop-unswitch-estimate-profile=1 2>&1 | FileCheck %t/probable-or.prof --check-prefixes=PROFILE-COM,PROFILE-CHK

; RUN: opt -passes='module(print<block-freq>),function(loop-mssa(simple-loop-unswitch)),module(print<block-freq>)' \
; RUN: %t/probable-and.ll -disable-output -simple-loop-unswitch-estimate-profile=0 2>&1 | FileCheck %t/probable-and.prof --check-prefixes=PROFILE-COM,PROFILE-REF

; RUN: opt -passes='module(print<block-freq>),function(loop-mssa(simple-loop-unswitch)),module(print<block-freq>)' \
; RUN: %t/probable-and.ll -disable-output -simple-loop-unswitch-estimate-profile=1 2>&1 | FileCheck %t/probable-and.prof --check-prefixes=PROFILE-COM,PROFILE-CHK

;--- main.ll
declare void @some_func() noreturn

define i32 @or(i1 %cond1, i32 %var1) !prof !0 {
entry:
br label %loop_begin

loop_begin:
%var3 = phi i32 [%var1, %entry], [%var2, %do_something]
%cond2 = icmp eq i32 %var3, 10
%cond.or = or i1 %cond1, %cond2
br i1 %cond.or, label %loop_exit, label %do_something, !prof !1

do_something:
%var2 = add i32 %var3, 1
call void @some_func() noreturn nounwind
br label %loop_begin

loop_exit:
ret i32 0
}

define i32 @and(i1 %cond1, i32 %var1) !prof !0 {
entry:
br label %loop_begin

loop_begin:
%var3 = phi i32 [%var1, %entry], [%var2, %do_something]
%cond2 = icmp eq i32 %var3, 10
%cond.and = and i1 %cond1, %cond2
br i1 %cond.and, label %do_something, label %loop_exit, !prof !1

do_something:
%var2 = add i32 %var3, 1
call void @some_func() noreturn nounwind
br label %loop_begin

loop_exit:
ret i32 0
}

;--- probable-or.prof
!0 = !{!"function_entry_count", i32 10}
!1 = !{!"branch_weights", i32 1, i32 1000}
; CHECK-LABEL: @or
; CHECK-LABEL: entry:
; CHECK-NEXT: %cond1.fr = freeze i1 %cond1
; CHECK-NEXT: br i1 %cond1.fr, label %loop_exit.split, label %entry.split, !prof !1
; CHECK-LABEL: @and
; CHECK-LABEL: entry:
; CHECK-NEXT: %cond1.fr = freeze i1 %cond1
; CHECK-NEXT: br i1 %cond1.fr, label %entry.split, label %loop_exit.split, !prof !2
; CHECK: !1 = !{!"branch_weights", i32 1, i32 1000}
; CHECK: !2 = !{!"unknown", !"simple-loop-unswitch"}

; PROFILE-COM: Printing analysis results of BFI for function 'or':
; PROFILE-COM: block-frequency-info: or
; PROFILE-COM: - entry: {{.*}} count = 10
; PROFILE-COM: - loop_begin: {{.*}} count = 10010
; PROFILE-COM: - do_something: {{.*}} count = 10000
; PROFILE-COM: - loop_exit: {{.*}} count = 10

; PROFILE-COM: Printing analysis results of BFI for function 'and':
; PROFILE-COM: block-frequency-info: and
; PROFILE-COM: - entry: {{.*}} count = 10
; PROFILE-COM: - loop_begin: {{.*}} count = 10
; PROFILE-COM: - do_something: {{.*}} count = 0
; PROFILE-COM: - loop_exit: {{.*}} count = 10

; PROFILE-COM: Printing analysis results of BFI for function 'or':
; PROFILE-COM: block-frequency-info: or
; PROFILE-COM: - entry: {{.*}} count = 10
; PROFILE-REF: - entry.split: {{.*}} count = 5
; PROFILE-CHK: - entry.split: {{.*}} count = 10
; PROFILE-REF: - loop_begin: {{.*}} count = 5005
; PROFILE-CHK: - loop_begin: {{.*}} count = 10000
; PROFILE-REF: - do_something: {{.*}} count = 5000
; PROFILE-CHK: - do_something: {{.*}} count = 9990
; PROFILE-REF: - loop_exit: {{.*}} count = 5
; PROFILE-CHK: - loop_exit: {{.*}} count = 10
; PROFILE-COM: - loop_exit.split: {{.*}} count = 10

; PROFILE-COM: Printing analysis results of BFI for function 'and':
; PROFILE-COM: block-frequency-info: and
; PROFILE-COM: - entry: {{.*}} count = 10
; PROFILE-COM: - entry.split: {{.*}} count = 5
; PROFILE-COM: - loop_begin: {{.*}} count = 5
; PROFILE-COM: - do_something: {{.*}} count = 0
; PROFILE-COM: - loop_exit: {{.*}} count = 5
; PROFILE-COM: - loop_exit.split: {{.*}} count = 10

;--- probable-and.prof
!0 = !{!"function_entry_count", i32 10}
!1 = !{!"branch_weights", i32 1000, i32 1}
; CHECK-LABEL: @or
; CHECK-LABEL: entry:
; CHECK-NEXT: %cond1.fr = freeze i1 %cond1
; CHECK-NEXT: br i1 %cond1.fr, label %loop_exit.split, label %entry.split, !prof !1
; CHECK-LABEL: @and
; CHECK-LABEL: entry:
; CHECK-NEXT: %cond1.fr = freeze i1 %cond1
; CHECK-NEXT: br i1 %cond1.fr, label %entry.split, label %loop_exit.split, !prof !2
; CHECK: !1 = !{!"unknown", !"simple-loop-unswitch"}
; CHECK: !2 = !{!"branch_weights", i32 1000, i32 1}
; PROFILE-COM: Printing analysis results of BFI for function 'or':
; PROFILE-COM: block-frequency-info: or
; PROFILE-COM: - entry: {{.*}}, count = 10
; PROFILE-COM: - loop_begin: {{.*}}, count = 10
; PROFILE-COM: - do_something: {{.*}}, count = 0
; PROFILE-COM: - loop_exit: {{.*}}, count = 10

; PROFILE-COM: Printing analysis results of BFI for function 'and':
; PROFILE-COM: block-frequency-info: and
; PROFILE-COM: - entry: {{.*}} count = 10
; PROFILE-COM: - loop_begin: {{.*}} count = 10010
; PROFILE-COM: - do_something: {{.*}} count = 10000
; PROFILE-COM: - loop_exit: {{.*}} count = 10

; PROFILE-COM: Printing analysis results of BFI for function 'or':
; PROFILE-COM: block-frequency-info: or
; PROFILE-COM: - entry: {{.*}} count = 10
; PROFILE-COM: - entry.split: {{.*}} count = 5
; PROFILE-COM: - loop_begin: {{.*}} count = 5
; PROFILE-COM: - do_something: {{.*}} count = 0
; PROFILE-COM: - loop_exit: {{.*}} count = 5
; PROFILE-COM: - loop_exit.split: {{.*}} count = 10

; PROFILE-COM: Printing analysis results of BFI for function 'and':
; PROFILE-COM: block-frequency-info: and
; PROFILE-COM: - entry: {{.*}} count = 10
; PROFILE-REF: - entry.split: {{.*}} count = 5
; PROFILE-CHK: - entry.split: {{.*}} count = 10
; PROFILE-REF: - loop_begin: {{.*}} count = 5005
; PROFILE-CHK: - loop_begin: {{.*}} count = 10000
; PROFILE-REF: - do_something: {{.*}} count = 5000
; PROFILE-CHK: - do_something: {{.*}} count = 9990
; PROFILE-REF: - loop_exit: {{.*}} count = 5
; PROFILE-CHK: - loop_exit: {{.*}} count = 10
; PROFILE-COM: - loop_exit.split: {{.*}} count = 10