Skip to content

Commit 75a8df6

Browse files
committed
[LoopUnroll] Fix block frequencies when no runtime
This patch implements the LoopUnroll changes discussed in [[RFC] Fix Loop Transformations to Preserve Block Frequencies](https://discourse.llvm.org/t/rfc-fix-loop-transformations-to-preserve-block-frequencies/85785) and is thus another step in addressing issue #135812. In summary, for the case of partial loop unrolling without a runtime, this patch changes LoopUnroll to: - Maintain branch weights consistently with the original loop for the sake of preserving the total frequency of the original loop body. - Store the new estimated trip count in the `llvm.loop.estimated_trip_count` metadata, introduced by PR #148758. - Correct the new estimated trip count (e.g., 3 instead of 2) when the original estimated trip count (e.g., 10) divided by the unroll count (e.g., 4) leaves a remainder (e.g., 2). There are loop unrolling cases this patch does not fully fix, such as partial unrolling with a runtime and complete unrolling, and there are two associated tests this patch marks as XFAIL. They will be addressed in future patches that should land with this patch.
1 parent cc3283d commit 75a8df6

File tree

5 files changed

+100
-6
lines changed

5 files changed

+100
-6
lines changed

llvm/lib/Transforms/Utils/LoopUnroll.cpp

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -499,9 +499,8 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
499499

500500
const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L);
501501
const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
502-
unsigned EstimatedLoopInvocationWeight = 0;
503502
std::optional<unsigned> OriginalTripCount =
504-
llvm::getLoopEstimatedTripCount(L, &EstimatedLoopInvocationWeight);
503+
llvm::getLoopEstimatedTripCount(L);
505504

506505
// Effectively "DCE" unrolled iterations that are beyond the max tripcount
507506
// and will never be executed.
@@ -1130,10 +1129,35 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
11301129
// We shouldn't try to use `L` anymore.
11311130
L = nullptr;
11321131
} else if (OriginalTripCount) {
1133-
// Update the trip count. Note that the remainder has already logic
1134-
// computing it in `UnrollRuntimeLoopRemainder`.
1135-
setLoopEstimatedTripCount(L, *OriginalTripCount / ULO.Count,
1136-
EstimatedLoopInvocationWeight);
1132+
// Update metadata for the estimated trip count.
1133+
//
1134+
// If ULO.Runtime, UnrollRuntimeLoopRemainder handles branch weights for the
1135+
// remainder loop it creates, and the unrolled loop's branch weights are
1136+
// adjusted below. Otherwise, if unrolled loop iterations' latches become
1137+
// unconditional, branch weights are adjusted above. Otherwise, the
1138+
// original loop's branch weights are correct for the unrolled loop, so do
1139+
// not adjust them.
1140+
// FIXME: Actually handle such unconditional latches and ULO.Runtime.
1141+
//
1142+
// For example, consider what happens if the unroll count is 4 for a loop
1143+
// with an estimated trip count of 10 when we do not create a remainder loop
1144+
// and all iterations' latches remain conditional. Each unrolled
1145+
// iteration's latch still has the same probability of exiting the loop as
1146+
// it did when in the original loop, and thus it should still have the same
1147+
// branch weights. Each unrolled iteration's non-zero probability of
1148+
// exiting already appropriately reduces the probability of reaching the
1149+
// remaining iterations just as it did in the original loop. Trying to also
1150+
// adjust the branch weights of the final unrolled iteration's latch (i.e.,
1151+
// the backedge for the unrolled loop as a whole) to reflect its new trip
1152+
// count of 3 will erroneously further reduce its block frequencies.
1153+
// However, in case an analysis later needs to estimate the trip count of
1154+
// the unrolled loop as a whole without considering the branch weights for
1155+
// each unrolled iteration's latch within it, we store the new trip count as
1156+
// separate metadata.
1157+
unsigned NewTripCount = *OriginalTripCount / ULO.Count;
1158+
if (!ULO.Runtime && *OriginalTripCount % ULO.Count)
1159+
NewTripCount += 1;
1160+
setLoopEstimatedTripCount(L, NewTripCount);
11371161
}
11381162

11391163
// LoopInfo should not be valid, confirm that.
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
; Test branch weight metadata, estimated trip count metadata, and block
2+
; frequencies after partial loop unrolling without -unroll-runtime.
3+
4+
; RUN: opt < %s -S -passes='print<block-freq>' 2>&1 | \
5+
; RUN: FileCheck -check-prefix=CHECK %s
6+
7+
; The -implicit-check-not options make sure that no additional labels or calls
8+
; to @f show up.
9+
; RUN: opt < %s -S -passes='loop-unroll,print<block-freq>' \
10+
; RUN: -unroll-count=4 2>&1 | \
11+
; RUN: FileCheck %s -check-prefix=CHECK-UR \
12+
; RUN: -implicit-check-not='{{^( *- )?[^ ;]*:}}' \
13+
; RUN: -implicit-check-not='call void @f'
14+
15+
; CHECK: block-frequency-info: test
16+
; CHECK: do.body: float = 10.0,
17+
18+
; The sum should still be ~10.
19+
;
20+
; CHECK-UR: block-frequency-info: test
21+
; CHECK-UR: - [[ENTRY:.*]]:
22+
; CHECK-UR: - [[DO_BODY:.*]]: float = 2.9078,
23+
; CHECK-UR: - [[DO_BODY_1:.*]]: float = 2.617,
24+
; CHECK-UR: - [[DO_BODY_2:.*]]: float = 2.3553,
25+
; CHECK-UR: - [[DO_BODY_3:.*]]: float = 2.1198,
26+
; CHECK-UR: - [[DO_END:.*]]:
27+
28+
declare void @f(i32)
29+
30+
define void @test(i32 %n) {
31+
; CHECK-UR-LABEL: define void @test(i32 %{{.*}}) {
32+
; CHECK-UR: [[ENTRY]]:
33+
; CHECK-UR: br label %[[DO_BODY]]
34+
; CHECK-UR: [[DO_BODY]]:
35+
; CHECK-UR: call void @f
36+
; CHECK-UR: br i1 %{{.*}}, label %[[DO_END]], label %[[DO_BODY_1]], !prof ![[#PROF:]]
37+
; CHECK-UR: [[DO_BODY_1]]:
38+
; CHECK-UR: call void @f
39+
; CHECK-UR: br i1 %{{.*}}, label %[[DO_END]], label %[[DO_BODY_2]], !prof ![[#PROF]]
40+
; CHECK-UR: [[DO_BODY_2]]:
41+
; CHECK-UR: call void @f
42+
; CHECK-UR: br i1 %{{.*}}, label %[[DO_END]], label %[[DO_BODY_3]], !prof ![[#PROF]]
43+
; CHECK-UR: [[DO_BODY_3]]:
44+
; CHECK-UR: call void @f
45+
; CHECK-UR: br i1 %{{.*}}, label %[[DO_END]], label %[[DO_BODY]], !prof ![[#PROF]], !llvm.loop ![[#LOOP_UR_LATCH:]]
46+
; CHECK-UR: [[DO_END]]:
47+
; CHECK-UR: ret void
48+
49+
entry:
50+
br label %do.body
51+
52+
do.body:
53+
%i = phi i32 [ 0, %entry ], [ %inc, %do.body ]
54+
%inc = add i32 %i, 1
55+
call void @f(i32 %i)
56+
%c = icmp sge i32 %inc, %n
57+
br i1 %c, label %do.end, label %do.body, !prof !0
58+
59+
do.end:
60+
ret void
61+
}
62+
63+
!0 = !{!"branch_weights", i32 1, i32 9}
64+
65+
; CHECK-UR: ![[#PROF]] = !{!"branch_weights", i32 1, i32 9}
66+
; CHECK-UR: ![[#LOOP_UR_LATCH]] = distinct !{![[#LOOP_UR_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]}
67+
; CHECK-UR: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 3}
68+
; CHECK-UR: ![[#DISABLE]] = !{!"llvm.loop.unroll.disable"}

llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: opt < %s -S -passes=loop-unroll -unroll-runtime=true -unroll-count=4 | FileCheck %s
2+
; XFAIL: *
23

34
;; Check that the remainder loop is properly assigned a branch weight for its latch branch.
45
; CHECK-LABEL: @test(

llvm/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: opt < %s -S -passes=loop-unroll -unroll-runtime -unroll-threshold=40 -unroll-max-percent-threshold-boost=100 | FileCheck %s
2+
; XFAIL: *
23

34
@known_constant = internal unnamed_addr constant [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16
45

0 commit comments

Comments
 (0)