Skip to content

Commit 8fe7a5e

Browse files
committed
address review comments
1 parent 997b6ea commit 8fe7a5e

File tree

2 files changed

+55
-23
lines changed

2 files changed

+55
-23
lines changed

llvm/lib/Transforms/Scalar/LoopFuse.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -988,8 +988,8 @@ struct LoopFuser {
988988

989989
// If it is not safe to hoist/sink all instructions in the
990990
// pre-header, we cannot fuse these loops.
991-
if (!collectAndFixMovablePreheaderInsts(*FC0, *FC1, SafeToHoist,
992-
SafeToSink)) {
991+
if (!collectMovablePreheaderInsts(*FC0, *FC1, SafeToHoist,
992+
SafeToSink)) {
993993
LLVM_DEBUG(dbgs() << "Could not hoist/sink all instructions in "
994994
"Fusion Candidate Pre-header.\n"
995995
<< "Not Fusing.\n");
@@ -1200,7 +1200,7 @@ struct LoopFuser {
12001200

12011201
/// Collect instructions in the \p FC1 Preheader that can be hoisted
12021202
/// to the \p FC0 Preheader or sunk into the \p FC1 Body
1203-
bool collectAndFixMovablePreheaderInsts(
1203+
bool collectMovablePreheaderInsts(
12041204
const FusionCandidate &FC0, const FusionCandidate &FC1,
12051205
SmallVector<Instruction *, 4> &SafeToHoist,
12061206
SmallVector<Instruction *, 4> &SafeToSink) const {
@@ -1249,7 +1249,6 @@ struct LoopFuser {
12491249
LLVM_DEBUG(
12501250
dbgs() << "All preheader instructions could be sunk or hoisted!\n");
12511251

1252-
fixPHINodes(SafeToSink, FC0, FC1);
12531252
return true;
12541253
}
12551254

@@ -1593,7 +1592,8 @@ struct LoopFuser {
15931592
/// two loops could also be fused into a single block. This will require
15941593
/// analysis to prove it is safe to move the contents of the block past
15951594
/// existing code, which currently has not been implemented.
1596-
Loop *performFusion(const FusionCandidate &FC0, const FusionCandidate &FC1) {
1595+
Loop *performFusion(const FusionCandidate &FC0, const FusionCandidate &FC1,
1596+
SmallVector<Instruction *, 4> &SafeToSink) {
15971597
assert(FC0.isValid() && FC1.isValid() &&
15981598
"Expecting valid fusion candidates");
15991599

@@ -1735,6 +1735,9 @@ struct LoopFuser {
17351735
TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete,
17361736
FC1.Latch, FC1.Header));
17371737

1738+
// Fix PHI nodes that are sunk into the body of the loop.
1739+
fixPHINodes(SafeToSink, FC0, FC1);
1740+
17381741
// Update DT/PDT
17391742
DTU.applyUpdates(TreeUpdates);
17401743

llvm/test/Transforms/LoopFusion/sunk-phi-nodes.ll

Lines changed: 47 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,56 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2-
; RUN: opt -S -passes=mem2reg,loop-rotate,loop-fusion < %s 2>&1 | FileCheck %s
3-
define i32 @main() {
4-
; CHECK-LABEL: define i32 @main() {
5-
; CHECK-NEXT: [[ENTRY:.*]]:
6-
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
2+
; RUN: opt -S -passes=loop-fusion < %s 2>&1 | FileCheck %s
3+
define i32 @foo() {
4+
; CHECK-LABEL: define i32 @foo() {
5+
; CHECK-NEXT: [[ENTRY:.*:]]
6+
; CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
7+
; CHECK-NEXT: [[SUM1:%.*]] = alloca i32, align 4
8+
; CHECK-NEXT: [[SUM2:%.*]] = alloca i32, align 4
9+
; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
10+
; CHECK-NEXT: [[I1:%.*]] = alloca i32, align 4
11+
; CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4
12+
; CHECK-NEXT: store i32 0, ptr [[SUM1]], align 4
13+
; CHECK-NEXT: store i32 0, ptr [[SUM2]], align 4
14+
; CHECK-NEXT: store i32 0, ptr [[I]], align 4
15+
; CHECK-NEXT: br label %[[FOR_COND:.*]]
16+
; CHECK: [[FOR_COND]]:
17+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
18+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10
19+
; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
720
; CHECK: [[FOR_BODY]]:
8-
; CHECK-NEXT: [[SUM1_02:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ADD:%.*]], %[[FOR_INC6:.*]] ]
9-
; CHECK-NEXT: [[I_01:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_INC6]] ]
10-
; CHECK-NEXT: [[I1_04:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC7:%.*]], %[[FOR_INC6]] ]
11-
; CHECK-NEXT: [[SUM2_03:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ADD5:%.*]], %[[FOR_INC6]] ]
12-
; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM1_02]], [[I_01]]
21+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I]], align 4
22+
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[SUM1]], align 4
23+
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP1]]
24+
; CHECK-NEXT: store i32 [[ADD]], ptr [[SUM1]], align 4
1325
; CHECK-NEXT: br label %[[FOR_INC:.*]]
1426
; CHECK: [[FOR_INC]]:
15-
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I1_04]], [[I1_04]]
16-
; CHECK-NEXT: [[ADD5]] = add nsw i32 [[SUM2_03]], [[MUL]]
17-
; CHECK-NEXT: br label %[[FOR_INC6]]
27+
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I]], align 4
28+
; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1
29+
; CHECK-NEXT: store i32 [[INC]], ptr [[I]], align 4
30+
; CHECK-NEXT: br label %[[FOR_COND]]
31+
; CHECK: [[FOR_END]]:
32+
; CHECK-NEXT: store i32 0, ptr [[I1]], align 4
33+
; CHECK-NEXT: br label %[[FOR_COND2:.*]]
34+
; CHECK: [[FOR_COND2]]:
35+
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I1]], align 4
36+
; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP4]], 10
37+
; CHECK-NEXT: br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END8:.*]]
38+
; CHECK: [[FOR_BODY4]]:
39+
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[I1]], align 4
40+
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[I1]], align 4
41+
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], [[TMP6]]
42+
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[SUM2]], align 4
43+
; CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP7]], [[MUL]]
44+
; CHECK-NEXT: store i32 [[ADD5]], ptr [[SUM2]], align 4
45+
; CHECK-NEXT: br label %[[FOR_INC6:.*]]
1846
; CHECK: [[FOR_INC6]]:
19-
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_01]], 1
20-
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 10
21-
; CHECK-NEXT: [[INC7]] = add nsw i32 [[I1_04]], 1
22-
; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[INC7]], 10
23-
; CHECK-NEXT: br i1 [[CMP3]], label %[[FOR_BODY]], label %[[FOR_END8:.*]]
47+
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[I1]], align 4
48+
; CHECK-NEXT: [[INC7:%.*]] = add nsw i32 [[TMP8]], 1
49+
; CHECK-NEXT: store i32 [[INC7]], ptr [[I1]], align 4
50+
; CHECK-NEXT: br label %[[FOR_COND2]]
2451
; CHECK: [[FOR_END8]]:
52+
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[SUM1]], align 4
53+
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[SUM2]], align 4
2554
; CHECK-NEXT: ret i32 0
2655
;
2756
entry:

0 commit comments

Comments
 (0)