Skip to content

Commit 35ea738

Browse files
committed
address review comments
1 parent 8fe7a5e commit 35ea738

File tree

2 files changed

+48
-100
lines changed

2 files changed

+48
-100
lines changed

llvm/lib/Transforms/Scalar/LoopFuse.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1176,19 +1176,18 @@ struct LoopFuser {
11761176
return true;
11771177
}
11781178

1179+
// This function fixes sunk PHI nodes after fusion.
11791180
void fixPHINodes(SmallVector<Instruction *, 4> &SafeToSink,
11801181
const FusionCandidate &FC0,
11811182
const FusionCandidate &FC1) const {
11821183
// Iterate over SafeToSink instructions and update PHI nodes
11831184
// to take values from the latch block of FC0 if they are taking
11841185
// from the latch block of FC1.
11851186
for (Instruction *Inst : SafeToSink) {
1186-
LLVM_DEBUG(dbgs() << "UPDATING: Instruction: " << *Inst << "\n");
11871187
// Continue if the instruction is not a PHI node.
11881188
if (!isa<PHINode>(Inst))
11891189
continue;
11901190
PHINode *Phi = dyn_cast<PHINode>(Inst);
1191-
LLVM_DEBUG(dbgs() << "UPDATING: PHI node: " << *Phi << "\n");
11921191
for (unsigned I = 0; I < Phi->getNumIncomingValues(); I++) {
11931192
if (Phi->getIncomingBlock(I) != FC0.Latch)
11941193
continue;
@@ -1248,7 +1247,6 @@ struct LoopFuser {
12481247
}
12491248
LLVM_DEBUG(
12501249
dbgs() << "All preheader instructions could be sunk or hoisted!\n");
1251-
12521250
return true;
12531251
}
12541252

Lines changed: 47 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -1,115 +1,65 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2-
; RUN: opt -S -passes=loop-fusion < %s 2>&1 | FileCheck %s
3-
define i32 @foo() {
4-
; CHECK-LABEL: define i32 @foo() {
5-
; CHECK-NEXT: [[ENTRY:.*:]]
6-
; CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
7-
; CHECK-NEXT: [[SUM1:%.*]] = alloca i32, align 4
8-
; CHECK-NEXT: [[SUM2:%.*]] = alloca i32, align 4
9-
; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
10-
; CHECK-NEXT: [[I1:%.*]] = alloca i32, align 4
11-
; CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4
12-
; CHECK-NEXT: store i32 0, ptr [[SUM1]], align 4
13-
; CHECK-NEXT: store i32 0, ptr [[SUM2]], align 4
14-
; CHECK-NEXT: store i32 0, ptr [[I]], align 4
15-
; CHECK-NEXT: br label %[[FOR_COND:.*]]
16-
; CHECK: [[FOR_COND]]:
17-
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
18-
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10
19-
; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
2+
; RUN: opt -passes=loop-fusion -S < %s 2>&1 | FileCheck %s
3+
define dso_local i32 @check_sunk_phi_nodes() {
4+
; CHECK-LABEL: define dso_local i32 @check_sunk_phi_nodes() {
5+
; CHECK-NEXT: [[ENTRY:.*]]:
6+
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
207
; CHECK: [[FOR_BODY]]:
21-
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I]], align 4
22-
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[SUM1]], align 4
23-
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP1]]
24-
; CHECK-NEXT: store i32 [[ADD]], ptr [[SUM1]], align 4
8+
; CHECK-NEXT: [[SUM1_02:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ADD:%.*]], %[[FOR_INC6:.*]] ]
9+
; CHECK-NEXT: [[I_01:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_INC6]] ]
10+
; CHECK-NEXT: [[I1_04:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC7:%.*]], %[[FOR_INC6]] ]
11+
; CHECK-NEXT: [[SUM2_03:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ADD5:%.*]], %[[FOR_INC6]] ]
12+
; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM1_02]], [[I_01]]
2513
; CHECK-NEXT: br label %[[FOR_INC:.*]]
2614
; CHECK: [[FOR_INC]]:
27-
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I]], align 4
28-
; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1
29-
; CHECK-NEXT: store i32 [[INC]], ptr [[I]], align 4
30-
; CHECK-NEXT: br label %[[FOR_COND]]
31-
; CHECK: [[FOR_END]]:
32-
; CHECK-NEXT: store i32 0, ptr [[I1]], align 4
33-
; CHECK-NEXT: br label %[[FOR_COND2:.*]]
34-
; CHECK: [[FOR_COND2]]:
35-
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I1]], align 4
36-
; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP4]], 10
37-
; CHECK-NEXT: br i1 [[CMP3]], label %[[FOR_BODY4:.*]], label %[[FOR_END8:.*]]
38-
; CHECK: [[FOR_BODY4]]:
39-
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[I1]], align 4
40-
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[I1]], align 4
41-
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], [[TMP6]]
42-
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[SUM2]], align 4
43-
; CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP7]], [[MUL]]
44-
; CHECK-NEXT: store i32 [[ADD5]], ptr [[SUM2]], align 4
45-
; CHECK-NEXT: br label %[[FOR_INC6:.*]]
15+
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I1_04]], [[I1_04]]
16+
; CHECK-NEXT: [[ADD5]] = add nsw i32 [[SUM2_03]], [[MUL]]
17+
; CHECK-NEXT: br label %[[FOR_INC6]]
4618
; CHECK: [[FOR_INC6]]:
47-
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[I1]], align 4
48-
; CHECK-NEXT: [[INC7:%.*]] = add nsw i32 [[TMP8]], 1
49-
; CHECK-NEXT: store i32 [[INC7]], ptr [[I1]], align 4
50-
; CHECK-NEXT: br label %[[FOR_COND2]]
19+
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_01]], 1
20+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 10
21+
; CHECK-NEXT: [[INC7]] = add nsw i32 [[I1_04]], 1
22+
; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[INC7]], 10
23+
; CHECK-NEXT: br i1 [[CMP3]], label %[[FOR_BODY]], label %[[FOR_END8:.*]]
5124
; CHECK: [[FOR_END8]]:
52-
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[SUM1]], align 4
53-
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[SUM2]], align 4
54-
; CHECK-NEXT: ret i32 0
25+
; CHECK-NEXT: [[SUM2_0_LCSSA:%.*]] = phi i32 [ [[ADD5]], %[[FOR_INC6]] ]
26+
; CHECK-NEXT: [[SUM1_0_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_INC6]] ]
27+
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SUM1_0_LCSSA]], [[SUM2_0_LCSSA]]
28+
; CHECK-NEXT: ret i32 [[TMP0]]
5529
;
5630
entry:
57-
%retval = alloca i32, align 4
58-
%sum1 = alloca i32, align 4
59-
%sum2 = alloca i32, align 4
60-
%i = alloca i32, align 4
61-
%i1 = alloca i32, align 4
62-
store i32 0, ptr %retval, align 4
63-
store i32 0, ptr %sum1, align 4
64-
store i32 0, ptr %sum2, align 4
65-
store i32 0, ptr %i, align 4
66-
br label %for.cond
31+
br label %for.body
6732

68-
for.cond:
69-
%0 = load i32, ptr %i, align 4
70-
%cmp = icmp slt i32 %0, 10
71-
br i1 %cmp, label %for.body, label %for.end
72-
73-
for.body:
74-
%1 = load i32, ptr %i, align 4
75-
%2 = load i32, ptr %sum1, align 4
76-
%add = add nsw i32 %2, %1
77-
store i32 %add, ptr %sum1, align 4
33+
for.body: ; preds = %entry, %for.inc
34+
%sum1.02 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
35+
%i.01 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
36+
%add = add nsw i32 %sum1.02, %i.01
7837
br label %for.inc
7938

80-
for.inc:
81-
%3 = load i32, ptr %i, align 4
82-
%inc = add nsw i32 %3, 1
83-
store i32 %inc, ptr %i, align 4
84-
br label %for.cond
85-
86-
for.end:
87-
store i32 0, ptr %i1, align 4
88-
br label %for.cond2
39+
for.inc: ; preds = %for.body
40+
%inc = add nsw i32 %i.01, 1
41+
%cmp = icmp slt i32 %inc, 10
42+
br i1 %cmp, label %for.body, label %for.end
8943

90-
for.cond2:
91-
%4 = load i32, ptr %i1, align 4
92-
%cmp3 = icmp slt i32 %4, 10
93-
br i1 %cmp3, label %for.body4, label %for.end8
44+
for.end: ; preds = %for.inc
45+
%sum1.0.lcssa = phi i32 [ %add, %for.inc ]
46+
br label %for.body4
9447

95-
for.body4:
96-
%5 = load i32, ptr %i1, align 4
97-
%6 = load i32, ptr %i1, align 4
98-
%mul = mul nsw i32 %5, %6
99-
%7 = load i32, ptr %sum2, align 4
100-
%add5 = add nsw i32 %7, %mul
101-
store i32 %add5, ptr %sum2, align 4
48+
for.body4: ; preds = %for.end, %for.inc6
49+
%i1.04 = phi i32 [ 0, %for.end ], [ %inc7, %for.inc6 ]
50+
%sum2.03 = phi i32 [ 0, %for.end ], [ %add5, %for.inc6 ]
51+
%mul = mul nsw i32 %i1.04, %i1.04
52+
%add5 = add nsw i32 %sum2.03, %mul
10253
br label %for.inc6
10354

104-
for.inc6:
105-
%8 = load i32, ptr %i1, align 4
106-
%inc7 = add nsw i32 %8, 1
107-
store i32 %inc7, ptr %i1, align 4
108-
br label %for.cond2
55+
for.inc6: ; preds = %for.body4
56+
%inc7 = add nsw i32 %i1.04, 1
57+
%cmp3 = icmp slt i32 %inc7, 10
58+
br i1 %cmp3, label %for.body4, label %for.end8
10959

110-
for.end8:
111-
%9 = load i32, ptr %sum1, align 4
112-
%10 = load i32, ptr %sum2, align 4
113-
ret i32 0
60+
for.end8: ; preds = %for.inc6
61+
%sum2.0.lcssa = phi i32 [ %add5, %for.inc6 ]
62+
%0 = add i32 %sum1.0.lcssa, %sum2.0.lcssa
63+
ret i32 %0
11464
}
11565

0 commit comments

Comments
 (0)