Skip to content

Commit 997b6ea

Browse files
committed
[LoopFusion] Fix sink instructions
If we have instructions in second loop's preheader which can be sunk, we should also be adjusting PHI nodes to receive values from the new loop's latch block. Fixes #128600
1 parent 5d3b057 commit 997b6ea

File tree

2 files changed

+115
-5
lines changed

2 files changed

+115
-5
lines changed

llvm/lib/Transforms/Scalar/LoopFuse.cpp

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -988,8 +988,8 @@ struct LoopFuser {
988988

989989
// If it is not safe to hoist/sink all instructions in the
990990
// pre-header, we cannot fuse these loops.
991-
if (!collectMovablePreheaderInsts(*FC0, *FC1, SafeToHoist,
992-
SafeToSink)) {
991+
if (!collectAndFixMovablePreheaderInsts(*FC0, *FC1, SafeToHoist,
992+
SafeToSink)) {
993993
LLVM_DEBUG(dbgs() << "Could not hoist/sink all instructions in "
994994
"Fusion Candidate Pre-header.\n"
995995
<< "Not Fusing.\n");
@@ -1033,8 +1033,8 @@ struct LoopFuser {
10331033
FuseCounter);
10341034

10351035
FusionCandidate FusedCand(
1036-
performFusion((Peel ? FC0Copy : *FC0), *FC1), DT, &PDT, ORE,
1037-
FC0Copy.PP);
1036+
performFusion((Peel ? FC0Copy : *FC0), *FC1, SafeToSink), DT,
1037+
&PDT, ORE, FC0Copy.PP);
10381038
FusedCand.verify();
10391039
assert(FusedCand.isEligibleForFusion(SE) &&
10401040
"Fused candidate should be eligible for fusion!");
@@ -1176,9 +1176,31 @@ struct LoopFuser {
11761176
return true;
11771177
}
11781178

1179+
void fixPHINodes(SmallVector<Instruction *, 4> &SafeToSink,
1180+
const FusionCandidate &FC0,
1181+
const FusionCandidate &FC1) const {
1182+
// Iterate over SafeToSink instructions and update PHI nodes
1183+
// to take values from the latch block of FC0 if they are taking
1184+
// from the latch block of FC1.
1185+
for (Instruction *Inst : SafeToSink) {
1186+
LLVM_DEBUG(dbgs() << "UPDATING: Instruction: " << *Inst << "\n");
1187+
// Continue if the instruction is not a PHI node.
1188+
if (!isa<PHINode>(Inst))
1189+
continue;
1190+
PHINode *Phi = dyn_cast<PHINode>(Inst);
1191+
LLVM_DEBUG(dbgs() << "UPDATING: PHI node: " << *Phi << "\n");
1192+
for (unsigned I = 0; I < Phi->getNumIncomingValues(); I++) {
1193+
if (Phi->getIncomingBlock(I) != FC0.Latch)
1194+
continue;
1195+
assert(FC1.Latch && "FC1 latch is not set");
1196+
Phi->setIncomingBlock(I, FC1.Latch);
1197+
}
1198+
}
1199+
}
1200+
11791201
/// Collect instructions in the \p FC1 Preheader that can be hoisted
11801202
/// to the \p FC0 Preheader or sunk into the \p FC1 Body
1181-
bool collectMovablePreheaderInsts(
1203+
bool collectAndFixMovablePreheaderInsts(
11821204
const FusionCandidate &FC0, const FusionCandidate &FC1,
11831205
SmallVector<Instruction *, 4> &SafeToHoist,
11841206
SmallVector<Instruction *, 4> &SafeToSink) const {
@@ -1226,6 +1248,8 @@ struct LoopFuser {
12261248
}
12271249
LLVM_DEBUG(
12281250
dbgs() << "All preheader instructions could be sunk or hoisted!\n");
1251+
1252+
fixPHINodes(SafeToSink, FC0, FC1);
12291253
return true;
12301254
}
12311255

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -passes=mem2reg,loop-rotate,loop-fusion < %s 2>&1 | FileCheck %s
3+
define i32 @main() {
4+
; CHECK-LABEL: define i32 @main() {
5+
; CHECK-NEXT: [[ENTRY:.*]]:
6+
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
7+
; CHECK: [[FOR_BODY]]:
8+
; CHECK-NEXT: [[SUM1_02:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ADD:%.*]], %[[FOR_INC6:.*]] ]
9+
; CHECK-NEXT: [[I_01:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_INC6]] ]
10+
; CHECK-NEXT: [[I1_04:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC7:%.*]], %[[FOR_INC6]] ]
11+
; CHECK-NEXT: [[SUM2_03:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ADD5:%.*]], %[[FOR_INC6]] ]
12+
; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM1_02]], [[I_01]]
13+
; CHECK-NEXT: br label %[[FOR_INC:.*]]
14+
; CHECK: [[FOR_INC]]:
15+
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[I1_04]], [[I1_04]]
16+
; CHECK-NEXT: [[ADD5]] = add nsw i32 [[SUM2_03]], [[MUL]]
17+
; CHECK-NEXT: br label %[[FOR_INC6]]
18+
; CHECK: [[FOR_INC6]]:
19+
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_01]], 1
20+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 10
21+
; CHECK-NEXT: [[INC7]] = add nsw i32 [[I1_04]], 1
22+
; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[INC7]], 10
23+
; CHECK-NEXT: br i1 [[CMP3]], label %[[FOR_BODY]], label %[[FOR_END8:.*]]
24+
; CHECK: [[FOR_END8]]:
25+
; CHECK-NEXT: ret i32 0
26+
;
27+
entry:
28+
%retval = alloca i32, align 4
29+
%sum1 = alloca i32, align 4
30+
%sum2 = alloca i32, align 4
31+
%i = alloca i32, align 4
32+
%i1 = alloca i32, align 4
33+
store i32 0, ptr %retval, align 4
34+
store i32 0, ptr %sum1, align 4
35+
store i32 0, ptr %sum2, align 4
36+
store i32 0, ptr %i, align 4
37+
br label %for.cond
38+
39+
for.cond:
40+
%0 = load i32, ptr %i, align 4
41+
%cmp = icmp slt i32 %0, 10
42+
br i1 %cmp, label %for.body, label %for.end
43+
44+
for.body:
45+
%1 = load i32, ptr %i, align 4
46+
%2 = load i32, ptr %sum1, align 4
47+
%add = add nsw i32 %2, %1
48+
store i32 %add, ptr %sum1, align 4
49+
br label %for.inc
50+
51+
for.inc:
52+
%3 = load i32, ptr %i, align 4
53+
%inc = add nsw i32 %3, 1
54+
store i32 %inc, ptr %i, align 4
55+
br label %for.cond
56+
57+
for.end:
58+
store i32 0, ptr %i1, align 4
59+
br label %for.cond2
60+
61+
for.cond2:
62+
%4 = load i32, ptr %i1, align 4
63+
%cmp3 = icmp slt i32 %4, 10
64+
br i1 %cmp3, label %for.body4, label %for.end8
65+
66+
for.body4:
67+
%5 = load i32, ptr %i1, align 4
68+
%6 = load i32, ptr %i1, align 4
69+
%mul = mul nsw i32 %5, %6
70+
%7 = load i32, ptr %sum2, align 4
71+
%add5 = add nsw i32 %7, %mul
72+
store i32 %add5, ptr %sum2, align 4
73+
br label %for.inc6
74+
75+
for.inc6:
76+
%8 = load i32, ptr %i1, align 4
77+
%inc7 = add nsw i32 %8, 1
78+
store i32 %inc7, ptr %i1, align 4
79+
br label %for.cond2
80+
81+
for.end8:
82+
%9 = load i32, ptr %sum1, align 4
83+
%10 = load i32, ptr %sum2, align 4
84+
ret i32 0
85+
}
86+

0 commit comments

Comments
 (0)