Skip to content

Commit 9a43216

Browse files
committed
[LoopInterchange] Adjust assertions when updating successors.
Currently the assertion in updateSuccessor is overly strict in some cases and overly relaxed in other cases. For branches to the inner and outer loop preheader it is too strict, because they can either be unconditional branches or conditional branches with duplicate targets. Both cases are fine and we can allow updating multiple successors. On the other hand, we have to at least update one successor. This patch adds such an assertion.
1 parent 6078be6 commit 9a43216

File tree

2 files changed

+180
-19
lines changed

2 files changed

+180
-19
lines changed

llvm/lib/Transforms/Scalar/LoopInterchange.cpp

Lines changed: 35 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1316,25 +1316,32 @@ static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
13161316
FromBB->getTerminator()->getIterator());
13171317
}
13181318

1319-
/// Update BI to jump to NewBB instead of OldBB. Records updates to
1320-
/// the dominator tree in DTUpdates, if DT should be preserved.
1319+
// Update BI to jump to NewBB instead of OldBB. Records updates to the
1320+
// dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
1321+
// \p OldBB is exactly once in BI's successor list.
13211322
static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
13221323
BasicBlock *NewBB,
1323-
std::vector<DominatorTree::UpdateType> &DTUpdates) {
1324-
assert(llvm::count_if(successors(BI),
1325-
[OldBB](BasicBlock *BB) { return BB == OldBB; }) < 2 &&
1326-
"BI must jump to OldBB at most once.");
1327-
for (unsigned i = 0, e = BI->getNumSuccessors(); i < e; ++i) {
1328-
if (BI->getSuccessor(i) == OldBB) {
1329-
BI->setSuccessor(i, NewBB);
1330-
1331-
DTUpdates.push_back(
1332-
{DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
1333-
DTUpdates.push_back(
1334-
{DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
1335-
break;
1324+
std::vector<DominatorTree::UpdateType> &DTUpdates,
1325+
bool MustUpdateOnce = true) {
1326+
assert((!MustUpdateOnce ||
1327+
llvm::count_if(successors(BI),
1328+
[OldBB](BasicBlock *BB) {
1329+
return BB == OldBB;
1330+
}) == 1) && "BI must jump to OldBB exactly once.");
1331+
bool Changed = false;
1332+
for (Use &Op : BI->operands())
1333+
if (Op == OldBB) {
1334+
Op.set(NewBB);
1335+
Changed = true;
13361336
}
1337+
1338+
if (Changed) {
1339+
DTUpdates.push_back(
1340+
{DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
1341+
DTUpdates.push_back(
1342+
{DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
13371343
}
1344+
assert(Changed && "Expected a successor to be updated");
13381345
}
13391346

13401347
// Move Lcssa PHIs to the right place.
@@ -1481,12 +1488,21 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
14811488
if (!InnerLoopHeaderSuccessor)
14821489
return false;
14831490

1484-
// Adjust Loop Preheader and headers
1491+
// Adjust Loop Preheader and headers.
1492+
// The branches in the outer loop predecessor and the outer loop header can
1493+
// be unconditional branches or conditional branches with duplicates. Consider
1494+
// this when updating the successors.
14851495
updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1486-
InnerLoopPreHeader, DTUpdates);
1487-
updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates);
1496+
InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
1497+
// The outer loop header might or might not branch to the outer latch.
1498+
// We are guaranteed to branch to the inner loop preheader.
1499+
if (std::find(succ_begin(OuterLoopHeaderBI), succ_end(OuterLoopHeaderBI),
1500+
OuterLoopLatch) != succ_end(OuterLoopHeaderBI))
1501+
updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates,
1502+
/*MustUpdateOnce=*/false);
14881503
updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1489-
InnerLoopHeaderSuccessor, DTUpdates);
1504+
InnerLoopHeaderSuccessor, DTUpdates,
1505+
/*MustUpdateOnce=*/false);
14901506

14911507
// Adjust reduction PHI's now that the incoming block has changed.
14921508
InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -loop-interchange -S %s | FileCheck %s
3+
4+
5+
@global = external dso_local global [1000 x [1000 x i32]], align 16
6+
7+
; Test that we support updating conditional branches where both targets are the same
8+
; in the predecessor of the outer loop header.
9+
10+
define void @foo(i1 %cmp) {
11+
; CHECK-LABEL: @foo(
12+
; CHECK-NEXT: entry:
13+
; CHECK-NEXT: br i1 [[CMP:%.*]], label [[INNER_HEADER_PREHEADER:%.*]], label [[INNER_HEADER_PREHEADER]]
14+
; CHECK: bb1:
15+
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
16+
; CHECK: outer.header:
17+
; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ 0, [[BB1:%.*]] ], [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ]
18+
; CHECK-NEXT: br label [[INNER_HEADER_SPLIT1:%.*]]
19+
; CHECK: inner.header.preheader:
20+
; CHECK-NEXT: br label [[INNER_HEADER:%.*]]
21+
; CHECK: inner.header:
22+
; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[TMP0:%.*]], [[INNER_HEADER_SPLIT:%.*]] ], [ 5, [[INNER_HEADER_PREHEADER]] ]
23+
; CHECK-NEXT: br label [[BB1]]
24+
; CHECK: inner.header.split1:
25+
; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [1000 x [1000 x i32]], [1000 x [1000 x i32]]* @global, i64 0, i64 [[INNER_IV]], i64 [[OUTER_IV]]
26+
; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR]]
27+
; CHECK-NEXT: [[V:%.*]] = mul i32 [[LV]], 100
28+
; CHECK-NEXT: store i32 [[V]], i32* [[PTR]]
29+
; CHECK-NEXT: [[INNER_IV_NEXT:%.*]] = add nsw i64 [[INNER_IV]], 1
30+
; CHECK-NEXT: [[COND1:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], 1000
31+
; CHECK-NEXT: br label [[OUTER_LATCH]]
32+
; CHECK: inner.header.split:
33+
; CHECK-NEXT: [[TMP0]] = add nsw i64 [[INNER_IV]], 1
34+
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 1000
35+
; CHECK-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[INNER_HEADER]]
36+
; CHECK: outer.latch:
37+
; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nuw nsw i64 [[OUTER_IV]], 1
38+
; CHECK-NEXT: [[COND2:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], 1000
39+
; CHECK-NEXT: br i1 [[COND2]], label [[INNER_HEADER_SPLIT]], label [[OUTER_HEADER]]
40+
; CHECK: bb9:
41+
; CHECK-NEXT: br label [[BB10:%.*]]
42+
; CHECK: bb10:
43+
; CHECK-NEXT: ret void
44+
;
45+
entry:
46+
br i1 %cmp, label %bb1, label %bb1
47+
48+
bb1: ; preds = %entry, %entry
49+
br label %outer.header
50+
51+
outer.header: ; preds = %outer.latch, %bb1
52+
%outer.iv = phi i64 [ 0, %bb1], [ %outer.iv.next, %outer.latch ]
53+
br label %inner.header
54+
55+
inner.header: ; preds = %inner.header, %outer.header
56+
%inner.iv = phi i64 [ %inner.iv.next, %inner.header ], [ 5, %outer.header ]
57+
%ptr = getelementptr inbounds [1000 x [1000 x i32]], [1000 x [1000 x i32]]* @global, i64 0, i64 %inner.iv, i64 %outer.iv
58+
%lv = load i32, i32* %ptr
59+
%v = mul i32 %lv, 100
60+
store i32 %v, i32* %ptr
61+
%inner.iv.next = add nsw i64 %inner.iv, 1
62+
%cond1 = icmp eq i64 %inner.iv.next , 1000
63+
br i1 %cond1, label %outer.latch, label %inner.header
64+
65+
outer.latch: ; preds = %inner.header
66+
%outer.iv.next = add nuw nsw i64 %outer.iv, 1
67+
%cond2 = icmp eq i64 %outer.iv.next, 1000
68+
br i1 %cond2, label %bb9, label %outer.header
69+
70+
bb9: ; preds = %outer.latch
71+
br label %bb10
72+
73+
bb10: ; preds = %bb9
74+
ret void
75+
}
76+
77+
78+
define void @foo1(i1 %cmp) {
79+
; CHECK-LABEL: @foo1(
80+
; CHECK-NEXT: entry:
81+
; CHECK-NEXT: br i1 [[CMP:%.*]], label [[BB1:%.*]], label [[BB1]]
82+
; CHECK: bb1:
83+
; CHECK-NEXT: br i1 [[CMP]], label [[INNER_HEADER_PREHEADER:%.*]], label [[INNER_HEADER_PREHEADER]]
84+
; CHECK: outer.header.preheader:
85+
; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
86+
; CHECK: outer.header:
87+
; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ], [ 0, [[OUTER_HEADER_PREHEADER:%.*]] ]
88+
; CHECK-NEXT: br i1 [[CMP]], label [[INNER_HEADER_SPLIT1:%.*]], label [[INNER_HEADER_SPLIT1]]
89+
; CHECK: inner.header.preheader:
90+
; CHECK-NEXT: br label [[INNER_HEADER:%.*]]
91+
; CHECK: inner.header:
92+
; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ [[TMP0:%.*]], [[INNER_HEADER_SPLIT:%.*]] ], [ 5, [[INNER_HEADER_PREHEADER]] ]
93+
; CHECK-NEXT: br label [[OUTER_HEADER_PREHEADER]]
94+
; CHECK: inner.header.split1:
95+
; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [1000 x [1000 x i32]], [1000 x [1000 x i32]]* @global, i64 0, i64 [[INNER_IV]], i64 [[OUTER_IV]]
96+
; CHECK-NEXT: [[LV:%.*]] = load i32, i32* [[PTR]]
97+
; CHECK-NEXT: [[V:%.*]] = mul i32 [[LV]], 100
98+
; CHECK-NEXT: store i32 [[V]], i32* [[PTR]]
99+
; CHECK-NEXT: [[INNER_IV_NEXT:%.*]] = add nsw i64 [[INNER_IV]], 1
100+
; CHECK-NEXT: [[COND1:%.*]] = icmp eq i64 [[INNER_IV_NEXT]], 1000
101+
; CHECK-NEXT: br label [[OUTER_LATCH]]
102+
; CHECK: inner.header.split:
103+
; CHECK-NEXT: [[TMP0]] = add nsw i64 [[INNER_IV]], 1
104+
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 1000
105+
; CHECK-NEXT: br i1 [[TMP1]], label [[BB9:%.*]], label [[INNER_HEADER]]
106+
; CHECK: outer.latch:
107+
; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nuw nsw i64 [[OUTER_IV]], 1
108+
; CHECK-NEXT: [[COND2:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], 1000
109+
; CHECK-NEXT: br i1 [[COND2]], label [[INNER_HEADER_SPLIT]], label [[OUTER_HEADER]]
110+
; CHECK: bb9:
111+
; CHECK-NEXT: br label [[BB10:%.*]]
112+
; CHECK: bb10:
113+
; CHECK-NEXT: ret void
114+
;
115+
entry:
116+
br i1 %cmp, label %bb1, label %bb1
117+
118+
bb1: ; preds = %entry, %entry
119+
br i1 %cmp, label %outer.header, label %outer.header
120+
121+
outer.header: ; preds = %outer.latch, %bb1
122+
%outer.iv = phi i64 [ 0, %bb1 ], [ 0, %bb1 ], [ %outer.iv.next, %outer.latch ]
123+
br i1 %cmp, label %inner.header, label %inner.header
124+
125+
inner.header: ; preds = %inner.header, %outer.header
126+
%inner.iv = phi i64 [ %inner.iv.next, %inner.header ], [ 5, %outer.header ], [ 5, %outer.header ]
127+
%ptr = getelementptr inbounds [1000 x [1000 x i32]], [1000 x [1000 x i32]]* @global, i64 0, i64 %inner.iv, i64 %outer.iv
128+
%lv = load i32, i32* %ptr
129+
%v = mul i32 %lv, 100
130+
store i32 %v, i32* %ptr
131+
%inner.iv.next = add nsw i64 %inner.iv, 1
132+
%cond1 = icmp eq i64 %inner.iv.next , 1000
133+
br i1 %cond1, label %outer.latch, label %inner.header
134+
135+
outer.latch: ; preds = %inner.header
136+
%outer.iv.next = add nuw nsw i64 %outer.iv, 1
137+
%cond2 = icmp eq i64 %outer.iv.next, 1000
138+
br i1 %cond2, label %bb9, label %outer.header
139+
140+
bb9: ; preds = %outer.latch
141+
br label %bb10
142+
143+
bb10: ; preds = %bb9
144+
ret void
145+
}

0 commit comments

Comments
 (0)