Skip to content

Commit 460202b

Browse files
committed
[ARM] Add an low overhead sibling loop test. NFC
1 parent 9b2ab41 commit 460202b

File tree

1 file changed

+95
-0
lines changed

1 file changed

+95
-0
lines changed
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+lob --verify-machineinstrs %s -o - | FileCheck %s
3+
4+
define arm_aapcs_vfpcc void @test(i16* noalias nocapture readonly %off, i16* noalias nocapture %data, i16* noalias nocapture %dst, i32 %n) {
5+
; CHECK-LABEL: test:
6+
; CHECK: @ %bb.0: @ %entry
7+
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
8+
; CHECK-NEXT: cmp r3, #1
9+
; CHECK-NEXT: blt .LBB0_7
10+
; CHECK-NEXT: @ %bb.1: @ %for.cond1.preheader.us.preheader
11+
; CHECK-NEXT: lsl.w r12, r3, #1
12+
; CHECK-NEXT: mov.w r8, #0
13+
; CHECK-NEXT: mov r4, r1
14+
; CHECK-NEXT: .LBB0_2: @ %for.cond1.preheader.us
15+
; CHECK-NEXT: @ =>This Loop Header: Depth=1
16+
; CHECK-NEXT: @ Child Loop BB0_3 Depth 2
17+
; CHECK-NEXT: @ Child Loop BB0_5 Depth 2
18+
; CHECK-NEXT: movs r5, #0
19+
; CHECK-NEXT: dls lr, r3
20+
; CHECK-NEXT: .LBB0_3: @ %for.body4.us
21+
; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1
22+
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
23+
; CHECK-NEXT: ldrh.w r6, [r0, r5, lsl #1]
24+
; CHECK-NEXT: ldrh.w r7, [r1, r5, lsl #1]
25+
; CHECK-NEXT: add r6, r7
26+
; CHECK-NEXT: strh.w r6, [r4, r5, lsl #1]
27+
; CHECK-NEXT: adds r5, #1
28+
; CHECK-NEXT: le lr, .LBB0_3
29+
; CHECK-NEXT: @ %bb.4: @ %for.body15.us.preheader
30+
; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1
31+
; CHECK-NEXT: movs r5, #0
32+
; CHECK-NEXT: .LBB0_5: @ %for.body15.us
33+
; CHECK-NEXT: @ Parent Loop BB0_2 Depth=1
34+
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
35+
; CHECK-NEXT: ldrh.w r6, [r0, r5, lsl #1]
36+
; CHECK-NEXT: ldrh.w r7, [r1, r5, lsl #1]
37+
; CHECK-NEXT: add r7, r6
38+
; CHECK-NEXT: strh.w r7, [r2, r5, lsl #1]
39+
; CHECK-NEXT: adds r5, #1
40+
; CHECK-NEXT: cmp r3, r5
41+
; CHECK-NEXT: bne .LBB0_5
42+
; CHECK-NEXT: @ %bb.6: @ %for.cond.cleanup14.us
43+
; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1
44+
; CHECK-NEXT: add.w r8, r8, #1
45+
; CHECK-NEXT: add r2, r12
46+
; CHECK-NEXT: add r4, r12
47+
; CHECK-NEXT: cmp r8, r3
48+
; CHECK-NEXT: bne .LBB0_2
49+
; CHECK-NEXT: .LBB0_7: @ %for.cond.cleanup
50+
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
51+
entry:
52+
%cmp252 = icmp sgt i32 %n, 0
53+
br i1 %cmp252, label %for.cond1.preheader.us, label %for.cond.cleanup
54+
55+
for.cond1.preheader.us: ; preds = %entry, %for.cond.cleanup14.us
56+
%i.057.us = phi i32 [ %inc29.us, %for.cond.cleanup14.us ], [ 0, %entry ]
57+
%mul.us = mul i32 %i.057.us, %n
58+
br label %for.body4.us
59+
60+
for.cond.cleanup14.us: ; preds = %for.body15.us
61+
%inc29.us = add nuw i32 %i.057.us, 1
62+
%exitcond94 = icmp eq i32 %inc29.us, %n
63+
br i1 %exitcond94, label %for.cond.cleanup, label %for.cond1.preheader.us
64+
65+
for.body15.us: ; preds = %for.body4.us, %for.body15.us
66+
%j10.055.us = phi i32 [ %inc26.us, %for.body15.us ], [ 0, %for.body4.us ]
67+
%arrayidx16.us = getelementptr inbounds i16, i16* %off, i32 %j10.055.us
68+
%0 = load i16, i16* %arrayidx16.us, align 2
69+
%arrayidx18.us = getelementptr inbounds i16, i16* %data, i32 %j10.055.us
70+
%1 = load i16, i16* %arrayidx18.us, align 2
71+
%add20.us = add i16 %1, %0
72+
%add23.us = add i32 %j10.055.us, %mul.us
73+
%arrayidx24.us = getelementptr inbounds i16, i16* %dst, i32 %add23.us
74+
store i16 %add20.us, i16* %arrayidx24.us, align 2
75+
%inc26.us = add nuw nsw i32 %j10.055.us, 1
76+
%exitcond93 = icmp eq i32 %inc26.us, %n
77+
br i1 %exitcond93, label %for.cond.cleanup14.us, label %for.body15.us
78+
79+
for.body4.us: ; preds = %for.body4.us, %for.cond1.preheader.us
80+
%j.053.us = phi i32 [ 0, %for.cond1.preheader.us ], [ %inc.us, %for.body4.us ]
81+
%arrayidx.us = getelementptr inbounds i16, i16* %off, i32 %j.053.us
82+
%2 = load i16, i16* %arrayidx.us, align 2
83+
%arrayidx5.us = getelementptr inbounds i16, i16* %data, i32 %j.053.us
84+
%3 = load i16, i16* %arrayidx5.us, align 2
85+
%add.us = add i16 %3, %2
86+
%add8.us = add i32 %j.053.us, %mul.us
87+
%arrayidx9.us = getelementptr inbounds i16, i16* %data, i32 %add8.us
88+
store i16 %add.us, i16* %arrayidx9.us, align 2
89+
%inc.us = add nuw nsw i32 %j.053.us, 1
90+
%exitcond = icmp eq i32 %inc.us, %n
91+
br i1 %exitcond, label %for.body15.us, label %for.body4.us
92+
93+
for.cond.cleanup: ; preds = %for.cond.cleanup14.us, %entry
94+
ret void
95+
}

0 commit comments

Comments
 (0)