@@ -96,6 +96,17 @@ entry:
9696define void @_Z2x6v () local_unnamed_addr {
9797; CHECK-LABEL: _Z2x6v:
9898; CHECK: # %bb.0: # %entry
99+ ; CHECK-NEXT: movq x1@GOTPCREL(%rip), %rax
100+ ; CHECK-NEXT: movl (%rax), %edx
101+ ; CHECK-NEXT: andl $511, %edx # imm = 0x1FF
102+ ; CHECK-NEXT: leaq 1(%rdx), %rax
103+ ; CHECK-NEXT: movq x4@GOTPCREL(%rip), %rcx
104+ ; CHECK-NEXT: movl %eax, (%rcx)
105+ ; CHECK-NEXT: movq x3@GOTPCREL(%rip), %rcx
106+ ; CHECK-NEXT: movl (%rcx), %ecx
107+ ; CHECK-NEXT: testl %ecx, %ecx
108+ ; CHECK-NEXT: je .LBB1_18
109+ ; CHECK-NEXT: # %bb.1: # %for.cond1thread-pre-split.lr.ph
99110; CHECK-NEXT: pushq %rbp
100111; CHECK-NEXT: .cfi_def_cfa_offset 16
101112; CHECK-NEXT: pushq %r15
@@ -114,58 +125,47 @@ define void @_Z2x6v() local_unnamed_addr {
114125; CHECK-NEXT: .cfi_offset %r14, -32
115126; CHECK-NEXT: .cfi_offset %r15, -24
116127; CHECK-NEXT: .cfi_offset %rbp, -16
117- ; CHECK-NEXT: movq x1@GOTPCREL(%rip), %rax
118- ; CHECK-NEXT: movl (%rax), %ebx
119- ; CHECK-NEXT: andl $511, %ebx # imm = 0x1FF
120- ; CHECK-NEXT: leaq 1(%rbx), %rax
121- ; CHECK-NEXT: movq x4@GOTPCREL(%rip), %rcx
122- ; CHECK-NEXT: movl %eax, (%rcx)
123- ; CHECK-NEXT: movq x3@GOTPCREL(%rip), %rcx
124- ; CHECK-NEXT: movl (%rcx), %ecx
125- ; CHECK-NEXT: testl %ecx, %ecx
126- ; CHECK-NEXT: je .LBB1_18
127- ; CHECK-NEXT: # %bb.1: # %for.cond1thread-pre-split.lr.ph
128- ; CHECK-NEXT: movq x5@GOTPCREL(%rip), %rdx
129- ; CHECK-NEXT: movq (%rdx), %rsi
130- ; CHECK-NEXT: movl %ecx, %edx
131- ; CHECK-NEXT: notl %edx
132- ; CHECK-NEXT: leaq 8(,%rdx,8), %rdi
128+ ; CHECK-NEXT: movq x5@GOTPCREL(%rip), %rsi
129+ ; CHECK-NEXT: movq (%rsi), %rsi
130+ ; CHECK-NEXT: movl %ecx, %edi
131+ ; CHECK-NEXT: notl %edi
132+ ; CHECK-NEXT: leaq 8(,%rdi,8), %rdi
133133; CHECK-NEXT: imulq %rax, %rdi
134134; CHECK-NEXT: addq %rsi, %rdi
135135; CHECK-NEXT: movq x2@GOTPCREL(%rip), %r8
136- ; CHECK-NEXT: movl (%r8), %edx
137- ; CHECK-NEXT: leal 8(,%rbx ,8), %eax
136+ ; CHECK-NEXT: movl (%r8), %r9d
137+ ; CHECK-NEXT: leal 8(,%rdx ,8), %eax
138138; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
139- ; CHECK-NEXT: leaq 32(%rsi), %r11
140- ; CHECK-NEXT: leaq 8(,%rbx ,8), %rbx
141- ; CHECK-NEXT: xorl %r14d , %r14d
142- ; CHECK-NEXT: movq x0@GOTPCREL(%rip), %r15
143- ; CHECK-NEXT: movq %rsi, %r12
139+ ; CHECK-NEXT: leaq 32(%rsi), %rbx
140+ ; CHECK-NEXT: leaq 8(,%rdx ,8), %r14
141+ ; CHECK-NEXT: xorl %r15d , %r15d
142+ ; CHECK-NEXT: movq x0@GOTPCREL(%rip), %r12
143+ ; CHECK-NEXT: movq %rsi, %r13
144144; CHECK-NEXT: jmp .LBB1_2
145145; CHECK-NEXT: .p2align 4
146146; CHECK-NEXT: .LBB1_15: # %for.cond1.for.inc3_crit_edge
147147; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
148- ; CHECK-NEXT: movl %edx , (%r8)
148+ ; CHECK-NEXT: movl %r9d , (%r8)
149149; CHECK-NEXT: .LBB1_16: # %for.inc3
150150; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
151- ; CHECK-NEXT: addq %rbx , %r12
152- ; CHECK-NEXT: incq %r14
153- ; CHECK-NEXT: addq %rbx , %r11
151+ ; CHECK-NEXT: addq %r14 , %r13
152+ ; CHECK-NEXT: incq %r15
153+ ; CHECK-NEXT: addq %r14 , %rbx
154154; CHECK-NEXT: incl %ecx
155155; CHECK-NEXT: je .LBB1_17
156156; CHECK-NEXT: .LBB1_2: # %for.cond1thread-pre-split
157157; CHECK-NEXT: # =>This Loop Header: Depth=1
158158; CHECK-NEXT: # Child Loop BB1_12 Depth 2
159159; CHECK-NEXT: # Child Loop BB1_14 Depth 2
160- ; CHECK-NEXT: testl %edx , %edx
160+ ; CHECK-NEXT: testl %r9d , %r9d
161161; CHECK-NEXT: jns .LBB1_16
162162; CHECK-NEXT: # %bb.3: # %for.body2.preheader
163163; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
164- ; CHECK-NEXT: movslq %edx , %r13
165- ; CHECK-NEXT: testq %r13 , %r13
164+ ; CHECK-NEXT: movslq %r9d , %r9
165+ ; CHECK-NEXT: testq %r9 , %r9
166166; CHECK-NEXT: movq $-1, %rbp
167- ; CHECK-NEXT: cmovnsq %r13 , %rbp
168- ; CHECK-NEXT: subq %r13 , %rbp
167+ ; CHECK-NEXT: cmovnsq %r9 , %rbp
168+ ; CHECK-NEXT: subq %r9 , %rbp
169169; CHECK-NEXT: incq %rbp
170170; CHECK-NEXT: cmpq $4, %rbp
171171; CHECK-NEXT: jb .LBB1_14
@@ -177,20 +177,20 @@ define void @_Z2x6v() local_unnamed_addr {
177177; CHECK-NEXT: # %bb.5: # %vector.memcheck
178178; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
179179; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
180- ; CHECK-NEXT: imulq %r14 , %rax
181- ; CHECK-NEXT: leaq (%rsi,%rax), %r10
182- ; CHECK-NEXT: leaq (%r10,%r13 ,8), %r9
183- ; CHECK-NEXT: testq %r13 , %r13
184- ; CHECK-NEXT: movq $-1, %r10
185- ; CHECK-NEXT: cmovnsq %r13 , %r10
186- ; CHECK-NEXT: cmpq %r15 , %r9
180+ ; CHECK-NEXT: imulq %r15 , %rax
181+ ; CHECK-NEXT: leaq (%rsi,%rax), %r11
182+ ; CHECK-NEXT: leaq (%r11,%r9 ,8), %r10
183+ ; CHECK-NEXT: testq %r9 , %r9
184+ ; CHECK-NEXT: movq $-1, %r11
185+ ; CHECK-NEXT: cmovnsq %r9 , %r11
186+ ; CHECK-NEXT: cmpq %r12 , %r10
187187; CHECK-NEXT: jae .LBB1_7
188188; CHECK-NEXT: # %bb.6: # %vector.memcheck
189189; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
190- ; CHECK-NEXT: leaq 8(%rsi), %r9
191- ; CHECK-NEXT: addq %r9 , %rax
192- ; CHECK-NEXT: leaq (%rax,%r10 ,8), %rax
193- ; CHECK-NEXT: cmpq %r15 , %rax
190+ ; CHECK-NEXT: leaq 8(%rsi), %r10
191+ ; CHECK-NEXT: addq %r10 , %rax
192+ ; CHECK-NEXT: leaq (%rax,%r11 ,8), %rax
193+ ; CHECK-NEXT: cmpq %r12 , %rax
194194; CHECK-NEXT: ja .LBB1_14
195195; CHECK-NEXT: .LBB1_7: # %vector.body.preheader
196196; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
@@ -201,58 +201,54 @@ define void @_Z2x6v() local_unnamed_addr {
201201; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
202202; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
203203; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
204- ; CHECK-NEXT: movdqu %xmm0, (%r12,%r13 ,8)
205- ; CHECK-NEXT: movdqu %xmm0, 16(%r12,%r13 ,8)
206- ; CHECK-NEXT: movl $4, %r10d
204+ ; CHECK-NEXT: movdqu %xmm0, (%r13,%r9 ,8)
205+ ; CHECK-NEXT: movdqu %xmm0, 16(%r13,%r9 ,8)
206+ ; CHECK-NEXT: movl $4, %r11d
207207; CHECK-NEXT: shrq $2, %rax
208208; CHECK-NEXT: jne .LBB1_11
209209; CHECK-NEXT: jmp .LBB1_13
210210; CHECK-NEXT: .LBB1_8: # in Loop: Header=BB1_2 Depth=1
211- ; CHECK-NEXT: xorl %r10d , %r10d
211+ ; CHECK-NEXT: xorl %r11d , %r11d
212212; CHECK-NEXT: shrq $2, %rax
213213; CHECK-NEXT: je .LBB1_13
214214; CHECK-NEXT: .LBB1_11: # %vector.body.preheader.new
215215; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
216216; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
217217; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
218- ; CHECK-NEXT: movq %r10 , %rax
218+ ; CHECK-NEXT: movq %r11 , %rax
219219; CHECK-NEXT: subq %rdx, %rax
220- ; CHECK-NEXT: addq %r13 , %r10
221- ; CHECK-NEXT: leaq (%r11,%r10 ,8), %r10
220+ ; CHECK-NEXT: addq %r9 , %r11
221+ ; CHECK-NEXT: leaq (%rbx,%r11 ,8), %r11
222222; CHECK-NEXT: .p2align 4
223223; CHECK-NEXT: .LBB1_12: # %vector.body
224224; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
225225; CHECK-NEXT: # => This Inner Loop Header: Depth=2
226- ; CHECK-NEXT: movdqu %xmm0, -32(%r10 )
227- ; CHECK-NEXT: movdqu %xmm0, -16(%r10 )
228- ; CHECK-NEXT: movdqu %xmm0, (%r10 )
229- ; CHECK-NEXT: movdqu %xmm0, 16(%r10 )
230- ; CHECK-NEXT: addq $64, %r10
226+ ; CHECK-NEXT: movdqu %xmm0, -32(%r11 )
227+ ; CHECK-NEXT: movdqu %xmm0, -16(%r11 )
228+ ; CHECK-NEXT: movdqu %xmm0, (%r11 )
229+ ; CHECK-NEXT: movdqu %xmm0, 16(%r11 )
230+ ; CHECK-NEXT: addq $64, %r11
231231; CHECK-NEXT: addq $8, %rax
232232; CHECK-NEXT: jne .LBB1_12
233233; CHECK-NEXT: .LBB1_13: # %middle.block
234234; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
235- ; CHECK-NEXT: addq %rdx, %r13
235+ ; CHECK-NEXT: addq %rdx, %r9
236236; CHECK-NEXT: cmpq %rdx, %rbp
237- ; CHECK-NEXT: movq %r13, %rdx
238237; CHECK-NEXT: je .LBB1_15
239238; CHECK-NEXT: .p2align 4
240239; CHECK-NEXT: .LBB1_14: # %for.body2
241240; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
242241; CHECK-NEXT: # => This Inner Loop Header: Depth=2
243- ; CHECK-NEXT: movq (%r15), %rax
244- ; CHECK-NEXT: movq %rax, (%r12,%r13,8)
245- ; CHECK-NEXT: leaq 1(%r13), %rdx
246- ; CHECK-NEXT: cmpq $-1, %r13
247- ; CHECK-NEXT: movq %rdx, %r13
242+ ; CHECK-NEXT: movq (%r12), %rax
243+ ; CHECK-NEXT: movq %rax, (%r13,%r9,8)
244+ ; CHECK-NEXT: incq %r9
248245; CHECK-NEXT: jl .LBB1_14
249246; CHECK-NEXT: jmp .LBB1_15
250247; CHECK-NEXT: .LBB1_17: # %for.cond.for.end5_crit_edge
251248; CHECK-NEXT: movq x5@GOTPCREL(%rip), %rax
252249; CHECK-NEXT: movq %rdi, (%rax)
253250; CHECK-NEXT: movq x3@GOTPCREL(%rip), %rax
254251; CHECK-NEXT: movl $0, (%rax)
255- ; CHECK-NEXT: .LBB1_18: # %for.end5
256252; CHECK-NEXT: popq %rbx
257253; CHECK-NEXT: .cfi_def_cfa_offset 48
258254; CHECK-NEXT: popq %r12
@@ -265,6 +261,13 @@ define void @_Z2x6v() local_unnamed_addr {
265261; CHECK-NEXT: .cfi_def_cfa_offset 16
266262; CHECK-NEXT: popq %rbp
267263; CHECK-NEXT: .cfi_def_cfa_offset 8
264+ ; CHECK-NEXT: .cfi_restore %rbx
265+ ; CHECK-NEXT: .cfi_restore %r12
266+ ; CHECK-NEXT: .cfi_restore %r13
267+ ; CHECK-NEXT: .cfi_restore %r14
268+ ; CHECK-NEXT: .cfi_restore %r15
269+ ; CHECK-NEXT: .cfi_restore %rbp
270+ ; CHECK-NEXT: .LBB1_18: # %for.end5
268271; CHECK-NEXT: retq
269272entry:
270273 %0 = load i32 , ptr @x1 , align 4
0 commit comments