Skip to content

Commit ea2de9a

Browse files
authored
[X86] Fold generic ADD/SUB with constants to X86ISD::SUB/ADD (#164316)
Fix #163125 This PR enhances `combineX86AddSub` so that it can handle `X86ISD::SUB(X,Constant)` with `add(X,-Constant)` and other similar cases: - `X86ISD::ADD(LHS, C)` will fold `sub(-C, LHS)` - `X86ISD::SUB(LHS, C)` will fold `add(LHS, -C)` - `X86ISD::SUB(C, RHS)` will fold `add(RHS, -C)` `CodeGen/X86/dag-update-nodetomatch.ll` is updated because following IR is folded: ```llvm for.body2: ; ...... ; This generates `add t6, Constant:i64<1>` %indvars.iv.next = add nsw i64 %indvars.iv, 1; ; This generates `X86ISD::SUB t6, Constant:i64<-1>` and folds the previous `add` %cmp = icmp slt i64 %indvars.iv, -1; br i1 %cmp, label %for.body2, label %for.cond1.for.inc3_crit_edge.loopexit ``` ```diff - ; CHECK-NEXT: movq (%r15), %rax - ; CHECK-NEXT: movq %rax, (%r12,%r13,8) - ; CHECK-NEXT: leaq 1(%r13), %rdx - ; CHECK-NEXT: cmpq $-1, %r13 - ; CHECK-NEXT: movq %rdx, %r13 + ; CHECK-NEXT: movq (%r12), %rax + ; CHECK-NEXT: movq %rax, (%r13,%r9,8) + ; CHECK-NEXT: incq %r9 ```
1 parent 6b30d21 commit ea2de9a

File tree

4 files changed

+216
-67
lines changed

4 files changed

+216
-67
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57617,10 +57617,10 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
5761757617
}
5761857618

5761957619
// Fold any similar generic ADD/SUB opcodes to reuse this node.
57620-
auto MatchGeneric = [&](SDValue N0, SDValue N1, bool Negate) {
57620+
auto MatchGeneric = [&](unsigned Opc, SDValue N0, SDValue N1, bool Negate) {
5762157621
SDValue Ops[] = {N0, N1};
5762257622
SDVTList VTs = DAG.getVTList(N->getValueType(0));
57623-
if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops)) {
57623+
if (SDNode *GenericAddSub = DAG.getNodeIfExists(Opc, VTs, Ops)) {
5762457624
SDValue Op(N, 0);
5762557625
if (Negate) {
5762657626
// Bail if this is only used by a user of the x86 add/sub.
@@ -57632,8 +57632,25 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
5763257632
DCI.CombineTo(GenericAddSub, Op);
5763357633
}
5763457634
};
57635-
MatchGeneric(LHS, RHS, false);
57636-
MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode());
57635+
MatchGeneric(GenericOpc, LHS, RHS, false);
57636+
MatchGeneric(GenericOpc, RHS, LHS, X86ISD::SUB == N->getOpcode());
57637+
57638+
if (auto *Const = dyn_cast<ConstantSDNode>(RHS)) {
57639+
SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT);
57640+
if (X86ISD::SUB == N->getOpcode()) {
57641+
// Fold generic add(LHS, -C) to X86ISD::SUB(LHS, C).
57642+
MatchGeneric(ISD::ADD, LHS, NegC, false);
57643+
} else {
57644+
// Negate X86ISD::ADD(LHS, C) and replace generic sub(-C, LHS).
57645+
MatchGeneric(ISD::SUB, NegC, LHS, true);
57646+
}
57647+
} else if (auto *Const = dyn_cast<ConstantSDNode>(LHS)) {
57648+
if (X86ISD::SUB == N->getOpcode()) {
57649+
SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT);
57650+
// Negate X86ISD::SUB(C, RHS) and replace generic add(RHS, -C).
57651+
MatchGeneric(ISD::ADD, RHS, NegC, true);
57652+
}
57653+
}
5763757654

5763857655
// TODO: Can we drop the ZeroSecondOpOnly limit? This is to guarantee that the
5763957656
// EFLAGS result doesn't change.

llvm/test/CodeGen/X86/combine-adc.ll

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,4 +89,52 @@ define i32 @adc_merge_constants(i32 %a0) nounwind {
8989
ret i32 %sum
9090
}
9191

92+
define i32 @adc_merge_sub(i32 %a0) nounwind {
93+
; X86-LABEL: adc_merge_sub:
94+
; X86: # %bb.0:
95+
; X86-NEXT: pushl %edi
96+
; X86-NEXT: pushl %esi
97+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
98+
; X86-NEXT: xorl %eax, %eax
99+
; X86-NEXT: addl $42, %edi
100+
; X86-NEXT: setb %al
101+
; X86-NEXT: movl %edi, %esi
102+
; X86-NEXT: negl %esi
103+
; X86-NEXT: pushl %eax
104+
; X86-NEXT: calll use@PLT
105+
; X86-NEXT: addl $4, %esp
106+
; X86-NEXT: xorl %edi, %esi
107+
; X86-NEXT: movl %esi, %eax
108+
; X86-NEXT: popl %esi
109+
; X86-NEXT: popl %edi
110+
; X86-NEXT: retl
111+
;
112+
; X64-LABEL: adc_merge_sub:
113+
; X64: # %bb.0:
114+
; X64-NEXT: pushq %rbp
115+
; X64-NEXT: pushq %rbx
116+
; X64-NEXT: pushq %rax
117+
; X64-NEXT: movl %edi, %ebx
118+
; X64-NEXT: xorl %edi, %edi
119+
; X64-NEXT: addl $42, %ebx
120+
; X64-NEXT: setb %dil
121+
; X64-NEXT: movl %ebx, %ebp
122+
; X64-NEXT: negl %ebp
123+
; X64-NEXT: callq use@PLT
124+
; X64-NEXT: xorl %ebx, %ebp
125+
; X64-NEXT: movl %ebp, %eax
126+
; X64-NEXT: addq $8, %rsp
127+
; X64-NEXT: popq %rbx
128+
; X64-NEXT: popq %rbp
129+
; X64-NEXT: retq
130+
%adc = tail call { i8, i32 } @llvm.x86.addcarry.32(i8 0, i32 %a0, i32 42)
131+
%carry = extractvalue { i8, i32 } %adc, 0
132+
call void @use(i8 %carry)
133+
%sum = extractvalue { i8, i32 } %adc, 1
134+
%sub = sub i32 -42, %a0
135+
%result = xor i32 %sum, %sub
136+
ret i32 %result
137+
}
138+
92139
declare { i8, i32 } @llvm.x86.addcarry.32(i8, i32, i32)
140+
declare void @use(i8)

llvm/test/CodeGen/X86/combine-sbb.ll

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,4 +333,85 @@ define i32 @PR40483_sub6(ptr, i32) nounwind {
333333
ret i32 %10
334334
}
335335

336+
define i32 @sbb_merge_add1(i32 %a0) nounwind {
337+
; X86-LABEL: sbb_merge_add1:
338+
; X86: # %bb.0:
339+
; X86-NEXT: xorl %eax, %eax
340+
; X86-NEXT: cmpl $42, {{[0-9]+}}(%esp)
341+
; X86-NEXT: setb %al
342+
; X86-NEXT: pushl %eax
343+
; X86-NEXT: calll use@PLT
344+
; X86-NEXT: addl $4, %esp
345+
; X86-NEXT: xorl %eax, %eax
346+
; X86-NEXT: retl
347+
;
348+
; X64-LABEL: sbb_merge_add1:
349+
; X64: # %bb.0:
350+
; X64-NEXT: pushq %rax
351+
; X64-NEXT: xorl %eax, %eax
352+
; X64-NEXT: cmpl $42, %edi
353+
; X64-NEXT: setb %al
354+
; X64-NEXT: movl %eax, %edi
355+
; X64-NEXT: callq use@PLT
356+
; X64-NEXT: xorl %eax, %eax
357+
; X64-NEXT: popq %rcx
358+
; X64-NEXT: retq
359+
%sbb = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %a0, i32 42)
360+
%borrow = extractvalue { i8, i32 } %sbb, 0
361+
call void @use(i8 %borrow)
362+
%diff = extractvalue { i8, i32 } %sbb, 1
363+
%add = add i32 %a0, -42
364+
%result = xor i32 %diff, %add
365+
ret i32 %result
366+
}
367+
368+
define i32 @sbb_merge_add2(i32 %a0) nounwind {
369+
; X86-LABEL: sbb_merge_add2:
370+
; X86: # %bb.0:
371+
; X86-NEXT: pushl %edi
372+
; X86-NEXT: pushl %esi
373+
; X86-NEXT: movl $42, %edi
374+
; X86-NEXT: xorl %eax, %eax
375+
; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
376+
; X86-NEXT: setb %al
377+
; X86-NEXT: movl %edi, %esi
378+
; X86-NEXT: negl %esi
379+
; X86-NEXT: pushl %eax
380+
; X86-NEXT: calll use@PLT
381+
; X86-NEXT: addl $4, %esp
382+
; X86-NEXT: xorl %edi, %esi
383+
; X86-NEXT: movl %esi, %eax
384+
; X86-NEXT: popl %esi
385+
; X86-NEXT: popl %edi
386+
; X86-NEXT: retl
387+
;
388+
; X64-LABEL: sbb_merge_add2:
389+
; X64: # %bb.0:
390+
; X64-NEXT: pushq %rbp
391+
; X64-NEXT: pushq %rbx
392+
; X64-NEXT: pushq %rax
393+
; X64-NEXT: movl $42, %ebp
394+
; X64-NEXT: xorl %eax, %eax
395+
; X64-NEXT: subl %edi, %ebp
396+
; X64-NEXT: setb %al
397+
; X64-NEXT: movl %ebp, %ebx
398+
; X64-NEXT: negl %ebx
399+
; X64-NEXT: movl %eax, %edi
400+
; X64-NEXT: callq use@PLT
401+
; X64-NEXT: xorl %ebp, %ebx
402+
; X64-NEXT: movl %ebx, %eax
403+
; X64-NEXT: addq $8, %rsp
404+
; X64-NEXT: popq %rbx
405+
; X64-NEXT: popq %rbp
406+
; X64-NEXT: retq
407+
%sbb = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 42, i32 %a0)
408+
%borrow = extractvalue { i8, i32 } %sbb, 0
409+
call void @use(i8 %borrow)
410+
%diff = extractvalue { i8, i32 } %sbb, 1
411+
%add = add i32 %a0, -42
412+
%result = xor i32 %diff, %add
413+
ret i32 %result
414+
}
415+
336416
declare { i8, i32 } @llvm.x86.subborrow.32(i8, i32, i32)
417+
declare void @use(i8)

llvm/test/CodeGen/X86/dag-update-nodetomatch.ll

Lines changed: 66 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,17 @@ entry:
9696
define void @_Z2x6v() local_unnamed_addr {
9797
; CHECK-LABEL: _Z2x6v:
9898
; CHECK: # %bb.0: # %entry
99+
; CHECK-NEXT: movq x1@GOTPCREL(%rip), %rax
100+
; CHECK-NEXT: movl (%rax), %edx
101+
; CHECK-NEXT: andl $511, %edx # imm = 0x1FF
102+
; CHECK-NEXT: leaq 1(%rdx), %rax
103+
; CHECK-NEXT: movq x4@GOTPCREL(%rip), %rcx
104+
; CHECK-NEXT: movl %eax, (%rcx)
105+
; CHECK-NEXT: movq x3@GOTPCREL(%rip), %rcx
106+
; CHECK-NEXT: movl (%rcx), %ecx
107+
; CHECK-NEXT: testl %ecx, %ecx
108+
; CHECK-NEXT: je .LBB1_18
109+
; CHECK-NEXT: # %bb.1: # %for.cond1thread-pre-split.lr.ph
99110
; CHECK-NEXT: pushq %rbp
100111
; CHECK-NEXT: .cfi_def_cfa_offset 16
101112
; CHECK-NEXT: pushq %r15
@@ -114,58 +125,47 @@ define void @_Z2x6v() local_unnamed_addr {
114125
; CHECK-NEXT: .cfi_offset %r14, -32
115126
; CHECK-NEXT: .cfi_offset %r15, -24
116127
; CHECK-NEXT: .cfi_offset %rbp, -16
117-
; CHECK-NEXT: movq x1@GOTPCREL(%rip), %rax
118-
; CHECK-NEXT: movl (%rax), %ebx
119-
; CHECK-NEXT: andl $511, %ebx # imm = 0x1FF
120-
; CHECK-NEXT: leaq 1(%rbx), %rax
121-
; CHECK-NEXT: movq x4@GOTPCREL(%rip), %rcx
122-
; CHECK-NEXT: movl %eax, (%rcx)
123-
; CHECK-NEXT: movq x3@GOTPCREL(%rip), %rcx
124-
; CHECK-NEXT: movl (%rcx), %ecx
125-
; CHECK-NEXT: testl %ecx, %ecx
126-
; CHECK-NEXT: je .LBB1_18
127-
; CHECK-NEXT: # %bb.1: # %for.cond1thread-pre-split.lr.ph
128-
; CHECK-NEXT: movq x5@GOTPCREL(%rip), %rdx
129-
; CHECK-NEXT: movq (%rdx), %rsi
130-
; CHECK-NEXT: movl %ecx, %edx
131-
; CHECK-NEXT: notl %edx
132-
; CHECK-NEXT: leaq 8(,%rdx,8), %rdi
128+
; CHECK-NEXT: movq x5@GOTPCREL(%rip), %rsi
129+
; CHECK-NEXT: movq (%rsi), %rsi
130+
; CHECK-NEXT: movl %ecx, %edi
131+
; CHECK-NEXT: notl %edi
132+
; CHECK-NEXT: leaq 8(,%rdi,8), %rdi
133133
; CHECK-NEXT: imulq %rax, %rdi
134134
; CHECK-NEXT: addq %rsi, %rdi
135135
; CHECK-NEXT: movq x2@GOTPCREL(%rip), %r8
136-
; CHECK-NEXT: movl (%r8), %edx
137-
; CHECK-NEXT: leal 8(,%rbx,8), %eax
136+
; CHECK-NEXT: movl (%r8), %r9d
137+
; CHECK-NEXT: leal 8(,%rdx,8), %eax
138138
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
139-
; CHECK-NEXT: leaq 32(%rsi), %r11
140-
; CHECK-NEXT: leaq 8(,%rbx,8), %rbx
141-
; CHECK-NEXT: xorl %r14d, %r14d
142-
; CHECK-NEXT: movq x0@GOTPCREL(%rip), %r15
143-
; CHECK-NEXT: movq %rsi, %r12
139+
; CHECK-NEXT: leaq 32(%rsi), %rbx
140+
; CHECK-NEXT: leaq 8(,%rdx,8), %r14
141+
; CHECK-NEXT: xorl %r15d, %r15d
142+
; CHECK-NEXT: movq x0@GOTPCREL(%rip), %r12
143+
; CHECK-NEXT: movq %rsi, %r13
144144
; CHECK-NEXT: jmp .LBB1_2
145145
; CHECK-NEXT: .p2align 4
146146
; CHECK-NEXT: .LBB1_15: # %for.cond1.for.inc3_crit_edge
147147
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
148-
; CHECK-NEXT: movl %edx, (%r8)
148+
; CHECK-NEXT: movl %r9d, (%r8)
149149
; CHECK-NEXT: .LBB1_16: # %for.inc3
150150
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
151-
; CHECK-NEXT: addq %rbx, %r12
152-
; CHECK-NEXT: incq %r14
153-
; CHECK-NEXT: addq %rbx, %r11
151+
; CHECK-NEXT: addq %r14, %r13
152+
; CHECK-NEXT: incq %r15
153+
; CHECK-NEXT: addq %r14, %rbx
154154
; CHECK-NEXT: incl %ecx
155155
; CHECK-NEXT: je .LBB1_17
156156
; CHECK-NEXT: .LBB1_2: # %for.cond1thread-pre-split
157157
; CHECK-NEXT: # =>This Loop Header: Depth=1
158158
; CHECK-NEXT: # Child Loop BB1_12 Depth 2
159159
; CHECK-NEXT: # Child Loop BB1_14 Depth 2
160-
; CHECK-NEXT: testl %edx, %edx
160+
; CHECK-NEXT: testl %r9d, %r9d
161161
; CHECK-NEXT: jns .LBB1_16
162162
; CHECK-NEXT: # %bb.3: # %for.body2.preheader
163163
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
164-
; CHECK-NEXT: movslq %edx, %r13
165-
; CHECK-NEXT: testq %r13, %r13
164+
; CHECK-NEXT: movslq %r9d, %r9
165+
; CHECK-NEXT: testq %r9, %r9
166166
; CHECK-NEXT: movq $-1, %rbp
167-
; CHECK-NEXT: cmovnsq %r13, %rbp
168-
; CHECK-NEXT: subq %r13, %rbp
167+
; CHECK-NEXT: cmovnsq %r9, %rbp
168+
; CHECK-NEXT: subq %r9, %rbp
169169
; CHECK-NEXT: incq %rbp
170170
; CHECK-NEXT: cmpq $4, %rbp
171171
; CHECK-NEXT: jb .LBB1_14
@@ -177,20 +177,20 @@ define void @_Z2x6v() local_unnamed_addr {
177177
; CHECK-NEXT: # %bb.5: # %vector.memcheck
178178
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
179179
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
180-
; CHECK-NEXT: imulq %r14, %rax
181-
; CHECK-NEXT: leaq (%rsi,%rax), %r10
182-
; CHECK-NEXT: leaq (%r10,%r13,8), %r9
183-
; CHECK-NEXT: testq %r13, %r13
184-
; CHECK-NEXT: movq $-1, %r10
185-
; CHECK-NEXT: cmovnsq %r13, %r10
186-
; CHECK-NEXT: cmpq %r15, %r9
180+
; CHECK-NEXT: imulq %r15, %rax
181+
; CHECK-NEXT: leaq (%rsi,%rax), %r11
182+
; CHECK-NEXT: leaq (%r11,%r9,8), %r10
183+
; CHECK-NEXT: testq %r9, %r9
184+
; CHECK-NEXT: movq $-1, %r11
185+
; CHECK-NEXT: cmovnsq %r9, %r11
186+
; CHECK-NEXT: cmpq %r12, %r10
187187
; CHECK-NEXT: jae .LBB1_7
188188
; CHECK-NEXT: # %bb.6: # %vector.memcheck
189189
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
190-
; CHECK-NEXT: leaq 8(%rsi), %r9
191-
; CHECK-NEXT: addq %r9, %rax
192-
; CHECK-NEXT: leaq (%rax,%r10,8), %rax
193-
; CHECK-NEXT: cmpq %r15, %rax
190+
; CHECK-NEXT: leaq 8(%rsi), %r10
191+
; CHECK-NEXT: addq %r10, %rax
192+
; CHECK-NEXT: leaq (%rax,%r11,8), %rax
193+
; CHECK-NEXT: cmpq %r12, %rax
194194
; CHECK-NEXT: ja .LBB1_14
195195
; CHECK-NEXT: .LBB1_7: # %vector.body.preheader
196196
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
@@ -201,58 +201,54 @@ define void @_Z2x6v() local_unnamed_addr {
201201
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
202202
; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
203203
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
204-
; CHECK-NEXT: movdqu %xmm0, (%r12,%r13,8)
205-
; CHECK-NEXT: movdqu %xmm0, 16(%r12,%r13,8)
206-
; CHECK-NEXT: movl $4, %r10d
204+
; CHECK-NEXT: movdqu %xmm0, (%r13,%r9,8)
205+
; CHECK-NEXT: movdqu %xmm0, 16(%r13,%r9,8)
206+
; CHECK-NEXT: movl $4, %r11d
207207
; CHECK-NEXT: shrq $2, %rax
208208
; CHECK-NEXT: jne .LBB1_11
209209
; CHECK-NEXT: jmp .LBB1_13
210210
; CHECK-NEXT: .LBB1_8: # in Loop: Header=BB1_2 Depth=1
211-
; CHECK-NEXT: xorl %r10d, %r10d
211+
; CHECK-NEXT: xorl %r11d, %r11d
212212
; CHECK-NEXT: shrq $2, %rax
213213
; CHECK-NEXT: je .LBB1_13
214214
; CHECK-NEXT: .LBB1_11: # %vector.body.preheader.new
215215
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
216216
; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
217217
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
218-
; CHECK-NEXT: movq %r10, %rax
218+
; CHECK-NEXT: movq %r11, %rax
219219
; CHECK-NEXT: subq %rdx, %rax
220-
; CHECK-NEXT: addq %r13, %r10
221-
; CHECK-NEXT: leaq (%r11,%r10,8), %r10
220+
; CHECK-NEXT: addq %r9, %r11
221+
; CHECK-NEXT: leaq (%rbx,%r11,8), %r11
222222
; CHECK-NEXT: .p2align 4
223223
; CHECK-NEXT: .LBB1_12: # %vector.body
224224
; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
225225
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
226-
; CHECK-NEXT: movdqu %xmm0, -32(%r10)
227-
; CHECK-NEXT: movdqu %xmm0, -16(%r10)
228-
; CHECK-NEXT: movdqu %xmm0, (%r10)
229-
; CHECK-NEXT: movdqu %xmm0, 16(%r10)
230-
; CHECK-NEXT: addq $64, %r10
226+
; CHECK-NEXT: movdqu %xmm0, -32(%r11)
227+
; CHECK-NEXT: movdqu %xmm0, -16(%r11)
228+
; CHECK-NEXT: movdqu %xmm0, (%r11)
229+
; CHECK-NEXT: movdqu %xmm0, 16(%r11)
230+
; CHECK-NEXT: addq $64, %r11
231231
; CHECK-NEXT: addq $8, %rax
232232
; CHECK-NEXT: jne .LBB1_12
233233
; CHECK-NEXT: .LBB1_13: # %middle.block
234234
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
235-
; CHECK-NEXT: addq %rdx, %r13
235+
; CHECK-NEXT: addq %rdx, %r9
236236
; CHECK-NEXT: cmpq %rdx, %rbp
237-
; CHECK-NEXT: movq %r13, %rdx
238237
; CHECK-NEXT: je .LBB1_15
239238
; CHECK-NEXT: .p2align 4
240239
; CHECK-NEXT: .LBB1_14: # %for.body2
241240
; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
242241
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
243-
; CHECK-NEXT: movq (%r15), %rax
244-
; CHECK-NEXT: movq %rax, (%r12,%r13,8)
245-
; CHECK-NEXT: leaq 1(%r13), %rdx
246-
; CHECK-NEXT: cmpq $-1, %r13
247-
; CHECK-NEXT: movq %rdx, %r13
242+
; CHECK-NEXT: movq (%r12), %rax
243+
; CHECK-NEXT: movq %rax, (%r13,%r9,8)
244+
; CHECK-NEXT: incq %r9
248245
; CHECK-NEXT: jl .LBB1_14
249246
; CHECK-NEXT: jmp .LBB1_15
250247
; CHECK-NEXT: .LBB1_17: # %for.cond.for.end5_crit_edge
251248
; CHECK-NEXT: movq x5@GOTPCREL(%rip), %rax
252249
; CHECK-NEXT: movq %rdi, (%rax)
253250
; CHECK-NEXT: movq x3@GOTPCREL(%rip), %rax
254251
; CHECK-NEXT: movl $0, (%rax)
255-
; CHECK-NEXT: .LBB1_18: # %for.end5
256252
; CHECK-NEXT: popq %rbx
257253
; CHECK-NEXT: .cfi_def_cfa_offset 48
258254
; CHECK-NEXT: popq %r12
@@ -265,6 +261,13 @@ define void @_Z2x6v() local_unnamed_addr {
265261
; CHECK-NEXT: .cfi_def_cfa_offset 16
266262
; CHECK-NEXT: popq %rbp
267263
; CHECK-NEXT: .cfi_def_cfa_offset 8
264+
; CHECK-NEXT: .cfi_restore %rbx
265+
; CHECK-NEXT: .cfi_restore %r12
266+
; CHECK-NEXT: .cfi_restore %r13
267+
; CHECK-NEXT: .cfi_restore %r14
268+
; CHECK-NEXT: .cfi_restore %r15
269+
; CHECK-NEXT: .cfi_restore %rbp
270+
; CHECK-NEXT: .LBB1_18: # %for.end5
268271
; CHECK-NEXT: retq
269272
entry:
270273
%0 = load i32, ptr @x1, align 4

0 commit comments

Comments
 (0)