Skip to content

Commit 380fb33

Browse files
committed
Fixups
Change-Id: I44aaa861c82fa44fe4c762366572a7367e8bf5c0
1 parent 97c0833 commit 380fb33

File tree

4 files changed

+205
-71
lines changed

4 files changed

+205
-71
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -56,20 +56,20 @@
5656
// | async context if needed |
5757
// | (a.k.a. "frame record") |
5858
// |-----------------------------------| <- fp(=x29)
59-
// Default SVE stack layout Split SVE objects
60-
// (aarch64-split-sve-objects=false) (aarch64-split-sve-objects=true)
61-
// |-----------------------------------| |-----------------------------------|
62-
// | <hazard padding> | | callee-saved PPR registers |
63-
// |-----------------------------------| |-----------------------------------|
64-
// | | | PPR stack objects |
65-
// | callee-saved fp/simd/SVE regs | |-----------------------------------|
66-
// | | | <hazard padding> |
67-
// |-----------------------------------| |-----------------------------------|
68-
// | | | callee-saved ZPR registers |
69-
// | SVE stack objects | |-----------------------------------|
70-
// | | | ZPR stack objects |
71-
// |-----------------------------------| |-----------------------------------|
72-
// |.empty.space.to.make.part.below....|
59+
// Default SVE stack layout Split SVE objects
60+
// (aarch64-split-sve-objects=false) (aarch64-split-sve-objects=true)
61+
// |-----------------------------------| |-----------------------------------|
62+
// | <hazard padding> | | callee-saved PPR registers |
63+
// |-----------------------------------| |-----------------------------------|
64+
// | | | PPR stack objects |
65+
// | callee-saved fp/simd/SVE regs | |-----------------------------------|
66+
// | | | <hazard padding> |
67+
// |-----------------------------------| |-----------------------------------|
68+
// | | | callee-saved ZPR/FPR registers |
69+
// | SVE stack objects | |-----------------------------------|
70+
// | | | ZPR stack objects |
71+
// |-----------------------------------| |-----------------------------------|
72+
// |.empty.space.to.make.part.below....| ^ NB: FPR CSRs are are promoted to ZPRs
7373
// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
7474
// |.the.standard.16-byte.alignment....| compile time; if present)
7575
// |-----------------------------------|
@@ -2434,6 +2434,7 @@ void AArch64FrameLowering::determineStackHazardSlot(
24342434
SavedRegs |= FPRZRegs;
24352435

24362436
AFI->setSplitSVEObjects(true);
2437+
LLVM_DEBUG(dbgs() << "SplitSVEObjects enabled!\n");
24372438
}
24382439
}
24392440

llvm/test/CodeGen/AArch64/framelayout-split-sve.mir

Lines changed: 116 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -97,20 +97,29 @@
9797
# CHECK-NEXT: RET_ReallyLR
9898

9999
# ASM-LABEL: test_allocate_split_sve:
100-
# ASM: .cfi_def_cfa_offset 16
101-
# ASM-NEXT: .cfi_offset w29, -16
102-
# ASM: sub sp, sp, #1024
103-
# ASM: .cfi_def_cfa_offset 1040
104-
# ASM: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 1040 + 8 * VG
105-
# ASM: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG
106-
# ASM: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 2080 + 24 * VG
100+
# ASM: str x29, [sp, #-16]!
101+
# ASM-NEXT: .cfi_def_cfa_offset 16
102+
# ASM-NEXT: .cfi_offset w29, -16
103+
# ASM-NEXT: sub sp, sp, #1024
104+
# ASM-NEXT: .cfi_def_cfa_offset 1040
105+
# ASM-NEXT: addvl sp, sp, #-1
106+
# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 1040 + 8 * VG
107+
# ASM-NEXT: sub sp, sp, #1040
108+
# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG
109+
# ASM-NEXT: addvl sp, sp, #-2
110+
# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 2080 + 24 * VG
107111
#
108-
# ASM: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG
109-
# ASM: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 1056 + 8 * VG
110-
# ASM: .cfi_def_cfa wsp, 1056
111-
# ASM: .cfi_def_cfa_offset 16
112-
# ASM: .cfi_def_cfa_offset 0
113-
# ASM: .cfi_restore w29
112+
# ASM: addvl sp, sp, #2
113+
# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG
114+
# ASM-NEXT: add sp, sp, #1024
115+
# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 1056 + 8 * VG
116+
# ASM-NEXT: addvl sp, sp, #1
117+
# ASM-NEXT: .cfi_def_cfa wsp, 1056
118+
# ASM-NEXT: add sp, sp, #1040
119+
# ASM-NEXT: .cfi_def_cfa_offset 16
120+
# ASM-NEXT: ldr x29, [sp], #16
121+
# ASM-NEXT: .cfi_def_cfa_offset 0
122+
# ASM-NEXT: .cfi_restore w29
114123

115124
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
116125
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
@@ -206,15 +215,23 @@ body: |
206215
# CHECK-NEXT: RET_ReallyLR
207216

208217
# ASM-LABEL: test_allocate_split_sve_realigned
209-
# ASM: .cfi_def_cfa_offset 1040
210-
# ASM: .cfi_def_cfa w29, 16
211-
# ASM-NEXT: .cfi_offset w30, -8
212-
# ASM-NEXT: .cfi_offset w29, -16
218+
# ASM: sub sp, sp, #1040
219+
# ASM-NEXT: .cfi_def_cfa_offset 1040
220+
# ASM-NEXT: str x29, [sp, #1024]
221+
# ASM-NEXT: str x30, [sp, #1032]
222+
# ASM-NEXT: add x29, sp, #1024
223+
# ASM-NEXT: .cfi_def_cfa w29, 16
224+
# ASM-NEXT: .cfi_offset w30, -8
225+
# ASM-NEXT: .cfi_offset w29, -16
213226
#
214-
# ASM: .cfi_def_cfa wsp, 1040
215-
# ASM: .cfi_def_cfa_offset 0
216-
# ASM-NEXT: .cfi_restore w30
217-
# ASM-NEXT: .cfi_restore w29
227+
# ASM: sub sp, x29, #1024
228+
# ASM-NEXT: .cfi_def_cfa wsp, 1040
229+
# ASM-NEXT: ldr x30, [sp, #1032]
230+
# ASM-NEXT: ldr x29, [sp, #1024]
231+
# ASM-NEXT: add sp, sp, #1040
232+
# ASM-NEXT: .cfi_def_cfa_offset 0
233+
# ASM-NEXT: .cfi_restore w30
234+
# ASM-NEXT: .cfi_restore w29
218235

219236
# UNWINDINFO: DW_CFA_def_cfa_offset: +1040
220237
# UNWINDINFO: DW_CFA_def_cfa: reg29 +16
@@ -283,19 +300,29 @@ body: |
283300
# CHECK-NEXT: RET_ReallyLR
284301

285302
# ASM-LABEL: test_address_split_sve
286-
# ASM: .cfi_def_cfa_offset 16
287-
# ASM-NEXT: .cfi_offset w29, -16
288-
# ASM: .cfi_def_cfa_offset 1040
289-
# ASM: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 1040 + 8 * VG
290-
# ASM: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG
291-
# ASM: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 2080 + 24 * VG
303+
# ASM: str x29, [sp, #-16]!
304+
# ASM-NEXT: .cfi_def_cfa_offset 16
305+
# ASM-NEXT: .cfi_offset w29, -16
306+
# ASM-NEXT: sub sp, sp, #1024
307+
# ASM-NEXT: .cfi_def_cfa_offset 1040
308+
# ASM-NEXT: addvl sp, sp, #-1
309+
# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 1040 + 8 * VG
310+
# ASM-NEXT: sub sp, sp, #1040
311+
# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG
312+
# ASM-NEXT: addvl sp, sp, #-2
313+
# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 2080 + 24 * VG
292314
#
293-
# ASM: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG
294-
# ASM: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 1056 + 8 * VG
295-
# ASM: .cfi_def_cfa wsp, 1056
296-
# ASM: .cfi_def_cfa_offset 16
297-
# ASM: .cfi_def_cfa_offset 0
298-
# ASM-NEXT: .cfi_restore w29
315+
# ASM: addvl sp, sp, #2
316+
# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG
317+
# ASM-NEXT: add sp, sp, #1024
318+
# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 1056 + 8 * VG
319+
# ASM-NEXT: addvl sp, sp, #1
320+
# ASM-NEXT: .cfi_def_cfa wsp, 1056
321+
# ASM-NEXT: add sp, sp, #1040
322+
# ASM-NEXT: .cfi_def_cfa_offset 16
323+
# ASM-NEXT: ldr x29, [sp], #16
324+
# ASM-NEXT: .cfi_def_cfa_offset 0
325+
# ASM-NEXT: .cfi_restore w29
299326

300327
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
301328
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16
@@ -381,15 +408,26 @@ body: |
381408
# CHECK-NEXT: RET_ReallyLR
382409

383410
# ASM-LABEL: test_address_split_sve_fp
384-
# ASM: .cfi_def_cfa_offset 16
385-
# ASM: .cfi_def_cfa w29, 16
386-
# ASM-NEXT: .cfi_offset w30, -8
387-
# ASM-NEXT: .cfi_offset w29, -16
411+
# ASM: stp x29, x30, [sp, #-16]!
412+
# ASM-NEXT: .cfi_def_cfa_offset 16
413+
# ASM-NEXT: mov x29, sp
414+
# ASM-NEXT: .cfi_def_cfa w29, 16
415+
# ASM-NEXT: .cfi_offset w30, -8
416+
# ASM-NEXT: .cfi_offset w29, -16
417+
# ASM-NEXT: sub sp, sp, #1024
418+
# ASM-NEXT: addvl sp, sp, #-1
419+
# ASM-NEXT: sub sp, sp, #1040
420+
# ASM-NEXT: addvl sp, sp, #-2
388421
#
389-
# ASM: .cfi_def_cfa wsp, 16
390-
# ASM: .cfi_def_cfa_offset 0
391-
# ASM-NEXT: .cfi_restore w30
392-
# ASM-NEXT: .cfi_restore w29
422+
# ASM: addvl sp, sp, #2
423+
# ASM-NEXT: add sp, sp, #1024
424+
# ASM-NEXT: addvl sp, sp, #1
425+
# ASM-NEXT: add sp, sp, #1040
426+
# ASM-NEXT: .cfi_def_cfa wsp, 16
427+
# ASM-NEXT: ldp x29, x30, [sp], #16
428+
# ASM-NEXT: .cfi_def_cfa_offset 0
429+
# ASM-NEXT: .cfi_restore w30
430+
# ASM-NEXT: .cfi_restore w29
393431

394432
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
395433
# UNWINDINFO: DW_CFA_def_cfa: reg29 +16
@@ -471,25 +509,47 @@ body: |
471509
# CHECK-NEXT: RET_ReallyLR
472510

473511
# ASM-LABEL: save_restore_ppr_zpr:
474-
# ASM: .cfi_def_cfa_offset 16
475-
# ASM-NEXT: .cfi_offset w29, -16
476-
# ASM: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG
477-
# ASM: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 1040 + 8 * VG
478-
# ASM: .cfi_escape 0x0f, 0x0a, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22 // sp + 1040 + 32 * VG
479-
# ASM: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 16 * VG - 1040
480-
# ASM-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d9 @ cfa - 24 * VG - 1040
481-
# ASM-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d10 @ cfa - 32 * VG - 1040
482-
# ASM: .cfi_escape 0x0f, 0x0a, 0x8f, 0xb0, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22 // sp + 2096 + 32 * VG
512+
# ASM: str x29, [sp, #-16]!
513+
# ASM-NEXT: .cfi_def_cfa_offset 16
514+
# ASM-NEXT: .cfi_offset w29, -16
515+
# ASM-NEXT: addvl sp, sp, #-1
516+
# ASM-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG
517+
# ASM-NEXT: str p6, [sp, #5, mul vl]
518+
# ASM-NEXT: str p5, [sp, #6, mul vl]
519+
# ASM-NEXT: str p4, [sp, #7, mul vl]
520+
# ASM-NEXT: sub sp, sp, #1024
521+
# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 1040 + 8 * VG
522+
# ASM-NEXT: addvl sp, sp, #-3
523+
# ASM-NEXT: .cfi_escape 0x0f, 0x0a, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22 // sp + 1040 + 32 * VG
524+
# ASM-NEXT: str z10, [sp]
525+
# ASM-NEXT: str z9, [sp, #1, mul vl]
526+
# ASM-NEXT: str z8, [sp, #2, mul vl]
527+
# ASM-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 16 * VG - 1040
528+
# ASM-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d9 @ cfa - 24 * VG - 1040
529+
# ASM-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d10 @ cfa - 32 * VG - 1040
530+
# ASM-NEXT: sub sp, sp, #1056
531+
# ASM-NEXT: .cfi_escape 0x0f, 0x0a, 0x8f, 0xb0, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22 // sp + 2096 + 32 * VG
483532
#
484-
# ASM: .cfi_escape 0x0f, 0x0a, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22 // sp + 1040 + 32 * VG
485-
# ASM: .cfi_escape 0x0f, 0x09, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22 // sp + 16 + 32 * VG
486-
# ASM: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG
533+
# ASM: add sp, sp, #1056
534+
# ASM-NEXT: .cfi_escape 0x0f, 0x0a, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22 // sp + 1040 + 32 * VG
535+
# ASM-NEXT: ldr z10, [sp]
536+
# ASM-NEXT: ldr z9, [sp, #1, mul vl]
537+
# ASM-NEXT: ldr z8, [sp, #2, mul vl]
538+
# ASM-NEXT: add sp, sp, #1024
539+
# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x20, 0x1e, 0x22 // sp + 16 + 32 * VG
540+
# ASM-NEXT: addvl sp, sp, #3
541+
# ASM-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 16 + 8 * VG
487542
# ASM-NEXT: .cfi_restore z8
488543
# ASM-NEXT: .cfi_restore z9
489544
# ASM-NEXT: .cfi_restore z10
490-
# ASM: .cfi_def_cfa wsp, 16
491-
# ASM: .cfi_def_cfa_offset 0
492-
# ASM-NEXT: .cfi_restore w29
545+
# ASM-NEXT: ldr p6, [sp, #5, mul vl]
546+
# ASM-NEXT: ldr p5, [sp, #6, mul vl]
547+
# ASM-NEXT: ldr p4, [sp, #7, mul vl]
548+
# ASM-NEXT: addvl sp, sp, #1
549+
# ASM-NEXT: .cfi_def_cfa wsp, 16
550+
# ASM-NEXT: ldr x29, [sp], #16
551+
# ASM-NEXT: .cfi_def_cfa_offset 0
552+
# ASM-NEXT: .cfi_restore w29
493553

494554
# UNWINDINFO: DW_CFA_def_cfa_offset: +16
495555
# UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16

llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
22
# RUN: llc -mtriple=aarch64-linux-gnu -aarch64-enable-zpr-predicate-spills -run-pass=greedy %s -o - | FileCheck %s
33
# RUN: llc -mtriple=aarch64-linux-gnu -aarch64-enable-zpr-predicate-spills -start-before=greedy -stop-after=aarch64-expand-pseudo -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=EXPAND
4-
54
--- |
65
source_filename = "<stdin>"
76
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"

llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -748,3 +748,77 @@ entry:
748748
ret i32 -396142473
749749
}
750750
declare ptr @memset(ptr, i32, i32)
751+
752+
; FIXME: aarch64-split-sve-objects is currently not supported in this function
753+
; as it requires stack reealignment (for the 32-byte aligned alloca).
754+
; GPR CSRs
755+
; <hazard padding>
756+
; FPR CSRs
757+
; <hazrd padding>
758+
; <SVE locals (PPRs and ZPRs)> <--- hazard between PPRs and ZPRs here!
759+
; <realignment padding>
760+
; -> sp
761+
define void @zpr_and_ppr_local_realignment(<vscale x 16 x i1> %pred, <vscale x 16 x i8> %vector, i64 %gpr) "aarch64_pstate_sm_compatible" {
762+
; CHECK-LABEL: zpr_and_ppr_local_realignment:
763+
; CHECK: // %bb.0:
764+
; CHECK-NEXT: sub sp, sp, #1040
765+
; CHECK-NEXT: sub x9, sp, #1040
766+
; CHECK-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
767+
; CHECK-NEXT: add x29, sp, #1024
768+
; CHECK-NEXT: addvl x9, x9, #-2
769+
; CHECK-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill
770+
; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0
771+
; CHECK-NEXT: .cfi_def_cfa w29, 16
772+
; CHECK-NEXT: .cfi_offset w30, -8
773+
; CHECK-NEXT: .cfi_offset w29, -16
774+
; CHECK-NEXT: sub x8, x29, #1024
775+
; CHECK-NEXT: str p0, [x8, #-1, mul vl]
776+
; CHECK-NEXT: str z0, [x8, #-2, mul vl]
777+
; CHECK-NEXT: str x0, [sp]
778+
; CHECK-NEXT: sub sp, x29, #1024
779+
; CHECK-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload
780+
; CHECK-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload
781+
; CHECK-NEXT: add sp, sp, #1040
782+
; CHECK-NEXT: ret
783+
%ppr_local = alloca <vscale x 16 x i1>
784+
%zpr_local = alloca <vscale x 16 x i8>
785+
%gpr_local = alloca i64, align 32
786+
store volatile <vscale x 16 x i1> %pred, ptr %ppr_local
787+
store volatile <vscale x 16 x i8> %vector, ptr %zpr_local
788+
store volatile i64 %gpr, ptr %gpr_local
789+
ret void
790+
}
791+
792+
define void @zpr_and_ppr_local_stack_probing(<vscale x 16 x i1> %pred, <vscale x 16 x i8> %vector, i64 %gpr)
793+
; CHECK-LABEL: zpr_and_ppr_local_stack_probing:
794+
; CHECK: // %bb.0:
795+
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
796+
; CHECK-NEXT: sub sp, sp, #1024
797+
; CHECK-NEXT: addvl sp, sp, #-1
798+
; CHECK-NEXT: str xzr, [sp]
799+
; CHECK-NEXT: sub sp, sp, #1824
800+
; CHECK-NEXT: addvl sp, sp, #-1
801+
; CHECK-NEXT: str xzr, [sp]
802+
; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xb0, 0x16, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 2864 + 16 * VG
803+
; CHECK-NEXT: .cfi_offset w29, -16
804+
; CHECK-NEXT: add x8, sp, #2848
805+
; CHECK-NEXT: str p0, [x8, #15, mul vl]
806+
; CHECK-NEXT: add x8, sp, #1824
807+
; CHECK-NEXT: str z0, [x8]
808+
; CHECK-NEXT: str x0, [sp]
809+
; CHECK-NEXT: addvl sp, sp, #1
810+
; CHECK-NEXT: add sp, sp, #1024
811+
; CHECK-NEXT: addvl sp, sp, #1
812+
; CHECK-NEXT: add sp, sp, #1824
813+
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
814+
; CHECK-NEXT: ret
815+
"probe-stack"="inline-asm" "stack-probe-size"="4096" "frame-pointer"="none" "aarch64_pstate_sm_compatible"
816+
{
817+
%ppr_local = alloca <vscale x 16 x i1>
818+
%zpr_local = alloca <vscale x 16 x i8>
819+
%gpr_local = alloca i64, i64 100, align 8
820+
store volatile <vscale x 16 x i1> %pred, ptr %ppr_local
821+
store volatile <vscale x 16 x i8> %vector, ptr %zpr_local
822+
store volatile i64 %gpr, ptr %gpr_local
823+
ret void
824+
}

0 commit comments

Comments
 (0)