Skip to content

Commit 5ded065

Browse files
committed
Improve moving to the PPRs
1 parent 8b1dc2c commit 5ded065

File tree

3 files changed

+20
-29
lines changed

3 files changed

+20
-29
lines changed

llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1521,23 +1521,22 @@ void AArch64EpilogueEmitter::emitEpilogue() {
15211521
if (SVECalleeSavesSize && BaseForSVEDealloc == AArch64::FP) {
15221522
// The offset from the frame-pointer to the start of the ZPR saves.
15231523
StackOffset FPOffsetZPR =
1524-
-SVECalleeSavesSize -
1524+
-SVECalleeSavesSize - PPR.LocalsSize -
15251525
StackOffset::getFixed(AFI->getCalleeSaveBaseToFrameRecordOffset());
1526+
// The offset from the frame-pointer to the start of the PPR saves.
1527+
StackOffset FPOffsetPPR = -PPR.CalleeSavesSize;
15261528

1527-
// With split SVE, the PPR locals are above the ZPR callee-saves.
1528-
if (ZPR.CalleeSavesSize && SVELayout == SVEStackLayout::Split)
1529-
FPOffsetZPR -= PPR.LocalsSize;
1530-
1531-
// Deallocate the stack space space by moving the SP to the start of the
1532-
// ZPR/PPR callee-save area.
1533-
moveSPBelowFP(ZPRRange.Begin, FPOffsetZPR);
1529+
if (ZPR.CalleeSavesSize || SVELayout != SVEStackLayout::Split) {
1530+
// Deallocate the stack space space by moving the SP to the start of the
1531+
// ZPR/PPR callee-save area.
1532+
moveSPBelowFP(ZPRRange.Begin, FPOffsetZPR);
1533+
}
15341534

15351535
if (PPR.CalleeSavesSize && SVELayout == SVEStackLayout::Split) {
1536-
// Move to the start of the PPR area (this offset may be zero).
1537-
StackOffset FPOffsetPPR = -PPR.CalleeSavesSize;
1538-
emitFrameOffset(MBB, ZPRRange.End, DL, AArch64::SP, AArch64::SP,
1539-
FPOffsetPPR - FPOffsetZPR, TII,
1540-
MachineInstr::FrameDestroy);
1536+
// Move to the start of the PPR area.
1537+
assert(!FPOffsetPPR.getFixed() && "expected only scalable offset");
1538+
emitFrameOffset(MBB, ZPRRange.End, DL, AArch64::SP, AArch64::FP,
1539+
FPOffsetPPR, TII, MachineInstr::FrameDestroy);
15411540
}
15421541
} else if (BaseForSVEDealloc == AArch64::SP) {
15431542
auto CFAOffset =

llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -935,8 +935,7 @@ define aarch64_sve_vector_pcs void @zpr_ppr_csr_vla(i64 %n) {
935935
; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload
936936
; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload
937937
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
938-
; CHECK-NEXT: add sp, sp, #1024
939-
; CHECK-NEXT: addvl sp, sp, #3
938+
; CHECK-NEXT: addvl sp, x29, #-1
940939
; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload
941940
; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
942941
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
@@ -1091,8 +1090,7 @@ define void @sve_locals_zpr_ppr_csr_vla(i64 %n, <vscale x 16 x i1> %pred, <vscal
10911090
; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload
10921091
; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload
10931092
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
1094-
; CHECK-NEXT: add sp, sp, #1024
1095-
; CHECK-NEXT: addvl sp, sp, #4
1093+
; CHECK-NEXT: addvl sp, x29, #-1
10961094
; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload
10971095
; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
10981096
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload

llvm/test/CodeGen/AArch64/stack-hazard.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3612,8 +3612,7 @@ define i32 @svecc_call_dynamic_alloca(<4 x i16> %P0, i32 %P1, i32 %P2, <vscale x
36123612
; CHECK64-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
36133613
; CHECK64-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
36143614
; CHECK64-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload
3615-
; CHECK64-NEXT: add sp, sp, #64
3616-
; CHECK64-NEXT: addvl sp, sp, #16
3615+
; CHECK64-NEXT: addvl sp, x29, #-2
36173616
; CHECK64-NEXT: .cfi_restore z8
36183617
; CHECK64-NEXT: .cfi_restore z9
36193618
; CHECK64-NEXT: .cfi_restore z10
@@ -3899,8 +3898,7 @@ define i32 @svecc_call_dynamic_alloca(<4 x i16> %P0, i32 %P1, i32 %P2, <vscale x
38993898
; CHECK1024-SPLITSVE-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
39003899
; CHECK1024-SPLITSVE-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
39013900
; CHECK1024-SPLITSVE-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload
3902-
; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024
3903-
; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #16
3901+
; CHECK1024-SPLITSVE-NEXT: addvl sp, x29, #-2
39043902
; CHECK1024-SPLITSVE-NEXT: .cfi_restore z8
39053903
; CHECK1024-SPLITSVE-NEXT: .cfi_restore z9
39063904
; CHECK1024-SPLITSVE-NEXT: .cfi_restore z10
@@ -4166,8 +4164,7 @@ define i32 @svecc_call_realign(<4 x i16> %P0, i32 %P1, i32 %P2, <vscale x 16 x i
41664164
; CHECK64-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
41674165
; CHECK64-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
41684166
; CHECK64-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload
4169-
; CHECK64-NEXT: add sp, sp, #64
4170-
; CHECK64-NEXT: addvl sp, sp, #16
4167+
; CHECK64-NEXT: addvl sp, x29, #-2
41714168
; CHECK64-NEXT: .cfi_restore z8
41724169
; CHECK64-NEXT: .cfi_restore z9
41734170
; CHECK64-NEXT: .cfi_restore z10
@@ -4432,8 +4429,7 @@ define i32 @svecc_call_realign(<4 x i16> %P0, i32 %P1, i32 %P2, <vscale x 16 x i
44324429
; CHECK1024-SPLITSVE-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
44334430
; CHECK1024-SPLITSVE-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
44344431
; CHECK1024-SPLITSVE-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload
4435-
; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024
4436-
; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #16
4432+
; CHECK1024-SPLITSVE-NEXT: addvl sp, x29, #-2
44374433
; CHECK1024-SPLITSVE-NEXT: .cfi_restore z8
44384434
; CHECK1024-SPLITSVE-NEXT: .cfi_restore z9
44394435
; CHECK1024-SPLITSVE-NEXT: .cfi_restore z10
@@ -4678,8 +4674,7 @@ define i32 @svecc_call_dynamic_and_scalable_alloca(<4 x i16> %P0, i32 %P1, i32 %
46784674
; CHECK64-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
46794675
; CHECK64-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
46804676
; CHECK64-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload
4681-
; CHECK64-NEXT: add sp, sp, #64
4682-
; CHECK64-NEXT: addvl sp, sp, #16
4677+
; CHECK64-NEXT: addvl sp, x29, #-2
46834678
; CHECK64-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
46844679
; CHECK64-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
46854680
; CHECK64-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
@@ -4910,8 +4905,7 @@ define i32 @svecc_call_dynamic_and_scalable_alloca(<4 x i16> %P0, i32 %P1, i32 %
49104905
; CHECK1024-SPLITSVE-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
49114906
; CHECK1024-SPLITSVE-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
49124907
; CHECK1024-SPLITSVE-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload
4913-
; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024
4914-
; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #16
4908+
; CHECK1024-SPLITSVE-NEXT: addvl sp, x29, #-2
49154909
; CHECK1024-SPLITSVE-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
49164910
; CHECK1024-SPLITSVE-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
49174911
; CHECK1024-SPLITSVE-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload

0 commit comments

Comments
 (0)