Skip to content

Commit c6cb8e4

Browse files
committed
Improve moving to the PPRs
1 parent d9d7d0f commit c6cb8e4

File tree

3 files changed

+20
-29
lines changed

3 files changed

+20
-29
lines changed

llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1532,23 +1532,22 @@ void AArch64EpilogueEmitter::emitEpilogue() {
15321532
if (SVECalleeSavesSize && BaseForSVEDealloc == AArch64::FP) {
15331533
// The offset from the frame-pointer to the start of the ZPR saves.
15341534
StackOffset FPOffsetZPR =
1535-
-SVECalleeSavesSize -
1535+
-SVECalleeSavesSize - PPR.LocalsSize -
15361536
StackOffset::getFixed(AFI->getCalleeSaveBaseToFrameRecordOffset());
1537+
// The offset from the frame-pointer to the start of the PPR saves.
1538+
StackOffset FPOffsetPPR = -PPR.CalleeSavesSize;
15371539

1538-
// With split SVE, the PPR locals are above the ZPR callee-saves.
1539-
if (ZPR.CalleeSavesSize && SVELayout == SVEStackLayout::Split)
1540-
FPOffsetZPR -= PPR.LocalsSize;
1541-
1542-
// Deallocate the stack space space by moving the SP to the start of the
1543-
// ZPR/PPR callee-save area.
1544-
moveSPBelowFP(ZPRRange.Begin, FPOffsetZPR);
1540+
if (ZPR.CalleeSavesSize || SVELayout != SVEStackLayout::Split) {
1541+
// Deallocate the stack space space by moving the SP to the start of the
1542+
// ZPR/PPR callee-save area.
1543+
moveSPBelowFP(ZPRRange.Begin, FPOffsetZPR);
1544+
}
15451545

15461546
if (PPR.CalleeSavesSize && SVELayout == SVEStackLayout::Split) {
1547-
// Move to the start of the PPR area (this offset may be zero).
1548-
StackOffset FPOffsetPPR = -PPR.CalleeSavesSize;
1549-
emitFrameOffset(MBB, ZPRRange.End, DL, AArch64::SP, AArch64::SP,
1550-
FPOffsetPPR - FPOffsetZPR, TII,
1551-
MachineInstr::FrameDestroy);
1547+
// Move to the start of the PPR area.
1548+
assert(!FPOffsetPPR.getFixed() && "expected only scalable offset");
1549+
emitFrameOffset(MBB, ZPRRange.End, DL, AArch64::SP, AArch64::FP,
1550+
FPOffsetPPR, TII, MachineInstr::FrameDestroy);
15521551
}
15531552
} else if (BaseForSVEDealloc == AArch64::SP) {
15541553
auto NonSVELocals = StackOffset::getFixed(NumBytes);

llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -918,8 +918,7 @@ define aarch64_sve_vector_pcs void @zpr_ppr_csr_vla(i64 %n) {
918918
; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload
919919
; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload
920920
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
921-
; CHECK-NEXT: add sp, sp, #1024
922-
; CHECK-NEXT: addvl sp, sp, #3
921+
; CHECK-NEXT: addvl sp, x29, #-1
923922
; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload
924923
; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
925924
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
@@ -1074,8 +1073,7 @@ define void @sve_locals_zpr_ppr_csr_vla(i64 %n, <vscale x 16 x i1> %pred, <vscal
10741073
; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload
10751074
; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload
10761075
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
1077-
; CHECK-NEXT: add sp, sp, #1024
1078-
; CHECK-NEXT: addvl sp, sp, #4
1076+
; CHECK-NEXT: addvl sp, x29, #-1
10791077
; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload
10801078
; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
10811079
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload

llvm/test/CodeGen/AArch64/stack-hazard.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3612,8 +3612,7 @@ define i32 @svecc_call_dynamic_alloca(<4 x i16> %P0, i32 %P1, i32 %P2, <vscale x
36123612
; CHECK64-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
36133613
; CHECK64-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
36143614
; CHECK64-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload
3615-
; CHECK64-NEXT: add sp, sp, #64
3616-
; CHECK64-NEXT: addvl sp, sp, #16
3615+
; CHECK64-NEXT: addvl sp, x29, #-2
36173616
; CHECK64-NEXT: .cfi_restore z8
36183617
; CHECK64-NEXT: .cfi_restore z9
36193618
; CHECK64-NEXT: .cfi_restore z10
@@ -3899,8 +3898,7 @@ define i32 @svecc_call_dynamic_alloca(<4 x i16> %P0, i32 %P1, i32 %P2, <vscale x
38993898
; CHECK1024-SPLITSVE-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
39003899
; CHECK1024-SPLITSVE-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
39013900
; CHECK1024-SPLITSVE-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload
3902-
; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024
3903-
; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #16
3901+
; CHECK1024-SPLITSVE-NEXT: addvl sp, x29, #-2
39043902
; CHECK1024-SPLITSVE-NEXT: .cfi_restore z8
39053903
; CHECK1024-SPLITSVE-NEXT: .cfi_restore z9
39063904
; CHECK1024-SPLITSVE-NEXT: .cfi_restore z10
@@ -4166,8 +4164,7 @@ define i32 @svecc_call_realign(<4 x i16> %P0, i32 %P1, i32 %P2, <vscale x 16 x i
41664164
; CHECK64-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
41674165
; CHECK64-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
41684166
; CHECK64-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload
4169-
; CHECK64-NEXT: add sp, sp, #64
4170-
; CHECK64-NEXT: addvl sp, sp, #16
4167+
; CHECK64-NEXT: addvl sp, x29, #-2
41714168
; CHECK64-NEXT: .cfi_restore z8
41724169
; CHECK64-NEXT: .cfi_restore z9
41734170
; CHECK64-NEXT: .cfi_restore z10
@@ -4432,8 +4429,7 @@ define i32 @svecc_call_realign(<4 x i16> %P0, i32 %P1, i32 %P2, <vscale x 16 x i
44324429
; CHECK1024-SPLITSVE-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
44334430
; CHECK1024-SPLITSVE-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
44344431
; CHECK1024-SPLITSVE-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload
4435-
; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024
4436-
; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #16
4432+
; CHECK1024-SPLITSVE-NEXT: addvl sp, x29, #-2
44374433
; CHECK1024-SPLITSVE-NEXT: .cfi_restore z8
44384434
; CHECK1024-SPLITSVE-NEXT: .cfi_restore z9
44394435
; CHECK1024-SPLITSVE-NEXT: .cfi_restore z10
@@ -4678,8 +4674,7 @@ define i32 @svecc_call_dynamic_and_scalable_alloca(<4 x i16> %P0, i32 %P1, i32 %
46784674
; CHECK64-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
46794675
; CHECK64-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
46804676
; CHECK64-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload
4681-
; CHECK64-NEXT: add sp, sp, #64
4682-
; CHECK64-NEXT: addvl sp, sp, #16
4677+
; CHECK64-NEXT: addvl sp, x29, #-2
46834678
; CHECK64-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
46844679
; CHECK64-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
46854680
; CHECK64-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
@@ -4910,8 +4905,7 @@ define i32 @svecc_call_dynamic_and_scalable_alloca(<4 x i16> %P0, i32 %P1, i32 %
49104905
; CHECK1024-SPLITSVE-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload
49114906
; CHECK1024-SPLITSVE-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload
49124907
; CHECK1024-SPLITSVE-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload
4913-
; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024
4914-
; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #16
4908+
; CHECK1024-SPLITSVE-NEXT: addvl sp, x29, #-2
49154909
; CHECK1024-SPLITSVE-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
49164910
; CHECK1024-SPLITSVE-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
49174911
; CHECK1024-SPLITSVE-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload

0 commit comments

Comments
 (0)