diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 8caa49de0af43..1afe23e637e8d 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -18,6 +18,7 @@ #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "MCTargetDesc/AArch64InstPrinter.h" +#include "Utils/AArch64SMEAttributes.h" #include "llvm/ADT/BitVector.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/LiveRegMatrix.h" @@ -632,14 +633,27 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { return true; auto &ST = MF.getSubtarget(); + const AArch64FunctionInfo *AFI = MF.getInfo(); if (ST.hasSVE() || ST.isStreaming()) { - const AArch64FunctionInfo *AFI = MF.getInfo(); // Frames that have variable sized objects and scalable SVE objects, // should always use a basepointer. if (!AFI->hasCalculatedStackSizeSVE() || AFI->getStackSizeSVE()) return true; } + // Frames with hazard padding can have a large offset between the frame + // pointer and GPR locals, which includes the emergency spill slot. If the + // emergency spill slot is not within range of the load/store instructions + // (which have a signed 9-bit range), we will fail to compile if it is used. + // Since hasBasePointer() is called before we know if we have hazard padding + // or an emergency spill slot we need to enable the basepointer + // conservatively. + if (AFI->hasStackHazardSlotIndex() || + (ST.getStreamingHazardSize() && + !SMEAttrs(MF.getFunction()).hasNonStreamingInterfaceAndBody())) { + return true; + } + // Conservatively estimate whether the negative offset from the frame // pointer will be sufficient to reach. If a function has a smallish // frame, it's less likely to have lots of spills and callee saved @@ -764,7 +778,8 @@ AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { assert((!MF.getSubtarget().hasSVE() || AFI->hasCalculatedStackSizeSVE()) && "Expected SVE area to be calculated by this point"); - return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeSVE(); + return TFI.hasFP(MF) && !hasStackRealignment(MF) && !AFI->getStackSizeSVE() && + !AFI->hasStackHazardSlotIndex(); } bool AArch64RegisterInfo::requiresFrameIndexScavenging( diff --git a/llvm/test/CodeGen/AArch64/framelayout-scavengingslot-stack-hazard.mir b/llvm/test/CodeGen/AArch64/framelayout-scavengingslot-stack-hazard.mir new file mode 100644 index 0000000000000..52ac36f801854 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/framelayout-scavengingslot-stack-hazard.mir @@ -0,0 +1,99 @@ +# RUN: llc -mtriple=aarch64-none-linux-gnu -aarch64-stack-hazard-size=1024 -run-pass=prologepilog %s -o - | FileCheck %s +--- | + + define void @stack_hazard_streaming_compat() "aarch64_pstate_sm_compatible" { entry: unreachable } + define void @stack_hazard_streaming_compat_emergency_spill_slot() "aarch64_pstate_sm_compatible" { entry: unreachable } + +... + +# +------------------+ +# | GPR callee-saves | +# +------------------+ <- FP +# | | +# +------------------+ +# | FPR locals | +# | %stack.1 | +# +------------------+ +# | | +# +------------------+ +# | GPR locals | +# | %stack.2 | +# | | +# +------------------+ <- BP +# | | +# +------------------+ <- SP (can't be used due to VLA) + +# In this case without the base pointer we'd need the emergency spill slot to +# access both %stack.1 and %stack.2. With the base pointer we can reach both +# without spilling. + +name: stack_hazard_streaming_compat +# CHECK-LABEL: name: stack_hazard_streaming_compat +# CHECK: bb.0: +# CHECK: STRDui $d0, $x19, 131 +# CHECK-NEXT: STRXui $x0, $x19, 1 +# CHECK: bb.1: +tracksRegLiveness: true +frameInfo: + isFrameAddressTaken: true +stack: + - { id: 0, type: variable-sized, alignment: 1 } + - { id: 1, size: 8, alignment: 8 } + - { id: 2, size: 8, alignment: 8 } +body: | + bb.0: + liveins: $x0, $x8, $d0 + $x9 = LDRXui $x0, 0 :: (load (s64)) + STRDui $d0, %stack.1, 0 :: (store (s64) into %stack.1) + STRXui $x0, %stack.2, 0 :: (store (s64) into %stack.2) + B %bb.1 + bb.1: + liveins: $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $lr + RET_ReallyLR implicit $x19, implicit $x20, implicit $x21, implicit $x22, implicit $x23, implicit $x24, implicit $x25, implicit $x26, implicit $x27, implicit $x28, implicit $lr +... +--- +# +------------------+ +# | GPR callee-saves | +# +------------------+ <- FP +# | | +# +------------------+ +# | FPR locals | +# | %stack.1 | +# +------------------+ +# | | +# +------------------+ +# | GPR locals | +# | %stack.2 | (very large) +# | | +# +------------------+ <- BP +# | | +# +------------------+ <- SP (can't be used due to VLA) + +# In this case we need to use the emergency spill slot to access %stack.1 as it +# is too far from the frame pointer and the base pointer to directly address. +# Note: This also tests that the located near the SP/BP. + +name: stack_hazard_streaming_compat_emergency_spill_slot +# CHECK-LABEL: name: stack_hazard_streaming_compat_emergency_spill_slot +# CHECK: bb.0: +# CHECK: STRXui killed $[[SCRATCH:x[0-9]+]], $x19, 0 +# CHECK-NEXT: $[[SCRATCH]] = ADDXri $x19, 1056, 0 +# CHECK-NEXT: STRDui $d0, killed $[[SCRATCH]], 4095 +# CHECK-NEXT: $[[SCRATCH]] = LDRXui $x19, 0 +# CHECK: bb.1: +tracksRegLiveness: true +frameInfo: + isFrameAddressTaken: true +stack: + - { id: 0, type: variable-sized, alignment: 1 } + - { id: 1, size: 8, alignment: 8 } + - { id: 2, size: 32761, alignment: 8 } +body: | + bb.0: + liveins: $x0, $x8, $d0 + $x9 = LDRXui $x0, 0 :: (load (s64)) + STRDui $d0, %stack.1, 0 :: (store (s64) into %stack.1) + B %bb.1 + bb.1: + liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $lr + RET_ReallyLR implicit $x19, implicit $x20, implicit $x21, implicit $x22, implicit $x23, implicit $x24, implicit $x25, implicit $x26, implicit $x27, implicit $x28, implicit $lr diff --git a/llvm/test/CodeGen/AArch64/stack-hazard.ll b/llvm/test/CodeGen/AArch64/stack-hazard.ll index 791d7580c327d..e169b199733bd 100644 --- a/llvm/test/CodeGen/AArch64/stack-hazard.ll +++ b/llvm/test/CodeGen/AArch64/stack-hazard.ll @@ -2905,12 +2905,13 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ ; CHECK64-NEXT: mov x9, sp ; CHECK64-NEXT: mov w20, w0 ; CHECK64-NEXT: msub x9, x8, x8, x9 +; CHECK64-NEXT: mov x19, sp ; CHECK64-NEXT: mov sp, x9 -; CHECK64-NEXT: stur x9, [x29, #-208] -; CHECK64-NEXT: sub x9, x29, #208 -; CHECK64-NEXT: sturh wzr, [x29, #-198] -; CHECK64-NEXT: stur wzr, [x29, #-196] -; CHECK64-NEXT: sturh w8, [x29, #-200] +; CHECK64-NEXT: str x9, [x19] +; CHECK64-NEXT: add x9, x19, #0 +; CHECK64-NEXT: strh wzr, [x19, #10] +; CHECK64-NEXT: str wzr, [x19, #12] +; CHECK64-NEXT: strh w8, [x19, #8] ; CHECK64-NEXT: msr TPIDR2_EL0, x9 ; CHECK64-NEXT: .cfi_offset vg, -32 ; CHECK64-NEXT: smstop sm @@ -2919,7 +2920,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ ; CHECK64-NEXT: .cfi_restore vg ; CHECK64-NEXT: smstart za ; CHECK64-NEXT: mrs x8, TPIDR2_EL0 -; CHECK64-NEXT: sub x0, x29, #208 +; CHECK64-NEXT: add x0, x19, #0 ; CHECK64-NEXT: cbnz x8, .LBB33_2 ; CHECK64-NEXT: // %bb.1: // %entry ; CHECK64-NEXT: bl __arm_tpidr2_restore @@ -2985,16 +2986,13 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ ; CHECK1024-NEXT: mov x9, sp ; CHECK1024-NEXT: mov w20, w0 ; CHECK1024-NEXT: msub x9, x8, x8, x9 +; CHECK1024-NEXT: mov x19, sp ; CHECK1024-NEXT: mov sp, x9 -; CHECK1024-NEXT: sub x10, x29, #1872 -; CHECK1024-NEXT: stur x9, [x10, #-256] -; CHECK1024-NEXT: sub x9, x29, #1862 -; CHECK1024-NEXT: sub x10, x29, #1860 -; CHECK1024-NEXT: sturh wzr, [x9, #-256] -; CHECK1024-NEXT: sub x9, x29, #2128 -; CHECK1024-NEXT: stur wzr, [x10, #-256] -; CHECK1024-NEXT: sub x10, x29, #1864 -; CHECK1024-NEXT: sturh w8, [x10, #-256] +; CHECK1024-NEXT: str x9, [x19] +; CHECK1024-NEXT: add x9, x19, #0 +; CHECK1024-NEXT: strh wzr, [x19, #10] +; CHECK1024-NEXT: str wzr, [x19, #12] +; CHECK1024-NEXT: strh w8, [x19, #8] ; CHECK1024-NEXT: msr TPIDR2_EL0, x9 ; CHECK1024-NEXT: .cfi_offset vg, -32 ; CHECK1024-NEXT: smstop sm @@ -3003,7 +3001,7 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ ; CHECK1024-NEXT: .cfi_restore vg ; CHECK1024-NEXT: smstart za ; CHECK1024-NEXT: mrs x8, TPIDR2_EL0 -; CHECK1024-NEXT: sub x0, x29, #2128 +; CHECK1024-NEXT: add x0, x19, #0 ; CHECK1024-NEXT: cbnz x8, .LBB33_2 ; CHECK1024-NEXT: // %bb.1: // %entry ; CHECK1024-NEXT: bl __arm_tpidr2_restore