Skip to content

Commit 8b93f27

Browse files
authored
[AArch64][SME] Fixup ABI routine insertion points to avoid clobbering NZCV (#161353)
This updates the `MachineSMEABIPass` to find insertion points for state changes (i.e., calls to ABI routines), where the NZCV register (status flags) are not live. It works by stepping backwards from where the state change is needed until we find an instruction where NZCV is not live, a previous state change, or a call sequence. We conservatively don't move into/over calls, as they may require a different state before the start of the call sequence.
1 parent bcf9e91 commit 8b93f27

File tree

4 files changed

+318
-33
lines changed

4 files changed

+318
-33
lines changed

llvm/lib/Target/AArch64/MachineSMEABIPass.cpp

Lines changed: 86 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,12 @@ struct MachineSMEABI : public MachineFunctionPass {
294294
MachineBasicBlock::iterator MBBI,
295295
LiveRegs PhysLiveRegs);
296296

297+
/// Attempts to find an insertion point before \p Inst where the status flags
298+
/// are not live. If \p Inst is `Block.Insts.end()` a point before the end of
299+
/// the block is found.
300+
std::pair<MachineBasicBlock::iterator, LiveRegs>
301+
findStateChangeInsertionPoint(MachineBasicBlock &MBB, const BlockInfo &Block,
302+
SmallVectorImpl<InstInfo>::const_iterator Inst);
297303
void emitStateChange(EmitContext &, MachineBasicBlock &MBB,
298304
MachineBasicBlock::iterator MBBI, ZAState From,
299305
ZAState To, LiveRegs PhysLiveRegs);
@@ -337,6 +343,28 @@ struct MachineSMEABI : public MachineFunctionPass {
337343
MachineRegisterInfo *MRI = nullptr;
338344
};
339345

346+
static LiveRegs getPhysLiveRegs(LiveRegUnits const &LiveUnits) {
347+
LiveRegs PhysLiveRegs = LiveRegs::None;
348+
if (!LiveUnits.available(AArch64::NZCV))
349+
PhysLiveRegs |= LiveRegs::NZCV;
350+
// We have to track W0 and X0 separately as otherwise things can get
351+
// confused if we attempt to preserve X0 but only W0 was defined.
352+
if (!LiveUnits.available(AArch64::W0))
353+
PhysLiveRegs |= LiveRegs::W0;
354+
if (!LiveUnits.available(AArch64::W0_HI))
355+
PhysLiveRegs |= LiveRegs::W0_HI;
356+
return PhysLiveRegs;
357+
}
358+
359+
static void setPhysLiveRegs(LiveRegUnits &LiveUnits, LiveRegs PhysLiveRegs) {
360+
if (PhysLiveRegs & LiveRegs::NZCV)
361+
LiveUnits.addReg(AArch64::NZCV);
362+
if (PhysLiveRegs & LiveRegs::W0)
363+
LiveUnits.addReg(AArch64::W0);
364+
if (PhysLiveRegs & LiveRegs::W0_HI)
365+
LiveUnits.addReg(AArch64::W0_HI);
366+
}
367+
340368
FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
341369
assert((SMEFnAttrs.hasAgnosticZAInterface() || SMEFnAttrs.hasZT0State() ||
342370
SMEFnAttrs.hasZAState()) &&
@@ -362,26 +390,13 @@ FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) {
362390
LiveRegUnits LiveUnits(*TRI);
363391
LiveUnits.addLiveOuts(MBB);
364392

365-
auto GetPhysLiveRegs = [&] {
366-
LiveRegs PhysLiveRegs = LiveRegs::None;
367-
if (!LiveUnits.available(AArch64::NZCV))
368-
PhysLiveRegs |= LiveRegs::NZCV;
369-
// We have to track W0 and X0 separately as otherwise things can get
370-
// confused if we attempt to preserve X0 but only W0 was defined.
371-
if (!LiveUnits.available(AArch64::W0))
372-
PhysLiveRegs |= LiveRegs::W0;
373-
if (!LiveUnits.available(AArch64::W0_HI))
374-
PhysLiveRegs |= LiveRegs::W0_HI;
375-
return PhysLiveRegs;
376-
};
377-
378-
Block.PhysLiveRegsAtExit = GetPhysLiveRegs();
393+
Block.PhysLiveRegsAtExit = getPhysLiveRegs(LiveUnits);
379394
auto FirstTerminatorInsertPt = MBB.getFirstTerminator();
380395
auto FirstNonPhiInsertPt = MBB.getFirstNonPHI();
381396
for (MachineInstr &MI : reverse(MBB)) {
382397
MachineBasicBlock::iterator MBBI(MI);
383398
LiveUnits.stepBackward(MI);
384-
LiveRegs PhysLiveRegs = GetPhysLiveRegs();
399+
LiveRegs PhysLiveRegs = getPhysLiveRegs(LiveUnits);
385400
// The SMEStateAllocPseudo marker is added to a function if the save
386401
// buffer was allocated in SelectionDAG. It marks the end of the
387402
// allocation -- which is a safe point for this pass to insert any TPIDR2
@@ -476,6 +491,49 @@ MachineSMEABI::assignBundleZAStates(const EdgeBundles &Bundles,
476491
return BundleStates;
477492
}
478493

494+
std::pair<MachineBasicBlock::iterator, LiveRegs>
495+
MachineSMEABI::findStateChangeInsertionPoint(
496+
MachineBasicBlock &MBB, const BlockInfo &Block,
497+
SmallVectorImpl<InstInfo>::const_iterator Inst) {
498+
LiveRegs PhysLiveRegs;
499+
MachineBasicBlock::iterator InsertPt;
500+
if (Inst != Block.Insts.end()) {
501+
InsertPt = Inst->InsertPt;
502+
PhysLiveRegs = Inst->PhysLiveRegs;
503+
} else {
504+
InsertPt = MBB.getFirstTerminator();
505+
PhysLiveRegs = Block.PhysLiveRegsAtExit;
506+
}
507+
508+
if (!(PhysLiveRegs & LiveRegs::NZCV))
509+
return {InsertPt, PhysLiveRegs}; // Nothing to do (no live flags).
510+
511+
// Find the previous state change. We can not move before this point.
512+
MachineBasicBlock::iterator PrevStateChangeI;
513+
if (Inst == Block.Insts.begin()) {
514+
PrevStateChangeI = MBB.begin();
515+
} else {
516+
// Note: `std::prev(Inst)` is the previous InstInfo. We only create an
517+
// InstInfo object for instructions that require a specific ZA state, so the
518+
// InstInfo is the site of the previous state change in the block (which can
519+
// be several MIs earlier).
520+
PrevStateChangeI = std::prev(Inst)->InsertPt;
521+
}
522+
523+
// Note: LiveUnits will only accurately track X0 and NZCV.
524+
LiveRegUnits LiveUnits(*TRI);
525+
setPhysLiveRegs(LiveUnits, PhysLiveRegs);
526+
for (MachineBasicBlock::iterator I = InsertPt; I != PrevStateChangeI; --I) {
527+
// Don't move before/into a call (which may have a state change before it).
528+
if (I->getOpcode() == TII->getCallFrameDestroyOpcode() || I->isCall())
529+
break;
530+
LiveUnits.stepBackward(*I);
531+
if (LiveUnits.available(AArch64::NZCV))
532+
return {I, getPhysLiveRegs(LiveUnits)};
533+
}
534+
return {InsertPt, PhysLiveRegs};
535+
}
536+
479537
void MachineSMEABI::insertStateChanges(EmitContext &Context,
480538
const FunctionInfo &FnInfo,
481539
const EdgeBundles &Bundles,
@@ -490,20 +548,26 @@ void MachineSMEABI::insertStateChanges(EmitContext &Context,
490548
CurrentState = InState;
491549

492550
for (auto &Inst : Block.Insts) {
493-
if (CurrentState != Inst.NeededState)
494-
emitStateChange(Context, MBB, Inst.InsertPt, CurrentState,
495-
Inst.NeededState, Inst.PhysLiveRegs);
496-
CurrentState = Inst.NeededState;
551+
if (CurrentState != Inst.NeededState) {
552+
auto [InsertPt, PhysLiveRegs] =
553+
findStateChangeInsertionPoint(MBB, Block, &Inst);
554+
emitStateChange(Context, MBB, InsertPt, CurrentState, Inst.NeededState,
555+
PhysLiveRegs);
556+
CurrentState = Inst.NeededState;
557+
}
497558
}
498559

499560
if (MBB.succ_empty())
500561
continue;
501562

502563
ZAState OutState =
503564
BundleStates[Bundles.getBundle(MBB.getNumber(), /*Out=*/true)];
504-
if (CurrentState != OutState)
505-
emitStateChange(Context, MBB, MBB.getFirstTerminator(), CurrentState,
506-
OutState, Block.PhysLiveRegsAtExit);
565+
if (CurrentState != OutState) {
566+
auto [InsertPt, PhysLiveRegs] =
567+
findStateChangeInsertionPoint(MBB, Block, Block.Insts.end());
568+
emitStateChange(Context, MBB, InsertPt, CurrentState, OutState,
569+
PhysLiveRegs);
570+
}
507571
}
508572
}
509573

Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
2+
# RUN: llc -mtriple=aarch64 -mattr=+sve -mattr=+sme -run-pass=aarch64-machine-sme-abi -verify-machineinstrs %s -o - | FileCheck %s
3+
4+
--- |
5+
; Test moving a state change to be before a $nzcv def
6+
define void @move_before_nzcv_def() "aarch64_inout_za" { ret void }
7+
8+
; Test moving a state change to a point where $x0 is live
9+
define void @move_to_x0_live() "aarch64_inout_za" { ret void }
10+
11+
; Test we don't move before a previous state change.
12+
define void @do_not_move_before_prior_state_change() "aarch64_za_state_agnostic" { ret void }
13+
14+
; Test we don't move into a call sequence.
15+
define void @do_not_move_into_call() "aarch64_inout_za" { ret void }
16+
17+
declare void @clobber()
18+
declare void @inout_call() "aarch64_inout_za"
19+
...
20+
---
21+
name: move_before_nzcv_def
22+
tracksRegLiveness: true
23+
isSSA: true
24+
noVRegs: false
25+
body: |
26+
bb.0:
27+
28+
; CHECK-LABEL: name: move_before_nzcv_def
29+
; CHECK: [[RDSVLI_XI:%[0-9]+]]:gpr64 = RDSVLI_XI 1, implicit $vg
30+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $sp
31+
; CHECK-NEXT: [[MSUBXrrr:%[0-9]+]]:gpr64 = MSUBXrrr [[RDSVLI_XI]], [[RDSVLI_XI]], [[COPY]]
32+
; CHECK-NEXT: $sp = COPY [[MSUBXrrr]]
33+
; CHECK-NEXT: STPXi [[MSUBXrrr]], [[RDSVLI_XI]], %stack.0, 0
34+
; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri %stack.0, 0, 0
35+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY [[ADDXri]]
36+
; CHECK-NEXT: MSR 56965, [[COPY1]]
37+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
38+
; CHECK-NEXT: RequiresZASavePseudo
39+
; CHECK-NEXT: BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
40+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
41+
; CHECK-NEXT: MSRpstatesvcrImm1 2, 1, implicit-def $nzcv
42+
; CHECK-NEXT: [[MRS:%[0-9]+]]:gpr64 = MRS 56965, implicit-def $nzcv
43+
; CHECK-NEXT: $x0 = ADDXri %stack.0, 0, 0
44+
; CHECK-NEXT: RestoreZAPseudo [[MRS]], $x0, &__arm_tpidr2_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x0
45+
; CHECK-NEXT: MSR 56965, $xzr
46+
; CHECK-NEXT: $nzcv = IMPLICIT_DEF
47+
; CHECK-NEXT: $zab0 = IMPLICIT_DEF
48+
; CHECK-NEXT: FAKE_USE $nzcv
49+
; CHECK-NEXT: RET_ReallyLR
50+
ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
51+
RequiresZASavePseudo
52+
BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
53+
ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
54+
55+
$nzcv = IMPLICIT_DEF
56+
$zab0 = IMPLICIT_DEF
57+
FAKE_USE $nzcv
58+
59+
RET_ReallyLR
60+
...
61+
---
62+
name: move_to_x0_live
63+
tracksRegLiveness: true
64+
isSSA: true
65+
noVRegs: false
66+
body: |
67+
bb.0:
68+
69+
; CHECK-LABEL: name: move_to_x0_live
70+
; CHECK: [[RDSVLI_XI:%[0-9]+]]:gpr64 = RDSVLI_XI 1, implicit $vg
71+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $sp
72+
; CHECK-NEXT: [[MSUBXrrr:%[0-9]+]]:gpr64 = MSUBXrrr [[RDSVLI_XI]], [[RDSVLI_XI]], [[COPY]]
73+
; CHECK-NEXT: $sp = COPY [[MSUBXrrr]]
74+
; CHECK-NEXT: STPXi [[MSUBXrrr]], [[RDSVLI_XI]], %stack.0, 0
75+
; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri %stack.0, 0, 0
76+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY [[ADDXri]]
77+
; CHECK-NEXT: MSR 56965, [[COPY1]]
78+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
79+
; CHECK-NEXT: RequiresZASavePseudo
80+
; CHECK-NEXT: BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
81+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
82+
; CHECK-NEXT: $x0 = IMPLICIT_DEF
83+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x0
84+
; CHECK-NEXT: MSRpstatesvcrImm1 2, 1, implicit-def $nzcv
85+
; CHECK-NEXT: [[MRS:%[0-9]+]]:gpr64 = MRS 56965, implicit-def $nzcv
86+
; CHECK-NEXT: $x0 = ADDXri %stack.0, 0, 0
87+
; CHECK-NEXT: RestoreZAPseudo [[MRS]], $x0, &__arm_tpidr2_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x0
88+
; CHECK-NEXT: MSR 56965, $xzr
89+
; CHECK-NEXT: $x0 = COPY [[COPY2]]
90+
; CHECK-NEXT: $nzcv = IMPLICIT_DEF
91+
; CHECK-NEXT: FAKE_USE $x0
92+
; CHECK-NEXT: $zab0 = IMPLICIT_DEF
93+
; CHECK-NEXT: FAKE_USE $nzcv
94+
; CHECK-NEXT: RET_ReallyLR
95+
ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
96+
RequiresZASavePseudo
97+
BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
98+
ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
99+
100+
$x0 = IMPLICIT_DEF
101+
102+
$nzcv = IMPLICIT_DEF
103+
FAKE_USE $x0
104+
105+
$zab0 = IMPLICIT_DEF
106+
FAKE_USE $nzcv
107+
108+
RET_ReallyLR
109+
...
110+
---
111+
name: do_not_move_before_prior_state_change
112+
tracksRegLiveness: true
113+
isSSA: true
114+
noVRegs: false
115+
body: |
116+
; CHECK-LABEL: name: do_not_move_before_prior_state_change
117+
; CHECK: bb.0:
118+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
119+
; CHECK-NEXT: {{ $}}
120+
; CHECK-NEXT: BL &__arm_sme_state_size, csr_aarch64_sme_abi_support_routines_preservemost_from_x1, implicit-def $lr, implicit $sp, implicit-def $x0
121+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
122+
; CHECK-NEXT: $sp = SUBXrx64 $sp, [[COPY]], 24
123+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $sp
124+
; CHECK-NEXT: $nzcv = IMPLICIT_DEF
125+
; CHECK-NEXT: $zab0 = IMPLICIT_DEF
126+
; CHECK-NEXT: [[MRS:%[0-9]+]]:gpr64 = MRS 55824, implicit-def $nzcv, implicit $nzcv
127+
; CHECK-NEXT: $x0 = COPY [[COPY1]]
128+
; CHECK-NEXT: BL &__arm_sme_save, csr_aarch64_sme_abi_support_routines_preservemost_from_x1, implicit-def $lr, implicit $sp, implicit $x0
129+
; CHECK-NEXT: MSR 55824, [[MRS]], implicit-def $nzcv
130+
; CHECK-NEXT: Bcc 2, %bb.1, implicit $nzcv
131+
; CHECK-NEXT: B %bb.2
132+
; CHECK-NEXT: {{ $}}
133+
; CHECK-NEXT: bb.1:
134+
; CHECK-NEXT: liveins: $nzcv
135+
; CHECK-NEXT: {{ $}}
136+
; CHECK-NEXT: FAKE_USE $nzcv
137+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
138+
; CHECK-NEXT: RequiresZASavePseudo
139+
; CHECK-NEXT: BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
140+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
141+
; CHECK-NEXT: $x0 = COPY [[COPY1]]
142+
; CHECK-NEXT: BL &__arm_sme_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x1, implicit-def $lr, implicit $sp, implicit $x0
143+
; CHECK-NEXT: RET_ReallyLR
144+
; CHECK-NEXT: {{ $}}
145+
; CHECK-NEXT: bb.2:
146+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
147+
; CHECK-NEXT: RequiresZASavePseudo
148+
; CHECK-NEXT: BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
149+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
150+
; CHECK-NEXT: $x0 = COPY [[COPY1]]
151+
; CHECK-NEXT: BL &__arm_sme_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x1, implicit-def $lr, implicit $sp, implicit $x0
152+
; CHECK-NEXT: RET_ReallyLR
153+
bb.0:
154+
successors: %bb.1, %bb.2
155+
156+
; The insertion point can move before the $nzcv def (as that would require
157+
; moving before a $zab0 def -- that requires the ACTIVE state).
158+
$nzcv = IMPLICIT_DEF
159+
$zab0 = IMPLICIT_DEF
160+
Bcc 2, %bb.1, implicit $nzcv
161+
B %bb.2
162+
; bb.1 and bb.2 both require ZA saved on entry (to force bb.0's exit bundle to
163+
; pick the LOCAL_SAVED state).
164+
bb.1:
165+
liveins: $nzcv
166+
FAKE_USE $nzcv
167+
168+
ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
169+
RequiresZASavePseudo
170+
BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
171+
ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
172+
173+
RET_ReallyLR
174+
bb.2:
175+
ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
176+
RequiresZASavePseudo
177+
BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
178+
ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
179+
180+
RET_ReallyLR
181+
...
182+
---
183+
name: do_not_move_into_call
184+
tracksRegLiveness: true
185+
isSSA: true
186+
noVRegs: false
187+
body: |
188+
bb.0:
189+
190+
; CHECK-LABEL: name: do_not_move_into_call
191+
; CHECK: [[RDSVLI_XI:%[0-9]+]]:gpr64 = RDSVLI_XI 1, implicit $vg
192+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $sp
193+
; CHECK-NEXT: [[MSUBXrrr:%[0-9]+]]:gpr64 = MSUBXrrr [[RDSVLI_XI]], [[RDSVLI_XI]], [[COPY]]
194+
; CHECK-NEXT: $sp = COPY [[MSUBXrrr]]
195+
; CHECK-NEXT: STPXi [[MSUBXrrr]], [[RDSVLI_XI]], %stack.0, 0
196+
; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri %stack.0, 0, 0
197+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY [[ADDXri]]
198+
; CHECK-NEXT: MSR 56965, [[COPY1]]
199+
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
200+
; CHECK-NEXT: RequiresZASavePseudo
201+
; CHECK-NEXT: BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
202+
; CHECK-NEXT: $nzcv = IMPLICIT_DEF
203+
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
204+
; CHECK-NEXT: [[MRS:%[0-9]+]]:gpr64 = MRS 55824, implicit-def $nzcv, implicit $nzcv
205+
; CHECK-NEXT: MSRpstatesvcrImm1 2, 1, implicit-def $nzcv
206+
; CHECK-NEXT: [[MRS1:%[0-9]+]]:gpr64 = MRS 56965, implicit-def $nzcv
207+
; CHECK-NEXT: $x0 = ADDXri %stack.0, 0, 0
208+
; CHECK-NEXT: RestoreZAPseudo [[MRS1]], $x0, &__arm_tpidr2_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x0
209+
; CHECK-NEXT: MSR 56965, $xzr
210+
; CHECK-NEXT: MSR 55824, [[MRS]], implicit-def $nzcv
211+
; CHECK-NEXT: $zab0 = IMPLICIT_DEF
212+
; CHECK-NEXT: FAKE_USE $nzcv
213+
; CHECK-NEXT: RET_ReallyLR
214+
ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
215+
RequiresZASavePseudo
216+
BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
217+
218+
; This is artificial test where NZCV is def'd inside a call, so we can't
219+
; move the insert point before it's definition.
220+
$nzcv = IMPLICIT_DEF
221+
ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
222+
223+
$zab0 = IMPLICIT_DEF
224+
FAKE_USE $nzcv
225+
226+
RET_ReallyLR
227+
...

llvm/test/CodeGen/AArch64/sme-agnostic-za.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -391,11 +391,9 @@ define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_s
391391
; CHECK-NEWLOWERING-NEXT: sub x19, x8, x0
392392
; CHECK-NEWLOWERING-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1
393393
; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16, lsl #12 // =65536
394-
; CHECK-NEWLOWERING-NEXT: cmp sp, x19
395394
; CHECK-NEWLOWERING-NEXT: mov x0, x19
396-
; CHECK-NEWLOWERING-NEXT: mrs x8, NZCV
397395
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save
398-
; CHECK-NEWLOWERING-NEXT: msr NZCV, x8
396+
; CHECK-NEWLOWERING-NEXT: cmp sp, x19
399397
; CHECK-NEWLOWERING-NEXT: b.le .LBB7_3
400398
; CHECK-NEWLOWERING-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1
401399
; CHECK-NEWLOWERING-NEXT: mov x0, x19

0 commit comments

Comments
 (0)