diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index ac26f4d4fbe66..be8aed5967eb2 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -857,6 +857,8 @@ bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext( Register CtxReg = MBBI->getOperand(0).getReg(); Register BaseReg = MBBI->getOperand(1).getReg(); int Offset = MBBI->getOperand(2).getImm(); + Register ScratchReg1 = MBBI->getOperand(3).getReg(); + Register ScratchReg2 = MBBI->getOperand(4).getReg(); DebugLoc DL(MBBI->getDebugLoc()); auto &STI = MBB.getParent()->getSubtarget(); @@ -872,35 +874,35 @@ bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext( // We need to sign the context in an address-discriminated way. 0xc31a is a // fixed random value, chosen as part of the ABI. - // add x16, xBase, #Offset - // movk x16, #0xc31a, lsl #48 - // mov x17, x22/xzr - // pacdb x17, x16 - // str x17, [xBase, #Offset] + // add ScratchReg1, xBase, #Offset + // movk ScratchReg1, #0xc31a, lsl #48 + // mov ScratchReg2, x22/xzr + // pacdb ScratchReg2, ScratchReg1 + // str ScratchReg2, [xBase, #Offset] unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri; - BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16) + BuildMI(MBB, MBBI, DL, TII->get(Opc), ScratchReg1) .addUse(BaseReg) .addImm(abs(Offset)) .addImm(0) .setMIFlag(MachineInstr::FrameSetup); - BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16) - .addUse(AArch64::X16) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), ScratchReg1) + .addUse(ScratchReg1) .addImm(0xc31a) .addImm(48) .setMIFlag(MachineInstr::FrameSetup); // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so // move it somewhere before signing. - BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), ScratchReg2) .addUse(AArch64::XZR) .addUse(CtxReg) .addImm(0) .setMIFlag(MachineInstr::FrameSetup); - BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17) - .addUse(AArch64::X17) - .addUse(AArch64::X16) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), ScratchReg2) + .addUse(ScratchReg2) + .addUse(ScratchReg1) .setMIFlag(MachineInstr::FrameSetup); BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui)) - .addUse(AArch64::X17) + .addUse(ScratchReg2) .addUse(BaseReg) .addImm(Offset / 8) .setMIFlag(MachineInstr::FrameSetup); diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index fd47970bd0505..a171581773e75 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -296,7 +296,8 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF, static bool produceCompactUnwindFrame(MachineFunction &MF); static bool needsWinCFI(const MachineFunction &MF); static StackOffset getSVEStackSize(const MachineFunction &MF); -static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB); +static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB, + unsigned FirstScratchReg = 0); /// Returns true if a homogeneous prolog or epilog code can be emitted /// for the size optimization. If possible, a frame helper call is injected. @@ -870,17 +871,24 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero, // but we would then have to make sure that we were in fact saving at least one // callee-save register in the prologue, which is additional complexity that // doesn't seem worth the benefit. -static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) { +// +// If \p FirstScratchReg is not 0, it specifies the register that was chosen as +// first scratch register and indicates that it should return another scratch +// register, if possible. +static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB, + unsigned FirstScratchReg) { MachineFunction *MF = MBB->getParent(); // If MBB is an entry block, use X9 as the scratch register - if (&MF->front() == MBB) + if (&MF->front() == MBB && !FirstScratchReg) return AArch64::X9; const AArch64Subtarget &Subtarget = MF->getSubtarget(); const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo(); LivePhysRegs LiveRegs(TRI); LiveRegs.addLiveIns(*MBB); + if (FirstScratchReg) + LiveRegs.addReg(FirstScratchReg); // Mark callee saved registers as used so we will not choose them. const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs(); @@ -905,6 +913,17 @@ bool AArch64FrameLowering::canUseAsPrologue( MachineBasicBlock *TmpMBB = const_cast(&MBB); const AArch64Subtarget &Subtarget = MF->getSubtarget(); const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); + const AArch64FunctionInfo *AFI = MF->getInfo(); + + if (AFI->hasSwiftAsyncContext()) { + // Expanding StoreSwiftAsyncContext requires 2 scratch registers. + unsigned FirstScratchReg = findScratchNonCalleeSaveRegister(TmpMBB); + unsigned SecondScratchReg = + findScratchNonCalleeSaveRegister(TmpMBB, FirstScratchReg); + if (FirstScratchReg == AArch64::NoRegister || + SecondScratchReg == AArch64::NoRegister) + return false; + } // Don't need a scratch register if we're not going to re-align the stack. if (!RegInfo->hasStackRealignment(*MF)) @@ -1681,11 +1700,16 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, bool HaveInitialContext = Attrs.hasAttrSomewhere(Attribute::SwiftAsync); if (HaveInitialContext) MBB.addLiveIn(AArch64::X22); + unsigned FirstScratchReg = findScratchNonCalleeSaveRegister(&MBB); + unsigned SecondScratchReg = + findScratchNonCalleeSaveRegister(&MBB, FirstScratchReg); Register Reg = HaveInitialContext ? AArch64::X22 : AArch64::XZR; BuildMI(MBB, MBBI, DL, TII->get(AArch64::StoreSwiftAsyncContext)) .addUse(Reg) .addUse(AArch64::SP) .addImm(FPOffset - 8) + .addDef(FirstScratchReg, RegState::Implicit) + .addDef(SecondScratchReg, RegState::Implicit) .setMIFlags(MachineInstr::FrameSetup); if (NeedsWinCFI) { // WinCFI and arm64e, where StoreSwiftAsyncContext is expanded diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 0a8abfae5051d..75ab0ae6a41ad 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -9163,9 +9163,10 @@ def : Pat<(int_ptrauth_blend GPR64:$Rd, GPR64:$Rn), //----------------------------------------------------------------------------- // This gets lowered into an instruction sequence of 20 bytes -let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in +let mayStore = 1, isCodeGenOnly = 1, Size = 20 in def StoreSwiftAsyncContext - : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset), + : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset, + GPR64:$scratch1, GPR64sp:$scratch2), []>, Sched<[]>; def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>; diff --git a/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll b/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll index 217fb9bbfbeb9..a740b903e384d 100644 --- a/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll +++ b/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll @@ -1,10 +1,64 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: not --crash llc -o - -mtriple=arm64e-apple-macosx -aarch64-min-jump-table-entries=2 %s -; REQUIRES: asserts +; RUN: llc -o - -mtriple=arm64e-apple-macosx -aarch64-min-jump-table-entries=2 %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" define swifttailcc void @test_async_with_jumptable_x16_clobbered(ptr %src, ptr swiftasync %as) #0 { +; CHECK-LABEL: test_async_with_jumptable_x16_clobbered: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: orr x29, x29, #0x1000000000000000 +; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: add x9, sp, #8 +; CHECK-NEXT: movk x9, #49946, lsl #48 +; CHECK-NEXT: mov x1, x22 +; CHECK-NEXT: pacdb x1, x9 +; CHECK-NEXT: str x1, [sp, #8] +; CHECK-NEXT: add x29, sp, #16 +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w19, -32 +; CHECK-NEXT: mov x20, x22 +; CHECK-NEXT: mov x22, x0 +; CHECK-NEXT: Lloh0: +; CHECK-NEXT: adrp x9, LJTI0_0@PAGE +; CHECK-NEXT: Lloh1: +; CHECK-NEXT: add x9, x9, LJTI0_0@PAGEOFF +; CHECK-NEXT: Ltmp0: +; CHECK-NEXT: adr x10, Ltmp0 +; CHECK-NEXT: ldrsw x11, [x9, x8, lsl #2] +; CHECK-NEXT: add x10, x10, x11 +; CHECK-NEXT: mov x19, x20 +; CHECK-NEXT: br x10 +; CHECK-NEXT: LBB0_1: ; %then.2 +; CHECK-NEXT: mov x19, #0 ; =0x0 +; CHECK-NEXT: b LBB0_3 +; CHECK-NEXT: LBB0_2: ; %then.3 +; CHECK-NEXT: mov x19, x22 +; CHECK-NEXT: LBB0_3: ; %exit +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: bl _foo +; CHECK-NEXT: mov x2, x0 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: mov x1, x20 +; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload +; CHECK-NEXT: and x29, x29, #0xefffffffffffffff +; CHECK-NEXT: br x2 +; CHECK-NEXT: .loh AdrpAdd Lloh0, Lloh1 +; CHECK-NEXT: .cfi_endproc +; CHECK-NEXT: .section __TEXT,__const +; CHECK-NEXT: .p2align 2, 0x0 +; CHECK-NEXT: LJTI0_0: +; CHECK-NEXT: .long LBB0_3-Ltmp0 +; CHECK-NEXT: .long LBB0_1-Ltmp0 +; CHECK-NEXT: .long LBB0_1-Ltmp0 +; CHECK-NEXT: .long LBB0_2-Ltmp0 entry: %x16 = tail call i64 asm "", "={x16}"() %l = load i64, ptr %src, align 8 @@ -37,6 +91,61 @@ exit: } define swifttailcc void @test_async_with_jumptable_x17_clobbered(ptr %src, ptr swiftasync %as) #0 { +; CHECK-LABEL: test_async_with_jumptable_x17_clobbered: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: orr x29, x29, #0x1000000000000000 +; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: add x9, sp, #8 +; CHECK-NEXT: movk x9, #49946, lsl #48 +; CHECK-NEXT: mov x1, x22 +; CHECK-NEXT: pacdb x1, x9 +; CHECK-NEXT: str x1, [sp, #8] +; CHECK-NEXT: add x29, sp, #16 +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w19, -32 +; CHECK-NEXT: mov x20, x22 +; CHECK-NEXT: mov x22, x0 +; CHECK-NEXT: Lloh2: +; CHECK-NEXT: adrp x9, LJTI1_0@PAGE +; CHECK-NEXT: Lloh3: +; CHECK-NEXT: add x9, x9, LJTI1_0@PAGEOFF +; CHECK-NEXT: Ltmp1: +; CHECK-NEXT: adr x10, Ltmp1 +; CHECK-NEXT: ldrsw x11, [x9, x8, lsl #2] +; CHECK-NEXT: add x10, x10, x11 +; CHECK-NEXT: mov x19, x20 +; CHECK-NEXT: br x10 +; CHECK-NEXT: LBB1_1: ; %then.2 +; CHECK-NEXT: mov x19, #0 ; =0x0 +; CHECK-NEXT: b LBB1_3 +; CHECK-NEXT: LBB1_2: ; %then.3 +; CHECK-NEXT: mov x19, x22 +; CHECK-NEXT: LBB1_3: ; %exit +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: bl _foo +; CHECK-NEXT: mov x2, x0 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: mov x1, x20 +; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload +; CHECK-NEXT: and x29, x29, #0xefffffffffffffff +; CHECK-NEXT: br x2 +; CHECK-NEXT: .loh AdrpAdd Lloh2, Lloh3 +; CHECK-NEXT: .cfi_endproc +; CHECK-NEXT: .section __TEXT,__const +; CHECK-NEXT: .p2align 2, 0x0 +; CHECK-NEXT: LJTI1_0: +; CHECK-NEXT: .long LBB1_3-Ltmp1 +; CHECK-NEXT: .long LBB1_1-Ltmp1 +; CHECK-NEXT: .long LBB1_1-Ltmp1 +; CHECK-NEXT: .long LBB1_2-Ltmp1 entry: %x17 = tail call i64 asm "", "={x17}"() %l = load i64, ptr %src, align 8 @@ -69,6 +178,61 @@ exit: } define swifttailcc void @test_async_with_jumptable_x1_clobbered(ptr %src, ptr swiftasync %as) #0 { +; CHECK-LABEL: test_async_with_jumptable_x1_clobbered: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: orr x29, x29, #0x1000000000000000 +; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: add x9, sp, #8 +; CHECK-NEXT: movk x9, #49946, lsl #48 +; CHECK-NEXT: mov x2, x22 +; CHECK-NEXT: pacdb x2, x9 +; CHECK-NEXT: str x2, [sp, #8] +; CHECK-NEXT: add x29, sp, #16 +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w19, -32 +; CHECK-NEXT: mov x20, x22 +; CHECK-NEXT: mov x22, x0 +; CHECK-NEXT: Lloh4: +; CHECK-NEXT: adrp x9, LJTI2_0@PAGE +; CHECK-NEXT: Lloh5: +; CHECK-NEXT: add x9, x9, LJTI2_0@PAGEOFF +; CHECK-NEXT: Ltmp2: +; CHECK-NEXT: adr x10, Ltmp2 +; CHECK-NEXT: ldrsw x11, [x9, x8, lsl #2] +; CHECK-NEXT: add x10, x10, x11 +; CHECK-NEXT: mov x19, x20 +; CHECK-NEXT: br x10 +; CHECK-NEXT: LBB2_1: ; %then.2 +; CHECK-NEXT: mov x19, #0 ; =0x0 +; CHECK-NEXT: b LBB2_3 +; CHECK-NEXT: LBB2_2: ; %then.3 +; CHECK-NEXT: mov x19, x22 +; CHECK-NEXT: LBB2_3: ; %exit +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: bl _foo +; CHECK-NEXT: mov x2, x0 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: mov x1, x20 +; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload +; CHECK-NEXT: and x29, x29, #0xefffffffffffffff +; CHECK-NEXT: br x2 +; CHECK-NEXT: .loh AdrpAdd Lloh4, Lloh5 +; CHECK-NEXT: .cfi_endproc +; CHECK-NEXT: .section __TEXT,__const +; CHECK-NEXT: .p2align 2, 0x0 +; CHECK-NEXT: LJTI2_0: +; CHECK-NEXT: .long LBB2_3-Ltmp2 +; CHECK-NEXT: .long LBB2_1-Ltmp2 +; CHECK-NEXT: .long LBB2_1-Ltmp2 +; CHECK-NEXT: .long LBB2_2-Ltmp2 entry: %x1 = tail call i64 asm "", "={x1}"() %l = load i64, ptr %src, align 8 @@ -101,6 +265,65 @@ exit: } define swifttailcc void @test_async_with_jumptable_x1_x9_clobbered(ptr %src, ptr swiftasync %as) #0 { +; CHECK-LABEL: test_async_with_jumptable_x1_x9_clobbered: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: orr x29, x29, #0x1000000000000000 +; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: add x2, sp, #8 +; CHECK-NEXT: movk x2, #49946, lsl #48 +; CHECK-NEXT: mov x3, x22 +; CHECK-NEXT: pacdb x3, x2 +; CHECK-NEXT: str x3, [sp, #8] +; CHECK-NEXT: add x29, sp, #16 +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w19, -32 +; CHECK-NEXT: mov x20, x22 +; CHECK-NEXT: mov x22, x0 +; CHECK-NEXT: Lloh6: +; CHECK-NEXT: adrp x10, LJTI3_0@PAGE +; CHECK-NEXT: Lloh7: +; CHECK-NEXT: add x10, x10, LJTI3_0@PAGEOFF +; CHECK-NEXT: Ltmp3: +; CHECK-NEXT: adr x11, Ltmp3 +; CHECK-NEXT: ldrsw x12, [x10, x8, lsl #2] +; CHECK-NEXT: add x11, x11, x12 +; CHECK-NEXT: mov x19, x20 +; CHECK-NEXT: br x11 +; CHECK-NEXT: LBB3_1: ; %then.2 +; CHECK-NEXT: mov x19, #0 ; =0x0 +; CHECK-NEXT: b LBB3_3 +; CHECK-NEXT: LBB3_2: ; %then.3 +; CHECK-NEXT: mov x19, x22 +; CHECK-NEXT: LBB3_3: ; %exit +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: bl _foo +; CHECK-NEXT: mov x2, x0 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: mov x1, x20 +; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload +; CHECK-NEXT: and x29, x29, #0xefffffffffffffff +; CHECK-NEXT: br x2 +; CHECK-NEXT: .loh AdrpAdd Lloh6, Lloh7 +; CHECK-NEXT: .cfi_endproc +; CHECK-NEXT: .section __TEXT,__const +; CHECK-NEXT: .p2align 2, 0x0 +; CHECK-NEXT: LJTI3_0: +; CHECK-NEXT: .long LBB3_3-Ltmp3 +; CHECK-NEXT: .long LBB3_1-Ltmp3 +; CHECK-NEXT: .long LBB3_1-Ltmp3 +; CHECK-NEXT: .long LBB3_2-Ltmp3 entry: %x1 = tail call i64 asm "", "={x1}"() %x9 = tail call i64 asm "", "={x9}"() @@ -136,6 +359,117 @@ exit: ; There are 2 available scratch registers left, shrink-wrapping can happen. define swifttailcc void @test_async_with_jumptable_2_available_regs_left(ptr %src, ptr swiftasync %as) #0 { +; CHECK-LABEL: test_async_with_jumptable_2_available_regs_left: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ldr x10, [x0] +; CHECK-NEXT: orr x29, x29, #0x1000000000000000 +; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: add x17, sp, #8 +; CHECK-NEXT: movk x17, #49946, lsl #48 +; CHECK-NEXT: mov x20, x22 +; CHECK-NEXT: pacdb x20, x17 +; CHECK-NEXT: str x20, [sp, #8] +; CHECK-NEXT: add x29, sp, #16 +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w19, -32 +; CHECK-NEXT: mov x20, x22 +; CHECK-NEXT: mov x22, x0 +; CHECK-NEXT: Lloh8: +; CHECK-NEXT: adrp x17, LJTI4_0@PAGE +; CHECK-NEXT: Lloh9: +; CHECK-NEXT: add x17, x17, LJTI4_0@PAGEOFF +; CHECK-NEXT: Ltmp4: +; CHECK-NEXT: adr x0, Ltmp4 +; CHECK-NEXT: ldrsw x19, [x17, x10, lsl #2] +; CHECK-NEXT: add x0, x0, x19 +; CHECK-NEXT: mov x19, x20 +; CHECK-NEXT: br x0 +; CHECK-NEXT: LBB4_1: ; %then.2 +; CHECK-NEXT: mov x19, #0 ; =0x0 +; CHECK-NEXT: b LBB4_3 +; CHECK-NEXT: LBB4_2: ; %then.3 +; CHECK-NEXT: mov x19, x22 +; CHECK-NEXT: LBB4_3: ; %exit +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: bl _foo +; CHECK-NEXT: mov x2, x0 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: mov x1, x20 +; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload +; CHECK-NEXT: and x29, x29, #0xefffffffffffffff +; CHECK-NEXT: br x2 +; CHECK-NEXT: .loh AdrpAdd Lloh8, Lloh9 +; CHECK-NEXT: .cfi_endproc +; CHECK-NEXT: .section __TEXT,__const +; CHECK-NEXT: .p2align 2, 0x0 +; CHECK-NEXT: LJTI4_0: +; CHECK-NEXT: .long LBB4_3-Ltmp4 +; CHECK-NEXT: .long LBB4_1-Ltmp4 +; CHECK-NEXT: .long LBB4_1-Ltmp4 +; CHECK-NEXT: .long LBB4_2-Ltmp4 entry: %x1 = tail call i64 asm "", "={x1}"() %x2 = tail call i64 asm "", "={x2}"() @@ -198,6 +532,124 @@ exit: ; There is only 1 available scratch registers left, shrink-wrapping cannot ; happen because StoreSwiftAsyncContext needs 2 free scratch registers. define swifttailcc void @test_async_with_jumptable_1_available_reg_left(ptr %src, ptr swiftasync %as) #0 { +; CHECK-LABEL: test_async_with_jumptable_1_available_reg_left: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: orr x29, x29, #0x1000000000000000 +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: stp x21, x19, [sp, #8] ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill +; CHECK-NEXT: add x9, sp, #24 +; CHECK-NEXT: movk x9, #49946, lsl #48 +; CHECK-NEXT: mov x1, x22 +; CHECK-NEXT: pacdb x1, x9 +; CHECK-NEXT: str x1, [sp, #24] +; CHECK-NEXT: add x29, sp, #32 +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w19, -32 +; CHECK-NEXT: .cfi_offset w21, -40 +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ldr x10, [x0] +; CHECK-NEXT: mov x20, x22 +; CHECK-NEXT: mov x22, x0 +; CHECK-NEXT: Lloh10: +; CHECK-NEXT: adrp x0, LJTI5_0@PAGE +; CHECK-NEXT: Lloh11: +; CHECK-NEXT: add x0, x0, LJTI5_0@PAGEOFF +; CHECK-NEXT: Ltmp5: +; CHECK-NEXT: adr x21, Ltmp5 +; CHECK-NEXT: ldrsw x19, [x0, x10, lsl #2] +; CHECK-NEXT: add x21, x21, x19 +; CHECK-NEXT: mov x19, x20 +; CHECK-NEXT: br x21 +; CHECK-NEXT: LBB5_1: ; %then.2 +; CHECK-NEXT: mov x19, #0 ; =0x0 +; CHECK-NEXT: b LBB5_3 +; CHECK-NEXT: LBB5_2: ; %then.3 +; CHECK-NEXT: mov x19, x22 +; CHECK-NEXT: LBB5_3: ; %exit +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: bl _foo +; CHECK-NEXT: mov x2, x0 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: mov x1, x20 +; CHECK-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x21, x19, [sp, #8] ; 16-byte Folded Reload +; CHECK-NEXT: and x29, x29, #0xefffffffffffffff +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: br x2 +; CHECK-NEXT: .loh AdrpAdd Lloh10, Lloh11 +; CHECK-NEXT: .cfi_endproc +; CHECK-NEXT: .section __TEXT,__const +; CHECK-NEXT: .p2align 2, 0x0 +; CHECK-NEXT: LJTI5_0: +; CHECK-NEXT: .long LBB5_3-Ltmp5 +; CHECK-NEXT: .long LBB5_1-Ltmp5 +; CHECK-NEXT: .long LBB5_1-Ltmp5 +; CHECK-NEXT: .long LBB5_2-Ltmp5 entry: %x1 = tail call i64 asm "", "={x1}"() %x2 = tail call i64 asm "", "={x2}"() diff --git a/llvm/test/CodeGen/AArch64/swift-async.ll b/llvm/test/CodeGen/AArch64/swift-async.ll index 4a3bf15b666b3..28d32ebf1b991 100644 --- a/llvm/test/CodeGen/AArch64/swift-async.ll +++ b/llvm/test/CodeGen/AArch64/swift-async.ll @@ -12,11 +12,11 @@ define swifttailcc void @simple(ptr swiftasync %ctx) "frame-pointer"="all" { ; CHECK: stp x29, x30, [sp, #16] ; CHECK-NOAUTH-DAG: str x22, [sp, #8] -; CHECK-AUTH: add x16, sp, #8 -; CHECK-AUTH: movk x16, #49946, lsl #48 -; CHECK-AUTH: mov x17, x22 -; CHECK-AUTH: pacdb x17, x16 -; CHECK-AUTH: str x17, [sp, #8] +; CHECK-AUTH: add x9, sp, #8 +; CHECK-AUTH: movk x9, #49946, lsl #48 +; CHECK-AUTH: mov x0, x22 +; CHECK-AUTH: pacdb x0, x9 +; CHECK-AUTH: str x0, [sp, #8] ; CHECK-DAG: add x29, sp, #16 ; CHECK: .cfi_def_cfa w29, 16 @@ -39,11 +39,11 @@ define swifttailcc void @more_csrs(ptr swiftasync %ctx) "frame-pointer"="all" { ; CHECK: stp x29, x30, [sp, #16] ; CHECK-NOAUTH-DAG: str x22, [sp, #8] -; CHECK-AUTH: add x16, sp, #8 -; CHECK-AUTH: movk x16, #49946, lsl #48 -; CHECK-AUTH: mov x17, x22 -; CHECK-AUTH: pacdb x17, x16 -; CHECK-AUTH: str x17, [sp, #8] +; CHECK-AUTH: add x9, sp, #8 +; CHECK-AUTH: movk x9, #49946, lsl #48 +; CHECK-AUTH: mov x0, x22 +; CHECK-AUTH: pacdb x0, x9 +; CHECK-AUTH: str x0, [sp, #8] ; CHECK-DAG: add x29, sp, #16 ; CHECK: .cfi_def_cfa w29, 16 @@ -67,11 +67,11 @@ define swifttailcc void @locals(ptr swiftasync %ctx) "frame-pointer"="all" { ; CHECK: stp x29, x30, [sp, #48] ; CHECK-NOAUTH-DAG: str x22, [sp, #40] -; CHECK-AUTH: add x16, sp, #40 -; CHECK-AUTH: movk x16, #49946, lsl #48 -; CHECK-AUTH: mov x17, x22 -; CHECK-AUTH: pacdb x17, x16 -; CHECK-AUTH: str x17, [sp, #40] +; CHECK-AUTH: add x9, sp, #40 +; CHECK-AUTH: movk x9, #49946, lsl #48 +; CHECK-AUTH: mov x0, x22 +; CHECK-AUTH: pacdb x0, x9 +; CHECK-AUTH: str x0, [sp, #40] ; CHECK-DAG: add x29, sp, #48 ; CHECK: .cfi_def_cfa w29, 16 @@ -95,7 +95,7 @@ define swifttailcc void @use_input_context(ptr swiftasync %ctx, ptr %ptr) "frame ; CHECK-LABEL: use_input_context: ; CHECK-NOAUTH: str x22, [sp -; CHECK-AUTH: mov x17, x22 +; CHECK-AUTH: mov x1, x22 ; CHECK-NOT: x22 ; CHECK: str x22, [x0] @@ -108,11 +108,11 @@ define swifttailcc ptr @context_in_func() "frame-pointer"="non-leaf" { ; CHECK-LABEL: context_in_func: ; CHECK-NOAUTH: str xzr, [sp, #8] -; CHECK-AUTH: add x16, sp, #8 -; CHECK-AUTH: movk x16, #49946, lsl #48 -; CHECK-AUTH: mov x17, xzr -; CHECK-AUTH: pacdb x17, x16 -; CHECK-AUTH: str x17, [sp, #8] +; CHECK-AUTH: add x9, sp, #8 +; CHECK-AUTH: movk x9, #49946, lsl #48 +; CHECK-AUTH: mov x0, xzr +; CHECK-AUTH: pacdb x0, x9 +; CHECK-AUTH: str x0, [sp, #8] %ptr = call ptr @llvm.swift.async.context.addr() ret ptr %ptr diff --git a/llvm/unittests/Target/AArch64/InstSizes.cpp b/llvm/unittests/Target/AArch64/InstSizes.cpp index d7e4b4a91cdbd..e5bc6a67b6b73 100644 --- a/llvm/unittests/Target/AArch64/InstSizes.cpp +++ b/llvm/unittests/Target/AArch64/InstSizes.cpp @@ -196,7 +196,7 @@ TEST(InstSizes, StoreSwiftAsyncContext) { runChecks( TM.get(), II.get(), "", - " StoreSwiftAsyncContext $x0, $x1, 12, implicit-def $x16, " + " StoreSwiftAsyncContext $x0, $x1, 12, $x2, $x3, implicit-def $x16, " "implicit-def $x17\n", [](AArch64InstrInfo &II, MachineFunction &MF) { auto I = MF.begin()->begin();