diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 901e63c47fac1..3a8ec3c6105bc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2509,7 +2509,11 @@ bool SelectionDAG::expandMultipleResultFPLibCall( // Find users of the node that store the results (and share input chains). The // destination pointers can be used instead of creating stack allocations. - SDValue StoresInChain{}; + // FIXME: This should allow stores with the same chains (not just the entry + // chain), but there's a risk the store is within a (CALLSEQ_START, + // CALLSEQ_END) pair, which after this expansion will lead to nested call + // sequences. + SDValue InChain = getEntryNode(); SmallVector ResultStores(NumResults); for (SDNode *User : Node->uses()) { if (!ISD::isNormalStore(User)) @@ -2522,11 +2526,9 @@ bool SelectionDAG::expandMultipleResultFPLibCall( ST->getAddressSpace() != 0 || ST->getAlign() < getDataLayout().getABITypeAlign(StoreType->getScalarType()) || - (StoresInChain && ST->getChain() != StoresInChain) || - Node->isPredecessorOf(ST->getChain().getNode())) + ST->getChain() != InChain) continue; ResultStores[ResNo] = ST; - StoresInChain = ST->getChain(); } TargetLowering::ArgListTy Args; @@ -2568,7 +2570,6 @@ bool SelectionDAG::expandMultipleResultFPLibCall( Type *RetType = CallRetResNo.has_value() ? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx) : Type::getVoidTy(Ctx); - SDValue InChain = StoresInChain ? StoresInChain : getEntryNode(); SDValue Callee = getExternalSymbol(VD ? VD->getVectorFnName().data() : LCName, TLI->getPointerTy(getDataLayout())); TargetLowering::CallLoweringInfo CLI(*this); diff --git a/llvm/test/CodeGen/PowerPC/f128-arith.ll b/llvm/test/CodeGen/PowerPC/f128-arith.ll index decc4a38f7ccd..35e5d61947ead 100644 --- a/llvm/test/CodeGen/PowerPC/f128-arith.ll +++ b/llvm/test/CodeGen/PowerPC/f128-arith.ll @@ -1365,33 +1365,45 @@ define dso_local fp128 @qpFREXP(ptr %a, ptr %b) { ; CHECK-LABEL: qpFREXP: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mflr r0 -; CHECK-NEXT: stdu r1, -32(r1) -; CHECK-NEXT: std r0, 48(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r30, -16 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: stdu r1, -64(r1) +; CHECK-NEXT: std r0, 80(r1) +; CHECK-NEXT: addi r5, r1, 44 +; CHECK-NEXT: mr r30, r4 ; CHECK-NEXT: lxv v2, 0(r3) -; CHECK-NEXT: mr r5, r4 ; CHECK-NEXT: bl frexpf128 ; CHECK-NEXT: nop -; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: lwz r3, 44(r1) +; CHECK-NEXT: stw r3, 0(r30) +; CHECK-NEXT: addi r1, r1, 64 ; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr ; ; CHECK-P8-LABEL: qpFREXP: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mflr r0 -; CHECK-P8-NEXT: stdu r1, -32(r1) -; CHECK-P8-NEXT: std r0, 48(r1) -; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 +; CHECK-P8-NEXT: .cfi_def_cfa_offset 64 ; CHECK-P8-NEXT: .cfi_offset lr, 16 +; CHECK-P8-NEXT: .cfi_offset r30, -16 +; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: stdu r1, -64(r1) +; CHECK-P8-NEXT: std r0, 80(r1) +; CHECK-P8-NEXT: addi r5, r1, 44 +; CHECK-P8-NEXT: mr r30, r4 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 -; CHECK-P8-NEXT: mr r5, r4 ; CHECK-P8-NEXT: xxswapd v2, vs0 ; CHECK-P8-NEXT: bl frexpf128 ; CHECK-P8-NEXT: nop -; CHECK-P8-NEXT: addi r1, r1, 32 +; CHECK-P8-NEXT: lwz r3, 44(r1) +; CHECK-P8-NEXT: stw r3, 0(r30) +; CHECK-P8-NEXT: addi r1, r1, 64 ; CHECK-P8-NEXT: ld r0, 16(r1) +; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-P8-NEXT: mtlr r0 ; CHECK-P8-NEXT: blr entry: diff --git a/llvm/test/CodeGen/RISCV/llvm.frexp.ll b/llvm/test/CodeGen/RISCV/llvm.frexp.ll index 3f615d23d3eaf..e85a7118f5ff8 100644 --- a/llvm/test/CodeGen/RISCV/llvm.frexp.ll +++ b/llvm/test/CodeGen/RISCV/llvm.frexp.ll @@ -543,42 +543,50 @@ define i32 @test_frexp_f32_i32_only_use_exp(float %a) nounwind { define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwind { ; RV32IFD-LABEL: test_frexp_v4f32_v4i32: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -48 -; RV32IFD-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw s0, 40(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: fsd fs0, 32(sp) # 8-byte Folded Spill -; RV32IFD-NEXT: fsd fs1, 24(sp) # 8-byte Folded Spill -; RV32IFD-NEXT: fsd fs2, 16(sp) # 8-byte Folded Spill -; RV32IFD-NEXT: fsd fs3, 8(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: addi sp, sp, -64 +; RV32IFD-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: fsd fs0, 48(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: fsd fs1, 40(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: fsd fs2, 32(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: fsd fs3, 24(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: fmv.s fs0, fa3 ; RV32IFD-NEXT: fmv.s fs1, fa2 ; RV32IFD-NEXT: fmv.s fs2, fa1 ; RV32IFD-NEXT: mv s0, a0 -; RV32IFD-NEXT: addi a0, a0, 16 +; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call frexpf ; RV32IFD-NEXT: fmv.s fs3, fa0 -; RV32IFD-NEXT: addi a0, s0, 20 +; RV32IFD-NEXT: addi a0, sp, 12 ; RV32IFD-NEXT: fmv.s fa0, fs2 ; RV32IFD-NEXT: call frexpf ; RV32IFD-NEXT: fmv.s fs2, fa0 -; RV32IFD-NEXT: addi a0, s0, 24 +; RV32IFD-NEXT: addi a0, sp, 16 ; RV32IFD-NEXT: fmv.s fa0, fs1 ; RV32IFD-NEXT: call frexpf ; RV32IFD-NEXT: fmv.s fs1, fa0 -; RV32IFD-NEXT: addi a0, s0, 28 +; RV32IFD-NEXT: addi a0, sp, 20 ; RV32IFD-NEXT: fmv.s fa0, fs0 ; RV32IFD-NEXT: call frexpf +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: lw a2, 16(sp) +; RV32IFD-NEXT: lw a3, 20(sp) +; RV32IFD-NEXT: sw a0, 16(s0) +; RV32IFD-NEXT: sw a1, 20(s0) +; RV32IFD-NEXT: sw a2, 24(s0) +; RV32IFD-NEXT: sw a3, 28(s0) ; RV32IFD-NEXT: fsw fs3, 0(s0) ; RV32IFD-NEXT: fsw fs2, 4(s0) ; RV32IFD-NEXT: fsw fs1, 8(s0) ; RV32IFD-NEXT: fsw fa0, 12(s0) -; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: lw s0, 40(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: fld fs0, 32(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: fld fs1, 24(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: fld fs2, 16(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: fld fs3, 8(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 48 +; RV32IFD-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 56(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: fld fs0, 48(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: fld fs1, 40(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: fld fs2, 32(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: fld fs3, 24(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 64 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: test_frexp_v4f32_v4i32: @@ -631,44 +639,52 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi ; ; RV32IZFINXZDINX-LABEL: test_frexp_v4f32_v4i32: ; RV32IZFINXZDINX: # %bb.0: -; RV32IZFINXZDINX-NEXT: addi sp, sp, -32 -; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s4, 8(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: addi sp, sp, -48 +; RV32IZFINXZDINX-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s1, 36(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s2, 32(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s3, 28(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s4, 24(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: mv s0, a4 ; RV32IZFINXZDINX-NEXT: mv s1, a3 ; RV32IZFINXZDINX-NEXT: mv s2, a2 ; RV32IZFINXZDINX-NEXT: mv a2, a1 ; RV32IZFINXZDINX-NEXT: mv s3, a0 -; RV32IZFINXZDINX-NEXT: addi a1, a0, 16 +; RV32IZFINXZDINX-NEXT: addi a1, sp, 8 ; RV32IZFINXZDINX-NEXT: mv a0, a2 ; RV32IZFINXZDINX-NEXT: call frexpf ; RV32IZFINXZDINX-NEXT: mv s4, a0 -; RV32IZFINXZDINX-NEXT: addi a1, s3, 20 +; RV32IZFINXZDINX-NEXT: addi a1, sp, 12 ; RV32IZFINXZDINX-NEXT: mv a0, s2 ; RV32IZFINXZDINX-NEXT: call frexpf ; RV32IZFINXZDINX-NEXT: mv s2, a0 -; RV32IZFINXZDINX-NEXT: addi a1, s3, 24 +; RV32IZFINXZDINX-NEXT: addi a1, sp, 16 ; RV32IZFINXZDINX-NEXT: mv a0, s1 ; RV32IZFINXZDINX-NEXT: call frexpf ; RV32IZFINXZDINX-NEXT: mv s1, a0 -; RV32IZFINXZDINX-NEXT: addi a1, s3, 28 +; RV32IZFINXZDINX-NEXT: addi a1, sp, 20 ; RV32IZFINXZDINX-NEXT: mv a0, s0 ; RV32IZFINXZDINX-NEXT: call frexpf +; RV32IZFINXZDINX-NEXT: lw a1, 8(sp) +; RV32IZFINXZDINX-NEXT: lw a2, 12(sp) +; RV32IZFINXZDINX-NEXT: lw a3, 16(sp) +; RV32IZFINXZDINX-NEXT: lw a4, 20(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 16(s3) +; RV32IZFINXZDINX-NEXT: sw a2, 20(s3) +; RV32IZFINXZDINX-NEXT: sw a3, 24(s3) +; RV32IZFINXZDINX-NEXT: sw a4, 28(s3) ; RV32IZFINXZDINX-NEXT: sw s4, 0(s3) ; RV32IZFINXZDINX-NEXT: sw s2, 4(s3) ; RV32IZFINXZDINX-NEXT: sw s1, 8(s3) ; RV32IZFINXZDINX-NEXT: sw a0, 12(s3) -; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s4, 8(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 +; RV32IZFINXZDINX-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s1, 36(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 32(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s3, 28(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s4, 24(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: addi sp, sp, 48 ; RV32IZFINXZDINX-NEXT: ret ; ; RV64IZFINXZDINX-LABEL: test_frexp_v4f32_v4i32: @@ -1080,34 +1096,41 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwi define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind { ; RV32IFD-LABEL: test_frexp_v4f32_v4i32_only_use_exp: ; RV32IFD: # %bb.0: -; RV32IFD-NEXT: addi sp, sp, -32 -; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill -; RV32IFD-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill -; RV32IFD-NEXT: fsd fs2, 0(sp) # 8-byte Folded Spill -; RV32IFD-NEXT: fmv.s fs0, fa2 -; RV32IFD-NEXT: fmv.s fs1, fa1 -; RV32IFD-NEXT: fmv.s fs2, fa0 +; RV32IFD-NEXT: addi sp, sp, -48 +; RV32IFD-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: fsd fs0, 32(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: fsd fs1, 24(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: fsd fs2, 16(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: fmv.s fs0, fa3 +; RV32IFD-NEXT: fmv.s fs1, fa2 +; RV32IFD-NEXT: fmv.s fs2, fa1 ; RV32IFD-NEXT: mv s0, a0 -; RV32IFD-NEXT: addi a0, a0, 12 -; RV32IFD-NEXT: fmv.s fa0, fa3 +; RV32IFD-NEXT: mv a0, sp ; RV32IFD-NEXT: call frexpf -; RV32IFD-NEXT: addi a0, s0, 8 -; RV32IFD-NEXT: fmv.s fa0, fs0 +; RV32IFD-NEXT: addi a0, sp, 4 +; RV32IFD-NEXT: fmv.s fa0, fs2 ; RV32IFD-NEXT: call frexpf -; RV32IFD-NEXT: addi a0, s0, 4 +; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: fmv.s fa0, fs1 ; RV32IFD-NEXT: call frexpf -; RV32IFD-NEXT: fmv.s fa0, fs2 -; RV32IFD-NEXT: mv a0, s0 +; RV32IFD-NEXT: addi a0, sp, 12 +; RV32IFD-NEXT: fmv.s fa0, fs0 ; RV32IFD-NEXT: call frexpf -; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IFD-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: fld fs2, 0(sp) # 8-byte Folded Reload -; RV32IFD-NEXT: addi sp, sp, 32 +; RV32IFD-NEXT: lw a0, 0(sp) +; RV32IFD-NEXT: lw a1, 4(sp) +; RV32IFD-NEXT: lw a2, 8(sp) +; RV32IFD-NEXT: lw a3, 12(sp) +; RV32IFD-NEXT: sw a0, 0(s0) +; RV32IFD-NEXT: sw a1, 4(s0) +; RV32IFD-NEXT: sw a2, 8(s0) +; RV32IFD-NEXT: sw a3, 12(s0) +; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: fld fs0, 32(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: fld fs1, 24(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: fld fs2, 16(sp) # 8-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 48 ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: test_frexp_v4f32_v4i32_only_use_exp: @@ -1151,34 +1174,43 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind { ; ; RV32IZFINXZDINX-LABEL: test_frexp_v4f32_v4i32_only_use_exp: ; RV32IZFINXZDINX: # %bb.0: -; RV32IZFINXZDINX-NEXT: addi sp, sp, -32 -; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: mv s0, a3 -; RV32IZFINXZDINX-NEXT: mv s1, a2 -; RV32IZFINXZDINX-NEXT: mv s2, a1 +; RV32IZFINXZDINX-NEXT: addi sp, sp, -48 +; RV32IZFINXZDINX-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s1, 36(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s2, 32(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: sw s3, 28(sp) # 4-byte Folded Spill +; RV32IZFINXZDINX-NEXT: mv s0, a4 +; RV32IZFINXZDINX-NEXT: mv s1, a3 +; RV32IZFINXZDINX-NEXT: mv s2, a2 +; RV32IZFINXZDINX-NEXT: mv a2, a1 ; RV32IZFINXZDINX-NEXT: mv s3, a0 -; RV32IZFINXZDINX-NEXT: addi a1, a0, 12 -; RV32IZFINXZDINX-NEXT: mv a0, a4 +; RV32IZFINXZDINX-NEXT: addi a1, sp, 12 +; RV32IZFINXZDINX-NEXT: mv a0, a2 ; RV32IZFINXZDINX-NEXT: call frexpf -; RV32IZFINXZDINX-NEXT: addi a1, s3, 8 -; RV32IZFINXZDINX-NEXT: mv a0, s0 +; RV32IZFINXZDINX-NEXT: addi a1, sp, 16 +; RV32IZFINXZDINX-NEXT: mv a0, s2 ; RV32IZFINXZDINX-NEXT: call frexpf -; RV32IZFINXZDINX-NEXT: addi a1, s3, 4 +; RV32IZFINXZDINX-NEXT: addi a1, sp, 20 ; RV32IZFINXZDINX-NEXT: mv a0, s1 ; RV32IZFINXZDINX-NEXT: call frexpf -; RV32IZFINXZDINX-NEXT: mv a0, s2 -; RV32IZFINXZDINX-NEXT: mv a1, s3 +; RV32IZFINXZDINX-NEXT: addi a1, sp, 24 +; RV32IZFINXZDINX-NEXT: mv a0, s0 ; RV32IZFINXZDINX-NEXT: call frexpf -; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32IZFINXZDINX-NEXT: addi sp, sp, 32 +; RV32IZFINXZDINX-NEXT: lw a0, 12(sp) +; RV32IZFINXZDINX-NEXT: lw a1, 16(sp) +; RV32IZFINXZDINX-NEXT: lw a2, 20(sp) +; RV32IZFINXZDINX-NEXT: lw a3, 24(sp) +; RV32IZFINXZDINX-NEXT: sw a0, 0(s3) +; RV32IZFINXZDINX-NEXT: sw a1, 4(s3) +; RV32IZFINXZDINX-NEXT: sw a2, 8(s3) +; RV32IZFINXZDINX-NEXT: sw a3, 12(s3) +; RV32IZFINXZDINX-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s1, 36(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s2, 32(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: lw s3, 28(sp) # 4-byte Folded Reload +; RV32IZFINXZDINX-NEXT: addi sp, sp, 48 ; RV32IZFINXZDINX-NEXT: ret ; ; RV64IZFINXZDINX-LABEL: test_frexp_v4f32_v4i32_only_use_exp: diff --git a/llvm/test/CodeGen/X86/llvm.frexp.ll b/llvm/test/CodeGen/X86/llvm.frexp.ll index 96de34519556d..cd560ad627de4 100644 --- a/llvm/test/CodeGen/X86/llvm.frexp.ll +++ b/llvm/test/CodeGen/X86/llvm.frexp.ll @@ -325,27 +325,28 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) { ; ; WIN32-LABEL: test_frexp_v4f32_v4i32: ; WIN32: # %bb.0: +; WIN32-NEXT: pushl %edi ; WIN32-NEXT: pushl %esi -; WIN32-NEXT: subl $44, %esp +; WIN32-NEXT: subl $60, %esp ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi -; WIN32-NEXT: leal 24(%esi), %eax +; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; WIN32-NEXT: flds {{[0-9]+}}(%esp) ; WIN32-NEXT: fstpl (%esp) ; WIN32-NEXT: calll _frexp ; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill -; WIN32-NEXT: leal 20(%esi), %eax +; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; WIN32-NEXT: flds {{[0-9]+}}(%esp) ; WIN32-NEXT: fstpl (%esp) ; WIN32-NEXT: calll _frexp ; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill -; WIN32-NEXT: leal 16(%esi), %eax +; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; WIN32-NEXT: flds {{[0-9]+}}(%esp) ; WIN32-NEXT: fstpl (%esp) ; WIN32-NEXT: calll _frexp -; WIN32-NEXT: leal 28(%esi), %eax +; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; WIN32-NEXT: flds {{[0-9]+}}(%esp) ; WIN32-NEXT: fstpl (%esp) @@ -360,13 +361,22 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) { ; WIN32-NEXT: flds {{[0-9]+}}(%esp) ; WIN32-NEXT: flds {{[0-9]+}}(%esp) ; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi +; WIN32-NEXT: movl %edi, 28(%esi) +; WIN32-NEXT: movl %edx, 24(%esi) +; WIN32-NEXT: movl %ecx, 20(%esi) +; WIN32-NEXT: movl %eax, 16(%esi) ; WIN32-NEXT: fstps 12(%esi) ; WIN32-NEXT: fstps 8(%esi) ; WIN32-NEXT: fstps 4(%esi) ; WIN32-NEXT: fstps (%esi) ; WIN32-NEXT: movl %esi, %eax -; WIN32-NEXT: addl $44, %esp +; WIN32-NEXT: addl $60, %esp ; WIN32-NEXT: popl %esi +; WIN32-NEXT: popl %edi ; WIN32-NEXT: retl %result = call { <4 x float>, <4 x i32> } @llvm.frexp.v4f32.v4i32(<4 x float> %a) ret { <4 x float>, <4 x i32> } %result @@ -489,35 +499,46 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) { ; ; WIN32-LABEL: test_frexp_v4f32_v4i32_only_use_exp: ; WIN32: # %bb.0: +; WIN32-NEXT: pushl %edi ; WIN32-NEXT: pushl %esi -; WIN32-NEXT: subl $12, %esp +; WIN32-NEXT: subl $28, %esp ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi -; WIN32-NEXT: leal 8(%esi), %eax +; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; WIN32-NEXT: flds {{[0-9]+}}(%esp) ; WIN32-NEXT: fstpl (%esp) ; WIN32-NEXT: calll _frexp ; WIN32-NEXT: fstp %st(0) -; WIN32-NEXT: leal 4(%esi), %eax +; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; WIN32-NEXT: flds {{[0-9]+}}(%esp) ; WIN32-NEXT: fstpl (%esp) ; WIN32-NEXT: calll _frexp ; WIN32-NEXT: fstp %st(0) -; WIN32-NEXT: leal 12(%esi), %eax +; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax ; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; WIN32-NEXT: flds {{[0-9]+}}(%esp) ; WIN32-NEXT: fstpl (%esp) ; WIN32-NEXT: calll _frexp ; WIN32-NEXT: fstp %st(0) -; WIN32-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; WIN32-NEXT: flds {{[0-9]+}}(%esp) ; WIN32-NEXT: fstpl (%esp) ; WIN32-NEXT: calll _frexp ; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi +; WIN32-NEXT: movl %edi, 12(%esi) +; WIN32-NEXT: movl %edx, 8(%esi) +; WIN32-NEXT: movl %ecx, 4(%esi) +; WIN32-NEXT: movl %eax, (%esi) ; WIN32-NEXT: movl %esi, %eax -; WIN32-NEXT: addl $12, %esp +; WIN32-NEXT: addl $28, %esp ; WIN32-NEXT: popl %esi +; WIN32-NEXT: popl %edi ; WIN32-NEXT: retl %result = call { <4 x float>, <4 x i32> } @llvm.frexp.v4f32.v4i32(<4 x float> %a) %result.1 = extractvalue { <4 x float>, <4 x i32> } %result, 1 diff --git a/llvm/test/CodeGen/X86/sincos-stack-args.ll b/llvm/test/CodeGen/X86/sincos-stack-args.ll new file mode 100644 index 0000000000000..9fb3a6769fda1 --- /dev/null +++ b/llvm/test/CodeGen/X86/sincos-stack-args.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp --version 5 +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s +; Test for issue https://github.com/llvm/llvm-project/issues/115323 + +declare double @g(double, double) + +define double @f(double %a) { +; CHECK-LABEL: f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subl $44, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: fldl 48(%esp) +; CHECK-NEXT: leal 24(%esp), %eax +; CHECK-NEXT: movl %eax, 12(%esp) +; CHECK-NEXT: leal 32(%esp), %eax +; CHECK-NEXT: movl %eax, 8(%esp) +; CHECK-NEXT: fstpl (%esp) +; CHECK-NEXT: calll sincos +; CHECK-NEXT: fldl 32(%esp) +; CHECK-NEXT: fldl 24(%esp) +; CHECK-NEXT: faddl {{\.?LCPI[0-9]+_[0-9]+}} +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fstpl 8(%esp) +; CHECK-NEXT: fstpl (%esp) +; CHECK-NEXT: calll g@PLT +; CHECK-NEXT: addl $44, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: retl +entry: + %0 = tail call double @llvm.sin.f64(double %a) + %1 = tail call double @llvm.cos.f64(double %a) + %add = fadd double %1, 3.140000e+00 + %call = tail call double @g(double %add, double %0) + ret double %call +}