-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[SDAG] Limit sincos/frexp stack slot folding to stores chained to entry #115906
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
When the chain is not the entry node there is a risk the stores are within a (CALLSEQ_START, CALLSEQ_END), which when the node is expanded will lead to nested call sequences. It should be possible to check for this and allow more cases, but for now, let's limit this to cases where it's definitely safe. Fixes llvm#115323
|
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-x86 Author: Benjamin Maxwell (MacDue) ChangesWhen the chain is not the entry node there is a risk the stores are within a (CALLSEQ_START, CALLSEQ_END), which when the node is expanded will lead to nested call sequences. It should be possible to check for this and allow more cases, but for now, let's limit this to cases where it's definitely safe. Fixes #115323 Patch is 22.39 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/115906.diff 5 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 901e63c47fac17..3a8ec3c6105bc0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2509,7 +2509,11 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
// Find users of the node that store the results (and share input chains). The
// destination pointers can be used instead of creating stack allocations.
- SDValue StoresInChain{};
+ // FIXME: This should allow stores with the same chains (not just the entry
+ // chain), but there's a risk the store is within a (CALLSEQ_START,
+ // CALLSEQ_END) pair, which after this expansion will lead to nested call
+ // sequences.
+ SDValue InChain = getEntryNode();
SmallVector<StoreSDNode *, 2> ResultStores(NumResults);
for (SDNode *User : Node->uses()) {
if (!ISD::isNormalStore(User))
@@ -2522,11 +2526,9 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
ST->getAddressSpace() != 0 ||
ST->getAlign() <
getDataLayout().getABITypeAlign(StoreType->getScalarType()) ||
- (StoresInChain && ST->getChain() != StoresInChain) ||
- Node->isPredecessorOf(ST->getChain().getNode()))
+ ST->getChain() != InChain)
continue;
ResultStores[ResNo] = ST;
- StoresInChain = ST->getChain();
}
TargetLowering::ArgListTy Args;
@@ -2568,7 +2570,6 @@ bool SelectionDAG::expandMultipleResultFPLibCall(
Type *RetType = CallRetResNo.has_value()
? Node->getValueType(*CallRetResNo).getTypeForEVT(Ctx)
: Type::getVoidTy(Ctx);
- SDValue InChain = StoresInChain ? StoresInChain : getEntryNode();
SDValue Callee = getExternalSymbol(VD ? VD->getVectorFnName().data() : LCName,
TLI->getPointerTy(getDataLayout()));
TargetLowering::CallLoweringInfo CLI(*this);
diff --git a/llvm/test/CodeGen/PowerPC/f128-arith.ll b/llvm/test/CodeGen/PowerPC/f128-arith.ll
index decc4a38f7ccd4..35e5d61947ead7 100644
--- a/llvm/test/CodeGen/PowerPC/f128-arith.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-arith.ll
@@ -1365,33 +1365,45 @@ define dso_local fp128 @qpFREXP(ptr %a, ptr %b) {
; CHECK-LABEL: qpFREXP:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
-; CHECK-NEXT: stdu r1, -32(r1)
-; CHECK-NEXT: std r0, 48(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: .cfi_offset r30, -16
+; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT: stdu r1, -64(r1)
+; CHECK-NEXT: std r0, 80(r1)
+; CHECK-NEXT: addi r5, r1, 44
+; CHECK-NEXT: mr r30, r4
; CHECK-NEXT: lxv v2, 0(r3)
-; CHECK-NEXT: mr r5, r4
; CHECK-NEXT: bl frexpf128
; CHECK-NEXT: nop
-; CHECK-NEXT: addi r1, r1, 32
+; CHECK-NEXT: lwz r3, 44(r1)
+; CHECK-NEXT: stw r3, 0(r30)
+; CHECK-NEXT: addi r1, r1, 64
; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
;
; CHECK-P8-LABEL: qpFREXP:
; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mflr r0
-; CHECK-P8-NEXT: stdu r1, -32(r1)
-; CHECK-P8-NEXT: std r0, 48(r1)
-; CHECK-P8-NEXT: .cfi_def_cfa_offset 32
+; CHECK-P8-NEXT: .cfi_def_cfa_offset 64
; CHECK-P8-NEXT: .cfi_offset lr, 16
+; CHECK-P8-NEXT: .cfi_offset r30, -16
+; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-P8-NEXT: stdu r1, -64(r1)
+; CHECK-P8-NEXT: std r0, 80(r1)
+; CHECK-P8-NEXT: addi r5, r1, 44
+; CHECK-P8-NEXT: mr r30, r4
; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
-; CHECK-P8-NEXT: mr r5, r4
; CHECK-P8-NEXT: xxswapd v2, vs0
; CHECK-P8-NEXT: bl frexpf128
; CHECK-P8-NEXT: nop
-; CHECK-P8-NEXT: addi r1, r1, 32
+; CHECK-P8-NEXT: lwz r3, 44(r1)
+; CHECK-P8-NEXT: stw r3, 0(r30)
+; CHECK-P8-NEXT: addi r1, r1, 64
; CHECK-P8-NEXT: ld r0, 16(r1)
+; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mtlr r0
; CHECK-P8-NEXT: blr
entry:
diff --git a/llvm/test/CodeGen/RISCV/llvm.frexp.ll b/llvm/test/CodeGen/RISCV/llvm.frexp.ll
index 3f615d23d3eaf6..e85a7118f5ff83 100644
--- a/llvm/test/CodeGen/RISCV/llvm.frexp.ll
+++ b/llvm/test/CodeGen/RISCV/llvm.frexp.ll
@@ -543,42 +543,50 @@ define i32 @test_frexp_f32_i32_only_use_exp(float %a) nounwind {
define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwind {
; RV32IFD-LABEL: test_frexp_v4f32_v4i32:
; RV32IFD: # %bb.0:
-; RV32IFD-NEXT: addi sp, sp, -48
-; RV32IFD-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT: fsd fs0, 32(sp) # 8-byte Folded Spill
-; RV32IFD-NEXT: fsd fs1, 24(sp) # 8-byte Folded Spill
-; RV32IFD-NEXT: fsd fs2, 16(sp) # 8-byte Folded Spill
-; RV32IFD-NEXT: fsd fs3, 8(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT: addi sp, sp, -64
+; RV32IFD-NEXT: sw ra, 60(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s0, 56(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: fsd fs0, 48(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT: fsd fs1, 40(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT: fsd fs2, 32(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT: fsd fs3, 24(sp) # 8-byte Folded Spill
; RV32IFD-NEXT: fmv.s fs0, fa3
; RV32IFD-NEXT: fmv.s fs1, fa2
; RV32IFD-NEXT: fmv.s fs2, fa1
; RV32IFD-NEXT: mv s0, a0
-; RV32IFD-NEXT: addi a0, a0, 16
+; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: call frexpf
; RV32IFD-NEXT: fmv.s fs3, fa0
-; RV32IFD-NEXT: addi a0, s0, 20
+; RV32IFD-NEXT: addi a0, sp, 12
; RV32IFD-NEXT: fmv.s fa0, fs2
; RV32IFD-NEXT: call frexpf
; RV32IFD-NEXT: fmv.s fs2, fa0
-; RV32IFD-NEXT: addi a0, s0, 24
+; RV32IFD-NEXT: addi a0, sp, 16
; RV32IFD-NEXT: fmv.s fa0, fs1
; RV32IFD-NEXT: call frexpf
; RV32IFD-NEXT: fmv.s fs1, fa0
-; RV32IFD-NEXT: addi a0, s0, 28
+; RV32IFD-NEXT: addi a0, sp, 20
; RV32IFD-NEXT: fmv.s fa0, fs0
; RV32IFD-NEXT: call frexpf
+; RV32IFD-NEXT: lw a0, 8(sp)
+; RV32IFD-NEXT: lw a1, 12(sp)
+; RV32IFD-NEXT: lw a2, 16(sp)
+; RV32IFD-NEXT: lw a3, 20(sp)
+; RV32IFD-NEXT: sw a0, 16(s0)
+; RV32IFD-NEXT: sw a1, 20(s0)
+; RV32IFD-NEXT: sw a2, 24(s0)
+; RV32IFD-NEXT: sw a3, 28(s0)
; RV32IFD-NEXT: fsw fs3, 0(s0)
; RV32IFD-NEXT: fsw fs2, 4(s0)
; RV32IFD-NEXT: fsw fs1, 8(s0)
; RV32IFD-NEXT: fsw fa0, 12(s0)
-; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT: fld fs0, 32(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT: fld fs1, 24(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT: fld fs2, 16(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT: fld fs3, 8(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT: addi sp, sp, 48
+; RV32IFD-NEXT: lw ra, 60(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s0, 56(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: fld fs0, 48(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT: fld fs1, 40(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT: fld fs2, 32(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT: fld fs3, 24(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT: addi sp, sp, 64
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: test_frexp_v4f32_v4i32:
@@ -631,44 +639,52 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi
;
; RV32IZFINXZDINX-LABEL: test_frexp_v4f32_v4i32:
; RV32IZFINXZDINX: # %bb.0:
-; RV32IZFINXZDINX-NEXT: addi sp, sp, -32
-; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s4, 8(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: addi sp, sp, -48
+; RV32IZFINXZDINX-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s4, 24(sp) # 4-byte Folded Spill
; RV32IZFINXZDINX-NEXT: mv s0, a4
; RV32IZFINXZDINX-NEXT: mv s1, a3
; RV32IZFINXZDINX-NEXT: mv s2, a2
; RV32IZFINXZDINX-NEXT: mv a2, a1
; RV32IZFINXZDINX-NEXT: mv s3, a0
-; RV32IZFINXZDINX-NEXT: addi a1, a0, 16
+; RV32IZFINXZDINX-NEXT: addi a1, sp, 8
; RV32IZFINXZDINX-NEXT: mv a0, a2
; RV32IZFINXZDINX-NEXT: call frexpf
; RV32IZFINXZDINX-NEXT: mv s4, a0
-; RV32IZFINXZDINX-NEXT: addi a1, s3, 20
+; RV32IZFINXZDINX-NEXT: addi a1, sp, 12
; RV32IZFINXZDINX-NEXT: mv a0, s2
; RV32IZFINXZDINX-NEXT: call frexpf
; RV32IZFINXZDINX-NEXT: mv s2, a0
-; RV32IZFINXZDINX-NEXT: addi a1, s3, 24
+; RV32IZFINXZDINX-NEXT: addi a1, sp, 16
; RV32IZFINXZDINX-NEXT: mv a0, s1
; RV32IZFINXZDINX-NEXT: call frexpf
; RV32IZFINXZDINX-NEXT: mv s1, a0
-; RV32IZFINXZDINX-NEXT: addi a1, s3, 28
+; RV32IZFINXZDINX-NEXT: addi a1, sp, 20
; RV32IZFINXZDINX-NEXT: mv a0, s0
; RV32IZFINXZDINX-NEXT: call frexpf
+; RV32IZFINXZDINX-NEXT: lw a1, 8(sp)
+; RV32IZFINXZDINX-NEXT: lw a2, 12(sp)
+; RV32IZFINXZDINX-NEXT: lw a3, 16(sp)
+; RV32IZFINXZDINX-NEXT: lw a4, 20(sp)
+; RV32IZFINXZDINX-NEXT: sw a1, 16(s3)
+; RV32IZFINXZDINX-NEXT: sw a2, 20(s3)
+; RV32IZFINXZDINX-NEXT: sw a3, 24(s3)
+; RV32IZFINXZDINX-NEXT: sw a4, 28(s3)
; RV32IZFINXZDINX-NEXT: sw s4, 0(s3)
; RV32IZFINXZDINX-NEXT: sw s2, 4(s3)
; RV32IZFINXZDINX-NEXT: sw s1, 8(s3)
; RV32IZFINXZDINX-NEXT: sw a0, 12(s3)
-; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s4, 8(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: addi sp, sp, 32
+; RV32IZFINXZDINX-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s4, 24(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: addi sp, sp, 48
; RV32IZFINXZDINX-NEXT: ret
;
; RV64IZFINXZDINX-LABEL: test_frexp_v4f32_v4i32:
@@ -1080,34 +1096,41 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwi
define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
; RV32IFD-LABEL: test_frexp_v4f32_v4i32_only_use_exp:
; RV32IFD: # %bb.0:
-; RV32IFD-NEXT: addi sp, sp, -32
-; RV32IFD-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IFD-NEXT: fsd fs0, 16(sp) # 8-byte Folded Spill
-; RV32IFD-NEXT: fsd fs1, 8(sp) # 8-byte Folded Spill
-; RV32IFD-NEXT: fsd fs2, 0(sp) # 8-byte Folded Spill
-; RV32IFD-NEXT: fmv.s fs0, fa2
-; RV32IFD-NEXT: fmv.s fs1, fa1
-; RV32IFD-NEXT: fmv.s fs2, fa0
+; RV32IFD-NEXT: addi sp, sp, -48
+; RV32IFD-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT: fsd fs0, 32(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT: fsd fs1, 24(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT: fsd fs2, 16(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT: fmv.s fs0, fa3
+; RV32IFD-NEXT: fmv.s fs1, fa2
+; RV32IFD-NEXT: fmv.s fs2, fa1
; RV32IFD-NEXT: mv s0, a0
-; RV32IFD-NEXT: addi a0, a0, 12
-; RV32IFD-NEXT: fmv.s fa0, fa3
+; RV32IFD-NEXT: mv a0, sp
; RV32IFD-NEXT: call frexpf
-; RV32IFD-NEXT: addi a0, s0, 8
-; RV32IFD-NEXT: fmv.s fa0, fs0
+; RV32IFD-NEXT: addi a0, sp, 4
+; RV32IFD-NEXT: fmv.s fa0, fs2
; RV32IFD-NEXT: call frexpf
-; RV32IFD-NEXT: addi a0, s0, 4
+; RV32IFD-NEXT: addi a0, sp, 8
; RV32IFD-NEXT: fmv.s fa0, fs1
; RV32IFD-NEXT: call frexpf
-; RV32IFD-NEXT: fmv.s fa0, fs2
-; RV32IFD-NEXT: mv a0, s0
+; RV32IFD-NEXT: addi a0, sp, 12
+; RV32IFD-NEXT: fmv.s fa0, fs0
; RV32IFD-NEXT: call frexpf
-; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT: fld fs0, 16(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT: fld fs1, 8(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT: fld fs2, 0(sp) # 8-byte Folded Reload
-; RV32IFD-NEXT: addi sp, sp, 32
+; RV32IFD-NEXT: lw a0, 0(sp)
+; RV32IFD-NEXT: lw a1, 4(sp)
+; RV32IFD-NEXT: lw a2, 8(sp)
+; RV32IFD-NEXT: lw a3, 12(sp)
+; RV32IFD-NEXT: sw a0, 0(s0)
+; RV32IFD-NEXT: sw a1, 4(s0)
+; RV32IFD-NEXT: sw a2, 8(s0)
+; RV32IFD-NEXT: sw a3, 12(s0)
+; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT: fld fs0, 32(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT: fld fs1, 24(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT: fld fs2, 16(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT: addi sp, sp, 48
; RV32IFD-NEXT: ret
;
; RV64IFD-LABEL: test_frexp_v4f32_v4i32_only_use_exp:
@@ -1151,34 +1174,43 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind {
;
; RV32IZFINXZDINX-LABEL: test_frexp_v4f32_v4i32_only_use_exp:
; RV32IZFINXZDINX: # %bb.0:
-; RV32IZFINXZDINX-NEXT: addi sp, sp, -32
-; RV32IZFINXZDINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s1, 20(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s2, 16(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: sw s3, 12(sp) # 4-byte Folded Spill
-; RV32IZFINXZDINX-NEXT: mv s0, a3
-; RV32IZFINXZDINX-NEXT: mv s1, a2
-; RV32IZFINXZDINX-NEXT: mv s2, a1
+; RV32IZFINXZDINX-NEXT: addi sp, sp, -48
+; RV32IZFINXZDINX-NEXT: sw ra, 44(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s0, 40(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s1, 36(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s2, 32(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: sw s3, 28(sp) # 4-byte Folded Spill
+; RV32IZFINXZDINX-NEXT: mv s0, a4
+; RV32IZFINXZDINX-NEXT: mv s1, a3
+; RV32IZFINXZDINX-NEXT: mv s2, a2
+; RV32IZFINXZDINX-NEXT: mv a2, a1
; RV32IZFINXZDINX-NEXT: mv s3, a0
-; RV32IZFINXZDINX-NEXT: addi a1, a0, 12
-; RV32IZFINXZDINX-NEXT: mv a0, a4
+; RV32IZFINXZDINX-NEXT: addi a1, sp, 12
+; RV32IZFINXZDINX-NEXT: mv a0, a2
; RV32IZFINXZDINX-NEXT: call frexpf
-; RV32IZFINXZDINX-NEXT: addi a1, s3, 8
-; RV32IZFINXZDINX-NEXT: mv a0, s0
+; RV32IZFINXZDINX-NEXT: addi a1, sp, 16
+; RV32IZFINXZDINX-NEXT: mv a0, s2
; RV32IZFINXZDINX-NEXT: call frexpf
-; RV32IZFINXZDINX-NEXT: addi a1, s3, 4
+; RV32IZFINXZDINX-NEXT: addi a1, sp, 20
; RV32IZFINXZDINX-NEXT: mv a0, s1
; RV32IZFINXZDINX-NEXT: call frexpf
-; RV32IZFINXZDINX-NEXT: mv a0, s2
-; RV32IZFINXZDINX-NEXT: mv a1, s3
+; RV32IZFINXZDINX-NEXT: addi a1, sp, 24
+; RV32IZFINXZDINX-NEXT: mv a0, s0
; RV32IZFINXZDINX-NEXT: call frexpf
-; RV32IZFINXZDINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: lw s3, 12(sp) # 4-byte Folded Reload
-; RV32IZFINXZDINX-NEXT: addi sp, sp, 32
+; RV32IZFINXZDINX-NEXT: lw a0, 12(sp)
+; RV32IZFINXZDINX-NEXT: lw a1, 16(sp)
+; RV32IZFINXZDINX-NEXT: lw a2, 20(sp)
+; RV32IZFINXZDINX-NEXT: lw a3, 24(sp)
+; RV32IZFINXZDINX-NEXT: sw a0, 0(s3)
+; RV32IZFINXZDINX-NEXT: sw a1, 4(s3)
+; RV32IZFINXZDINX-NEXT: sw a2, 8(s3)
+; RV32IZFINXZDINX-NEXT: sw a3, 12(s3)
+; RV32IZFINXZDINX-NEXT: lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: lw s3, 28(sp) # 4-byte Folded Reload
+; RV32IZFINXZDINX-NEXT: addi sp, sp, 48
; RV32IZFINXZDINX-NEXT: ret
;
; RV64IZFINXZDINX-LABEL: test_frexp_v4f32_v4i32_only_use_exp:
diff --git a/llvm/test/CodeGen/X86/llvm.frexp.ll b/llvm/test/CodeGen/X86/llvm.frexp.ll
index 96de34519556d0..cd560ad627de4c 100644
--- a/llvm/test/CodeGen/X86/llvm.frexp.ll
+++ b/llvm/test/CodeGen/X86/llvm.frexp.ll
@@ -325,27 +325,28 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) {
;
; WIN32-LABEL: test_frexp_v4f32_v4i32:
; WIN32: # %bb.0:
+; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
-; WIN32-NEXT: subl $44, %esp
+; WIN32-NEXT: subl $60, %esp
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; WIN32-NEXT: leal 24(%esi), %eax
+; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
-; WIN32-NEXT: leal 20(%esi), %eax
+; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
-; WIN32-NEXT: leal 16(%esi), %eax
+; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
-; WIN32-NEXT: leal 28(%esi), %eax
+; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
@@ -360,13 +361,22 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) {
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; WIN32-NEXT: movl %edi, 28(%esi)
+; WIN32-NEXT: movl %edx, 24(%esi)
+; WIN32-NEXT: movl %ecx, 20(%esi)
+; WIN32-NEXT: movl %eax, 16(%esi)
; WIN32-NEXT: fstps 12(%esi)
; WIN32-NEXT: fstps 8(%esi)
; WIN32-NEXT: fstps 4(%esi)
; WIN32-NEXT: fstps (%esi)
; WIN32-NEXT: movl %esi, %eax
-; WIN32-NEXT: addl $44, %esp
+; WIN32-NEXT: addl $60, %esp
; WIN32-NEXT: popl %esi
+; WIN32-NEXT: popl %edi
; WIN32-NEXT: retl
%result = call { <4 x float>, <4 x i32> } @llvm.frexp.v4f32.v4i32(<4 x float> %a)
ret { <4 x float>, <4 x i32> } %result
@@ -489,35 +499,46 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) {
;
; WIN32-LABEL: test_frexp_v4f32_v4i32_only_use_exp:
; WIN32: # %bb.0:
+; WIN32-NEXT: pushl %edi
; WIN32-NEXT: pushl %esi
-; WIN32-NEXT: subl $12, %esp
+; WIN32-NEXT: subl $28, %esp
; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; WIN32-NEXT: leal 8(%esi), %eax
+; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: leal 4(%esi), %eax
+; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
; WIN32-NEXT: fstp %st(0)
-; WIN32-NEXT: leal 12(%esi), %eax
+; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax
; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; WIN32-NEXT: flds {{[0-9]+}}(%esp)
; WIN32-NEXT: fstpl (%esp)
; WIN32-NEXT: calll _frexp
; WI...
[truncated]
|
efriedma-quic
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please check the buildbot failure; LGTM if it's not related.
Thanks 👍 The failing test is |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/59/builds/8047 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/196/builds/927 Here is the relevant piece of the build log for the reference |
When the chain is not the entry node there is a risk the stores are within a (CALLSEQ_START, CALLSEQ_END), which when the node is expanded will lead to nested call sequences.
It should be possible to check for this and allow more cases, but for now, let's limit this to cases where it's definitely safe.
Fixes #115323