From 7745181604091de4895dad4dfa6c1ecdba7917cb Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 31 Oct 2024 17:32:24 -0700 Subject: [PATCH 1/6] [PowerPC] regenerate ctrloop-cpsgn.ll test --- llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll | 80 +++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll b/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll index a114438a87476..1d1612928dbc2 100644 --- a/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll +++ b/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll @@ -1,9 +1,88 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -verify-machineinstrs < %s -mcpu=ppc | FileCheck %s target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32" target triple = "powerpc-unknown-linux-gnu" define ppc_fp128 @foo(ptr nocapture %n, ppc_fp128 %d) nounwind readonly { +; CHECK-LABEL: foo: +; CHECK-NOT: mtctr +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stwu 1, -176(1) +; CHECK-NEXT: stw 0, 180(1) +; CHECK-NEXT: stfd 2, 128(1) +; CHECK-NEXT: lwz 3, 132(1) +; CHECK-NEXT: stfd 1, 136(1) +; CHECK-NEXT: stw 3, 148(1) +; CHECK-NEXT: lwz 3, 128(1) +; CHECK-NEXT: stfd 31, 168(1) # 8-byte Folded Spill +; CHECK-NEXT: stw 3, 144(1) +; CHECK-NEXT: lwz 3, 140(1) +; CHECK-NEXT: lfd 0, 144(1) +; CHECK-NEXT: stw 3, 156(1) +; CHECK-NEXT: lwz 3, 136(1) +; CHECK-NEXT: stw 30, 160(1) # 4-byte Folded Spill +; CHECK-NEXT: li 30, 2048 +; CHECK-NEXT: stw 3, 152(1) +; CHECK-NEXT: lfd 31, 152(1) +; CHECK-NEXT: fmr 1, 31 +; CHECK-NEXT: .LBB0_1: # %for.body +; CHECK-NEXT: # +; CHECK-NEXT: stfd 1, 64(1) +; CHECK-NEXT: lwz 3, 68(1) +; CHECK-NEXT: stfd 31, 88(1) +; CHECK-NEXT: stw 3, 84(1) +; CHECK-NEXT: lwz 3, 64(1) +; CHECK-NEXT: stfd 0, 56(1) +; CHECK-NEXT: stw 3, 80(1) +; CHECK-NEXT: lwz 3, 92(1) +; CHECK-NEXT: lfd 4, 96(1) +; CHECK-NEXT: stw 3, 108(1) +; CHECK-NEXT: lwz 3, 88(1) +; CHECK-NEXT: lfd 1, 80(1) +; CHECK-NEXT: stw 3, 104(1) +; CHECK-NEXT: lwz 3, 60(1) +; CHECK-NEXT: lfd 3, 104(1) +; CHECK-NEXT: stw 3, 76(1) +; CHECK-NEXT: lwz 3, 56(1) +; CHECK-NEXT: stw 3, 72(1) +; CHECK-NEXT: lfd 2, 72(1) +; CHECK-NEXT: bl copysignl +; CHECK-NEXT: stfd 2, 48(1) +; CHECK-NEXT: addi 30, 30, -1 +; CHECK-NEXT: lwz 3, 52(1) +; CHECK-NEXT: cmplwi 30, 0 +; CHECK-NEXT: stfd 1, 40(1) +; CHECK-NEXT: stw 3, 116(1) +; CHECK-NEXT: lwz 3, 48(1) +; CHECK-NEXT: stw 3, 112(1) +; CHECK-NEXT: lwz 3, 44(1) +; CHECK-NEXT: lfd 0, 112(1) +; CHECK-NEXT: stw 3, 124(1) +; CHECK-NEXT: lwz 3, 40(1) +; CHECK-NEXT: stw 3, 120(1) +; CHECK-NEXT: lfd 1, 120(1) +; CHECK-NEXT: bc 12, 1, .LBB0_1 +; CHECK-NEXT: # %bb.2: # %for.end +; CHECK-NEXT: stfd 1, 16(1) +; CHECK-NEXT: lwz 3, 20(1) +; CHECK-NEXT: stfd 0, 8(1) +; CHECK-NEXT: stw 3, 36(1) +; CHECK-NEXT: lwz 3, 16(1) +; CHECK-NEXT: lfd 31, 168(1) # 8-byte Folded Reload +; CHECK-NEXT: stw 3, 32(1) +; CHECK-NEXT: lwz 3, 12(1) +; CHECK-NEXT: lfd 1, 32(1) +; CHECK-NEXT: stw 3, 28(1) +; CHECK-NEXT: lwz 3, 8(1) +; CHECK-NEXT: lwz 30, 160(1) # 4-byte Folded Reload +; CHECK-NEXT: stw 3, 24(1) +; CHECK-NEXT: lfd 2, 24(1) +; CHECK-NEXT: lwz 0, 180(1) +; CHECK-NEXT: addi 1, 1, 176 +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr entry: br label %for.body @@ -23,6 +102,5 @@ for.end: ; preds = %for.body declare ppc_fp128 @copysignl(ppc_fp128, ppc_fp128) #0 -; CHECK: @foo ; CHECK-NOT: mtctr From 5aaacd72260f93d053253a1353ea90ca46208a8c Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 31 Oct 2024 17:32:35 -0700 Subject: [PATCH 2/6] [CodeGen] Demand llvm.copysign.f{16,32,64,80,128} lowers without libcalls This makes real what is already true: Copysign does not ever need to lower to runtime libcalls! Its operation should be possible to always implement via bitops. --- llvm/lib/CodeGen/IntrinsicLowering.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/IntrinsicLowering.cpp b/llvm/lib/CodeGen/IntrinsicLowering.cpp index f799a8cfc1ba7..0c3cb8ccc124b 100644 --- a/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -438,7 +438,15 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } case Intrinsic::copysign: { - ReplaceFPIntrinsicWithCall(CI, "copysignf", "copysign", "copysignl"); + switch (CI->getArgOperand(0)->getType()->getTypeID()) { + default: + report_fatal_error("copysign intrinsic without arch-specific floats " + "reached intrinsic-to-libcall lowering"); + break; + case Type::PPC_FP128TyID: + ReplaceCallWith("copysignl", CI, CI->arg_begin(), CI->arg_end(), + Type::getFloatTy(CI->getContext())); + } break; } case Intrinsic::get_rounding: From e277a8ed89c642cdb4838c1c73073eccfae4b77e Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Sat, 5 Oct 2024 15:47:58 -0700 Subject: [PATCH 3/6] [SelectionDAG] Only lower COPYSIGN_PPC_F128 to copysignl This reduces the burden on frontends that wish to support float ops without needing a C compiler to build LLVM's compiler-rt for that target, e.g. so that they can be a fully self-contained toolchain for bare-metal. All other floats are expanded for all current architectures just fine. PowerPC, however, does not efficiently legalize its very own float. --- llvm/include/llvm/IR/RuntimeLibcalls.def | 4 ---- llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 12 ++++++------ llvm/lib/IR/RuntimeLibcalls.cpp | 1 - llvm/lib/Target/SystemZ/ZOSLibcallNames.def | 3 --- .../WebAssemblyRuntimeLibcallSignatures.cpp | 3 --- 5 files changed, 6 insertions(+), 17 deletions(-) diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def index 4aab658a86690..62ee43cfc54ae 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.def +++ b/llvm/include/llvm/IR/RuntimeLibcalls.def @@ -289,10 +289,6 @@ HANDLE_LIBCALL(FLOOR_F64, "floor") HANDLE_LIBCALL(FLOOR_F80, "floorl") HANDLE_LIBCALL(FLOOR_F128, "floorl") HANDLE_LIBCALL(FLOOR_PPCF128, "floorl") -HANDLE_LIBCALL(COPYSIGN_F32, "copysignf") -HANDLE_LIBCALL(COPYSIGN_F64, "copysign") -HANDLE_LIBCALL(COPYSIGN_F80, "copysignl") -HANDLE_LIBCALL(COPYSIGN_F128, "copysignl") HANDLE_LIBCALL(COPYSIGN_PPCF128, "copysignl") HANDLE_LIBCALL(FMIN_F32, "fminf") HANDLE_LIBCALL(FMIN_F64, "fmin") diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index fa2731ff7dbda..146b5d720437d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1709,12 +1709,12 @@ void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi) { - ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), - RTLIB::COPYSIGN_F32, - RTLIB::COPYSIGN_F64, - RTLIB::COPYSIGN_F80, - RTLIB::COPYSIGN_F128, - RTLIB::COPYSIGN_PPCF128), Lo, Hi); + + EVT VT = N->getValueType(0); + ExpandFloatRes_Binary( + N, + (VT == MVT::ppcf128 ? RTLIB::COPYSIGN_PPCF128 : RTLIB::UNKNOWN_LIBCALL), + Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp index 06167559a7769..fba997a88279d 100644 --- a/llvm/lib/IR/RuntimeLibcalls.cpp +++ b/llvm/lib/IR/RuntimeLibcalls.cpp @@ -62,7 +62,6 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT) { setLibcallName(RTLIB::ROUND_F128, "roundf128"); setLibcallName(RTLIB::ROUNDEVEN_F128, "roundevenf128"); setLibcallName(RTLIB::FLOOR_F128, "floorf128"); - setLibcallName(RTLIB::COPYSIGN_F128, "copysignf128"); setLibcallName(RTLIB::FMIN_F128, "fminf128"); setLibcallName(RTLIB::FMAX_F128, "fmaxf128"); setLibcallName(RTLIB::LROUND_F128, "lroundf128"); diff --git a/llvm/lib/Target/SystemZ/ZOSLibcallNames.def b/llvm/lib/Target/SystemZ/ZOSLibcallNames.def index 12a01522a7e64..a53c9618696fc 100644 --- a/llvm/lib/Target/SystemZ/ZOSLibcallNames.def +++ b/llvm/lib/Target/SystemZ/ZOSLibcallNames.def @@ -87,9 +87,6 @@ HANDLE_LIBCALL(EXP2_F128, "@@LXP2@B") HANDLE_LIBCALL(COS_F64, "@@SCOS@B") HANDLE_LIBCALL(COS_F32, "@@FCOS@B") HANDLE_LIBCALL(COS_F128, "@@LCOS@B") -HANDLE_LIBCALL(COPYSIGN_F64, "@@DCPY@B") -HANDLE_LIBCALL(COPYSIGN_F32, "@@FCPY@B") -HANDLE_LIBCALL(COPYSIGN_F128, "@@LCPY@B") HANDLE_LIBCALL(CEIL_F64, "@@SCEL@B") HANDLE_LIBCALL(CEIL_F32, "@@FCEL@B") HANDLE_LIBCALL(CEIL_F128, "@@LCEL@B") diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp index aaa5225670721..9a8d73bee1c0b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp @@ -264,9 +264,6 @@ struct RuntimeLibcallSignatureTable { Table[RTLIB::FLOOR_F32] = f32_func_f32; Table[RTLIB::FLOOR_F64] = f64_func_f64; Table[RTLIB::FLOOR_F128] = i64_i64_func_i64_i64; - Table[RTLIB::COPYSIGN_F32] = f32_func_f32_f32; - Table[RTLIB::COPYSIGN_F64] = f64_func_f64_f64; - Table[RTLIB::COPYSIGN_F128] = i64_i64_func_i64_i64_i64_i64; Table[RTLIB::FMIN_F32] = f32_func_f32_f32; Table[RTLIB::FMIN_F64] = f64_func_f64_f64; Table[RTLIB::FMIN_F128] = i64_i64_func_i64_i64_i64_i64; From 5fd0d86737fa7b0a97c6182d323291b558ec8fe9 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Sat, 12 Oct 2024 01:31:45 -0700 Subject: [PATCH 4/6] [PowerPC][SelectionDAG] Expand `@llvm.copysign.ppc_fp128` without copysignl This allows ripping out the remaining copysignl infra. --- llvm/include/llvm/IR/RuntimeLibcalls.def | 1 - llvm/lib/CodeGen/IntrinsicLowering.cpp | 12 -- .../SelectionDAG/LegalizeFloatTypes.cpp | 17 ++- llvm/test/CodeGen/PowerPC/copysignl.ll | 105 ++++++------- llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll | 110 +++++++------- .../PowerPC/fp128-bitcast-after-operation.ll | 138 +++++++----------- 6 files changed, 161 insertions(+), 222 deletions(-) diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def index 62ee43cfc54ae..4f986d3c7450b 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.def +++ b/llvm/include/llvm/IR/RuntimeLibcalls.def @@ -289,7 +289,6 @@ HANDLE_LIBCALL(FLOOR_F64, "floor") HANDLE_LIBCALL(FLOOR_F80, "floorl") HANDLE_LIBCALL(FLOOR_F128, "floorl") HANDLE_LIBCALL(FLOOR_PPCF128, "floorl") -HANDLE_LIBCALL(COPYSIGN_PPCF128, "copysignl") HANDLE_LIBCALL(FMIN_F32, "fminf") HANDLE_LIBCALL(FMIN_F64, "fmin") HANDLE_LIBCALL(FMIN_F80, "fminl") diff --git a/llvm/lib/CodeGen/IntrinsicLowering.cpp b/llvm/lib/CodeGen/IntrinsicLowering.cpp index 0c3cb8ccc124b..fda30c65292b2 100644 --- a/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -437,18 +437,6 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { ReplaceFPIntrinsicWithCall(CI, "roundevenf", "roundeven", "roundevenl"); break; } - case Intrinsic::copysign: { - switch (CI->getArgOperand(0)->getType()->getTypeID()) { - default: - report_fatal_error("copysign intrinsic without arch-specific floats " - "reached intrinsic-to-libcall lowering"); - break; - case Type::PPC_FP128TyID: - ReplaceCallWith("copysignl", CI, CI->arg_begin(), CI->arg_end(), - Type::getFloatTy(CI->getContext())); - } - break; - } case Intrinsic::get_rounding: // Lower to "round to the nearest" if (!CI->getType()->isVoidTy()) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 146b5d720437d..e30d9b1a0f31f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1710,11 +1710,18 @@ void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi) { - EVT VT = N->getValueType(0); - ExpandFloatRes_Binary( - N, - (VT == MVT::ppcf128 ? RTLIB::COPYSIGN_PPCF128 : RTLIB::UNKNOWN_LIBCALL), - Lo, Hi); + assert(N->getValueType(0) == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + SDLoc DL = SDLoc(N); + SDValue Tmp = SDValue(); + GetExpandedFloat(N->getOperand(0), Lo, Tmp); + + Hi = DAG.getNode(ISD::FCOPYSIGN, DL, Tmp.getValueType(), Tmp, + N->getOperand(1)); + // A double-double is Hi + Lo, so if Hi flips sign, so must Lo + Lo = DAG.getSelectCC(DL, Tmp, Hi, Lo, + DAG.getNode(ISD::FNEG, DL, Lo.getValueType(), Lo), + ISD::SETEQ); } void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, diff --git a/llvm/test/CodeGen/PowerPC/copysignl.ll b/llvm/test/CodeGen/PowerPC/copysignl.ll index 40ed3d803094f..3b865a083dca5 100644 --- a/llvm/test/CodeGen/PowerPC/copysignl.ll +++ b/llvm/test/CodeGen/PowerPC/copysignl.ll @@ -43,31 +43,26 @@ declare double @copysign(double, double) #0 define ppc_fp128 @foo_ll(double %a, ppc_fp128 %b) #0 { ; CHECK-LABEL: foo_ll: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mflr 0 -; CHECK-NEXT: stdu 1, -112(1) -; CHECK-NEXT: fmr 3, 2 -; CHECK-NEXT: addis 3, 2, .LCPI2_0@toc@ha -; CHECK-NEXT: std 0, 128(1) -; CHECK-NEXT: lfs 2, .LCPI2_0@toc@l(3) -; CHECK-NEXT: bl copysignl -; CHECK-NEXT: nop -; CHECK-NEXT: addi 1, 1, 112 -; CHECK-NEXT: ld 0, 16(1) -; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: fcpsgn 0, 2, 1 +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: li 4, 8 +; CHECK-NEXT: fcmpu 0, 1, 0 +; CHECK-NEXT: fmr 1, 0 +; CHECK-NEXT: iseleq 3, 4, 3 +; CHECK-NEXT: addis 4, 2, .LCPI2_0@toc@ha +; CHECK-NEXT: addi 4, 4, .LCPI2_0@toc@l +; CHECK-NEXT: lfdx 2, 4, 3 ; CHECK-NEXT: blr ; ; CHECK-VSX-LABEL: foo_ll: ; CHECK-VSX: # %bb.0: # %entry -; CHECK-VSX-NEXT: mflr 0 -; CHECK-VSX-NEXT: stdu 1, -112(1) -; CHECK-VSX-NEXT: fmr 3, 2 +; CHECK-VSX-NEXT: fmr 0, 1 +; CHECK-VSX-NEXT: xscpsgndp 1, 2, 1 ; CHECK-VSX-NEXT: xxlxor 2, 2, 2 -; CHECK-VSX-NEXT: std 0, 128(1) -; CHECK-VSX-NEXT: bl copysignl -; CHECK-VSX-NEXT: nop -; CHECK-VSX-NEXT: addi 1, 1, 112 -; CHECK-VSX-NEXT: ld 0, 16(1) -; CHECK-VSX-NEXT: mtlr 0 +; CHECK-VSX-NEXT: xscmpudp 0, 0, 1 +; CHECK-VSX-NEXT: beqlr 0 +; CHECK-VSX-NEXT: # %bb.1: # %entry +; CHECK-VSX-NEXT: xsnegdp 2, 2 ; CHECK-VSX-NEXT: blr entry: %conv = fpext double %a to ppc_fp128 @@ -78,31 +73,26 @@ entry: define ppc_fp128 @foo_ld(double %a, double %b) #0 { ; CHECK-LABEL: foo_ld: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mflr 0 -; CHECK-NEXT: stdu 1, -112(1) -; CHECK-NEXT: fmr 3, 2 -; CHECK-NEXT: addis 3, 2, .LCPI3_0@toc@ha -; CHECK-NEXT: std 0, 128(1) -; CHECK-NEXT: lfs 2, .LCPI3_0@toc@l(3) -; CHECK-NEXT: bl copysignl -; CHECK-NEXT: nop -; CHECK-NEXT: addi 1, 1, 112 -; CHECK-NEXT: ld 0, 16(1) -; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: fcpsgn 0, 2, 1 +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: li 4, 8 +; CHECK-NEXT: fcmpu 0, 1, 0 +; CHECK-NEXT: fmr 1, 0 +; CHECK-NEXT: iseleq 3, 4, 3 +; CHECK-NEXT: addis 4, 2, .LCPI3_0@toc@ha +; CHECK-NEXT: addi 4, 4, .LCPI3_0@toc@l +; CHECK-NEXT: lfdx 2, 4, 3 ; CHECK-NEXT: blr ; ; CHECK-VSX-LABEL: foo_ld: ; CHECK-VSX: # %bb.0: # %entry -; CHECK-VSX-NEXT: mflr 0 -; CHECK-VSX-NEXT: stdu 1, -112(1) -; CHECK-VSX-NEXT: fmr 3, 2 +; CHECK-VSX-NEXT: fmr 0, 1 +; CHECK-VSX-NEXT: xscpsgndp 1, 2, 1 ; CHECK-VSX-NEXT: xxlxor 2, 2, 2 -; CHECK-VSX-NEXT: std 0, 128(1) -; CHECK-VSX-NEXT: bl copysignl -; CHECK-VSX-NEXT: nop -; CHECK-VSX-NEXT: addi 1, 1, 112 -; CHECK-VSX-NEXT: ld 0, 16(1) -; CHECK-VSX-NEXT: mtlr 0 +; CHECK-VSX-NEXT: xscmpudp 0, 0, 1 +; CHECK-VSX-NEXT: beqlr 0 +; CHECK-VSX-NEXT: # %bb.1: # %entry +; CHECK-VSX-NEXT: xsnegdp 2, 2 ; CHECK-VSX-NEXT: blr entry: %conv = fpext double %a to ppc_fp128 @@ -114,31 +104,26 @@ entry: define ppc_fp128 @foo_lf(double %a, float %b) #0 { ; CHECK-LABEL: foo_lf: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mflr 0 -; CHECK-NEXT: stdu 1, -112(1) -; CHECK-NEXT: fmr 3, 2 -; CHECK-NEXT: addis 3, 2, .LCPI4_0@toc@ha -; CHECK-NEXT: std 0, 128(1) -; CHECK-NEXT: lfs 2, .LCPI4_0@toc@l(3) -; CHECK-NEXT: bl copysignl -; CHECK-NEXT: nop -; CHECK-NEXT: addi 1, 1, 112 -; CHECK-NEXT: ld 0, 16(1) -; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: fcpsgn 0, 2, 1 +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: li 4, 8 +; CHECK-NEXT: fcmpu 0, 1, 0 +; CHECK-NEXT: fmr 1, 0 +; CHECK-NEXT: iseleq 3, 4, 3 +; CHECK-NEXT: addis 4, 2, .LCPI4_0@toc@ha +; CHECK-NEXT: addi 4, 4, .LCPI4_0@toc@l +; CHECK-NEXT: lfdx 2, 4, 3 ; CHECK-NEXT: blr ; ; CHECK-VSX-LABEL: foo_lf: ; CHECK-VSX: # %bb.0: # %entry -; CHECK-VSX-NEXT: mflr 0 -; CHECK-VSX-NEXT: stdu 1, -112(1) -; CHECK-VSX-NEXT: fmr 3, 2 +; CHECK-VSX-NEXT: fmr 0, 1 +; CHECK-VSX-NEXT: fcpsgn 1, 2, 1 ; CHECK-VSX-NEXT: xxlxor 2, 2, 2 -; CHECK-VSX-NEXT: std 0, 128(1) -; CHECK-VSX-NEXT: bl copysignl -; CHECK-VSX-NEXT: nop -; CHECK-VSX-NEXT: addi 1, 1, 112 -; CHECK-VSX-NEXT: ld 0, 16(1) -; CHECK-VSX-NEXT: mtlr 0 +; CHECK-VSX-NEXT: xscmpudp 0, 0, 1 +; CHECK-VSX-NEXT: beqlr 0 +; CHECK-VSX-NEXT: # %bb.1: # %entry +; CHECK-VSX-NEXT: xsnegdp 2, 2 ; CHECK-VSX-NEXT: blr entry: %conv = fpext double %a to ppc_fp128 diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll b/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll index 1d1612928dbc2..ff8311c131f76 100644 --- a/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll +++ b/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll @@ -4,84 +4,81 @@ target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32" target triple = "powerpc-unknown-linux-gnu" +; Previously we checked that loops that used CTR would not be used around a libm call to copysignl +; but now that copysignl is no longer emitted by LLVM in most cases, this stands as a tombstone. +; It has mtctr right in the middle, but we don't care because copysignl is nowhere to be found. + define ppc_fp128 @foo(ptr nocapture %n, ppc_fp128 %d) nounwind readonly { ; CHECK-LABEL: foo: ; CHECK-NOT: mtctr ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mflr 0 -; CHECK-NEXT: stwu 1, -176(1) -; CHECK-NEXT: stw 0, 180(1) -; CHECK-NEXT: stfd 2, 128(1) -; CHECK-NEXT: lwz 3, 132(1) -; CHECK-NEXT: stfd 1, 136(1) -; CHECK-NEXT: stw 3, 148(1) -; CHECK-NEXT: lwz 3, 128(1) -; CHECK-NEXT: stfd 31, 168(1) # 8-byte Folded Spill -; CHECK-NEXT: stw 3, 144(1) -; CHECK-NEXT: lwz 3, 140(1) -; CHECK-NEXT: lfd 0, 144(1) -; CHECK-NEXT: stw 3, 156(1) -; CHECK-NEXT: lwz 3, 136(1) -; CHECK-NEXT: stw 30, 160(1) # 4-byte Folded Spill -; CHECK-NEXT: li 30, 2048 -; CHECK-NEXT: stw 3, 152(1) -; CHECK-NEXT: lfd 31, 152(1) -; CHECK-NEXT: fmr 1, 31 +; CHECK-NEXT: stwu 1, -112(1) +; CHECK-NEXT: stfd 2, 80(1) +; CHECK-NEXT: li 3, 2048 +; CHECK-NEXT: lwz 4, 84(1) +; CHECK-NEXT: stfd 1, 88(1) +; CHECK-NEXT: stw 4, 100(1) +; CHECK-NEXT: lwz 4, 80(1) +; CHECK-NEXT: stw 4, 96(1) +; CHECK-NEXT: lwz 4, 92(1) +; CHECK-NEXT: lfd 1, 96(1) +; CHECK-NEXT: stw 4, 108(1) +; CHECK-NEXT: lwz 4, 88(1) +; CHECK-NEXT: stw 4, 104(1) +; CHECK-NEXT: lfd 0, 104(1) +; CHECK-NEXT: mtctr 3 +; CHECK-NEXT: fmr 2, 0 +; CHECK-NEXT: b .LBB0_2 ; CHECK-NEXT: .LBB0_1: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: stfd 1, 64(1) -; CHECK-NEXT: lwz 3, 68(1) -; CHECK-NEXT: stfd 31, 88(1) -; CHECK-NEXT: stw 3, 84(1) -; CHECK-NEXT: lwz 3, 64(1) -; CHECK-NEXT: stfd 0, 56(1) -; CHECK-NEXT: stw 3, 80(1) -; CHECK-NEXT: lwz 3, 92(1) -; CHECK-NEXT: lfd 4, 96(1) -; CHECK-NEXT: stw 3, 108(1) -; CHECK-NEXT: lwz 3, 88(1) -; CHECK-NEXT: lfd 1, 80(1) -; CHECK-NEXT: stw 3, 104(1) ; CHECK-NEXT: lwz 3, 60(1) -; CHECK-NEXT: lfd 3, 104(1) +; CHECK-NEXT: stfd 1, 48(1) ; CHECK-NEXT: stw 3, 76(1) ; CHECK-NEXT: lwz 3, 56(1) ; CHECK-NEXT: stw 3, 72(1) -; CHECK-NEXT: lfd 2, 72(1) -; CHECK-NEXT: bl copysignl -; CHECK-NEXT: stfd 2, 48(1) -; CHECK-NEXT: addi 30, 30, -1 ; CHECK-NEXT: lwz 3, 52(1) -; CHECK-NEXT: cmplwi 30, 0 -; CHECK-NEXT: stfd 1, 40(1) -; CHECK-NEXT: stw 3, 116(1) +; CHECK-NEXT: lfd 2, 72(1) +; CHECK-NEXT: stw 3, 68(1) ; CHECK-NEXT: lwz 3, 48(1) -; CHECK-NEXT: stw 3, 112(1) -; CHECK-NEXT: lwz 3, 44(1) -; CHECK-NEXT: lfd 0, 112(1) -; CHECK-NEXT: stw 3, 124(1) -; CHECK-NEXT: lwz 3, 40(1) -; CHECK-NEXT: stw 3, 120(1) -; CHECK-NEXT: lfd 1, 120(1) -; CHECK-NEXT: bc 12, 1, .LBB0_1 -; CHECK-NEXT: # %bb.2: # %for.end -; CHECK-NEXT: stfd 1, 16(1) +; CHECK-NEXT: stw 3, 64(1) +; CHECK-NEXT: lfd 1, 64(1) +; CHECK-NEXT: bdz .LBB0_7 +; CHECK-NEXT: .LBB0_2: # %for.body +; CHECK-NEXT: # +; CHECK-NEXT: stfd 0, 40(1) +; CHECK-NEXT: lbz 3, 40(1) +; CHECK-NEXT: srwi 3, 3, 7 +; CHECK-NEXT: andi. 3, 3, 1 +; CHECK-NEXT: bc 12, 1, .LBB0_4 +; CHECK-NEXT: # %bb.3: # %for.body +; CHECK-NEXT: # +; CHECK-NEXT: fabs 3, 2 +; CHECK-NEXT: b .LBB0_5 +; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: fnabs 3, 2 +; CHECK-NEXT: .LBB0_5: # %for.body +; CHECK-NEXT: # +; CHECK-NEXT: fcmpu 0, 2, 3 +; CHECK-NEXT: stfd 3, 56(1) +; CHECK-NEXT: beq 0, .LBB0_1 +; CHECK-NEXT: # %bb.6: # %for.body +; CHECK-NEXT: # +; CHECK-NEXT: fneg 1, 1 +; CHECK-NEXT: b .LBB0_1 +; CHECK-NEXT: .LBB0_7: # %for.end +; CHECK-NEXT: stfd 2, 16(1) ; CHECK-NEXT: lwz 3, 20(1) -; CHECK-NEXT: stfd 0, 8(1) +; CHECK-NEXT: stfd 1, 8(1) ; CHECK-NEXT: stw 3, 36(1) ; CHECK-NEXT: lwz 3, 16(1) -; CHECK-NEXT: lfd 31, 168(1) # 8-byte Folded Reload ; CHECK-NEXT: stw 3, 32(1) ; CHECK-NEXT: lwz 3, 12(1) ; CHECK-NEXT: lfd 1, 32(1) ; CHECK-NEXT: stw 3, 28(1) ; CHECK-NEXT: lwz 3, 8(1) -; CHECK-NEXT: lwz 30, 160(1) # 4-byte Folded Reload ; CHECK-NEXT: stw 3, 24(1) ; CHECK-NEXT: lfd 2, 24(1) -; CHECK-NEXT: lwz 0, 180(1) -; CHECK-NEXT: addi 1, 1, 176 -; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: addi 1, 1, 112 ; CHECK-NEXT: blr entry: br label %for.body @@ -103,4 +100,3 @@ for.end: ; preds = %for.body declare ppc_fp128 @copysignl(ppc_fp128, ppc_fp128) #0 ; CHECK-NOT: mtctr - diff --git a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll index ebec8c1c4d654..967e6cf679d4c 100644 --- a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll +++ b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll @@ -89,100 +89,59 @@ entry: } define i128 @test_copysign(ppc_fp128 %x, ppc_fp128 %y) nounwind { -; PPC64-P8-LE-LABEL: test_copysign: -; PPC64-P8-LE: # %bb.0: # %entry -; PPC64-P8-LE-NEXT: mflr 0 -; PPC64-P8-LE-NEXT: stdu 1, -32(1) -; PPC64-P8-LE-NEXT: std 0, 48(1) -; PPC64-P8-LE-NEXT: bl copysignl -; PPC64-P8-LE-NEXT: nop -; PPC64-P8-LE-NEXT: mffprd 3, 1 -; PPC64-P8-LE-NEXT: mffprd 4, 2 -; PPC64-P8-LE-NEXT: addi 1, 1, 32 -; PPC64-P8-LE-NEXT: ld 0, 16(1) -; PPC64-P8-LE-NEXT: mtlr 0 -; PPC64-P8-LE-NEXT: blr -; -; PPC64-LE-LABEL: test_copysign: -; PPC64-LE: # %bb.0: # %entry -; PPC64-LE-NEXT: mflr 0 -; PPC64-LE-NEXT: stdu 1, -48(1) -; PPC64-LE-NEXT: std 0, 64(1) -; PPC64-LE-NEXT: bl copysignl -; PPC64-LE-NEXT: nop -; PPC64-LE-NEXT: stfd 1, 32(1) -; PPC64-LE-NEXT: stfd 2, 40(1) -; PPC64-LE-NEXT: ld 3, 32(1) -; PPC64-LE-NEXT: ld 4, 40(1) -; PPC64-LE-NEXT: addi 1, 1, 48 -; PPC64-LE-NEXT: ld 0, 16(1) -; PPC64-LE-NEXT: mtlr 0 -; PPC64-LE-NEXT: blr -; -; PPC64-P8-BE-LABEL: test_copysign: -; PPC64-P8-BE: # %bb.0: # %entry -; PPC64-P8-BE-NEXT: mflr 0 -; PPC64-P8-BE-NEXT: stdu 1, -112(1) -; PPC64-P8-BE-NEXT: std 0, 128(1) -; PPC64-P8-BE-NEXT: bl copysignl -; PPC64-P8-BE-NEXT: nop -; PPC64-P8-BE-NEXT: mffprd 3, 1 -; PPC64-P8-BE-NEXT: mffprd 4, 2 -; PPC64-P8-BE-NEXT: addi 1, 1, 112 -; PPC64-P8-BE-NEXT: ld 0, 16(1) -; PPC64-P8-BE-NEXT: mtlr 0 -; PPC64-P8-BE-NEXT: blr +; PPC64-P8-LABEL: test_copysign: +; PPC64-P8: # %bb.0: # %entry +; PPC64-P8-NEXT: xscpsgndp 0, 3, 1 +; PPC64-P8-NEXT: xscmpudp 0, 1, 0 +; PPC64-P8-NEXT: beq 0, .LBB2_2 +; PPC64-P8-NEXT: # %bb.1: # %entry +; PPC64-P8-NEXT: xsnegdp 2, 2 +; PPC64-P8-NEXT: .LBB2_2: # %entry +; PPC64-P8-NEXT: mffprd 3, 0 +; PPC64-P8-NEXT: mffprd 4, 2 +; PPC64-P8-NEXT: blr ; -; PPC64-BE-LABEL: test_copysign: -; PPC64-BE: # %bb.0: # %entry -; PPC64-BE-NEXT: mflr 0 -; PPC64-BE-NEXT: stdu 1, -128(1) -; PPC64-BE-NEXT: std 0, 144(1) -; PPC64-BE-NEXT: bl copysignl -; PPC64-BE-NEXT: nop -; PPC64-BE-NEXT: stfd 1, 112(1) -; PPC64-BE-NEXT: stfd 2, 120(1) -; PPC64-BE-NEXT: ld 3, 112(1) -; PPC64-BE-NEXT: ld 4, 120(1) -; PPC64-BE-NEXT: addi 1, 1, 128 -; PPC64-BE-NEXT: ld 0, 16(1) -; PPC64-BE-NEXT: mtlr 0 -; PPC64-BE-NEXT: blr +; PPC64-LABEL: test_copysign: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT: xscpsgndp 0, 3, 1 +; PPC64-NEXT: xscmpudp 0, 1, 0 +; PPC64-NEXT: beq 0, .LBB2_2 +; PPC64-NEXT: # %bb.1: # %entry +; PPC64-NEXT: xsnegdp 2, 2 +; PPC64-NEXT: .LBB2_2: # %entry +; PPC64-NEXT: stfd 0, -16(1) +; PPC64-NEXT: stfd 2, -8(1) +; PPC64-NEXT: ld 3, -16(1) +; PPC64-NEXT: ld 4, -8(1) +; PPC64-NEXT: blr ; ; PPC32-LABEL: test_copysign: ; PPC32: # %bb.0: # %entry -; PPC32-NEXT: mflr 0 -; PPC32-NEXT: stwu 1, -80(1) -; PPC32-NEXT: stw 0, 84(1) -; PPC32-NEXT: stfd 1, 32(1) -; PPC32-NEXT: lwz 3, 36(1) -; PPC32-NEXT: stfd 2, 24(1) -; PPC32-NEXT: stw 3, 52(1) -; PPC32-NEXT: lwz 3, 32(1) -; PPC32-NEXT: stfd 3, 56(1) -; PPC32-NEXT: stw 3, 48(1) -; PPC32-NEXT: lwz 3, 28(1) -; PPC32-NEXT: lfd 4, 64(1) -; PPC32-NEXT: stw 3, 44(1) -; PPC32-NEXT: lwz 3, 24(1) -; PPC32-NEXT: lfd 1, 48(1) -; PPC32-NEXT: stw 3, 40(1) -; PPC32-NEXT: lwz 3, 60(1) -; PPC32-NEXT: lfd 2, 40(1) -; PPC32-NEXT: stw 3, 76(1) -; PPC32-NEXT: lwz 3, 56(1) -; PPC32-NEXT: stw 3, 72(1) -; PPC32-NEXT: lfd 3, 72(1) -; PPC32-NEXT: bl copysignl -; PPC32-NEXT: stfd 1, 8(1) +; PPC32-NEXT: stwu 1, -32(1) +; PPC32-NEXT: stfd 3, 8(1) +; PPC32-NEXT: lbz 3, 8(1) +; PPC32-NEXT: srwi 3, 3, 7 +; PPC32-NEXT: andi. 3, 3, 1 +; PPC32-NEXT: bc 12, 1, .LBB2_2 +; PPC32-NEXT: # %bb.1: # %entry +; PPC32-NEXT: fabs 0, 1 +; PPC32-NEXT: fcmpu 0, 1, 0 +; PPC32-NEXT: bne 0, .LBB2_3 +; PPC32-NEXT: b .LBB2_4 +; PPC32-NEXT: .LBB2_2: +; PPC32-NEXT: fnabs 0, 1 +; PPC32-NEXT: fcmpu 0, 1, 0 +; PPC32-NEXT: beq 0, .LBB2_4 +; PPC32-NEXT: .LBB2_3: # %entry +; PPC32-NEXT: fneg 2, 2 +; PPC32-NEXT: .LBB2_4: # %entry +; PPC32-NEXT: stfd 0, 24(1) ; PPC32-NEXT: stfd 2, 16(1) -; PPC32-NEXT: lwz 3, 8(1) -; PPC32-NEXT: lwz 4, 12(1) +; PPC32-NEXT: lwz 3, 24(1) +; PPC32-NEXT: lwz 4, 28(1) ; PPC32-NEXT: lwz 5, 16(1) ; PPC32-NEXT: lwz 6, 20(1) -; PPC32-NEXT: lwz 0, 84(1) -; PPC32-NEXT: addi 1, 1, 80 -; PPC32-NEXT: mtlr 0 +; PPC32-NEXT: addi 1, 1, 32 ; PPC32-NEXT: blr entry: %0 = tail call ppc_fp128 @llvm.copysign.ppcf128(ppc_fp128 %x, ppc_fp128 %y) @@ -236,3 +195,8 @@ entry: declare ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128) declare ppc_fp128 @llvm.copysign.ppcf128(ppc_fp128, ppc_fp128) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; PPC64-BE: {{.*}} +; PPC64-LE: {{.*}} +; PPC64-P8-BE: {{.*}} +; PPC64-P8-LE: {{.*}} From bd017ccac36fe397e2f12bd355f8c7a0c55420c9 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Tue, 29 Oct 2024 02:57:15 -0700 Subject: [PATCH 5/6] [PowerPC][SelectionDAG] Use SETUEQ in copysign expansion --- .../SelectionDAG/LegalizeFloatTypes.cpp | 2 +- llvm/test/CodeGen/PowerPC/copysignl.ll | 45 ++++++++++--------- llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll | 3 +- .../PowerPC/fp128-bitcast-after-operation.ll | 24 +++++----- 4 files changed, 40 insertions(+), 34 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index e30d9b1a0f31f..b7d512f5a1a4b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1721,7 +1721,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N, // A double-double is Hi + Lo, so if Hi flips sign, so must Lo Lo = DAG.getSelectCC(DL, Tmp, Hi, Lo, DAG.getNode(ISD::FNEG, DL, Lo.getValueType(), Lo), - ISD::SETEQ); + ISD::SETUEQ); } void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, diff --git a/llvm/test/CodeGen/PowerPC/copysignl.ll b/llvm/test/CodeGen/PowerPC/copysignl.ll index 3b865a083dca5..9cefe66df90bd 100644 --- a/llvm/test/CodeGen/PowerPC/copysignl.ll +++ b/llvm/test/CodeGen/PowerPC/copysignl.ll @@ -44,13 +44,13 @@ define ppc_fp128 @foo_ll(double %a, ppc_fp128 %b) #0 { ; CHECK-LABEL: foo_ll: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: fcpsgn 0, 2, 1 -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: li 4, 8 -; CHECK-NEXT: fcmpu 0, 1, 0 -; CHECK-NEXT: fmr 1, 0 -; CHECK-NEXT: iseleq 3, 4, 3 +; CHECK-NEXT: li 3, 8 ; CHECK-NEXT: addis 4, 2, .LCPI2_0@toc@ha ; CHECK-NEXT: addi 4, 4, .LCPI2_0@toc@l +; CHECK-NEXT: fcmpu 0, 1, 0 +; CHECK-NEXT: fmr 1, 0 +; CHECK-NEXT: crnor 20, 2, 3 +; CHECK-NEXT: isel 3, 0, 3, 20 ; CHECK-NEXT: lfdx 2, 4, 3 ; CHECK-NEXT: blr ; @@ -59,8 +59,9 @@ define ppc_fp128 @foo_ll(double %a, ppc_fp128 %b) #0 { ; CHECK-VSX-NEXT: fmr 0, 1 ; CHECK-VSX-NEXT: xscpsgndp 1, 2, 1 ; CHECK-VSX-NEXT: xxlxor 2, 2, 2 -; CHECK-VSX-NEXT: xscmpudp 0, 0, 1 -; CHECK-VSX-NEXT: beqlr 0 +; CHECK-VSX-NEXT: fcmpu 0, 0, 1 +; CHECK-VSX-NEXT: cror 20, 2, 3 +; CHECK-VSX-NEXT: bclr 12, 20, 0 ; CHECK-VSX-NEXT: # %bb.1: # %entry ; CHECK-VSX-NEXT: xsnegdp 2, 2 ; CHECK-VSX-NEXT: blr @@ -74,13 +75,13 @@ define ppc_fp128 @foo_ld(double %a, double %b) #0 { ; CHECK-LABEL: foo_ld: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: fcpsgn 0, 2, 1 -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: li 4, 8 -; CHECK-NEXT: fcmpu 0, 1, 0 -; CHECK-NEXT: fmr 1, 0 -; CHECK-NEXT: iseleq 3, 4, 3 +; CHECK-NEXT: li 3, 8 ; CHECK-NEXT: addis 4, 2, .LCPI3_0@toc@ha ; CHECK-NEXT: addi 4, 4, .LCPI3_0@toc@l +; CHECK-NEXT: fcmpu 0, 1, 0 +; CHECK-NEXT: fmr 1, 0 +; CHECK-NEXT: crnor 20, 2, 3 +; CHECK-NEXT: isel 3, 0, 3, 20 ; CHECK-NEXT: lfdx 2, 4, 3 ; CHECK-NEXT: blr ; @@ -89,8 +90,9 @@ define ppc_fp128 @foo_ld(double %a, double %b) #0 { ; CHECK-VSX-NEXT: fmr 0, 1 ; CHECK-VSX-NEXT: xscpsgndp 1, 2, 1 ; CHECK-VSX-NEXT: xxlxor 2, 2, 2 -; CHECK-VSX-NEXT: xscmpudp 0, 0, 1 -; CHECK-VSX-NEXT: beqlr 0 +; CHECK-VSX-NEXT: fcmpu 0, 0, 1 +; CHECK-VSX-NEXT: cror 20, 2, 3 +; CHECK-VSX-NEXT: bclr 12, 20, 0 ; CHECK-VSX-NEXT: # %bb.1: # %entry ; CHECK-VSX-NEXT: xsnegdp 2, 2 ; CHECK-VSX-NEXT: blr @@ -105,13 +107,13 @@ define ppc_fp128 @foo_lf(double %a, float %b) #0 { ; CHECK-LABEL: foo_lf: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: fcpsgn 0, 2, 1 -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: li 4, 8 -; CHECK-NEXT: fcmpu 0, 1, 0 -; CHECK-NEXT: fmr 1, 0 -; CHECK-NEXT: iseleq 3, 4, 3 +; CHECK-NEXT: li 3, 8 ; CHECK-NEXT: addis 4, 2, .LCPI4_0@toc@ha ; CHECK-NEXT: addi 4, 4, .LCPI4_0@toc@l +; CHECK-NEXT: fcmpu 0, 1, 0 +; CHECK-NEXT: fmr 1, 0 +; CHECK-NEXT: crnor 20, 2, 3 +; CHECK-NEXT: isel 3, 0, 3, 20 ; CHECK-NEXT: lfdx 2, 4, 3 ; CHECK-NEXT: blr ; @@ -120,8 +122,9 @@ define ppc_fp128 @foo_lf(double %a, float %b) #0 { ; CHECK-VSX-NEXT: fmr 0, 1 ; CHECK-VSX-NEXT: fcpsgn 1, 2, 1 ; CHECK-VSX-NEXT: xxlxor 2, 2, 2 -; CHECK-VSX-NEXT: xscmpudp 0, 0, 1 -; CHECK-VSX-NEXT: beqlr 0 +; CHECK-VSX-NEXT: fcmpu 0, 0, 1 +; CHECK-VSX-NEXT: cror 20, 2, 3 +; CHECK-VSX-NEXT: bclr 12, 20, 0 ; CHECK-VSX-NEXT: # %bb.1: # %entry ; CHECK-VSX-NEXT: xsnegdp 2, 2 ; CHECK-VSX-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll b/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll index ff8311c131f76..82f8ba3bfda53 100644 --- a/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll +++ b/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll @@ -59,8 +59,9 @@ define ppc_fp128 @foo(ptr nocapture %n, ppc_fp128 %d) nounwind readonly { ; CHECK-NEXT: .LBB0_5: # %for.body ; CHECK-NEXT: # ; CHECK-NEXT: fcmpu 0, 2, 3 +; CHECK-NEXT: cror 20, 2, 3 ; CHECK-NEXT: stfd 3, 56(1) -; CHECK-NEXT: beq 0, .LBB0_1 +; CHECK-NEXT: bc 12, 20, .LBB0_1 ; CHECK-NEXT: # %bb.6: # %for.body ; CHECK-NEXT: # ; CHECK-NEXT: fneg 1, 1 diff --git a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll index 967e6cf679d4c..c51b98de5cdb0 100644 --- a/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll +++ b/llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll @@ -92,20 +92,22 @@ define i128 @test_copysign(ppc_fp128 %x, ppc_fp128 %y) nounwind { ; PPC64-P8-LABEL: test_copysign: ; PPC64-P8: # %bb.0: # %entry ; PPC64-P8-NEXT: xscpsgndp 0, 3, 1 -; PPC64-P8-NEXT: xscmpudp 0, 1, 0 -; PPC64-P8-NEXT: beq 0, .LBB2_2 +; PPC64-P8-NEXT: fcmpu 0, 1, 0 +; PPC64-P8-NEXT: cror 20, 2, 3 +; PPC64-P8-NEXT: bc 12, 20, .LBB2_2 ; PPC64-P8-NEXT: # %bb.1: # %entry ; PPC64-P8-NEXT: xsnegdp 2, 2 ; PPC64-P8-NEXT: .LBB2_2: # %entry -; PPC64-P8-NEXT: mffprd 3, 0 ; PPC64-P8-NEXT: mffprd 4, 2 +; PPC64-P8-NEXT: mffprd 3, 0 ; PPC64-P8-NEXT: blr ; ; PPC64-LABEL: test_copysign: ; PPC64: # %bb.0: # %entry ; PPC64-NEXT: xscpsgndp 0, 3, 1 -; PPC64-NEXT: xscmpudp 0, 1, 0 -; PPC64-NEXT: beq 0, .LBB2_2 +; PPC64-NEXT: fcmpu 0, 1, 0 +; PPC64-NEXT: cror 20, 2, 3 +; PPC64-NEXT: bc 12, 20, .LBB2_2 ; PPC64-NEXT: # %bb.1: # %entry ; PPC64-NEXT: xsnegdp 2, 2 ; PPC64-NEXT: .LBB2_2: # %entry @@ -125,16 +127,16 @@ define i128 @test_copysign(ppc_fp128 %x, ppc_fp128 %y) nounwind { ; PPC32-NEXT: bc 12, 1, .LBB2_2 ; PPC32-NEXT: # %bb.1: # %entry ; PPC32-NEXT: fabs 0, 1 -; PPC32-NEXT: fcmpu 0, 1, 0 -; PPC32-NEXT: bne 0, .LBB2_3 -; PPC32-NEXT: b .LBB2_4 +; PPC32-NEXT: b .LBB2_3 ; PPC32-NEXT: .LBB2_2: ; PPC32-NEXT: fnabs 0, 1 -; PPC32-NEXT: fcmpu 0, 1, 0 -; PPC32-NEXT: beq 0, .LBB2_4 ; PPC32-NEXT: .LBB2_3: # %entry +; PPC32-NEXT: fcmpu 0, 1, 0 +; PPC32-NEXT: cror 20, 2, 3 +; PPC32-NEXT: bc 12, 20, .LBB2_5 +; PPC32-NEXT: # %bb.4: # %entry ; PPC32-NEXT: fneg 2, 2 -; PPC32-NEXT: .LBB2_4: # %entry +; PPC32-NEXT: .LBB2_5: # %entry ; PPC32-NEXT: stfd 0, 24(1) ; PPC32-NEXT: stfd 2, 16(1) ; PPC32-NEXT: lwz 3, 24(1) From 2ddf2ef28eb41f8a1a344dc9b19906f6ac954b1e Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Thu, 31 Oct 2024 17:55:25 -0700 Subject: [PATCH 6/6] [PowerPC] Erase now-irrelevant test --- llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll | 103 --------------------- 1 file changed, 103 deletions(-) delete mode 100644 llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll b/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll deleted file mode 100644 index 82f8ba3bfda53..0000000000000 --- a/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll +++ /dev/null @@ -1,103 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -verify-machineinstrs < %s -mcpu=ppc | FileCheck %s - -target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32" -target triple = "powerpc-unknown-linux-gnu" - -; Previously we checked that loops that used CTR would not be used around a libm call to copysignl -; but now that copysignl is no longer emitted by LLVM in most cases, this stands as a tombstone. -; It has mtctr right in the middle, but we don't care because copysignl is nowhere to be found. - -define ppc_fp128 @foo(ptr nocapture %n, ppc_fp128 %d) nounwind readonly { -; CHECK-LABEL: foo: -; CHECK-NOT: mtctr -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -112(1) -; CHECK-NEXT: stfd 2, 80(1) -; CHECK-NEXT: li 3, 2048 -; CHECK-NEXT: lwz 4, 84(1) -; CHECK-NEXT: stfd 1, 88(1) -; CHECK-NEXT: stw 4, 100(1) -; CHECK-NEXT: lwz 4, 80(1) -; CHECK-NEXT: stw 4, 96(1) -; CHECK-NEXT: lwz 4, 92(1) -; CHECK-NEXT: lfd 1, 96(1) -; CHECK-NEXT: stw 4, 108(1) -; CHECK-NEXT: lwz 4, 88(1) -; CHECK-NEXT: stw 4, 104(1) -; CHECK-NEXT: lfd 0, 104(1) -; CHECK-NEXT: mtctr 3 -; CHECK-NEXT: fmr 2, 0 -; CHECK-NEXT: b .LBB0_2 -; CHECK-NEXT: .LBB0_1: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: lwz 3, 60(1) -; CHECK-NEXT: stfd 1, 48(1) -; CHECK-NEXT: stw 3, 76(1) -; CHECK-NEXT: lwz 3, 56(1) -; CHECK-NEXT: stw 3, 72(1) -; CHECK-NEXT: lwz 3, 52(1) -; CHECK-NEXT: lfd 2, 72(1) -; CHECK-NEXT: stw 3, 68(1) -; CHECK-NEXT: lwz 3, 48(1) -; CHECK-NEXT: stw 3, 64(1) -; CHECK-NEXT: lfd 1, 64(1) -; CHECK-NEXT: bdz .LBB0_7 -; CHECK-NEXT: .LBB0_2: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: stfd 0, 40(1) -; CHECK-NEXT: lbz 3, 40(1) -; CHECK-NEXT: srwi 3, 3, 7 -; CHECK-NEXT: andi. 3, 3, 1 -; CHECK-NEXT: bc 12, 1, .LBB0_4 -; CHECK-NEXT: # %bb.3: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: fabs 3, 2 -; CHECK-NEXT: b .LBB0_5 -; CHECK-NEXT: .LBB0_4: -; CHECK-NEXT: fnabs 3, 2 -; CHECK-NEXT: .LBB0_5: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: fcmpu 0, 2, 3 -; CHECK-NEXT: cror 20, 2, 3 -; CHECK-NEXT: stfd 3, 56(1) -; CHECK-NEXT: bc 12, 20, .LBB0_1 -; CHECK-NEXT: # %bb.6: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: fneg 1, 1 -; CHECK-NEXT: b .LBB0_1 -; CHECK-NEXT: .LBB0_7: # %for.end -; CHECK-NEXT: stfd 2, 16(1) -; CHECK-NEXT: lwz 3, 20(1) -; CHECK-NEXT: stfd 1, 8(1) -; CHECK-NEXT: stw 3, 36(1) -; CHECK-NEXT: lwz 3, 16(1) -; CHECK-NEXT: stw 3, 32(1) -; CHECK-NEXT: lwz 3, 12(1) -; CHECK-NEXT: lfd 1, 32(1) -; CHECK-NEXT: stw 3, 28(1) -; CHECK-NEXT: lwz 3, 8(1) -; CHECK-NEXT: stw 3, 24(1) -; CHECK-NEXT: lfd 2, 24(1) -; CHECK-NEXT: addi 1, 1, 112 -; CHECK-NEXT: blr -entry: - br label %for.body - -for.body: ; preds = %for.body, %entry - %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ] - %x.05 = phi ppc_fp128 [ %d, %entry ], [ %conv, %for.body ] - %arrayidx = getelementptr inbounds ppc_fp128, ptr %n, i32 %i.06 - %0 = load ppc_fp128, ptr %arrayidx, align 8 - %conv = tail call ppc_fp128 @copysignl(ppc_fp128 %x.05, ppc_fp128 %d) nounwind readonly - %inc = add nsw i32 %i.06, 1 - %exitcond = icmp eq i32 %inc, 2048 - br i1 %exitcond, label %for.end, label %for.body - -for.end: ; preds = %for.body - ret ppc_fp128 %conv -} - -declare ppc_fp128 @copysignl(ppc_fp128, ppc_fp128) #0 - -; CHECK-NOT: mtctr