diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 3816e09953719..5c5766a8b2345 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -838,8 +838,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, setOperationAction(ISD::FCOPYSIGN, MVT::v2f16, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::v2bf16, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); // These map to corresponding instructions for f32/f64. f16 must be // promoted to f32. v2f16 is expanded to f16, which is then promoted diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index e8e8548120131..0c883093dd0a5 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -977,20 +977,6 @@ def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs, def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs, Float64Regs, int_nvvm_fabs_d>; -// -// copysign -// - -def COPYSIGN_F : - NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src0, Float32Regs:$src1), - "copysign.f32 \t$dst, $src0, $src1;", - [(set Float32Regs:$dst, (fcopysign Float32Regs:$src1, Float32Regs:$src0))]>; - -def COPYSIGN_D : - NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src0, Float64Regs:$src1), - "copysign.f64 \t$dst, $src0, $src1;", - [(set Float64Regs:$dst, (fcopysign Float64Regs:$src1, Float64Regs:$src0))]>; - // // Abs, Neg bf16, bf16x2 // diff --git a/llvm/test/CodeGen/NVPTX/copysign.ll b/llvm/test/CodeGen/NVPTX/copysign.ll deleted file mode 100644 index 96fb37a129b20..0000000000000 --- a/llvm/test/CodeGen/NVPTX/copysign.ll +++ /dev/null @@ -1,39 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s -; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %} - -target triple = "nvptx64-nvidia-cuda" -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" - -define float @fcopysign_f(float %a, float %b) { -; CHECK-LABEL: fcopysign_f( -; CHECK: { -; CHECK-NEXT: .reg .f32 %f<4>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.f32 %f1, [fcopysign_f_param_0]; -; CHECK-NEXT: ld.param.f32 %f2, [fcopysign_f_param_1]; -; CHECK-NEXT: copysign.f32 %f3, %f2, %f1; -; CHECK-NEXT: st.param.f32 [func_retval0+0], %f3; -; CHECK-NEXT: ret; - %val = call float @llvm.copysign.f32(float %a, float %b) - ret float %val -} - -define double @fcopysign_d(double %a, double %b) { -; CHECK-LABEL: fcopysign_d( -; CHECK: { -; CHECK-NEXT: .reg .f64 %fd<4>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.f64 %fd1, [fcopysign_d_param_0]; -; CHECK-NEXT: ld.param.f64 %fd2, [fcopysign_d_param_1]; -; CHECK-NEXT: copysign.f64 %fd3, %fd2, %fd1; -; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd3; -; CHECK-NEXT: ret; - %val = call double @llvm.copysign.f64(double %a, double %b) - ret double %val -} - -declare float @llvm.copysign.f32(float, float) -declare double @llvm.copysign.f64(double, double) diff --git a/llvm/test/CodeGen/NVPTX/math-intrins.ll b/llvm/test/CodeGen/NVPTX/math-intrins.ll index bdd6c91438460..fcc4ec6e4017f 100644 --- a/llvm/test/CodeGen/NVPTX/math-intrins.ll +++ b/llvm/test/CodeGen/NVPTX/math-intrins.ll @@ -195,8 +195,9 @@ define double @round_double(double %a) { ; check the use of 0.5 to implement round ; CHECK-LABEL: round_double( ; CHECK: { -; CHECK-NEXT: .reg .pred %p<3>; -; CHECK-NEXT: .reg .f64 %fd<8>; +; CHECK-NEXT: .reg .pred %p<4>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-NEXT: .reg .f64 %fd<10>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.f64 %fd1, [round_double_param_0]; @@ -205,10 +206,16 @@ define double @round_double(double %a) { ; CHECK-NEXT: add.rn.f64 %fd3, %fd2, 0d3FE0000000000000; ; CHECK-NEXT: cvt.rzi.f64.f64 %fd4, %fd3; ; CHECK-NEXT: selp.f64 %fd5, 0d0000000000000000, %fd4, %p1; -; CHECK-NEXT: copysign.f64 %fd6, %fd1, %fd5; -; CHECK-NEXT: setp.gt.f64 %p2, %fd2, 0d4330000000000000; -; CHECK-NEXT: selp.f64 %fd7, %fd1, %fd6, %p2; -; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd7; +; CHECK-NEXT: abs.f64 %fd6, %fd5; +; CHECK-NEXT: neg.f64 %fd7, %fd6; +; CHECK-NEXT: mov.b64 %rd1, %fd1; +; CHECK-NEXT: shr.u64 %rd2, %rd1, 63; +; CHECK-NEXT: and.b64 %rd3, %rd2, 1; +; CHECK-NEXT: setp.eq.b64 %p2, %rd3, 1; +; CHECK-NEXT: selp.f64 %fd8, %fd7, %fd6, %p2; +; CHECK-NEXT: setp.gt.f64 %p3, %fd2, 0d4330000000000000; +; CHECK-NEXT: selp.f64 %fd9, %fd1, %fd8, %p3; +; CHECK-NEXT: st.param.f64 [func_retval0+0], %fd9; ; CHECK-NEXT: ret; %b = call double @llvm.round.f64(double %a) ret double %b