From e46e4bf4c86fcea333c8a98e39672b513094426f Mon Sep 17 00:00:00 2001 From: Alex Maclean Date: Mon, 23 Dec 2024 21:50:32 +0000 Subject: [PATCH 1/2] pre-commit tests --- llvm/test/CodeGen/NVPTX/fabs-fneg-free.ll | 38 +++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 llvm/test/CodeGen/NVPTX/fabs-fneg-free.ll diff --git a/llvm/test/CodeGen/NVPTX/fabs-fneg-free.ll b/llvm/test/CodeGen/NVPTX/fabs-fneg-free.ll new file mode 100644 index 0000000000000..d934c6c58558f --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/fabs-fneg-free.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s +; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %} +target triple = "nvptx64-nvidia-cuda" + +define float @fabs_free(i32 %in) { +; CHECK-LABEL: fabs_free( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .f32 %f<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [fabs_free_param_0]; +; CHECK-NEXT: and.b32 %r2, %r1, 2147483647; +; CHECK-NEXT: mov.b32 %f1, %r2; +; CHECK-NEXT: st.param.f32 [func_retval0], %f1; +; CHECK-NEXT: ret; + %b = bitcast i32 %in to float + %f = call float @llvm.fabs.f32(float %b) + ret float %f +} + +define float @fneg_free(i32 %in) { +; CHECK-LABEL: fneg_free( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .f32 %f<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.u32 %r1, [fneg_free_param_0]; +; CHECK-NEXT: xor.b32 %r2, %r1, -2147483648; +; CHECK-NEXT: mov.b32 %f1, %r2; +; CHECK-NEXT: st.param.f32 [func_retval0], %f1; +; CHECK-NEXT: ret; + %b = bitcast i32 %in to float + %f = fneg float %b + ret float %f +} From 8e5571c0acb86fc9d56c40cf95d9d3b257bf56ee Mon Sep 17 00:00:00 2001 From: Alex Maclean Date: Mon, 23 Dec 2024 21:52:41 +0000 Subject: [PATCH 2/2] [NVPTX] designate fabs and fneg as free --- llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 3 +++ .../test/CodeGen/NVPTX/bf16x2-instructions.ll | 8 ++++---- llvm/test/CodeGen/NVPTX/fabs-fneg-free.ll | 20 ++++++++----------- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index 4a98fe21b81dc..c9b7e87455699 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -261,6 +261,9 @@ class NVPTXTargetLowering : public TargetLowering { return true; } + bool isFAbsFree(EVT VT) const override { return true; } + bool isFNegFree(EVT VT) const override { return true; } + private: const NVPTXSubtarget &STI; // cache the subtarget here SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const; diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll index 03cdeb9683aba..8be3a66b7f483 100644 --- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll @@ -182,8 +182,8 @@ define <2 x bfloat> @test_fneg(<2 x bfloat> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.u32 %r1, [test_fneg_param_0]; -; CHECK-NEXT: xor.b32 %r2, %r1, -2147450880; +; CHECK-NEXT: ld.param.b32 %r1, [test_fneg_param_0]; +; CHECK-NEXT: neg.bf16x2 %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %r = fneg <2 x bfloat> %a @@ -532,8 +532,8 @@ define <2 x bfloat> @test_fabs(<2 x bfloat> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.u32 %r1, [test_fabs_param_0]; -; CHECK-NEXT: and.b32 %r2, %r1, 2147450879; +; CHECK-NEXT: ld.param.b32 %r1, [test_fabs_param_0]; +; CHECK-NEXT: abs.bf16x2 %r2, %r1; ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; ; CHECK-NEXT: ret; %r = call <2 x bfloat> @llvm.fabs.f16(<2 x bfloat> %a) diff --git a/llvm/test/CodeGen/NVPTX/fabs-fneg-free.ll b/llvm/test/CodeGen/NVPTX/fabs-fneg-free.ll index d934c6c58558f..9031f33939f2f 100644 --- a/llvm/test/CodeGen/NVPTX/fabs-fneg-free.ll +++ b/llvm/test/CodeGen/NVPTX/fabs-fneg-free.ll @@ -6,14 +6,12 @@ target triple = "nvptx64-nvidia-cuda" define float @fabs_free(i32 %in) { ; CHECK-LABEL: fabs_free( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<3>; -; CHECK-NEXT: .reg .f32 %f<2>; +; CHECK-NEXT: .reg .f32 %f<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.u32 %r1, [fabs_free_param_0]; -; CHECK-NEXT: and.b32 %r2, %r1, 2147483647; -; CHECK-NEXT: mov.b32 %f1, %r2; -; CHECK-NEXT: st.param.f32 [func_retval0], %f1; +; CHECK-NEXT: ld.param.f32 %f1, [fabs_free_param_0]; +; CHECK-NEXT: abs.f32 %f2, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-NEXT: ret; %b = bitcast i32 %in to float %f = call float @llvm.fabs.f32(float %b) @@ -23,14 +21,12 @@ define float @fabs_free(i32 %in) { define float @fneg_free(i32 %in) { ; CHECK-LABEL: fneg_free( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<3>; -; CHECK-NEXT: .reg .f32 %f<2>; +; CHECK-NEXT: .reg .f32 %f<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.u32 %r1, [fneg_free_param_0]; -; CHECK-NEXT: xor.b32 %r2, %r1, -2147483648; -; CHECK-NEXT: mov.b32 %f1, %r2; -; CHECK-NEXT: st.param.f32 [func_retval0], %f1; +; CHECK-NEXT: ld.param.f32 %f1, [fneg_free_param_0]; +; CHECK-NEXT: neg.f32 %f2, %f1; +; CHECK-NEXT: st.param.f32 [func_retval0], %f2; ; CHECK-NEXT: ret; %b = bitcast i32 %in to float %f = fneg float %b