File tree Expand file tree Collapse file tree 3 files changed +41
-4
lines changed Expand file tree Collapse file tree 3 files changed +41
-4
lines changed Original file line number Diff line number Diff line change @@ -261,6 +261,9 @@ class NVPTXTargetLowering : public TargetLowering {
261261 return true ;
262262 }
263263
264+ bool isFAbsFree (EVT VT) const override { return true ; }
265+ bool isFNegFree (EVT VT) const override { return true ; }
266+
264267private:
265268 const NVPTXSubtarget &STI; // cache the subtarget here
266269 SDValue getParamSymbol (SelectionDAG &DAG, int idx, EVT) const ;
Original file line number Diff line number Diff line change @@ -182,8 +182,8 @@ define <2 x bfloat> @test_fneg(<2 x bfloat> %a) #0 {
182182; CHECK-NEXT: .reg .b32 %r<3>;
183183; CHECK-EMPTY:
184184; CHECK-NEXT: // %bb.0:
185- ; CHECK-NEXT: ld.param.u32 %r1, [test_fneg_param_0];
186- ; CHECK-NEXT: xor.b32 %r2, %r1, -2147450880 ;
185+ ; CHECK-NEXT: ld.param.b32 %r1, [test_fneg_param_0];
186+ ; CHECK-NEXT: neg.bf16x2 %r2, %r1;
187187; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
188188; CHECK-NEXT: ret;
189189 %r = fneg <2 x bfloat> %a
@@ -532,8 +532,8 @@ define <2 x bfloat> @test_fabs(<2 x bfloat> %a) #0 {
532532; CHECK-NEXT: .reg .b32 %r<3>;
533533; CHECK-EMPTY:
534534; CHECK-NEXT: // %bb.0:
535- ; CHECK-NEXT: ld.param.u32 %r1, [test_fabs_param_0];
536- ; CHECK-NEXT: and.b32 %r2, %r1, 2147450879 ;
535+ ; CHECK-NEXT: ld.param.b32 %r1, [test_fabs_param_0];
536+ ; CHECK-NEXT: abs.bf16x2 %r2, %r1;
537537; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
538538; CHECK-NEXT: ret;
539539 %r = call <2 x bfloat> @llvm.fabs.f16 (<2 x bfloat> %a )
Original file line number Diff line number Diff line change 1+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+ ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s
3+ ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %}
4+ target triple = "nvptx64-nvidia-cuda"
5+
6+ define float @fabs_free (i32 %in ) {
7+ ; CHECK-LABEL: fabs_free(
8+ ; CHECK: {
9+ ; CHECK-NEXT: .reg .f32 %f<3>;
10+ ; CHECK-EMPTY:
11+ ; CHECK-NEXT: // %bb.0:
12+ ; CHECK-NEXT: ld.param.f32 %f1, [fabs_free_param_0];
13+ ; CHECK-NEXT: abs.f32 %f2, %f1;
14+ ; CHECK-NEXT: st.param.f32 [func_retval0], %f2;
15+ ; CHECK-NEXT: ret;
16+ %b = bitcast i32 %in to float
17+ %f = call float @llvm.fabs.f32 (float %b )
18+ ret float %f
19+ }
20+
21+ define float @fneg_free (i32 %in ) {
22+ ; CHECK-LABEL: fneg_free(
23+ ; CHECK: {
24+ ; CHECK-NEXT: .reg .f32 %f<3>;
25+ ; CHECK-EMPTY:
26+ ; CHECK-NEXT: // %bb.0:
27+ ; CHECK-NEXT: ld.param.f32 %f1, [fneg_free_param_0];
28+ ; CHECK-NEXT: neg.f32 %f2, %f1;
29+ ; CHECK-NEXT: st.param.f32 [func_retval0], %f2;
30+ ; CHECK-NEXT: ret;
31+ %b = bitcast i32 %in to float
32+ %f = fneg float %b
33+ ret float %f
34+ }
You can’t perform that action at this time.
0 commit comments