Skip to content

Commit d6f4a89

Browse files
committed
[SYCL][libclc][CUDA] Add --ffast-math support
This patch allows the `--ffast-math` compiler flag to substitute the regular `genfloatf` math built-ins with their `::native` versions. Moreover, this patch completes the support of natives built-ins for `libclc/ptx-nvidiacl` connecting them with the `__nv_fast` functions present in libdevice. If a fast function is not available in libdevice the corresponding `nvvm` intrinsic is used.
1 parent e200720 commit d6f4a89

File tree

13 files changed

+188
-34
lines changed

13 files changed

+188
-34
lines changed

clang/include/clang/Basic/BuiltinsNVPTX.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,8 @@ BUILTIN(__nvvm_rcp_rm_ftz_f, "ff", "")
251251
BUILTIN(__nvvm_rcp_rm_f, "ff", "")
252252
BUILTIN(__nvvm_rcp_rp_ftz_f, "ff", "")
253253
BUILTIN(__nvvm_rcp_rp_f, "ff", "")
254+
BUILTIN(__nvvm_rcp_approx_f, "ff", "")
255+
BUILTIN(__nvvm_rcp_approx_ftz_f, "ff", "")
254256

255257
BUILTIN(__nvvm_rcp_rn_d, "dd", "")
256258
BUILTIN(__nvvm_rcp_rz_d, "dd", "")

libclc/ptx-nvidiacl/libspirv/SOURCES

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,15 @@ math/log2.cl
4242
math/logb.cl
4343
math/modf.cl
4444
math/native_cos.cl
45+
math/native_divide.cl
4546
math/native_exp.cl
4647
math/native_exp10.cl
4748
math/native_exp2.cl
4849
math/native_log.cl
4950
math/native_log10.cl
5051
math/native_log2.cl
5152
math/native_powr.cl
53+
math/native_recip.cl
5254
math/native_rsqrt.cl
5355
math/native_sin.cl
5456
math/native_sqrt.cl
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <spirv/spirv.h>
10+
11+
#include "../../include/libdevice.h"
12+
#include <clcmacro.h>
13+
14+
#define __CLC_FUNCTION __spirv_ocl_native_divide
15+
#define __CLC_BUILTIN __nv_fast_fdivide
16+
#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, f)
17+
#define __FLOAT_ONLY
18+
#include <math/binary_builtin.inc>

libclc/ptx-nvidiacl/libspirv/math/native_exp2.cl

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,17 @@
88

99
#include <spirv/spirv.h>
1010

11-
#include "../../include/libdevice.h"
1211
#include <clcmacro.h>
1312

14-
#define __CLC_FUNCTION __spirv_ocl_native_exp2
15-
#define __CLC_BUILTIN __nv_exp2
16-
#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, f)
13+
extern int __clc_nvvm_reflect_ftz();
14+
15+
_CLC_DEF _CLC_OVERLOAD float __spirv_ocl_native_exp2(float x) {
16+
return (__clc_nvvm_reflect_ftz()) ? __nvvm_ex2_approx_ftz_f(x)
17+
: __nvvm_ex2_approx_f(x);
18+
}
19+
20+
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_native_exp2,
21+
float)
1722

1823
#ifdef cl_khr_fp16
1924
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
@@ -39,9 +44,3 @@ _CLC_UNARY_VECTORIZE_HAVE2(_CLC_OVERLOAD _CLC_DEF, half, __clc_native_exp2,
3944
#undef __USE_HALF_EXP2_APPROX
4045

4146
#endif // cl_khr_fp16
42-
43-
// Undef halfs before uncluding unary builtins, as they are handled above.
44-
#ifdef cl_khr_fp16
45-
#undef cl_khr_fp16
46-
#endif // cl_khr_fp16
47-
#include <math/unary_builtin.inc>
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <spirv/spirv.h>
10+
11+
#include <clcmacro.h>
12+
13+
extern int __clc_nvvm_reflect_ftz();
14+
15+
_CLC_DEF _CLC_OVERLOAD float __spirv_ocl_native_recip(float x) {
16+
return (__clc_nvvm_reflect_ftz()) ? __nvvm_rcp_approx_ftz_f(x)
17+
: __nvvm_rcp_approx_f(x);
18+
}
19+
20+
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_native_recip,
21+
float)

libclc/ptx-nvidiacl/libspirv/math/native_rsqrt.cl

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,14 @@
88

99
#include <spirv/spirv.h>
1010

11-
#include "../../include/libdevice.h"
1211
#include <clcmacro.h>
1312

14-
#define __CLC_FUNCTION __spirv_ocl_native_rsqrt
15-
#define __CLC_BUILTIN __nv_rsqrt
16-
#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, f)
17-
#include <math/unary_builtin.inc>
13+
extern int __clc_nvvm_reflect_ftz();
14+
15+
_CLC_DEF _CLC_OVERLOAD float __spirv_ocl_native_rsqrt(float x) {
16+
return (__clc_nvvm_reflect_ftz()) ? __nvvm_rsqrt_approx_ftz_f(x)
17+
: __nvvm_rsqrt_approx_f(x);
18+
}
19+
20+
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_native_rsqrt,
21+
float)

libclc/ptx-nvidiacl/libspirv/math/native_sin.cl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <clcmacro.h>
1313

1414
#define __CLC_FUNCTION __spirv_ocl_native_sin
15-
#define __CLC_BUILTIN __nv_sin
15+
#define __CLC_BUILTIN __nv_fast_sin
1616
#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, f)
17+
#define __FLOAT_ONLY
1718
#include <math/unary_builtin.inc>

libclc/ptx-nvidiacl/libspirv/math/native_sqrt.cl

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,14 @@
88

99
#include <spirv/spirv.h>
1010

11-
#include "../../include/libdevice.h"
1211
#include <clcmacro.h>
1312

14-
#define __CLC_FUNCTION __spirv_ocl_native_sqrt
15-
#define __CLC_BUILTIN __nv_sqrt
16-
#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, f)
17-
#include <math/unary_builtin.inc>
13+
extern int __clc_nvvm_reflect_ftz();
14+
15+
_CLC_DEF _CLC_OVERLOAD float __spirv_ocl_native_sqrt(float x) {
16+
return (__clc_nvvm_reflect_ftz()) ? __nvvm_sqrt_approx_ftz_f(x)
17+
: __nvvm_sqrt_approx_f(x);
18+
}
19+
20+
_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __spirv_ocl_native_sqrt,
21+
float)

libclc/ptx-nvidiacl/libspirv/math/native_tan.cl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <clcmacro.h>
1313

1414
#define __CLC_FUNCTION __spirv_ocl_native_tan
15-
#define __CLC_BUILTIN __nv_tan
15+
#define __CLC_BUILTIN __nv_fast_tan
1616
#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, f)
17+
#define __FLOAT_ONLY
1718
#include <math/unary_builtin.inc>

libclc/ptx-nvidiacl/libspirv/reflect.ll

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,10 @@ define i32 @__clc_nvvm_reflect_arch() alwaysinline {
66
%reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([12 x i8], [12 x i8] addrspace(1)* @str, i32 0, i32 0) to i8*))
77
ret i32 %reflect
88
}
9+
10+
@str_ftz = private addrspace(1) constant [11 x i8] c"__CUDA_FTZ\00"
11+
12+
define i32 @__clc_nvvm_reflect_ftz() alwaysinline {
13+
%reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(1)* @str_ftz, i32 0, i32 0) to i8*))
14+
ret i32 %reflect
15+
}

0 commit comments

Comments
 (0)