From b139fa48cfa3d5247d8194f2e375d6f7c23328ad Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Tue, 27 May 2025 17:23:48 +0100 Subject: [PATCH 01/40] [SYCL][NVPTX][AMDGCN] Move devicelib cmath to header This patch experiments with moving standard library math built-ins from libdevice into headers. This is based on the way clang handles this for CUDA and HIP. In these languages you can define device functions as overloads. This allows re-defining standard library functions specifically for the device in a header, so that we can provide a device specific implementations of certain built-ins while still using the regular standard library headers. By default SYCL doesn't do overloads for device functions, so this patch introduces a new `sycl_device_only` attribute, this attribute will make a function device only and allow it to overload with existing functions. --- clang/include/clang/Basic/Attr.td | 7 + clang/lib/AST/Decl.cpp | 5 + clang/lib/CodeGen/CGBuiltin.cpp | 7 +- clang/lib/Sema/SemaDeclAttr.cpp | 3 + clang/lib/Sema/SemaOverload.cpp | 27 ++ libdevice/cmath_wrapper.cpp | 5 +- libdevice/cmath_wrapper_fp64.cpp | 5 +- libdevice/fallback-cmath.cpp | 5 +- sycl/include/sycl/stl_wrappers/cmath | 4 + .../sycl/stl_wrappers/cmath-fallback.h | 283 ++++++++++++++++++ 10 files changed, 337 insertions(+), 14 deletions(-) create mode 100644 sycl/include/sycl/stl_wrappers/cmath-fallback.h diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 0747eda3addb4..8c2eddcc1487d 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1607,6 +1607,13 @@ def SYCLDevice : InheritableAttr { let Documentation = [SYCLDeviceDocs]; } +def SYCLDeviceOnly : InheritableAttr { + let Spellings = [GNU<"sycl_device_only">]; + let Subjects = SubjectList<[Function]>; + let LangOpts = [SYCLIsDevice]; + let Documentation = [Undocumented]; +} + def SYCLGlobalVar : InheritableAttr { let Spellings = [GNU<"sycl_global_var">]; let Subjects = SubjectList<[GlobalStorageNonLocalVar], ErrorDiag>; diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index fae15742b52ab..62d1f0d9ba9fe 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3729,6 +3729,11 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const { !(BuiltinID == Builtin::BIprintf || BuiltinID == Builtin::BImalloc)) return 0; + if (Context.getLangOpts().isSYCL() && hasAttr() && + !(BuiltinID == Builtin::BIprintf || BuiltinID == Builtin::BImalloc)) { + return 0; + } + // As AMDGCN implementation of OpenMP does not have a device-side standard // library, none of the predefined library functions except printf and malloc // should be treated as a builtin i.e. 0 should be returned for them. diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c7db24b8d95ff..57522a049bb00 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2782,10 +2782,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, GenerateIntrinsics = ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt; } - bool IsSYCLDeviceWithoutIntrinsics = - getLangOpts().SYCLIsDevice && - (getTarget().getTriple().isNVPTX() || getTarget().getTriple().isAMDGCN()); - if (GenerateIntrinsics && !IsSYCLDeviceWithoutIntrinsics) { + if (GenerateIntrinsics) { switch (BuiltinIDIfNoAsmLabel) { case Builtin::BIacos: case Builtin::BIacosf: @@ -3885,7 +3882,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_modf: case Builtin::BI__builtin_modff: case Builtin::BI__builtin_modfl: - if (Builder.getIsFPConstrained() || IsSYCLDeviceWithoutIntrinsics) + if (Builder.getIsFPConstrained()) break; // TODO: Emit constrained modf intrinsic once one exists. return RValue::get(emitModfBuiltin(*this, E, Intrinsic::modf)); case Builtin::BI__builtin_isgreater: diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index e6f6c547113be..b076cb60db269 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -7224,6 +7224,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_SYCLDevice: S.SYCL().handleSYCLDeviceAttr(D, AL); break; + case ParsedAttr::AT_SYCLDeviceOnly: + handleSimpleAttribute(S, D, AL); + break; case ParsedAttr::AT_SYCLScope: S.SYCL().handleSYCLScopeAttr(D, AL); break; diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 15294a11d4ecd..da9473914c98e 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -1629,6 +1629,14 @@ static bool IsOverloadOrOverrideImpl(Sema &SemaRef, FunctionDecl *New, } } + // Allow overloads with SYCLDeviceOnlyAttr + if (SemaRef.getLangOpts().isSYCL()) { + if (hasExplicitAttr(Old) != + hasExplicitAttr(New)) { + return true; + } + } + // The signatures match; this is not an overload. return false; } @@ -11020,6 +11028,16 @@ bool clang::isBetterOverloadCandidate( S.CUDA().IdentifyPreference(Caller, Cand2.Function); } + // In SYCL device compilation mode prefer the overload with the + // SYCLDeviceOnly attribute. + if (S.getLangOpts().isSYCL() && S.getLangOpts().SYCLIsDevice && + Cand1.Function && Cand2.Function) { + if (hasExplicitAttr(Cand1.Function) != + hasExplicitAttr(Cand2.Function)) { + return hasExplicitAttr(Cand1.Function); + } + } + // General member function overloading is handled above, so this only handles // constructors with address spaces. // This only handles address spaces since C++ has no other @@ -11374,6 +11392,15 @@ OverloadingResult OverloadCandidateSet::BestViableFunctionImpl( if (S.getLangOpts().CUDA) CudaExcludeWrongSideCandidates(S, Candidates); + // In SYCL host compilation remove candidates marked SYCLDeviceOnly. + if (S.getLangOpts().isSYCL() && !S.getLangOpts().SYCLIsDevice) { + auto IsDeviceCand = [&](const OverloadCandidate *Cand) { + return Cand->Viable && Cand->Function && + hasExplicitAttr(Cand->Function); + }; + llvm::erase_if(Candidates, IsDeviceCand); + } + Best = end(); for (auto *Cand : Candidates) { Cand->Best = false; diff --git a/libdevice/cmath_wrapper.cpp b/libdevice/cmath_wrapper.cpp index d59395b2d0994..3c6c1b97fa1c5 100644 --- a/libdevice/cmath_wrapper.cpp +++ b/libdevice/cmath_wrapper.cpp @@ -8,8 +8,7 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ - defined(__AMDGCN__) +#if defined(__SPIR__) || defined(__SPIRV__) DEVICE_EXTERN_C_INLINE int abs(int x) { return __devicelib_abs(x); } @@ -211,4 +210,4 @@ DEVICE_EXTERN_C_INLINE float rintf(float x) { return __spirv_ocl_rint(x); } #endif -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ +#endif // __SPIR__ || __SPIRV__ diff --git a/libdevice/cmath_wrapper_fp64.cpp b/libdevice/cmath_wrapper_fp64.cpp index 720982799ea71..81ba3e710ec6d 100644 --- a/libdevice/cmath_wrapper_fp64.cpp +++ b/libdevice/cmath_wrapper_fp64.cpp @@ -9,8 +9,7 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ - defined(__AMDGCN__) +#if defined(__SPIR__) || defined(__SPIRV__) // All exported functions in math and complex device libraries are weak // reference. If users provide their own math or complex functions(with @@ -508,4 +507,4 @@ double _Sinh(double x, double y) { // compute y * sinh(x), |y| <= 1 } } #endif // defined(_WIN32) -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ +#endif // __SPIR__ || __SPIRV__ diff --git a/libdevice/fallback-cmath.cpp b/libdevice/fallback-cmath.cpp index 97cb4cf67b4c7..d930ea014ac24 100644 --- a/libdevice/fallback-cmath.cpp +++ b/libdevice/fallback-cmath.cpp @@ -8,8 +8,7 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ - defined(__AMDGCN__) +#if defined(__SPIR__) || defined(__SPIRV__) // To support fallback device libraries on-demand loading, please update the // DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add @@ -215,4 +214,4 @@ float __devicelib_asinhf(float x) { return __spirv_ocl_asinh(x); } DEVICE_EXTERN_C_INLINE float __devicelib_atanhf(float x) { return __spirv_ocl_atanh(x); } -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ +#endif // __SPIR__ || __SPIRV__ diff --git a/sycl/include/sycl/stl_wrappers/cmath b/sycl/include/sycl/stl_wrappers/cmath index c25eadf6394a1..8c626e4b06606 100644 --- a/sycl/include/sycl/stl_wrappers/cmath +++ b/sycl/include/sycl/stl_wrappers/cmath @@ -16,6 +16,10 @@ // *** *** +#if defined(__NVPTX__) || defined(__AMDGCN__) +#include "cmath-fallback.h" +#endif + #include #ifdef __SYCL_DEVICE_ONLY__ diff --git a/sycl/include/sycl/stl_wrappers/cmath-fallback.h b/sycl/include/sycl/stl_wrappers/cmath-fallback.h new file mode 100644 index 0000000000000..2ade9be9aea6c --- /dev/null +++ b/sycl/include/sycl/stl_wrappers/cmath-fallback.h @@ -0,0 +1,283 @@ +#ifndef __CMATH_FALLBACK_H__ +#define __CMATH_FALLBACK_H__ + +#ifdef __SYCL_DEVICE_ONLY__ + +#define __DPCPP_SYCL_DEVICE __attribute__((sycl_device_only, always_inline)) + +#define __DPCPP_SPIRV_MAP_UNARY(NAME, TYPE) \ + __DPCPP_SYCL_DEVICE TYPE NAME(TYPE x) { return __spirv_ocl_##NAME(x); } + +__DPCPP_SYCL_DEVICE long long abs(long long n) { return n < 0 ? -n : n; } +__DPCPP_SYCL_DEVICE long abs(long n) { return n < 0 ? -n : n; } +__DPCPP_SYCL_DEVICE float abs(float x) { return x < 0 ? -x : x; } +__DPCPP_SYCL_DEVICE double abs(double x) { return x < 0 ? -x : x; } +__DPCPP_SYCL_DEVICE float fabs(float x) { return x < 0 ? -x : x; } +__DPCPP_SYCL_DEVICE double fabs(double x) { return x < 0 ? -x : x; } +__DPCPP_SPIRV_MAP_UNARY(acos, double); +__DPCPP_SPIRV_MAP_UNARY(acos, float); +__DPCPP_SPIRV_MAP_UNARY(acosh, double); +__DPCPP_SPIRV_MAP_UNARY(acosh, float); +__DPCPP_SPIRV_MAP_UNARY(asin, double); +__DPCPP_SPIRV_MAP_UNARY(asin, float); +__DPCPP_SPIRV_MAP_UNARY(asinh, double); +__DPCPP_SPIRV_MAP_UNARY(asinh, float); +__DPCPP_SYCL_DEVICE double scalbn(double x, int exp) { + return __spirv_ocl_ldexp(x, exp); +} +__DPCPP_SYCL_DEVICE float scalbn(float x, int exp) { + return __spirv_ocl_ldexp(x, exp); +} +__DPCPP_SYCL_DEVICE double scalbln(double x, long int exp) { + return __spirv_ocl_ldexp(x, (int)exp); +} +__DPCPP_SYCL_DEVICE float scalbln(float x, long int exp) { + return __spirv_ocl_ldexp(x, (int)exp); +} +__DPCPP_SYCL_DEVICE double atan2(double x, double y) { + return __spirv_ocl_atan2(x, y); +} +__DPCPP_SYCL_DEVICE float atan2(float x, float y) { + return __spirv_ocl_atan2(x, y); +} +__DPCPP_SPIRV_MAP_UNARY(atan, double); +__DPCPP_SPIRV_MAP_UNARY(atan, float); +__DPCPP_SPIRV_MAP_UNARY(atanh, double); +__DPCPP_SPIRV_MAP_UNARY(atanh, float); +__DPCPP_SPIRV_MAP_UNARY(cbrt, double); +__DPCPP_SPIRV_MAP_UNARY(cbrt, float); +__DPCPP_SPIRV_MAP_UNARY(ceil, double); +__DPCPP_SPIRV_MAP_UNARY(ceil, float); +__DPCPP_SPIRV_MAP_UNARY(cos, double); +__DPCPP_SPIRV_MAP_UNARY(cos, float); +__DPCPP_SPIRV_MAP_UNARY(cosh, double); +__DPCPP_SPIRV_MAP_UNARY(cosh, float); +__DPCPP_SPIRV_MAP_UNARY(erfc, double); +__DPCPP_SPIRV_MAP_UNARY(erfc, float); +__DPCPP_SPIRV_MAP_UNARY(erf, double); +__DPCPP_SPIRV_MAP_UNARY(erf, float); +__DPCPP_SPIRV_MAP_UNARY(exp2, double); +__DPCPP_SPIRV_MAP_UNARY(exp2, float); +__DPCPP_SPIRV_MAP_UNARY(exp, double); +__DPCPP_SPIRV_MAP_UNARY(exp, float); +__DPCPP_SPIRV_MAP_UNARY(expm1, double); +__DPCPP_SPIRV_MAP_UNARY(expm1, float); +__DPCPP_SYCL_DEVICE double fdim(double x, double y) { + return __spirv_ocl_fdim(x, y); +} +__DPCPP_SYCL_DEVICE float fdim(float x, float y) { + return __spirv_ocl_fdim(x, y); +} +__DPCPP_SPIRV_MAP_UNARY(floor, double); +__DPCPP_SPIRV_MAP_UNARY(floor, float); +__DPCPP_SYCL_DEVICE double fma(double x, double y, double z) { + return __spirv_ocl_fma(x, y, z); +} +__DPCPP_SYCL_DEVICE float fma(float x, float y, float z) { + return __spirv_ocl_fma(x, y, z); +} +__DPCPP_SYCL_DEVICE double fmax(double x, double y) { + return __spirv_ocl_fmax(x, y); +} +__DPCPP_SYCL_DEVICE float fmax(float x, float y) { + return __spirv_ocl_fmax(x, y); +} +__DPCPP_SYCL_DEVICE double fmin(double x, double y) { + return __spirv_ocl_fmin(x, y); +} +__DPCPP_SYCL_DEVICE float fmin(float x, float y) { + return __spirv_ocl_fmin(x, y); +} +__DPCPP_SYCL_DEVICE double fmod(double x, double y) { + return __spirv_ocl_fmod(x, y); +} +__DPCPP_SYCL_DEVICE float fmod(float x, float y) { + return __spirv_ocl_fmod(x, y); +} +__DPCPP_SYCL_DEVICE double frexp(double x, int *exp) { + return __spirv_ocl_frexp(x, exp); +} +__DPCPP_SYCL_DEVICE float frexp(float x, int *exp) { + return __spirv_ocl_frexp(x, exp); +} +__DPCPP_SYCL_DEVICE double hypot(double x, double y) { + return __spirv_ocl_hypot(x, y); +} +__DPCPP_SYCL_DEVICE float hypot(float x, float y) { + return __spirv_ocl_hypot(x, y); +} +__DPCPP_SYCL_DEVICE int ilogb(double x) { return __spirv_ocl_ilogb(x); } +__DPCPP_SYCL_DEVICE int ilogb(float x) { return __spirv_ocl_ilogb(x); } +__DPCPP_SYCL_DEVICE double ldexp(double x, int exp) { + return __spirv_ocl_ldexp(x, exp); +} +__DPCPP_SYCL_DEVICE float ldexp(float x, int exp) { + return __spirv_ocl_ldexp(x, exp); +} +__DPCPP_SPIRV_MAP_UNARY(lgamma, double); +__DPCPP_SPIRV_MAP_UNARY(lgamma, float); +__DPCPP_SPIRV_MAP_UNARY(log10, double); +__DPCPP_SPIRV_MAP_UNARY(log10, float); +__DPCPP_SPIRV_MAP_UNARY(log1p, double); +__DPCPP_SPIRV_MAP_UNARY(log1p, float); +__DPCPP_SPIRV_MAP_UNARY(log2, double); +__DPCPP_SPIRV_MAP_UNARY(log2, float); +__DPCPP_SPIRV_MAP_UNARY(logb, double); +__DPCPP_SPIRV_MAP_UNARY(logb, float); +__DPCPP_SPIRV_MAP_UNARY(log, double); +__DPCPP_SPIRV_MAP_UNARY(log, float); +__DPCPP_SYCL_DEVICE double modf(double x, double *intpart) { + return __spirv_ocl_modf(x, intpart); +} +__DPCPP_SYCL_DEVICE float modf(float x, float *intpart) { + return __spirv_ocl_modf(x, intpart); +} +__DPCPP_SYCL_DEVICE double nextafter(double x, double y) { + return __spirv_ocl_nextafter(x, y); +} +__DPCPP_SYCL_DEVICE float nextafter(float x, float y) { + return __spirv_ocl_nextafter(x, y); +} +__DPCPP_SYCL_DEVICE double pow(double x, double y) { + return __spirv_ocl_pow(x, y); +} +__DPCPP_SYCL_DEVICE float pow(float x, float y) { + return __spirv_ocl_pow(x, y); +} +__DPCPP_SYCL_DEVICE double remainder(double x, double y) { + return __spirv_ocl_remainder(x, y); +} +__DPCPP_SYCL_DEVICE float remainder(float x, float y) { + return __spirv_ocl_remainder(x, y); +} +__DPCPP_SYCL_DEVICE double remquo(double x, double y, int *q) { + return __spirv_ocl_remquo(x, y, q); +} +__DPCPP_SYCL_DEVICE float remquo(float x, float y, int *q) { + return __spirv_ocl_remquo(x, y, q); +} +__DPCPP_SPIRV_MAP_UNARY(rint, double); +__DPCPP_SPIRV_MAP_UNARY(rint, float); +__DPCPP_SPIRV_MAP_UNARY(round, double); +__DPCPP_SPIRV_MAP_UNARY(round, float); +__DPCPP_SPIRV_MAP_UNARY(sin, double); +__DPCPP_SPIRV_MAP_UNARY(sin, float); +__DPCPP_SPIRV_MAP_UNARY(sinh, double); +__DPCPP_SPIRV_MAP_UNARY(sinh, float); +__DPCPP_SPIRV_MAP_UNARY(sqrt, double); +__DPCPP_SPIRV_MAP_UNARY(sqrt, float); +__DPCPP_SPIRV_MAP_UNARY(tan, double); +__DPCPP_SPIRV_MAP_UNARY(tan, float); +__DPCPP_SPIRV_MAP_UNARY(tanh, double); +__DPCPP_SPIRV_MAP_UNARY(tanh, float); +__DPCPP_SPIRV_MAP_UNARY(tgamma, double); +__DPCPP_SPIRV_MAP_UNARY(tgamma, float); +__DPCPP_SPIRV_MAP_UNARY(trunc, double); +__DPCPP_SPIRV_MAP_UNARY(trunc, float); + +__DPCPP_SYCL_DEVICE div_t div(int x, int y) { return {x / y, x % y}; } + +__DPCPP_SYCL_DEVICE ldiv_t ldiv(long x, long y) { return {x / y, x % y}; } + +__DPCPP_SYCL_DEVICE lldiv_t ldiv(long long x, long long y) { + return {x / y, x % y}; +} + +#ifdef _LIBCPP_BEGIN_NAMESPACE_STD +_LIBCPP_BEGIN_NAMESPACE_STD +#else +namespace std { +#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION +_GLIBCXX_BEGIN_NAMESPACE_VERSION +#endif +#endif + +using ::abs; +using ::acos; +using ::acosh; +using ::asin; +using ::asinh; +using ::atan; +using ::atan2; +using ::atanh; +using ::cbrt; +using ::ceil; +using ::div; +using ::ldiv; +using ::lldiv; +// using ::copysign; +using ::cos; +using ::cosh; +using ::erf; +using ::erfc; +using ::exp; +using ::exp2; +using ::expm1; +using ::fabs; +using ::fdim; +using ::floor; +using ::fma; +using ::fmax; +using ::fmin; +using ::fmod; +// using ::fpclassify; +using ::frexp; +using ::hypot; +using ::ilogb; +// using ::isfinite; +// using ::isgreater; +// using ::isgreaterequal; +// using ::isinf; +// using ::isless; +// using ::islessequal; +// using ::islessgreater; +// using ::isnan; +// using ::isnormal; +// using ::isunordered; +// using ::labs; +using ::ldexp; +using ::lgamma; +// using ::llabs; +// using ::llrint; +using ::log; +using ::log10; +using ::log1p; +using ::log2; +using ::logb; +// using ::lrint; +// using ::lround; +// using ::llround; +using ::modf; +// using ::nan; +// using ::nanf; +// using ::nearbyint; +using ::nextafter; +using ::pow; +using ::remainder; +using ::remquo; +using ::rint; +using ::round; +using ::scalbln; +using ::scalbn; +// using ::signbit; +using ::sin; +using ::sinh; +using ::sqrt; +using ::tan; +using ::tanh; +using ::tgamma; +using ::trunc; + +#ifdef _LIBCPP_END_NAMESPACE_STD +_LIBCPP_END_NAMESPACE_STD +#else +#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION +_GLIBCXX_END_NAMESPACE_VERSION +#endif +} // namespace std +#endif + +#undef __DPCPP_SPIRV_MAP_UNARY +#undef __DPCPP_SYCL_DEVICE +#endif +#endif From e942076bc94eda9f870868b1c94993c5c4e1423b Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Thu, 29 May 2025 10:42:58 +0100 Subject: [PATCH 02/40] [SYCL] Fixup attribute handling We don't support malloc in SYCL, silence warnings for host compilation with `sycl_device_only`. Fix failing clang test with new attribute. --- clang/include/clang/Basic/Attr.td | 2 +- clang/lib/AST/Decl.cpp | 2 +- clang/test/Misc/pragma-attribute-supported-attributes-list.test | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 8c2eddcc1487d..27a190f5c1ef9 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1610,7 +1610,7 @@ def SYCLDevice : InheritableAttr { def SYCLDeviceOnly : InheritableAttr { let Spellings = [GNU<"sycl_device_only">]; let Subjects = SubjectList<[Function]>; - let LangOpts = [SYCLIsDevice]; + let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; let Documentation = [Undocumented]; } diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 62d1f0d9ba9fe..5182a0f342015 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3730,7 +3730,7 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const { return 0; if (Context.getLangOpts().isSYCL() && hasAttr() && - !(BuiltinID == Builtin::BIprintf || BuiltinID == Builtin::BImalloc)) { + BuiltinID != Builtin::BIprintf) { return 0; } diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index d04cf791e88bf..c23d9acb98826 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -190,6 +190,7 @@ // CHECK-NEXT: SYCLDeviceGlobal (SubjectMatchRule_record) // CHECK-NEXT: SYCLDeviceHas (SubjectMatchRule_function) // CHECK-NEXT: SYCLDeviceIndirectlyCallable (SubjectMatchRule_function) +// CHECK-NEXT: SYCLDeviceOnly (SubjectMatchRule_function) // CHECK-NEXT: SYCLGlobalVariableAllowed (SubjectMatchRule_record) // CHECK-NEXT: SYCLIntelDisableLoopPipelining (SubjectMatchRule_function) // CHECK-NEXT: SYCLIntelInitiationInterval (SubjectMatchRule_function) From d192f332518c2ea88ce641eb8f0a5a75fd37a989 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Thu, 29 May 2025 10:54:02 +0100 Subject: [PATCH 03/40] [SYCL] Use hasAttr instead of hasExplicitAttr --- clang/lib/Sema/SemaOverload.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index da9473914c98e..7cb24f06ae1e2 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -1631,8 +1631,8 @@ static bool IsOverloadOrOverrideImpl(Sema &SemaRef, FunctionDecl *New, // Allow overloads with SYCLDeviceOnlyAttr if (SemaRef.getLangOpts().isSYCL()) { - if (hasExplicitAttr(Old) != - hasExplicitAttr(New)) { + if (Old->hasAttr() != + New->hasAttr()) { return true; } } @@ -11032,9 +11032,9 @@ bool clang::isBetterOverloadCandidate( // SYCLDeviceOnly attribute. if (S.getLangOpts().isSYCL() && S.getLangOpts().SYCLIsDevice && Cand1.Function && Cand2.Function) { - if (hasExplicitAttr(Cand1.Function) != - hasExplicitAttr(Cand2.Function)) { - return hasExplicitAttr(Cand1.Function); + if (Cand1.Function->hasAttr() != + Cand2.Function->hasAttr()) { + return Cand1.Function->hasAttr(); } } @@ -11396,7 +11396,7 @@ OverloadingResult OverloadCandidateSet::BestViableFunctionImpl( if (S.getLangOpts().isSYCL() && !S.getLangOpts().SYCLIsDevice) { auto IsDeviceCand = [&](const OverloadCandidate *Cand) { return Cand->Viable && Cand->Function && - hasExplicitAttr(Cand->Function); + Cand->Function->hasAttr(); }; llvm::erase_if(Candidates, IsDeviceCand); } From a82cebca4e767e91c03b40ad71ab9c11c112fcf5 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Thu, 29 May 2025 15:48:31 +0100 Subject: [PATCH 04/40] [SYCL] Update fallback header --- .../sycl/stl_wrappers/cmath-fallback.h | 276 ++++++++++-------- 1 file changed, 150 insertions(+), 126 deletions(-) diff --git a/sycl/include/sycl/stl_wrappers/cmath-fallback.h b/sycl/include/sycl/stl_wrappers/cmath-fallback.h index 2ade9be9aea6c..93d0cb5e912f4 100644 --- a/sycl/include/sycl/stl_wrappers/cmath-fallback.h +++ b/sycl/include/sycl/stl_wrappers/cmath-fallback.h @@ -3,177 +3,157 @@ #ifdef __SYCL_DEVICE_ONLY__ -#define __DPCPP_SYCL_DEVICE __attribute__((sycl_device_only, always_inline)) +#define __DPCPP_SYCL_DEVICE \ + __attribute__((sycl_device_only, always_inline, overloadable)) +#define __DPCPP_SYCL_DEVICE_C \ + extern "C" __attribute__((sycl_device_only, always_inline, overloadable)) -#define __DPCPP_SPIRV_MAP_UNARY(NAME, TYPE) \ - __DPCPP_SYCL_DEVICE TYPE NAME(TYPE x) { return __spirv_ocl_##NAME(x); } +#define __DPCPP_SPIRV_MAP_UNARY(NAME) \ + __DPCPP_SYCL_DEVICE_C float NAME##f(float x) { \ + return __spirv_ocl_##NAME(x); \ + } \ + __DPCPP_SYCL_DEVICE float NAME(float x) { return __spirv_ocl_##NAME(x); } \ + __DPCPP_SYCL_DEVICE double NAME(double x) { return __spirv_ocl_##NAME(x); } + +#define __DPCPP_SPIRV_MAP_BINARY(NAME) \ + __DPCPP_SYCL_DEVICE_C float NAME##f(float x, float y) { \ + return __spirv_ocl_##NAME(x, y); \ + } \ + __DPCPP_SYCL_DEVICE float NAME(float x, float y) { \ + return __spirv_ocl_##NAME(x, y); \ + } \ + __DPCPP_SYCL_DEVICE double NAME(double x, double y) { \ + return __spirv_ocl_##NAME(x, y); \ + } __DPCPP_SYCL_DEVICE long long abs(long long n) { return n < 0 ? -n : n; } +__DPCPP_SYCL_DEVICE_C long long llabs(long long n) { return n < 0 ? -n : n; } __DPCPP_SYCL_DEVICE long abs(long n) { return n < 0 ? -n : n; } +__DPCPP_SYCL_DEVICE_C long labs(long n) { return n < 0 ? -n : n; } __DPCPP_SYCL_DEVICE float abs(float x) { return x < 0 ? -x : x; } __DPCPP_SYCL_DEVICE double abs(double x) { return x < 0 ? -x : x; } __DPCPP_SYCL_DEVICE float fabs(float x) { return x < 0 ? -x : x; } +__DPCPP_SYCL_DEVICE_C float fabsf(float x) { return x < 0 ? -x : x; } __DPCPP_SYCL_DEVICE double fabs(double x) { return x < 0 ? -x : x; } -__DPCPP_SPIRV_MAP_UNARY(acos, double); -__DPCPP_SPIRV_MAP_UNARY(acos, float); -__DPCPP_SPIRV_MAP_UNARY(acosh, double); -__DPCPP_SPIRV_MAP_UNARY(acosh, float); -__DPCPP_SPIRV_MAP_UNARY(asin, double); -__DPCPP_SPIRV_MAP_UNARY(asin, float); -__DPCPP_SPIRV_MAP_UNARY(asinh, double); -__DPCPP_SPIRV_MAP_UNARY(asinh, float); -__DPCPP_SYCL_DEVICE double scalbn(double x, int exp) { + +__DPCPP_SPIRV_MAP_UNARY(acos); +__DPCPP_SPIRV_MAP_UNARY(acosh); +__DPCPP_SPIRV_MAP_UNARY(asin); +__DPCPP_SPIRV_MAP_UNARY(asinh); + +__DPCPP_SYCL_DEVICE_C float scalbnf(float x, int exp) { return __spirv_ocl_ldexp(x, exp); } __DPCPP_SYCL_DEVICE float scalbn(float x, int exp) { return __spirv_ocl_ldexp(x, exp); } -__DPCPP_SYCL_DEVICE double scalbln(double x, long int exp) { +__DPCPP_SYCL_DEVICE double scalbn(double x, int exp) { + return __spirv_ocl_ldexp(x, exp); +} + +__DPCPP_SYCL_DEVICE_C float scalblnf(float x, long int exp) { return __spirv_ocl_ldexp(x, (int)exp); } __DPCPP_SYCL_DEVICE float scalbln(float x, long int exp) { return __spirv_ocl_ldexp(x, (int)exp); } -__DPCPP_SYCL_DEVICE double atan2(double x, double y) { - return __spirv_ocl_atan2(x, y); -} -__DPCPP_SYCL_DEVICE float atan2(float x, float y) { - return __spirv_ocl_atan2(x, y); -} -__DPCPP_SPIRV_MAP_UNARY(atan, double); -__DPCPP_SPIRV_MAP_UNARY(atan, float); -__DPCPP_SPIRV_MAP_UNARY(atanh, double); -__DPCPP_SPIRV_MAP_UNARY(atanh, float); -__DPCPP_SPIRV_MAP_UNARY(cbrt, double); -__DPCPP_SPIRV_MAP_UNARY(cbrt, float); -__DPCPP_SPIRV_MAP_UNARY(ceil, double); -__DPCPP_SPIRV_MAP_UNARY(ceil, float); -__DPCPP_SPIRV_MAP_UNARY(cos, double); -__DPCPP_SPIRV_MAP_UNARY(cos, float); -__DPCPP_SPIRV_MAP_UNARY(cosh, double); -__DPCPP_SPIRV_MAP_UNARY(cosh, float); -__DPCPP_SPIRV_MAP_UNARY(erfc, double); -__DPCPP_SPIRV_MAP_UNARY(erfc, float); -__DPCPP_SPIRV_MAP_UNARY(erf, double); -__DPCPP_SPIRV_MAP_UNARY(erf, float); -__DPCPP_SPIRV_MAP_UNARY(exp2, double); -__DPCPP_SPIRV_MAP_UNARY(exp2, float); -__DPCPP_SPIRV_MAP_UNARY(exp, double); -__DPCPP_SPIRV_MAP_UNARY(exp, float); -__DPCPP_SPIRV_MAP_UNARY(expm1, double); -__DPCPP_SPIRV_MAP_UNARY(expm1, float); -__DPCPP_SYCL_DEVICE double fdim(double x, double y) { - return __spirv_ocl_fdim(x, y); -} -__DPCPP_SYCL_DEVICE float fdim(float x, float y) { - return __spirv_ocl_fdim(x, y); +__DPCPP_SYCL_DEVICE double scalbln(double x, long int exp) { + return __spirv_ocl_ldexp(x, (int)exp); } -__DPCPP_SPIRV_MAP_UNARY(floor, double); -__DPCPP_SPIRV_MAP_UNARY(floor, float); -__DPCPP_SYCL_DEVICE double fma(double x, double y, double z) { + +__DPCPP_SPIRV_MAP_BINARY(atan2); +__DPCPP_SPIRV_MAP_UNARY(atan); +__DPCPP_SPIRV_MAP_UNARY(atanh); +__DPCPP_SPIRV_MAP_UNARY(cbrt); +__DPCPP_SPIRV_MAP_UNARY(ceil); +__DPCPP_SPIRV_MAP_UNARY(cos); +__DPCPP_SPIRV_MAP_UNARY(cosh); +__DPCPP_SPIRV_MAP_UNARY(erfc); +__DPCPP_SPIRV_MAP_UNARY(erf); +__DPCPP_SPIRV_MAP_UNARY(exp2); +__DPCPP_SPIRV_MAP_UNARY(exp); +__DPCPP_SPIRV_MAP_UNARY(expm1); +__DPCPP_SPIRV_MAP_BINARY(fdim); +__DPCPP_SPIRV_MAP_UNARY(floor); + +__DPCPP_SYCL_DEVICE_C float fmaf(float x, float y, float z) { return __spirv_ocl_fma(x, y, z); } __DPCPP_SYCL_DEVICE float fma(float x, float y, float z) { return __spirv_ocl_fma(x, y, z); } -__DPCPP_SYCL_DEVICE double fmax(double x, double y) { - return __spirv_ocl_fmax(x, y); -} -__DPCPP_SYCL_DEVICE float fmax(float x, float y) { - return __spirv_ocl_fmax(x, y); -} -__DPCPP_SYCL_DEVICE double fmin(double x, double y) { - return __spirv_ocl_fmin(x, y); -} -__DPCPP_SYCL_DEVICE float fmin(float x, float y) { - return __spirv_ocl_fmin(x, y); -} -__DPCPP_SYCL_DEVICE double fmod(double x, double y) { - return __spirv_ocl_fmod(x, y); -} -__DPCPP_SYCL_DEVICE float fmod(float x, float y) { - return __spirv_ocl_fmod(x, y); +__DPCPP_SYCL_DEVICE double fma(double x, double y, double z) { + return __spirv_ocl_fma(x, y, z); } -__DPCPP_SYCL_DEVICE double frexp(double x, int *exp) { + +__DPCPP_SPIRV_MAP_BINARY(fmax); +__DPCPP_SPIRV_MAP_BINARY(fmin); +__DPCPP_SPIRV_MAP_BINARY(fmod); + +__DPCPP_SYCL_DEVICE_C float frexpf(float x, int *exp) { return __spirv_ocl_frexp(x, exp); } __DPCPP_SYCL_DEVICE float frexp(float x, int *exp) { return __spirv_ocl_frexp(x, exp); } -__DPCPP_SYCL_DEVICE double hypot(double x, double y) { - return __spirv_ocl_hypot(x, y); -} -__DPCPP_SYCL_DEVICE float hypot(float x, float y) { - return __spirv_ocl_hypot(x, y); +__DPCPP_SYCL_DEVICE double frexp(double x, int *exp) { + return __spirv_ocl_frexp(x, exp); } -__DPCPP_SYCL_DEVICE int ilogb(double x) { return __spirv_ocl_ilogb(x); } + +__DPCPP_SPIRV_MAP_BINARY(hypot); +__DPCPP_SYCL_DEVICE_C int ilogbf(float x) { return __spirv_ocl_ilogb(x); } __DPCPP_SYCL_DEVICE int ilogb(float x) { return __spirv_ocl_ilogb(x); } -__DPCPP_SYCL_DEVICE double ldexp(double x, int exp) { +__DPCPP_SYCL_DEVICE int ilogb(double x) { return __spirv_ocl_ilogb(x); } + +__DPCPP_SYCL_DEVICE_C float ldexpf(float x, int exp) { return __spirv_ocl_ldexp(x, exp); } __DPCPP_SYCL_DEVICE float ldexp(float x, int exp) { return __spirv_ocl_ldexp(x, exp); } -__DPCPP_SPIRV_MAP_UNARY(lgamma, double); -__DPCPP_SPIRV_MAP_UNARY(lgamma, float); -__DPCPP_SPIRV_MAP_UNARY(log10, double); -__DPCPP_SPIRV_MAP_UNARY(log10, float); -__DPCPP_SPIRV_MAP_UNARY(log1p, double); -__DPCPP_SPIRV_MAP_UNARY(log1p, float); -__DPCPP_SPIRV_MAP_UNARY(log2, double); -__DPCPP_SPIRV_MAP_UNARY(log2, float); -__DPCPP_SPIRV_MAP_UNARY(logb, double); -__DPCPP_SPIRV_MAP_UNARY(logb, float); -__DPCPP_SPIRV_MAP_UNARY(log, double); -__DPCPP_SPIRV_MAP_UNARY(log, float); -__DPCPP_SYCL_DEVICE double modf(double x, double *intpart) { +__DPCPP_SYCL_DEVICE double ldexp(double x, int exp) { + return __spirv_ocl_ldexp(x, exp); +} + +__DPCPP_SPIRV_MAP_UNARY(lgamma); +__DPCPP_SPIRV_MAP_UNARY(log10); +__DPCPP_SPIRV_MAP_UNARY(log1p); +__DPCPP_SPIRV_MAP_UNARY(log2); +__DPCPP_SPIRV_MAP_UNARY(logb); +__DPCPP_SPIRV_MAP_UNARY(log); + +__DPCPP_SYCL_DEVICE_C float modff(float x, float *intpart) { return __spirv_ocl_modf(x, intpart); } __DPCPP_SYCL_DEVICE float modf(float x, float *intpart) { return __spirv_ocl_modf(x, intpart); } -__DPCPP_SYCL_DEVICE double nextafter(double x, double y) { - return __spirv_ocl_nextafter(x, y); -} -__DPCPP_SYCL_DEVICE float nextafter(float x, float y) { - return __spirv_ocl_nextafter(x, y); -} -__DPCPP_SYCL_DEVICE double pow(double x, double y) { - return __spirv_ocl_pow(x, y); -} -__DPCPP_SYCL_DEVICE float pow(float x, float y) { - return __spirv_ocl_pow(x, y); -} -__DPCPP_SYCL_DEVICE double remainder(double x, double y) { - return __spirv_ocl_remainder(x, y); -} -__DPCPP_SYCL_DEVICE float remainder(float x, float y) { - return __spirv_ocl_remainder(x, y); +__DPCPP_SYCL_DEVICE double modf(double x, double *intpart) { + return __spirv_ocl_modf(x, intpart); } -__DPCPP_SYCL_DEVICE double remquo(double x, double y, int *q) { + +__DPCPP_SPIRV_MAP_BINARY(nextafter); +__DPCPP_SPIRV_MAP_BINARY(pow); +__DPCPP_SPIRV_MAP_BINARY(remainder); + +__DPCPP_SYCL_DEVICE_C float remquof(float x, float y, int *q) { return __spirv_ocl_remquo(x, y, q); } __DPCPP_SYCL_DEVICE float remquo(float x, float y, int *q) { return __spirv_ocl_remquo(x, y, q); } -__DPCPP_SPIRV_MAP_UNARY(rint, double); -__DPCPP_SPIRV_MAP_UNARY(rint, float); -__DPCPP_SPIRV_MAP_UNARY(round, double); -__DPCPP_SPIRV_MAP_UNARY(round, float); -__DPCPP_SPIRV_MAP_UNARY(sin, double); -__DPCPP_SPIRV_MAP_UNARY(sin, float); -__DPCPP_SPIRV_MAP_UNARY(sinh, double); -__DPCPP_SPIRV_MAP_UNARY(sinh, float); -__DPCPP_SPIRV_MAP_UNARY(sqrt, double); -__DPCPP_SPIRV_MAP_UNARY(sqrt, float); -__DPCPP_SPIRV_MAP_UNARY(tan, double); -__DPCPP_SPIRV_MAP_UNARY(tan, float); -__DPCPP_SPIRV_MAP_UNARY(tanh, double); -__DPCPP_SPIRV_MAP_UNARY(tanh, float); -__DPCPP_SPIRV_MAP_UNARY(tgamma, double); -__DPCPP_SPIRV_MAP_UNARY(tgamma, float); -__DPCPP_SPIRV_MAP_UNARY(trunc, double); -__DPCPP_SPIRV_MAP_UNARY(trunc, float); +__DPCPP_SYCL_DEVICE double remquo(double x, double y, int *q) { + return __spirv_ocl_remquo(x, y, q); +} +__DPCPP_SPIRV_MAP_UNARY(rint); +__DPCPP_SPIRV_MAP_UNARY(round); +__DPCPP_SPIRV_MAP_UNARY(sin); +__DPCPP_SPIRV_MAP_UNARY(sinh); +__DPCPP_SPIRV_MAP_UNARY(sqrt); +__DPCPP_SPIRV_MAP_UNARY(tan); +__DPCPP_SPIRV_MAP_UNARY(tanh); +__DPCPP_SPIRV_MAP_UNARY(tgamma); +__DPCPP_SPIRV_MAP_UNARY(trunc); __DPCPP_SYCL_DEVICE div_t div(int x, int y) { return {x / y, x % y}; } @@ -194,32 +174,54 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION using ::abs; using ::acos; +using ::acosf; using ::acosh; +using ::acoshf; using ::asin; +using ::asinf; using ::asinh; +using ::asinhf; using ::atan; using ::atan2; +using ::atan2f; +using ::atanf; using ::atanh; +using ::atanhf; using ::cbrt; +using ::cbrtf; using ::ceil; +using ::ceilf; using ::div; +using ::labs; using ::ldiv; +using ::llabs; using ::lldiv; // using ::copysign; using ::cos; +using ::cosf; using ::cosh; +using ::coshf; using ::erf; using ::erfc; +using ::erfcf; +using ::erff; using ::exp; using ::exp2; +using ::exp2f; +using ::expf; using ::expm1; +using ::expm1f; using ::fabs; +using ::fabsf; using ::fdim; +using ::fdimf; using ::floor; -using ::fma; -using ::fmax; -using ::fmin; +using ::floorf; +using ::fmaf; +using ::fmaxf; +using ::fminf; using ::fmod; +using ::fmodf; // using ::fpclassify; using ::frexp; using ::hypot; @@ -237,36 +239,58 @@ using ::ilogb; // using ::labs; using ::ldexp; using ::lgamma; +using ::lgammaf; // using ::llabs; // using ::llrint; using ::log; using ::log10; +using ::log10f; using ::log1p; +using ::log1pf; using ::log2; +using ::log2f; using ::logb; +using ::logbf; +using ::logf; // using ::lrint; // using ::lround; // using ::llround; using ::modf; +using ::modff; // using ::nan; // using ::nanf; // using ::nearbyint; using ::nextafter; +using ::nextafterf; using ::pow; +using ::powf; using ::remainder; +using ::remainderf; using ::remquo; +using ::remquof; using ::rint; +using ::rintf; using ::round; +using ::roundf; using ::scalbln; +using ::scalblnf; using ::scalbn; +using ::scalbnf; // using ::signbit; using ::sin; +using ::sinf; using ::sinh; +using ::sinhf; using ::sqrt; +using ::sqrtf; using ::tan; +using ::tanf; using ::tanh; +using ::tanhf; using ::tgamma; +using ::tgammaf; using ::trunc; +using ::truncf; #ifdef _LIBCPP_END_NAMESPACE_STD _LIBCPP_END_NAMESPACE_STD From b3574fb95b1ff26838379834a61e42313ced1807 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Thu, 29 May 2025 15:50:09 +0100 Subject: [PATCH 05/40] [SYCL] Remove sycl-libdevice-cmath.cpp test This test was relying on the hack preventing LLVM intrinsics from being emitted so it doesn't work at all with the new approach. --- .../test/CodeGenSYCL/sycl-libdevice-cmath.cpp | 144 ------------------ 1 file changed, 144 deletions(-) delete mode 100644 clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp diff --git a/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp b/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp deleted file mode 100644 index 5c282449dc851..0000000000000 --- a/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp +++ /dev/null @@ -1,144 +0,0 @@ -// SYCL compilation uses libdevice in order to implement platform specific -// versions of funcs like cosf, logf, etc. In order for the libdevice funcs -// to be used, we need to make sure that llvm intrinsics such as llvm.cos.f32 -// are not emitted since many backends do not have lowerings for such -// intrinsics. This allows the driver to link in the libdevice definitions for -// cosf etc. later in the driver flow. - -// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -ffast-math -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck %s -// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amd-amdhsa -ffast-math -emit-llvm -o - | FileCheck %s - -#include "Inputs/sycl.hpp" - -extern "C" { -float scalbnf(float x, int n); -float logf(float x); -float expf(float x); -float frexpf(float x, int *exp); -float ldexpf(float x, int exp); -float log10f(float x); -float modff(float x, float *intpart); -float exp2f(float x); -float expm1f(float x); -int ilogbf(float x); -float log1pf(float x); -float log2f(float x); -float logbf(float x); -float sqrtf(float x); -float cbrtf(float x); -float hypotf(float x, float y); -float erff(float x); -float erfcf(float x); -float tgammaf(float x); -float lgammaf(float x); -float fmodf(float x, float y); -float remainderf(float x, float y); -float remquof(float x, float y, int *q); -float nextafterf(float x, float y); -float fdimf(float x, float y); -float fmaf(float x, float y, float z); -float sinf(float x); -float cosf(float x); -float tanf(float x); -float powf(float x, float y); -float acosf(float x); -float asinf(float x); -float atanf(float x); -float atan2f(float x, float y); -float coshf(float x); -float sinhf(float x); -float tanhf(float x); -float acoshf(float x); -float asinhf(float x); -float atanhf(float x); -}; - -// CHECK-NOT: llvm.abs. -// CHECK-NOT: llvm.scalbnf. -// CHECK-NOT: llvm.log. -// CHECK-NOT: llvm.exp. -// CHECK-NOT: llvm.frexp. -// CHECK-NOT: llvm.ldexp. -// CHECK-NOT: llvm.log10. -// CHECK-NOT: llvm.mod. -// CHECK-NOT: llvm.exp2. -// CHECK-NOT: llvm.expm1. -// CHECK-NOT: llvm.ilogb. -// CHECK-NOT: llvm.log1p. -// CHECK-NOT: llvm.log2. -// CHECK-NOT: llvm.logb. -// CHECK-NOT: llvm.sqrt. -// CHECK-NOT: llvm.cbrt. -// CHECK-NOT: llvm.hypot. -// CHECK-NOT: llvm.erf. -// CHECK-NOT: llvm.erfc. -// CHECK-NOT: llvm.tgamma. -// CHECK-NOT: llvm.lgamma. -// CHECK-NOT: llvm.fmod. -// CHECK-NOT: llvm.remainder. -// CHECK-NOT: llvm.remquo. -// CHECK-NOT: llvm.nextafter. -// CHECK-NOT: llvm.fdim. -// CHECK-NOT: llvm.fma. -// CHECK-NOT: llvm.sin. -// CHECK-NOT: llvm.cos. -// CHECK-NOT: llvm.tan. -// CHECK-NOT: llvm.pow. -// CHECK-NOT: llvm.acos. -// CHECK-NOT: llvm.asin. -// CHECK-NOT: llvm.atan. -// CHECK-NOT: llvm.atan2. -// CHECK-NOT: llvm.cosh. -// CHECK-NOT: llvm.sinh. -// CHECK-NOT: llvm.tanh. -// CHECK-NOT: llvm.acosh. -// CHECK-NOT: llvm.asinh. -// CHECK-NOT: llvm.atanh. -void sycl_kernel(float *a, int *b) { - sycl::queue{}.submit([&](sycl::handler &cgh) { - cgh.single_task([=]() { - a[0] = scalbnf(a[0], b[0]); - a[0] = logf(a[0]); - a[0] = expf(a[0]); - a[0] = frexpf(a[0], b); - a[0] = ldexpf(a[0], b[0]); - a[0] = log10f(a[0]); - a[0] = modff(a[0], a); - a[0] = exp2f(a[0]); - a[0] = expm1f(a[0]); - a[0] = ilogbf(a[0]); - a[0] = log1pf(a[0]); - a[0] = log2f(a[0]); - a[0] = logbf(a[0]); - a[0] = sqrtf(a[0]); - a[0] = cbrtf(a[0]); - a[0] = hypotf(a[0], a[0]); - a[0] = erff(a[0]); - a[0] = erfcf(a[0]); - a[0] = tgammaf(a[0]); - a[0] = lgammaf(a[0]); - a[0] = fmodf(a[0], a[0]); - a[0] = remainderf(a[0], a[0]); - a[0] = remquof(a[0], a[0], b); - a[0] = nextafterf(a[0], a[0]); - a[0] = fdimf(a[0], a[0]); - a[0] = fmaf(a[0], a[0], a[0]); - a[0] = sinf(a[0]); - a[0] = cosf(a[0]); - a[0] = tanf(a[0]); - a[0] = powf(a[0], a[0]); - a[0] = acosf(a[0]); - a[0] = asinf(a[0]); - a[0] = atanf(a[0]); - a[0] = atan2f(a[0], a[0]); - a[0] = coshf(a[0]); - a[0] = sinhf(a[0]); - a[0] = tanhf(a[0]); - a[0] = acoshf(a[0]); - a[0] = asinhf(a[0]); - a[0] = atanhf(a[0]); - }); - }); -} From 3d2aa445e8c7a965a7c1416966f6968742e8af39 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Thu, 29 May 2025 17:51:17 +0100 Subject: [PATCH 06/40] [SYCL] Add missing abs --- sycl/include/sycl/stl_wrappers/cmath-fallback.h | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/include/sycl/stl_wrappers/cmath-fallback.h b/sycl/include/sycl/stl_wrappers/cmath-fallback.h index 93d0cb5e912f4..a433bbe43bd1d 100644 --- a/sycl/include/sycl/stl_wrappers/cmath-fallback.h +++ b/sycl/include/sycl/stl_wrappers/cmath-fallback.h @@ -29,6 +29,7 @@ __DPCPP_SYCL_DEVICE long long abs(long long n) { return n < 0 ? -n : n; } __DPCPP_SYCL_DEVICE_C long long llabs(long long n) { return n < 0 ? -n : n; } __DPCPP_SYCL_DEVICE long abs(long n) { return n < 0 ? -n : n; } +__DPCPP_SYCL_DEVICE int abs(int n) { return n < 0 ? -n : n; } __DPCPP_SYCL_DEVICE_C long labs(long n) { return n < 0 ? -n : n; } __DPCPP_SYCL_DEVICE float abs(float x) { return x < 0 ? -x : x; } __DPCPP_SYCL_DEVICE double abs(double x) { return x < 0 ? -x : x; } From 8ce1d93ba39b9792e380e0d2093107e4d3c15bf5 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Mon, 9 Jun 2025 15:23:38 +0100 Subject: [PATCH 07/40] [SYCL] Fix overloadble requirement for sycl_device_only --- clang/lib/Sema/SemaDecl.cpp | 4 ++++ sycl/include/sycl/stl_wrappers/cmath-fallback.h | 5 ++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 49eef7e6f05e2..3ebffbec4c4c8 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -7354,6 +7354,10 @@ static bool isIncompleteDeclExternC(Sema &S, const T *D) { if (S.getLangOpts().CUDA && (D->template hasAttr() || D->template hasAttr())) return false; + + // So does SYCL's device_only attribute. + if (S.getLangOpts().isSYCL() && D->template hasAttr()) + return false; } return D->isExternC(); } diff --git a/sycl/include/sycl/stl_wrappers/cmath-fallback.h b/sycl/include/sycl/stl_wrappers/cmath-fallback.h index a433bbe43bd1d..25e8f543154f1 100644 --- a/sycl/include/sycl/stl_wrappers/cmath-fallback.h +++ b/sycl/include/sycl/stl_wrappers/cmath-fallback.h @@ -3,10 +3,9 @@ #ifdef __SYCL_DEVICE_ONLY__ -#define __DPCPP_SYCL_DEVICE \ - __attribute__((sycl_device_only, always_inline, overloadable)) +#define __DPCPP_SYCL_DEVICE __attribute__((sycl_device_only, always_inline)) #define __DPCPP_SYCL_DEVICE_C \ - extern "C" __attribute__((sycl_device_only, always_inline, overloadable)) + extern "C" __attribute__((sycl_device_only, always_inline)) #define __DPCPP_SPIRV_MAP_UNARY(NAME) \ __DPCPP_SYCL_DEVICE_C float NAME##f(float x) { \ From d3b29882a73606931ce70d203548c2779d564b67 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Mon, 9 Jun 2025 17:25:24 +0100 Subject: [PATCH 08/40] [SYCL] Add device only docs --- clang/include/clang/Basic/Attr.td | 2 +- clang/include/clang/Basic/AttrDocs.td | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 27a190f5c1ef9..6a10a02d877a8 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1611,7 +1611,7 @@ def SYCLDeviceOnly : InheritableAttr { let Spellings = [GNU<"sycl_device_only">]; let Subjects = SubjectList<[Function]>; let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; - let Documentation = [Undocumented]; + let Documentation = [SYCLDeviceOnlyDocs]; } def SYCLGlobalVar : InheritableAttr { diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 5234c7ee02fff..17594e5710419 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -4518,6 +4518,17 @@ implicitly inherit this attribute. }]; } +def SYCLDeviceOnlyDocs : Documentation { + let Category = DocCatFunction; + let Heading = "sycl_device_only"; + let Content = [{ +This attribute can only be applied to functions and indicates that the function +is for the device only. This attribute allows to provide a device specific +overload of an existing function. All ``sycl_device_only`` function callees +implicitly inherit this attribute. + }]; +} + def RISCVInterruptDocs : Documentation { let Category = DocCatFunction; let Heading = "interrupt (RISC-V)"; From 350aec6b0c55ff0b14e4b1883517ca150624fcb8 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Mon, 9 Jun 2025 17:46:00 +0100 Subject: [PATCH 09/40] [SYCL] Add initial test for sycl-device-only --- clang/test/CodeGenSYCL/sycl-device-only.cpp | 34 +++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 clang/test/CodeGenSYCL/sycl-device-only.cpp diff --git a/clang/test/CodeGenSYCL/sycl-device-only.cpp b/clang/test/CodeGenSYCL/sycl-device-only.cpp new file mode 100644 index 0000000000000..86db368b75af1 --- /dev/null +++ b/clang/test/CodeGenSYCL/sycl-device-only.cpp @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECKD +// RUN: %clang_cc1 -fsycl-is-host -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECKH +// Test code generation for sycl_device_only attribute. + +// Verify that the device overload is used on device. +// +// CHECK-LABEL: _Z3fooi +// CHECKH: %add = add nsw i32 %0, 10 +// CHECKD: %add = add nsw i32 %0, 20 +int foo(int a) { return a + 10; } + +#ifdef __SYCL_DEVICE_ONLY__ +__attribute__((sycl_device_only)) int foo(int a) { return a + 20; } +#endif + +__attribute__((sycl_device)) int bar(int b) { + return foo(b); +} + + +// Verify that in extern C the attribute enables mangling. +extern "C" { +// CHECK-LABEL: _Z3fooci +// CHECKH: %add = add nsw i32 %0, 10 +// CHECKD: %add = add nsw i32 %0, 20 +int fooc(int a) { return a + 10; } +#ifdef __SYCL_DEVICE_ONLY__ +__attribute__((sycl_device_only)) int fooc(int a) { return a + 20; } +#endif + +__attribute__((sycl_device)) int barc(int b) { + return fooc(b); +} +} From 2c119aed1575141544a8fe32ae2f4bfd18c2b852 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Tue, 10 Jun 2025 17:14:45 +0100 Subject: [PATCH 10/40] [SYCL] Add diagnostic for host side sycl_device_only --- clang/include/clang/Basic/DiagnosticSemaKinds.td | 4 ++++ clang/lib/Sema/SemaDeclAttr.cpp | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index d2583e96b2922..e7bdacf906a11 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -8048,6 +8048,10 @@ def err_sycl_device_global_not_publicly_accessible: Error< def err_sycl_device_global_array : Error< "'device_global' array is not allowed">; +def err_sycl_device_only_attr + : Error<"'sycl_device_only' functions are not allowed in host side. Please " + "guard them with __SYCL_DEVICE_ONLY__.">; + def err_unexpected_interface : Error< "unexpected interface name %0: expected expression">; def err_ref_non_value : Error<"%0 does not refer to a value">; diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index b076cb60db269..b61b256186f55 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -8053,6 +8053,12 @@ void Sema::ProcessDeclAttributeList( D->setInvalidDecl(); } + // Do not permit 'sycl_device_only' functions in host code + if (getLangOpts().SYCLIsHost && D->hasAttr()) { + Diag(D->getLocation(), diag::err_sycl_device_only_attr); + D->setInvalidDecl(); + } + // Do this check after processing D's attributes because the attribute // objc_method_family can change whether the given method is in the init // family, and it can be applied after objc_designated_initializer. This is a From 605eed6c777a9197a5df22733b3036d96a8baebe Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Wed, 11 Jun 2025 11:07:11 +0100 Subject: [PATCH 11/40] [SYCL] Block sycl_device_only emission on host side --- clang/lib/CodeGen/CodeGenModule.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 9478b836e1fd0..b5a485ee28ca9 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4323,6 +4323,12 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { } } + // Don't emit 'sycl_device_only' function in SYCL host compilation. + if (LangOpts.SYCLIsHost && isa(Global) && + Global->hasAttr()) { + return; + } + if (LangOpts.OpenMP) { // If this is OpenMP, check if it is legal to emit this global normally. if (OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(GD)) From 172e7f7e5d99002a34f990cc0b79eabbf759f1c2 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Wed, 11 Jun 2025 11:07:44 +0100 Subject: [PATCH 12/40] Revert "[SYCL] Add diagnostic for host side sycl_device_only" This reverts commit af224a08d299adee6263c0cdf953ab7f8eee568f. --- clang/include/clang/Basic/DiagnosticSemaKinds.td | 4 ---- clang/lib/Sema/SemaDeclAttr.cpp | 6 ------ 2 files changed, 10 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index e7bdacf906a11..d2583e96b2922 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -8048,10 +8048,6 @@ def err_sycl_device_global_not_publicly_accessible: Error< def err_sycl_device_global_array : Error< "'device_global' array is not allowed">; -def err_sycl_device_only_attr - : Error<"'sycl_device_only' functions are not allowed in host side. Please " - "guard them with __SYCL_DEVICE_ONLY__.">; - def err_unexpected_interface : Error< "unexpected interface name %0: expected expression">; def err_ref_non_value : Error<"%0 does not refer to a value">; diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index b61b256186f55..b076cb60db269 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -8053,12 +8053,6 @@ void Sema::ProcessDeclAttributeList( D->setInvalidDecl(); } - // Do not permit 'sycl_device_only' functions in host code - if (getLangOpts().SYCLIsHost && D->hasAttr()) { - Diag(D->getLocation(), diag::err_sycl_device_only_attr); - D->setInvalidDecl(); - } - // Do this check after processing D's attributes because the attribute // objc_method_family can change whether the given method is in the init // family, and it can be applied after objc_designated_initializer. This is a From bb9fc66b465110d1e61f7f4b489a90d43234c2c0 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Wed, 11 Jun 2025 11:56:13 +0100 Subject: [PATCH 13/40] [SYCL] Cleanup documentation and comments --- clang/include/clang/Basic/AttrDocs.td | 7 ++++--- clang/test/CodeGenSYCL/sycl-device-only.cpp | 8 ++------ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 17594e5710419..55d889f3efa1f 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -4523,9 +4523,10 @@ def SYCLDeviceOnlyDocs : Documentation { let Heading = "sycl_device_only"; let Content = [{ This attribute can only be applied to functions and indicates that the function -is for the device only. This attribute allows to provide a device specific -overload of an existing function. All ``sycl_device_only`` function callees -implicitly inherit this attribute. +is only available for the device. It allows functions marked with it to +overload existing functions without the attribute, in which case the overload +with the attribute will be used on the device side and the overload without +will be used on the host side. }]; } diff --git a/clang/test/CodeGenSYCL/sycl-device-only.cpp b/clang/test/CodeGenSYCL/sycl-device-only.cpp index 86db368b75af1..32331665f5ece 100644 --- a/clang/test/CodeGenSYCL/sycl-device-only.cpp +++ b/clang/test/CodeGenSYCL/sycl-device-only.cpp @@ -8,26 +8,22 @@ // CHECKH: %add = add nsw i32 %0, 10 // CHECKD: %add = add nsw i32 %0, 20 int foo(int a) { return a + 10; } - -#ifdef __SYCL_DEVICE_ONLY__ __attribute__((sycl_device_only)) int foo(int a) { return a + 20; } -#endif +// Use a `sycl_device` function as entry point __attribute__((sycl_device)) int bar(int b) { return foo(b); } - // Verify that in extern C the attribute enables mangling. extern "C" { // CHECK-LABEL: _Z3fooci // CHECKH: %add = add nsw i32 %0, 10 // CHECKD: %add = add nsw i32 %0, 20 int fooc(int a) { return a + 10; } -#ifdef __SYCL_DEVICE_ONLY__ __attribute__((sycl_device_only)) int fooc(int a) { return a + 20; } -#endif +// Use a `sycl_device` function as entry point __attribute__((sycl_device)) int barc(int b) { return fooc(b); } From 06474cbb34b8c91f46dad2af40bd8b4fc7bfd31e Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Thu, 12 Jun 2025 18:04:08 +0100 Subject: [PATCH 14/40] [SYCL] Fix attribute emission handling --- clang/include/clang/Basic/AttrDocs.td | 4 ++- clang/lib/CodeGen/CodeGenModule.cpp | 37 +++++++++++++++++++++ clang/test/CodeGenSYCL/sycl-device-only.cpp | 36 ++++++++++++++++++++ 3 files changed, 76 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 55d889f3efa1f..000a0e522e8cc 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -4526,7 +4526,9 @@ This attribute can only be applied to functions and indicates that the function is only available for the device. It allows functions marked with it to overload existing functions without the attribute, in which case the overload with the attribute will be used on the device side and the overload without -will be used on the host side. +will be used on the host side. Note: as opposed to ``sycl_device`` this does +not mark the function as being exported, both attributes can be used together +if needed. }]; } diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index b5a485ee28ca9..3850247242fb3 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4417,6 +4417,43 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { } } + + // When using SYCLDeviceOnlyAttr, there can be two functions with the same + // mangling, the host function and the device overload. So when compiling for + // device we need to make sure we're selecting the SYCLDeviceOnlyAttr + // overload and dropping the host overload. + if (LangOpts.SYCLIsDevice) { + StringRef MangledName = getMangledName(GD); + auto DDI = DeferredDecls.find(MangledName); + // If we have an existing declaration with the same mangling for this + // symbol it may be a SYCLDeviceOnlyAttr case. + if (DDI != DeferredDecls.end()) { + auto *G = cast(DeferredDecls[MangledName].getDecl()); + + if (!G->hasAttr() && + Global->hasAttr() && + Global->hasAttr()) { + // If the host declaration was already processed and the device only + // declaration is also a sycl external declaration, remove the host + // variant and skip. The device only variant will be generated later + // as it's marked sycl external. + DeferredDecls.erase(DDI); + return; + } else if (!G->hasAttr() && + Global->hasAttr()) { + // If the host declaration was already processed, replace it with the + // device only declaration. + DeferredDecls[MangledName] = GD; + return; + } else if (!Global->hasAttr() && + G->hasAttr()) { + // If the device only declaration was already processed, skip the + // host declaration. + return; + } + } + } + // clang::ParseAST ensures that we emit the SYCL devices at the end, so // anything that is a device (or indirectly called) will be handled later. if (LangOpts.SYCLIsDevice && MustBeEmitted(Global)) { diff --git a/clang/test/CodeGenSYCL/sycl-device-only.cpp b/clang/test/CodeGenSYCL/sycl-device-only.cpp index 32331665f5ece..51e76eb5deeee 100644 --- a/clang/test/CodeGenSYCL/sycl-device-only.cpp +++ b/clang/test/CodeGenSYCL/sycl-device-only.cpp @@ -15,6 +15,19 @@ __attribute__((sycl_device)) int bar(int b) { return foo(b); } +// Verify that the order of declaration doesn't change the behavior. +// +// CHECK-LABEL: _Z3fooswapi +// CHECKH: %add = add nsw i32 %0, 10 +// CHECKD: %add = add nsw i32 %0, 20 +__attribute__((sycl_device_only)) int fooswap(int a) { return a + 20; } +int fooswap(int a) { return a + 10; } + +// Use a `sycl_device` function as entry point +__attribute__((sycl_device)) int barswap(int b) { + return fooswap(b); +} + // Verify that in extern C the attribute enables mangling. extern "C" { // CHECK-LABEL: _Z3fooci @@ -28,3 +41,26 @@ __attribute__((sycl_device)) int barc(int b) { return fooc(b); } } + +// Check that both attributes can work together +// CHECK-LABEL: _Z3fooai +// CHECKH: %add = add nsw i32 %0, 10 +// CHECKD: %add = add nsw i32 %0, 20 +int fooa(int a) { return a + 10; } +__attribute__((sycl_device_only, sycl_device)) int fooa(int a) { return a + 20; } + +// Use a `sycl_device` function as entry point +__attribute__((sycl_device)) int bara(int b) { + return fooa(b); +} + +// CHECK-LABEL: _Z3fooaswapi +// CHECKH: %add = add nsw i32 %0, 10 +// CHECKD: %add = add nsw i32 %0, 20 +__attribute__((sycl_device_only, sycl_device)) int fooaswap(int a) { return a + 20; } +int fooaswap(int a) { return a + 10; } + +// Use a `sycl_device` function as entry point +__attribute__((sycl_device)) int baraswap(int b) { + return fooaswap(b); +} From 59c6edfdb48e50fbe1986132cd1acecf6094fe25 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Thu, 12 Jun 2025 18:10:32 +0100 Subject: [PATCH 15/40] [SYCL] Fix formatting --- clang/lib/CodeGen/CodeGenModule.cpp | 57 ++++++++++----------- clang/test/CodeGenSYCL/sycl-device-only.cpp | 28 ++++------ 2 files changed, 39 insertions(+), 46 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 3850247242fb3..43078e9e29545 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4417,41 +4417,40 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { } } - // When using SYCLDeviceOnlyAttr, there can be two functions with the same // mangling, the host function and the device overload. So when compiling for // device we need to make sure we're selecting the SYCLDeviceOnlyAttr // overload and dropping the host overload. if (LangOpts.SYCLIsDevice) { - StringRef MangledName = getMangledName(GD); - auto DDI = DeferredDecls.find(MangledName); - // If we have an existing declaration with the same mangling for this - // symbol it may be a SYCLDeviceOnlyAttr case. - if (DDI != DeferredDecls.end()) { - auto *G = cast(DeferredDecls[MangledName].getDecl()); - - if (!G->hasAttr() && - Global->hasAttr() && - Global->hasAttr()) { - // If the host declaration was already processed and the device only - // declaration is also a sycl external declaration, remove the host - // variant and skip. The device only variant will be generated later - // as it's marked sycl external. - DeferredDecls.erase(DDI); - return; - } else if (!G->hasAttr() && - Global->hasAttr()) { - // If the host declaration was already processed, replace it with the - // device only declaration. - DeferredDecls[MangledName] = GD; - return; - } else if (!Global->hasAttr() && - G->hasAttr()) { - // If the device only declaration was already processed, skip the - // host declaration. - return; - } + StringRef MangledName = getMangledName(GD); + auto DDI = DeferredDecls.find(MangledName); + // If we have an existing declaration with the same mangling for this + // symbol it may be a SYCLDeviceOnlyAttr case. + if (DDI != DeferredDecls.end()) { + auto *G = cast(DeferredDecls[MangledName].getDecl()); + + if (!G->hasAttr() && + Global->hasAttr() && + Global->hasAttr()) { + // If the host declaration was already processed and the device only + // declaration is also a sycl external declaration, remove the host + // variant and skip. The device only variant will be generated later + // as it's marked sycl external. + DeferredDecls.erase(DDI); + return; + } else if (!G->hasAttr() && + Global->hasAttr()) { + // If the host declaration was already processed, replace it with the + // device only declaration. + DeferredDecls[MangledName] = GD; + return; + } else if (!Global->hasAttr() && + G->hasAttr()) { + // If the device only declaration was already processed, skip the + // host declaration. + return; } + } } // clang::ParseAST ensures that we emit the SYCL devices at the end, so diff --git a/clang/test/CodeGenSYCL/sycl-device-only.cpp b/clang/test/CodeGenSYCL/sycl-device-only.cpp index 51e76eb5deeee..f9a97cb803fae 100644 --- a/clang/test/CodeGenSYCL/sycl-device-only.cpp +++ b/clang/test/CodeGenSYCL/sycl-device-only.cpp @@ -11,9 +11,7 @@ int foo(int a) { return a + 10; } __attribute__((sycl_device_only)) int foo(int a) { return a + 20; } // Use a `sycl_device` function as entry point -__attribute__((sycl_device)) int bar(int b) { - return foo(b); -} +__attribute__((sycl_device)) int bar(int b) { return foo(b); } // Verify that the order of declaration doesn't change the behavior. // @@ -24,9 +22,7 @@ __attribute__((sycl_device_only)) int fooswap(int a) { return a + 20; } int fooswap(int a) { return a + 10; } // Use a `sycl_device` function as entry point -__attribute__((sycl_device)) int barswap(int b) { - return fooswap(b); -} +__attribute__((sycl_device)) int barswap(int b) { return fooswap(b); } // Verify that in extern C the attribute enables mangling. extern "C" { @@ -37,9 +33,7 @@ int fooc(int a) { return a + 10; } __attribute__((sycl_device_only)) int fooc(int a) { return a + 20; } // Use a `sycl_device` function as entry point -__attribute__((sycl_device)) int barc(int b) { - return fooc(b); -} +__attribute__((sycl_device)) int barc(int b) { return fooc(b); } } // Check that both attributes can work together @@ -47,20 +41,20 @@ __attribute__((sycl_device)) int barc(int b) { // CHECKH: %add = add nsw i32 %0, 10 // CHECKD: %add = add nsw i32 %0, 20 int fooa(int a) { return a + 10; } -__attribute__((sycl_device_only, sycl_device)) int fooa(int a) { return a + 20; } +__attribute__((sycl_device_only, sycl_device)) int fooa(int a) { + return a + 20; +} // Use a `sycl_device` function as entry point -__attribute__((sycl_device)) int bara(int b) { - return fooa(b); -} +__attribute__((sycl_device)) int bara(int b) { return fooa(b); } // CHECK-LABEL: _Z3fooaswapi // CHECKH: %add = add nsw i32 %0, 10 // CHECKD: %add = add nsw i32 %0, 20 -__attribute__((sycl_device_only, sycl_device)) int fooaswap(int a) { return a + 20; } +__attribute__((sycl_device_only, sycl_device)) int fooaswap(int a) { + return a + 20; +} int fooaswap(int a) { return a + 10; } // Use a `sycl_device` function as entry point -__attribute__((sycl_device)) int baraswap(int b) { - return fooaswap(b); -} +__attribute__((sycl_device)) int baraswap(int b) { return fooaswap(b); } From 1a6e0f56d9b5a4ccde42ad4a2066e84af8815f61 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Fri, 13 Jun 2025 10:09:11 +0100 Subject: [PATCH 16/40] [SYCL] Rename variable --- clang/lib/CodeGen/CodeGenModule.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 43078e9e29545..5c03aacd25c22 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4427,9 +4427,10 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { // If we have an existing declaration with the same mangling for this // symbol it may be a SYCLDeviceOnlyAttr case. if (DDI != DeferredDecls.end()) { - auto *G = cast(DeferredDecls[MangledName].getDecl()); + auto *PreviousGlobal = + cast(DeferredDecls[MangledName].getDecl()); - if (!G->hasAttr() && + if (!PreviousGlobal->hasAttr() && Global->hasAttr() && Global->hasAttr()) { // If the host declaration was already processed and the device only @@ -4438,14 +4439,14 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { // as it's marked sycl external. DeferredDecls.erase(DDI); return; - } else if (!G->hasAttr() && + } else if (!PreviousGlobal->hasAttr() && Global->hasAttr()) { // If the host declaration was already processed, replace it with the // device only declaration. DeferredDecls[MangledName] = GD; return; - } else if (!Global->hasAttr() && - G->hasAttr()) { + } else if (PreviousGlobal->hasAttr() && + !Global->hasAttr()) { // If the device only declaration was already processed, skip the // host declaration. return; From 5df06cf1d8784fef80652f9d4e6169793b3ae4d6 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Fri, 13 Jun 2025 14:58:37 +0100 Subject: [PATCH 17/40] [SYCL] More fallback header improvements --- libdevice/cmath_wrapper.cpp | 18 ------------ libdevice/cmath_wrapper_fp64.cpp | 18 ------------ .../sycl/stl_wrappers/cmath-fallback.h | 29 +++++++++++++++++-- 3 files changed, 27 insertions(+), 38 deletions(-) diff --git a/libdevice/cmath_wrapper.cpp b/libdevice/cmath_wrapper.cpp index 3c6c1b97fa1c5..a084a86883767 100644 --- a/libdevice/cmath_wrapper.cpp +++ b/libdevice/cmath_wrapper.cpp @@ -189,25 +189,7 @@ float asinhf(float x) { return __devicelib_asinhf(x); } DEVICE_EXTERN_C_INLINE float atanhf(float x) { return __devicelib_atanhf(x); } -#ifdef __NVPTX__ -extern "C" SYCL_EXTERNAL float __nv_nearbyintf(float); -DEVICE_EXTERN_C_INLINE -float nearbyintf(float x) { return __nv_nearbyintf(x); } - -extern "C" SYCL_EXTERNAL float __nv_rintf(float); -DEVICE_EXTERN_C_INLINE -float rintf(float x) { return __nv_rintf(x); } -#elif defined(__AMDGCN__) -extern "C" SYCL_EXTERNAL float __ocml_nearbyint_f32(float); -DEVICE_EXTERN_C_INLINE -float nearbyintf(float x) { return __ocml_nearbyint_f32(x); } - -extern "C" SYCL_EXTERNAL float __ocml_rint_f32(float); -DEVICE_EXTERN_C_INLINE -float rintf(float x) { return __ocml_rint_f32(x); } -#else DEVICE_EXTERN_C_INLINE float rintf(float x) { return __spirv_ocl_rint(x); } -#endif #endif // __SPIR__ || __SPIRV__ diff --git a/libdevice/cmath_wrapper_fp64.cpp b/libdevice/cmath_wrapper_fp64.cpp index 81ba3e710ec6d..855317bcf3f4b 100644 --- a/libdevice/cmath_wrapper_fp64.cpp +++ b/libdevice/cmath_wrapper_fp64.cpp @@ -179,26 +179,8 @@ double atanh(double x) { return __devicelib_atanh(x); } DEVICE_EXTERN_C_INLINE double scalbn(double x, int exp) { return __devicelib_scalbn(x, exp); } -#ifdef __NVPTX__ -extern "C" SYCL_EXTERNAL double __nv_nearbyint(double); -DEVICE_EXTERN_C_INLINE -double nearbyint(double x) { return __nv_nearbyint(x); } - -extern "C" SYCL_EXTERNAL double __nv_rint(double); -DEVICE_EXTERN_C_INLINE -double rint(double x) { return __nv_rint(x); } -#elif defined(__AMDGCN__) -extern "C" SYCL_EXTERNAL double __ocml_nearbyint_f64(double); -DEVICE_EXTERN_C_INLINE -double nearbyint(double x) { return __ocml_nearbyint_f64(x); } - -extern "C" SYCL_EXTERNAL double __ocml_rint_f64(double); -DEVICE_EXTERN_C_INLINE -double rint(double x) { return __ocml_rint_f64(x); } -#else DEVICE_EXTERN_C_INLINE double rint(double x) { return __spirv_ocl_rint(x); } -#endif #if defined(_MSC_VER) #include diff --git a/sycl/include/sycl/stl_wrappers/cmath-fallback.h b/sycl/include/sycl/stl_wrappers/cmath-fallback.h index 25e8f543154f1..5ff5cb8461575 100644 --- a/sycl/include/sycl/stl_wrappers/cmath-fallback.h +++ b/sycl/include/sycl/stl_wrappers/cmath-fallback.h @@ -36,6 +36,7 @@ __DPCPP_SYCL_DEVICE float fabs(float x) { return x < 0 ? -x : x; } __DPCPP_SYCL_DEVICE_C float fabsf(float x) { return x < 0 ? -x : x; } __DPCPP_SYCL_DEVICE double fabs(double x) { return x < 0 ? -x : x; } +__DPCPP_SPIRV_MAP_BINARY(copysign); __DPCPP_SPIRV_MAP_UNARY(acos); __DPCPP_SPIRV_MAP_UNARY(acosh); __DPCPP_SPIRV_MAP_UNARY(asin); @@ -163,6 +164,21 @@ __DPCPP_SYCL_DEVICE lldiv_t ldiv(long long x, long long y) { return {x / y, x % y}; } +#if defined(__NVPTX__) +extern "C" SYCL_EXTERNAL float __nv_nearbyintf(float); +extern "C" SYCL_EXTERNAL double __nv_nearbyint(double); +__DPCPP_SYCL_DEVICE_C float nearbyintf(float x) { return __nv_nearbyintf(x); } +__DPCPP_SYCL_DEVICE float nearbyint(float x) { return __nv_nearbyintf(x); } +__DPCPP_SYCL_DEVICE double nearbyint(double x) { return __nv_nearbyintf(x); } +#elif defined(__AMDGCN__) +extern "C" SYCL_EXTERNAL float __ocml_nearbyint_f32(float); +extern "C" SYCL_EXTERNAL double __ocml_nearbyint_f64(double); +__DPCPP_SYCL_DEVICE_C float nearbyintf(float x) { return __ocml_nearbyint_f32(x); } +__DPCPP_SYCL_DEVICE float nearbyint(float x) { return __ocml_nearbyint_f32(x); } +__DPCPP_SYCL_DEVICE double nearbyint(double x) { return __ocml_nearbyint_f64(x); } +#endif + + #ifdef _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_BEGIN_NAMESPACE_STD #else @@ -172,6 +188,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif #endif +#if defined(__NVPTX__) || defined(__AMDGCN__) +using ::nearbyint; +using ::nearbyintf; +#endif + using ::abs; using ::acos; using ::acosf; @@ -196,7 +217,8 @@ using ::labs; using ::ldiv; using ::llabs; using ::lldiv; -// using ::copysign; +using ::copysign; +using ::copysignf; using ::cos; using ::cosf; using ::cosh; @@ -225,7 +247,9 @@ using ::fmodf; // using ::fpclassify; using ::frexp; using ::hypot; +using ::hypotf; using ::ilogb; +using ::ilogbf; // using ::isfinite; // using ::isgreater; // using ::isgreaterequal; @@ -238,11 +262,13 @@ using ::ilogb; // using ::isunordered; // using ::labs; using ::ldexp; +using ::ldexpf; using ::lgamma; using ::lgammaf; // using ::llabs; // using ::llrint; using ::log; +using ::logf; using ::log10; using ::log10f; using ::log1p; @@ -251,7 +277,6 @@ using ::log2; using ::log2f; using ::logb; using ::logbf; -using ::logf; // using ::lrint; // using ::lround; // using ::llround; From e0cd39995818ee40933d207fc5006d6a30db4e20 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Fri, 13 Jun 2025 15:02:55 +0100 Subject: [PATCH 18/40] [SYCL] Add nearbyint and rint to devicelib tests --- sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp | 13 ++++++++----- sycl/test-e2e/DeviceLib/cmath_test.cpp | 14 ++++++++------ sycl/test-e2e/DeviceLib/math_fp64_test.cpp | 13 ++++++++----- sycl/test-e2e/DeviceLib/math_test.cpp | 13 ++++++++----- 4 files changed, 32 insertions(+), 21 deletions(-) diff --git a/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp b/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp index bda9ce2ff1ced..a7c030de66c15 100644 --- a/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp +++ b/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp @@ -21,13 +21,13 @@ namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 74 +#define TEST_NUM 76 double ref[TEST_NUM] = { - 6, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, - 0, 1, 1, 0.5, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, - 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + 1.0, 1.0, 6, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 1, 0, 1, 1, 0, 0, + 0, 0, 0, 1, 1, 0.5, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, + 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; double refIptr = 1; @@ -62,6 +62,9 @@ template void device_cmath_test(s::queue &deviceQueue) { T minus_infinity = -INFINITY; double subnormal; *((uint64_t *)&subnormal) = 0xFFFFFFFFFFFFFULL; + + res_access[i++] = std::nearbyint(0.9); + res_access[i++] = std::rint(0.9); res_access[i++] = std::scalbln(1.5, 2); res_access[i++] = sycl::exp10(2.0); res_access[i++] = sycl::rsqrt(4.0); diff --git a/sycl/test-e2e/DeviceLib/cmath_test.cpp b/sycl/test-e2e/DeviceLib/cmath_test.cpp index 6a453e56f704f..0c1687e26044c 100644 --- a/sycl/test-e2e/DeviceLib/cmath_test.cpp +++ b/sycl/test-e2e/DeviceLib/cmath_test.cpp @@ -24,13 +24,13 @@ namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 70 +#define TEST_NUM 72 -float ref[TEST_NUM] = {100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 0, 1, 1, 0, - 0, 0, 0, 0, 1, 1, 0.5, 0, 0, 1, 0, 2, 0, 0, - 0, 0, 0, 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, - 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +float ref[TEST_NUM] = { + 1.0f, 1.0f, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 0, 1, 1, 0, 0, 0, + 0, 0, 1, 1, 0.5, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, + 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; float refIptr = 1; @@ -60,6 +60,8 @@ template void device_cmath_test_1(s::queue &deviceQueue) { float subnormal; *((uint32_t *)&subnormal) = 0x7FFFFF; + res_access[i++] = std::nearbyint(0.9f); + res_access[i++] = std::rint(0.9f); res_access[i++] = sycl::exp10(2.0f); res_access[i++] = sycl::rsqrt(4.0f); res_access[i++] = std::trunc(1.2f); diff --git a/sycl/test-e2e/DeviceLib/math_fp64_test.cpp b/sycl/test-e2e/DeviceLib/math_fp64_test.cpp index 86dad1b3c0d3f..099f420c3aeb6 100644 --- a/sycl/test-e2e/DeviceLib/math_fp64_test.cpp +++ b/sycl/test-e2e/DeviceLib/math_fp64_test.cpp @@ -20,12 +20,13 @@ namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 61 +#define TEST_NUM 63 -double ref_val[TEST_NUM] = { - 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, - 0, 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +double ref_val[TEST_NUM] = {1.0, 1.0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, + 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, + 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, + NAN, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; double refIptr = 1; @@ -61,6 +62,8 @@ void device_math_test(s::queue &deviceQueue) { double subnormal; *((uint64_t *)&subnormal) = 0xFFFFFFFFFFFFFULL; + res_access[i++] = nearbyint(0.9); + res_access[i++] = rint(0.9); res_access[i++] = cos(0.0); res_access[i++] = sin(0.0); res_access[i++] = log(1.0); diff --git a/sycl/test-e2e/DeviceLib/math_test.cpp b/sycl/test-e2e/DeviceLib/math_test.cpp index 029409b617473..d7f7532c09a69 100644 --- a/sycl/test-e2e/DeviceLib/math_test.cpp +++ b/sycl/test-e2e/DeviceLib/math_test.cpp @@ -18,12 +18,13 @@ namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 59 +#define TEST_NUM 61 -float ref_val[TEST_NUM] = { - 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, - 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +float ref_val[TEST_NUM] = {1.0f, 1.0f, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, + 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, 2, + 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0}; float refIptr = 1; @@ -53,6 +54,8 @@ void device_math_test(s::queue &deviceQueue) { float subnormal; *((uint32_t *)&subnormal) = 0x7FFFFF; + res_access[i++] = nearbyintf(0.9); + res_access[i++] = rintf(0.9); res_access[i++] = cosf(0.0f); res_access[i++] = sinf(0.0f); res_access[i++] = logf(1.0f); From 8affa7a55f69068d7982975a410ba442f97d67d4 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Fri, 13 Jun 2025 16:32:17 +0100 Subject: [PATCH 19/40] [SYCL] Fix formatting --- .../sycl/stl_wrappers/cmath-fallback.h | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/sycl/include/sycl/stl_wrappers/cmath-fallback.h b/sycl/include/sycl/stl_wrappers/cmath-fallback.h index 5ff5cb8461575..8f074257bd17f 100644 --- a/sycl/include/sycl/stl_wrappers/cmath-fallback.h +++ b/sycl/include/sycl/stl_wrappers/cmath-fallback.h @@ -173,12 +173,15 @@ __DPCPP_SYCL_DEVICE double nearbyint(double x) { return __nv_nearbyintf(x); } #elif defined(__AMDGCN__) extern "C" SYCL_EXTERNAL float __ocml_nearbyint_f32(float); extern "C" SYCL_EXTERNAL double __ocml_nearbyint_f64(double); -__DPCPP_SYCL_DEVICE_C float nearbyintf(float x) { return __ocml_nearbyint_f32(x); } +__DPCPP_SYCL_DEVICE_C float nearbyintf(float x) { + return __ocml_nearbyint_f32(x); +} __DPCPP_SYCL_DEVICE float nearbyint(float x) { return __ocml_nearbyint_f32(x); } -__DPCPP_SYCL_DEVICE double nearbyint(double x) { return __ocml_nearbyint_f64(x); } +__DPCPP_SYCL_DEVICE double nearbyint(double x) { + return __ocml_nearbyint_f64(x); +} #endif - #ifdef _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_BEGIN_NAMESPACE_STD #else @@ -212,17 +215,13 @@ using ::cbrt; using ::cbrtf; using ::ceil; using ::ceilf; -using ::div; -using ::labs; -using ::ldiv; -using ::llabs; -using ::lldiv; using ::copysign; using ::copysignf; using ::cos; using ::cosf; using ::cosh; using ::coshf; +using ::div; using ::erf; using ::erfc; using ::erfcf; @@ -244,6 +243,10 @@ using ::fmaxf; using ::fminf; using ::fmod; using ::fmodf; +using ::labs; +using ::ldiv; +using ::llabs; +using ::lldiv; // using ::fpclassify; using ::frexp; using ::hypot; @@ -268,7 +271,6 @@ using ::lgammaf; // using ::llabs; // using ::llrint; using ::log; -using ::logf; using ::log10; using ::log10f; using ::log1p; @@ -277,6 +279,7 @@ using ::log2; using ::log2f; using ::logb; using ::logbf; +using ::logf; // using ::lrint; // using ::lround; // using ::llround; From 8d9733db16b7ebfe5b5a44bb469c1ad7bea5fcea Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Fri, 13 Jun 2025 17:29:15 +0100 Subject: [PATCH 20/40] [SYCL] Add SYCL_EXTERNAL to neabyint and rint --- sycl/include/sycl/stl_wrappers/cmath | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sycl/include/sycl/stl_wrappers/cmath b/sycl/include/sycl/stl_wrappers/cmath index 8c626e4b06606..e6c3fdfca7a03 100644 --- a/sycl/include/sycl/stl_wrappers/cmath +++ b/sycl/include/sycl/stl_wrappers/cmath @@ -109,6 +109,10 @@ extern __DPCPP_SYCL_EXTERNAL double atanh(double x); extern __DPCPP_SYCL_EXTERNAL double frexp(double x, int *exp); extern __DPCPP_SYCL_EXTERNAL double ldexp(double x, int exp); extern __DPCPP_SYCL_EXTERNAL double hypot(double x, double y); +extern __DPCPP_SYCL_EXTERNAL float nearbyintf(float x); +extern __DPCPP_SYCL_EXTERNAL double nearbyint(double x); +extern __DPCPP_SYCL_EXTERNAL float rintf(float x); +extern __DPCPP_SYCL_EXTERNAL double rint(double x); } #ifdef __GLIBC__ From ec84f57a6b0ae6f140d7022571a62be76ba7b745 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Mon, 16 Jun 2025 13:29:26 +0100 Subject: [PATCH 21/40] [SYCL][E2E] Remove nearbyint from test and stl wrapper This doesn't map to a spir-v built-in --- sycl/include/sycl/stl_wrappers/cmath | 2 -- sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp | 11 +++++------ sycl/test-e2e/DeviceLib/cmath_test.cpp | 13 ++++++------- sycl/test-e2e/DeviceLib/math_fp64_test.cpp | 12 +++++------- sycl/test-e2e/DeviceLib/math_test.cpp | 12 +++++------- 5 files changed, 21 insertions(+), 29 deletions(-) diff --git a/sycl/include/sycl/stl_wrappers/cmath b/sycl/include/sycl/stl_wrappers/cmath index e6c3fdfca7a03..4240e7458d0b0 100644 --- a/sycl/include/sycl/stl_wrappers/cmath +++ b/sycl/include/sycl/stl_wrappers/cmath @@ -109,8 +109,6 @@ extern __DPCPP_SYCL_EXTERNAL double atanh(double x); extern __DPCPP_SYCL_EXTERNAL double frexp(double x, int *exp); extern __DPCPP_SYCL_EXTERNAL double ldexp(double x, int exp); extern __DPCPP_SYCL_EXTERNAL double hypot(double x, double y); -extern __DPCPP_SYCL_EXTERNAL float nearbyintf(float x); -extern __DPCPP_SYCL_EXTERNAL double nearbyint(double x); extern __DPCPP_SYCL_EXTERNAL float rintf(float x); extern __DPCPP_SYCL_EXTERNAL double rint(double x); diff --git a/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp b/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp index a7c030de66c15..833b3aea6203a 100644 --- a/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp +++ b/sycl/test-e2e/DeviceLib/cmath_fp64_test.cpp @@ -21,13 +21,13 @@ namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 76 +#define TEST_NUM 75 double ref[TEST_NUM] = { - 1.0, 1.0, 6, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 1, 0, 1, 1, 0, 0, - 0, 0, 0, 1, 1, 0.5, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, - 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + 1.0, 6, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 1, 0, 1, 1, 0, 0, 0, + 0, 0, 1, 1, 0.5, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, + 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; double refIptr = 1; @@ -63,7 +63,6 @@ template void device_cmath_test(s::queue &deviceQueue) { double subnormal; *((uint64_t *)&subnormal) = 0xFFFFFFFFFFFFFULL; - res_access[i++] = std::nearbyint(0.9); res_access[i++] = std::rint(0.9); res_access[i++] = std::scalbln(1.5, 2); res_access[i++] = sycl::exp10(2.0); diff --git a/sycl/test-e2e/DeviceLib/cmath_test.cpp b/sycl/test-e2e/DeviceLib/cmath_test.cpp index 0c1687e26044c..c5c58f09023d4 100644 --- a/sycl/test-e2e/DeviceLib/cmath_test.cpp +++ b/sycl/test-e2e/DeviceLib/cmath_test.cpp @@ -24,13 +24,13 @@ namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 72 +#define TEST_NUM 71 -float ref[TEST_NUM] = { - 1.0f, 1.0f, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 0, 1, 1, 0, 0, 0, - 0, 0, 1, 1, 0.5, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, - 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +float ref[TEST_NUM] = {1.0f, 100, 0.5, 1.0, 0, 0, -2, 1, 2, 1, 1, 0, 1, 1, 0, + 0, 0, 0, 0, 1, 1, 0.5, 0, 0, 1, 0, 2, 0, 0, 0, + 0, 0, 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, + 0.5, NAN, NAN, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; float refIptr = 1; @@ -60,7 +60,6 @@ template void device_cmath_test_1(s::queue &deviceQueue) { float subnormal; *((uint32_t *)&subnormal) = 0x7FFFFF; - res_access[i++] = std::nearbyint(0.9f); res_access[i++] = std::rint(0.9f); res_access[i++] = sycl::exp10(2.0f); res_access[i++] = sycl::rsqrt(4.0f); diff --git a/sycl/test-e2e/DeviceLib/math_fp64_test.cpp b/sycl/test-e2e/DeviceLib/math_fp64_test.cpp index 099f420c3aeb6..5f160b9f2a7b1 100644 --- a/sycl/test-e2e/DeviceLib/math_fp64_test.cpp +++ b/sycl/test-e2e/DeviceLib/math_fp64_test.cpp @@ -20,13 +20,12 @@ namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 63 +#define TEST_NUM 62 -double ref_val[TEST_NUM] = {1.0, 1.0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, - 2, 0, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, - 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, - NAN, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +double ref_val[TEST_NUM] = { + 1.0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, 2, 0, 0, 1, 0, 2, 0, 0, 0, + 0, 0, 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; double refIptr = 1; @@ -62,7 +61,6 @@ void device_math_test(s::queue &deviceQueue) { double subnormal; *((uint64_t *)&subnormal) = 0xFFFFFFFFFFFFFULL; - res_access[i++] = nearbyint(0.9); res_access[i++] = rint(0.9); res_access[i++] = cos(0.0); res_access[i++] = sin(0.0); diff --git a/sycl/test-e2e/DeviceLib/math_test.cpp b/sycl/test-e2e/DeviceLib/math_test.cpp index d7f7532c09a69..c9a98f468225d 100644 --- a/sycl/test-e2e/DeviceLib/math_test.cpp +++ b/sycl/test-e2e/DeviceLib/math_test.cpp @@ -18,13 +18,12 @@ namespace s = sycl; constexpr s::access::mode sycl_read = s::access::mode::read; constexpr s::access::mode sycl_write = s::access::mode::write; -#define TEST_NUM 61 +#define TEST_NUM 60 -float ref_val[TEST_NUM] = {1.0f, 1.0f, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, - 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, 2, - 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0}; +float ref_val[TEST_NUM] = { + 1.0f, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0.5, 0, 0, 1, 0, 2, 0, 0, 0, 0, + 0, 1, 0, 1, 2, 0, 1, 2, 5, 0, 0, 0, 0, 0.5, 0.5, NAN, NAN, 2, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; float refIptr = 1; @@ -54,7 +53,6 @@ void device_math_test(s::queue &deviceQueue) { float subnormal; *((uint32_t *)&subnormal) = 0x7FFFFF; - res_access[i++] = nearbyintf(0.9); res_access[i++] = rintf(0.9); res_access[i++] = cosf(0.0f); res_access[i++] = sinf(0.0f); From 9db7bde66616a24228eb1132424cc6bf09340b75 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Wed, 18 Jun 2025 18:05:34 +0100 Subject: [PATCH 22/40] [SYCL] Don't leak macros from cmath-fallback.h --- sycl/include/sycl/stl_wrappers/cmath-fallback.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/include/sycl/stl_wrappers/cmath-fallback.h b/sycl/include/sycl/stl_wrappers/cmath-fallback.h index 8f074257bd17f..a056e501dd1c5 100644 --- a/sycl/include/sycl/stl_wrappers/cmath-fallback.h +++ b/sycl/include/sycl/stl_wrappers/cmath-fallback.h @@ -329,7 +329,9 @@ _GLIBCXX_END_NAMESPACE_VERSION } // namespace std #endif +#undef __DPCPP_SPIRV_MAP_BINARY #undef __DPCPP_SPIRV_MAP_UNARY +#undef __DPCPP_SYCL_DEVICE_C #undef __DPCPP_SYCL_DEVICE #endif #endif From d8a273b9193bdc1a085a806cebbddecb1a73fcfe Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Wed, 25 Jun 2025 14:52:42 +0100 Subject: [PATCH 23/40] [SYCL] Fix if/else/return formatting --- clang/lib/CodeGen/CodeGenModule.cpp | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 5c03aacd25c22..310e0c72ffb1e 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4430,25 +4430,29 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { auto *PreviousGlobal = cast(DeferredDecls[MangledName].getDecl()); + // If the host declaration was already processed and the device only + // declaration is also a sycl external declaration, remove the host + // variant and skip. The device only variant will be generated later + // as it's marked sycl external. if (!PreviousGlobal->hasAttr() && Global->hasAttr() && Global->hasAttr()) { - // If the host declaration was already processed and the device only - // declaration is also a sycl external declaration, remove the host - // variant and skip. The device only variant will be generated later - // as it's marked sycl external. DeferredDecls.erase(DDI); return; - } else if (!PreviousGlobal->hasAttr() && - Global->hasAttr()) { - // If the host declaration was already processed, replace it with the - // device only declaration. + } + + // If the host declaration was already processed, replace it with the + // device only declaration. + if (!PreviousGlobal->hasAttr() && + Global->hasAttr()) { DeferredDecls[MangledName] = GD; return; - } else if (PreviousGlobal->hasAttr() && - !Global->hasAttr()) { - // If the device only declaration was already processed, skip the - // host declaration. + } + + // If the device only declaration was already processed, skip the + // host declaration. + if (PreviousGlobal->hasAttr() && + !Global->hasAttr()) { return; } } From f53840b3b640db793adff7aa54bf91a6ab675a95 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Wed, 25 Jun 2025 14:53:18 +0100 Subject: [PATCH 24/40] [SYCL] Cleanup use of isSYCL() in SemaOverload --- clang/lib/Sema/SemaOverload.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 7cb24f06ae1e2..91c9106be81c1 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -11030,8 +11030,7 @@ bool clang::isBetterOverloadCandidate( // In SYCL device compilation mode prefer the overload with the // SYCLDeviceOnly attribute. - if (S.getLangOpts().isSYCL() && S.getLangOpts().SYCLIsDevice && - Cand1.Function && Cand2.Function) { + if (S.getLangOpts().SYCLIsDevice && Cand1.Function && Cand2.Function) { if (Cand1.Function->hasAttr() != Cand2.Function->hasAttr()) { return Cand1.Function->hasAttr(); @@ -11393,7 +11392,7 @@ OverloadingResult OverloadCandidateSet::BestViableFunctionImpl( CudaExcludeWrongSideCandidates(S, Candidates); // In SYCL host compilation remove candidates marked SYCLDeviceOnly. - if (S.getLangOpts().isSYCL() && !S.getLangOpts().SYCLIsDevice) { + if (S.getLangOpts().SYCLIsHost) { auto IsDeviceCand = [&](const OverloadCandidate *Cand) { return Cand->Viable && Cand->Function && Cand->Function->hasAttr(); From 8477da70585bfbaef96d4a53188398515e8b2367 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Fri, 27 Jun 2025 09:10:28 +0100 Subject: [PATCH 25/40] [SYCL] Add missing copyright notices --- sycl/include/sycl/stl_wrappers/cmath | 8 ++++++++ sycl/include/sycl/stl_wrappers/cmath-fallback.h | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/sycl/include/sycl/stl_wrappers/cmath b/sycl/include/sycl/stl_wrappers/cmath index 4240e7458d0b0..eeb8261523d1a 100644 --- a/sycl/include/sycl/stl_wrappers/cmath +++ b/sycl/include/sycl/stl_wrappers/cmath @@ -1,3 +1,11 @@ +//==------------------------ cmath -----------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + #pragma once // Include real STL header - the next one from the include search diff --git a/sycl/include/sycl/stl_wrappers/cmath-fallback.h b/sycl/include/sycl/stl_wrappers/cmath-fallback.h index a056e501dd1c5..218477bfd4ab3 100644 --- a/sycl/include/sycl/stl_wrappers/cmath-fallback.h +++ b/sycl/include/sycl/stl_wrappers/cmath-fallback.h @@ -1,3 +1,11 @@ +//==------------- cmath-fallback.h -----------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + #ifndef __CMATH_FALLBACK_H__ #define __CMATH_FALLBACK_H__ From 34d26f4e19f0554811fa4b0675d80f338c9995b8 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Fri, 27 Jun 2025 13:59:46 +0100 Subject: [PATCH 26/40] [SYCL] Re-use DDI in CodeGenModule --- clang/lib/CodeGen/CodeGenModule.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 310e0c72ffb1e..9e4fe69e24128 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4427,8 +4427,7 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { // If we have an existing declaration with the same mangling for this // symbol it may be a SYCLDeviceOnlyAttr case. if (DDI != DeferredDecls.end()) { - auto *PreviousGlobal = - cast(DeferredDecls[MangledName].getDecl()); + auto *PreviousGlobal = cast(DDI->second.getDecl()); // If the host declaration was already processed and the device only // declaration is also a sycl external declaration, remove the host From 4a576c47c26b1c0996ae2582113d1ed40b6d31a6 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Tue, 1 Jul 2025 14:16:30 +0100 Subject: [PATCH 27/40] [SYCL] Cleanup and complete fallback header --- .../sycl/stl_wrappers/cmath-fallback.h | 523 ++++++++++++------ 1 file changed, 340 insertions(+), 183 deletions(-) diff --git a/sycl/include/sycl/stl_wrappers/cmath-fallback.h b/sycl/include/sycl/stl_wrappers/cmath-fallback.h index 218477bfd4ab3..c4940d23acd80 100644 --- a/sycl/include/sycl/stl_wrappers/cmath-fallback.h +++ b/sycl/include/sycl/stl_wrappers/cmath-fallback.h @@ -9,18 +9,37 @@ #ifndef __CMATH_FALLBACK_H__ #define __CMATH_FALLBACK_H__ +// This header defines device-side overloads of functions based on +// their equivalent __spirv_ built-ins. + #ifdef __SYCL_DEVICE_ONLY__ +// The 'sycl_device_only' attribute enables device-side overloading. #define __DPCPP_SYCL_DEVICE __attribute__((sycl_device_only, always_inline)) #define __DPCPP_SYCL_DEVICE_C \ extern "C" __attribute__((sycl_device_only, always_inline)) +// For each math built-in we need to define float and double overloads, an +// extern "C" float variant with the 'f' suffix, and a version that promotes to +// double if any floating-point parameter passed is an integer. +// +// TODO: Consider targets that don't have double support +// TODO: Enable long double support where possible +// +// The following two macros provide an easy way to define these overloads for +// basic built-ins with one or two floating-point parameters. #define __DPCPP_SPIRV_MAP_UNARY(NAME) \ __DPCPP_SYCL_DEVICE_C float NAME##f(float x) { \ return __spirv_ocl_##NAME(x); \ } \ __DPCPP_SYCL_DEVICE float NAME(float x) { return __spirv_ocl_##NAME(x); } \ - __DPCPP_SYCL_DEVICE double NAME(double x) { return __spirv_ocl_##NAME(x); } + __DPCPP_SYCL_DEVICE double NAME(double x) { return __spirv_ocl_##NAME(x); } \ + template \ + __DPCPP_SYCL_DEVICE \ + typename std::enable_if::value, double>::type \ + NAME(T x) { \ + return __spirv_ocl_##NAME((double)x); \ + } #define __DPCPP_SPIRV_MAP_BINARY(NAME) \ __DPCPP_SYCL_DEVICE_C float NAME##f(float x, float y) { \ @@ -31,60 +50,63 @@ } \ __DPCPP_SYCL_DEVICE double NAME(double x, double y) { \ return __spirv_ocl_##NAME(x, y); \ + } \ + template \ + __DPCPP_SYCL_DEVICE typename std::enable_if< \ + std::is_integral::value || std::is_integral::value, double>::type \ + NAME(T x, U y) { \ + return __spirv_ocl_##NAME((double)x, (double)y); \ } +/// +// FIXME: Move this to a cstdlib fallback header + +__DPCPP_SYCL_DEVICE div_t div(int x, int y) { return {x / y, x % y}; } +__DPCPP_SYCL_DEVICE ldiv_t ldiv(long x, long y) { return {x / y, x % y}; } +__DPCPP_SYCL_DEVICE lldiv_t ldiv(long long x, long long y) { + return {x / y, x % y}; +} + __DPCPP_SYCL_DEVICE long long abs(long long n) { return n < 0 ? -n : n; } __DPCPP_SYCL_DEVICE_C long long llabs(long long n) { return n < 0 ? -n : n; } __DPCPP_SYCL_DEVICE long abs(long n) { return n < 0 ? -n : n; } __DPCPP_SYCL_DEVICE int abs(int n) { return n < 0 ? -n : n; } __DPCPP_SYCL_DEVICE_C long labs(long n) { return n < 0 ? -n : n; } + +/// Basic operations +// + __DPCPP_SYCL_DEVICE float abs(float x) { return x < 0 ? -x : x; } __DPCPP_SYCL_DEVICE double abs(double x) { return x < 0 ? -x : x; } __DPCPP_SYCL_DEVICE float fabs(float x) { return x < 0 ? -x : x; } __DPCPP_SYCL_DEVICE_C float fabsf(float x) { return x < 0 ? -x : x; } __DPCPP_SYCL_DEVICE double fabs(double x) { return x < 0 ? -x : x; } +template +__DPCPP_SYCL_DEVICE + typename std::enable_if::value, double>::type + fabs(T x) { + return x < 0 ? -x : x; +} -__DPCPP_SPIRV_MAP_BINARY(copysign); -__DPCPP_SPIRV_MAP_UNARY(acos); -__DPCPP_SPIRV_MAP_UNARY(acosh); -__DPCPP_SPIRV_MAP_UNARY(asin); -__DPCPP_SPIRV_MAP_UNARY(asinh); +__DPCPP_SPIRV_MAP_BINARY(fmod); +__DPCPP_SPIRV_MAP_BINARY(remainder); -__DPCPP_SYCL_DEVICE_C float scalbnf(float x, int exp) { - return __spirv_ocl_ldexp(x, exp); -} -__DPCPP_SYCL_DEVICE float scalbn(float x, int exp) { - return __spirv_ocl_ldexp(x, exp); -} -__DPCPP_SYCL_DEVICE double scalbn(double x, int exp) { - return __spirv_ocl_ldexp(x, exp); +__DPCPP_SYCL_DEVICE_C float remquof(float x, float y, int *q) { + return __spirv_ocl_remquo(x, y, q); } - -__DPCPP_SYCL_DEVICE_C float scalblnf(float x, long int exp) { - return __spirv_ocl_ldexp(x, (int)exp); +__DPCPP_SYCL_DEVICE float remquo(float x, float y, int *q) { + return __spirv_ocl_remquo(x, y, q); } -__DPCPP_SYCL_DEVICE float scalbln(float x, long int exp) { - return __spirv_ocl_ldexp(x, (int)exp); +__DPCPP_SYCL_DEVICE double remquo(double x, double y, int *q) { + return __spirv_ocl_remquo(x, y, q); } -__DPCPP_SYCL_DEVICE double scalbln(double x, long int exp) { - return __spirv_ocl_ldexp(x, (int)exp); +template +__DPCPP_SYCL_DEVICE typename std::enable_if< + std::is_integral::value || std::is_integral::value, double>::type +remquo(T x, U y, int *q) { + return __spirv_ocl_remquo((double)x, (double)y, q); } -__DPCPP_SPIRV_MAP_BINARY(atan2); -__DPCPP_SPIRV_MAP_UNARY(atan); -__DPCPP_SPIRV_MAP_UNARY(atanh); -__DPCPP_SPIRV_MAP_UNARY(cbrt); -__DPCPP_SPIRV_MAP_UNARY(ceil); -__DPCPP_SPIRV_MAP_UNARY(cos); -__DPCPP_SPIRV_MAP_UNARY(cosh); -__DPCPP_SPIRV_MAP_UNARY(erfc); -__DPCPP_SPIRV_MAP_UNARY(erf); -__DPCPP_SPIRV_MAP_UNARY(exp2); -__DPCPP_SPIRV_MAP_UNARY(exp); -__DPCPP_SPIRV_MAP_UNARY(expm1); -__DPCPP_SPIRV_MAP_BINARY(fdim); -__DPCPP_SPIRV_MAP_UNARY(floor); - __DPCPP_SYCL_DEVICE_C float fmaf(float x, float y, float z) { return __spirv_ocl_fma(x, y, z); } @@ -94,10 +116,100 @@ __DPCPP_SYCL_DEVICE float fma(float x, float y, float z) { __DPCPP_SYCL_DEVICE double fma(double x, double y, double z) { return __spirv_ocl_fma(x, y, z); } +template +__DPCPP_SYCL_DEVICE typename std::enable_if::value || + std::is_integral::value || + std::is_integral::value, + double>::type +fma(T x, U y, V z) { + return __spirv_ocl_fma((double)x, (double)y, (double)z); +} __DPCPP_SPIRV_MAP_BINARY(fmax); __DPCPP_SPIRV_MAP_BINARY(fmin); -__DPCPP_SPIRV_MAP_BINARY(fmod); +__DPCPP_SPIRV_MAP_BINARY(fdim); +// unsupported: nan + +/// Exponential functions +// + +__DPCPP_SPIRV_MAP_UNARY(exp); +__DPCPP_SPIRV_MAP_UNARY(exp2); +__DPCPP_SPIRV_MAP_UNARY(expm1); +__DPCPP_SPIRV_MAP_UNARY(log); +__DPCPP_SPIRV_MAP_UNARY(log10); +__DPCPP_SPIRV_MAP_UNARY(log2); +__DPCPP_SPIRV_MAP_UNARY(log1p); + +/// Power functions +// + +__DPCPP_SPIRV_MAP_BINARY(pow); +__DPCPP_SPIRV_MAP_UNARY(sqrt); +__DPCPP_SPIRV_MAP_UNARY(cbrt); +__DPCPP_SPIRV_MAP_BINARY(hypot); + +/// Trigonometric functions +// + +__DPCPP_SPIRV_MAP_UNARY(sin); +__DPCPP_SPIRV_MAP_UNARY(cos); +__DPCPP_SPIRV_MAP_UNARY(tan); +__DPCPP_SPIRV_MAP_UNARY(asin); +__DPCPP_SPIRV_MAP_UNARY(acos); +__DPCPP_SPIRV_MAP_UNARY(atan); +__DPCPP_SPIRV_MAP_BINARY(atan2); + +/// Hyperbolic functions +// + +__DPCPP_SPIRV_MAP_UNARY(sinh); +__DPCPP_SPIRV_MAP_UNARY(cosh); +__DPCPP_SPIRV_MAP_UNARY(tanh); +__DPCPP_SPIRV_MAP_UNARY(asinh); +__DPCPP_SPIRV_MAP_UNARY(acosh); +__DPCPP_SPIRV_MAP_UNARY(atanh); + +/// Error and gamma functions +// + +__DPCPP_SPIRV_MAP_UNARY(erf); +__DPCPP_SPIRV_MAP_UNARY(erfc); +__DPCPP_SPIRV_MAP_UNARY(tgamma); +__DPCPP_SPIRV_MAP_UNARY(lgamma); + +/// Nearest integer floating-point operations +// + +__DPCPP_SPIRV_MAP_UNARY(ceil); +__DPCPP_SPIRV_MAP_UNARY(floor); +__DPCPP_SPIRV_MAP_UNARY(trunc); +__DPCPP_SPIRV_MAP_UNARY(round); +// unsupported: lround, llround (no spirv mapping) +__DPCPP_SPIRV_MAP_UNARY(rint); +// unsupported: lrint, llrint (no spirv mapping) + +// unsupported (partially, no spirv mapping): nearbyint +#if defined(__NVPTX__) +extern "C" SYCL_EXTERNAL float __nv_nearbyintf(float); +extern "C" SYCL_EXTERNAL double __nv_nearbyint(double); +__DPCPP_SYCL_DEVICE_C float nearbyintf(float x) { return __nv_nearbyintf(x); } +__DPCPP_SYCL_DEVICE float nearbyint(float x) { return __nv_nearbyintf(x); } +__DPCPP_SYCL_DEVICE double nearbyint(double x) { return __nv_nearbyintf(x); } +#elif defined(__AMDGCN__) +extern "C" SYCL_EXTERNAL float __ocml_nearbyint_f32(float); +extern "C" SYCL_EXTERNAL double __ocml_nearbyint_f64(double); +__DPCPP_SYCL_DEVICE_C float nearbyintf(float x) { + return __ocml_nearbyint_f32(x); +} +__DPCPP_SYCL_DEVICE float nearbyint(float x) { return __ocml_nearbyint_f32(x); } +__DPCPP_SYCL_DEVICE double nearbyint(double x) { + return __ocml_nearbyint_f64(x); +} +#endif + +/// Floating-point manipulation functions +// __DPCPP_SYCL_DEVICE_C float frexpf(float x, int *exp) { return __spirv_ocl_frexp(x, exp); @@ -108,11 +220,12 @@ __DPCPP_SYCL_DEVICE float frexp(float x, int *exp) { __DPCPP_SYCL_DEVICE double frexp(double x, int *exp) { return __spirv_ocl_frexp(x, exp); } - -__DPCPP_SPIRV_MAP_BINARY(hypot); -__DPCPP_SYCL_DEVICE_C int ilogbf(float x) { return __spirv_ocl_ilogb(x); } -__DPCPP_SYCL_DEVICE int ilogb(float x) { return __spirv_ocl_ilogb(x); } -__DPCPP_SYCL_DEVICE int ilogb(double x) { return __spirv_ocl_ilogb(x); } +template +__DPCPP_SYCL_DEVICE + typename std::enable_if::value, double>::type + frexp(T x, int *exp) { + return __spirv_ocl_frexp((double)x, exp); +} __DPCPP_SYCL_DEVICE_C float ldexpf(float x, int exp) { return __spirv_ocl_ldexp(x, exp); @@ -123,13 +236,12 @@ __DPCPP_SYCL_DEVICE float ldexp(float x, int exp) { __DPCPP_SYCL_DEVICE double ldexp(double x, int exp) { return __spirv_ocl_ldexp(x, exp); } - -__DPCPP_SPIRV_MAP_UNARY(lgamma); -__DPCPP_SPIRV_MAP_UNARY(log10); -__DPCPP_SPIRV_MAP_UNARY(log1p); -__DPCPP_SPIRV_MAP_UNARY(log2); -__DPCPP_SPIRV_MAP_UNARY(logb); -__DPCPP_SPIRV_MAP_UNARY(log); +template +__DPCPP_SYCL_DEVICE + typename std::enable_if::value, double>::type + ldexp(T x, int exp) { + return __spirv_ocl_ldexp((double)x, exp); +} __DPCPP_SYCL_DEVICE_C float modff(float x, float *intpart) { return __spirv_ocl_modf(x, intpart); @@ -140,56 +252,79 @@ __DPCPP_SYCL_DEVICE float modf(float x, float *intpart) { __DPCPP_SYCL_DEVICE double modf(double x, double *intpart) { return __spirv_ocl_modf(x, intpart); } +// modf only supports integer x when the intpart is double +template +__DPCPP_SYCL_DEVICE + typename std::enable_if::value, double>::type + modf(T x, double *intpart) { + return __spirv_ocl_modf((double)x, intpart); +} -__DPCPP_SPIRV_MAP_BINARY(nextafter); -__DPCPP_SPIRV_MAP_BINARY(pow); -__DPCPP_SPIRV_MAP_BINARY(remainder); +__DPCPP_SYCL_DEVICE_C float scalbnf(float x, int exp) { + return __spirv_ocl_ldexp(x, exp); +} +__DPCPP_SYCL_DEVICE float scalbn(float x, int exp) { + return __spirv_ocl_ldexp(x, exp); +} +__DPCPP_SYCL_DEVICE double scalbn(double x, int exp) { + return __spirv_ocl_ldexp(x, exp); +} +template +__DPCPP_SYCL_DEVICE + typename std::enable_if::value, double>::type + scalbn(T x, int exp) { + return __spirv_ocl_ldexp((double)x, exp); +} -__DPCPP_SYCL_DEVICE_C float remquof(float x, float y, int *q) { - return __spirv_ocl_remquo(x, y, q); +__DPCPP_SYCL_DEVICE_C float scalblnf(float x, long exp) { + return __spirv_ocl_ldexp(x, (int)exp); } -__DPCPP_SYCL_DEVICE float remquo(float x, float y, int *q) { - return __spirv_ocl_remquo(x, y, q); +__DPCPP_SYCL_DEVICE float scalbln(float x, long exp) { + return __spirv_ocl_ldexp(x, (int)exp); } -__DPCPP_SYCL_DEVICE double remquo(double x, double y, int *q) { - return __spirv_ocl_remquo(x, y, q); +__DPCPP_SYCL_DEVICE double scalbln(double x, long exp) { + return __spirv_ocl_ldexp(x, (int)exp); +} +template +__DPCPP_SYCL_DEVICE + typename std::enable_if::value, double>::type + scalbln(T x, long exp) { + return __spirv_ocl_ldexp((double)x, (int)exp); } -__DPCPP_SPIRV_MAP_UNARY(rint); -__DPCPP_SPIRV_MAP_UNARY(round); -__DPCPP_SPIRV_MAP_UNARY(sin); -__DPCPP_SPIRV_MAP_UNARY(sinh); -__DPCPP_SPIRV_MAP_UNARY(sqrt); -__DPCPP_SPIRV_MAP_UNARY(tan); -__DPCPP_SPIRV_MAP_UNARY(tanh); -__DPCPP_SPIRV_MAP_UNARY(tgamma); -__DPCPP_SPIRV_MAP_UNARY(trunc); -__DPCPP_SYCL_DEVICE div_t div(int x, int y) { return {x / y, x % y}; } +__DPCPP_SYCL_DEVICE_C int ilogbf(float x) { return __spirv_ocl_ilogb(x); } +__DPCPP_SYCL_DEVICE int ilogb(float x) { return __spirv_ocl_ilogb(x); } +__DPCPP_SYCL_DEVICE int ilogb(double x) { return __spirv_ocl_ilogb(x); } +template ::value, + bool>::type = true> +__DPCPP_SYCL_DEVICE double ilogb(T x) { + return __spirv_ocl_ilogb((double)x); +} -__DPCPP_SYCL_DEVICE ldiv_t ldiv(long x, long y) { return {x / y, x % y}; } +__DPCPP_SPIRV_MAP_UNARY(logb); +__DPCPP_SPIRV_MAP_BINARY(nextafter); +// unsupported: nextforward +__DPCPP_SPIRV_MAP_BINARY(copysign); -__DPCPP_SYCL_DEVICE lldiv_t ldiv(long long x, long long y) { - return {x / y, x % y}; -} +/// Classification and comparison +// -#if defined(__NVPTX__) -extern "C" SYCL_EXTERNAL float __nv_nearbyintf(float); -extern "C" SYCL_EXTERNAL double __nv_nearbyint(double); -__DPCPP_SYCL_DEVICE_C float nearbyintf(float x) { return __nv_nearbyintf(x); } -__DPCPP_SYCL_DEVICE float nearbyint(float x) { return __nv_nearbyintf(x); } -__DPCPP_SYCL_DEVICE double nearbyint(double x) { return __nv_nearbyintf(x); } -#elif defined(__AMDGCN__) -extern "C" SYCL_EXTERNAL float __ocml_nearbyint_f32(float); -extern "C" SYCL_EXTERNAL double __ocml_nearbyint_f64(double); -__DPCPP_SYCL_DEVICE_C float nearbyintf(float x) { - return __ocml_nearbyint_f32(x); -} -__DPCPP_SYCL_DEVICE float nearbyint(float x) { return __ocml_nearbyint_f32(x); } -__DPCPP_SYCL_DEVICE double nearbyint(double x) { - return __ocml_nearbyint_f64(x); -} -#endif +// unsupported: fpclassify +// unsupported: isfinite +// unsupported: isinf +// unsupported: isnan +// unsupported: isnormal +// unsupported: signbit +// unsupported: isgreater +// unsupported: isgreaterequal +// unsupported: isless +// unsupported: islessequal +// unsupported: islessgreated +// unsupported: isunordered +// Now drag all of the overloads we've just defined in the std namespace. For +// the overloads to work properly we need to ensure our namespace matches +// exactly the one of the system C++ library. #ifdef _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_BEGIN_NAMESPACE_STD #else @@ -199,68 +334,145 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif #endif -#if defined(__NVPTX__) || defined(__AMDGCN__) -using ::nearbyint; -using ::nearbyintf; -#endif +// +using ::div; +using ::labs; +using ::ldiv; +using ::llabs; +using ::lldiv; +// Basic operations using ::abs; +using ::fabs; +using ::fabsf; +using ::fdim; +using ::fdimf; +using ::fma; +using ::fmaf; +using ::fmax; +using ::fmaxf; +using ::fmin; +using ::fminf; +using ::fmod; +using ::fmodf; +using ::remainder; +using ::remainderf; +using ::remquo; +using ::remquof; +// using ::nan; +// using ::nanf; + +// Exponential functions +using ::exp; +using ::exp2; +using ::exp2f; +using ::expf; +using ::expm1; +using ::expm1f; +using ::log; +using ::log10; +using ::log10f; +using ::log1p; +using ::log1pf; +using ::log2; +using ::log2f; +using ::logf; + +// Power functions +using ::cbrt; +using ::cbrtf; +using ::hypot; +using ::hypotf; +using ::pow; +using ::powf; +using ::sqrt; +using ::sqrtf; + +// Trigonometric functions using ::acos; using ::acosf; -using ::acosh; -using ::acoshf; using ::asin; using ::asinf; -using ::asinh; -using ::asinhf; using ::atan; using ::atan2; using ::atan2f; using ::atanf; -using ::atanh; -using ::atanhf; -using ::cbrt; -using ::cbrtf; -using ::ceil; -using ::ceilf; -using ::copysign; -using ::copysignf; using ::cos; using ::cosf; +using ::sin; +using ::sinf; +using ::tan; +using ::tanf; + +// Hyperbloic functions +using ::acosh; +using ::acoshf; +using ::asinh; +using ::asinhf; +using ::atanh; +using ::atanhf; using ::cosh; using ::coshf; -using ::div; +using ::sinh; +using ::sinhf; +using ::tanh; +using ::tanhf; + +// Error and gamma functions using ::erf; using ::erfc; using ::erfcf; using ::erff; -using ::exp; -using ::exp2; -using ::exp2f; -using ::expf; -using ::expm1; -using ::expm1f; -using ::fabs; -using ::fabsf; -using ::fdim; -using ::fdimf; +using ::tgamma; +using ::tgammaf; +using ::lgamma; +using ::lgammaf; + +// Nearest integer floating-point operations +using ::ceil; +using ::ceilf; using ::floor; using ::floorf; -using ::fmaf; -using ::fmaxf; -using ::fminf; -using ::fmod; -using ::fmodf; -using ::labs; -using ::ldiv; -using ::llabs; -using ::lldiv; -// using ::fpclassify; +using ::trunc; +using ::truncf; +using ::round; +using ::roundf; +// using ::lround; +// using ::llround; +using ::rint; +using ::rintf; +// using ::lrint; +// using ::llrint; + +#if defined(__NVPTX__) || defined(__AMDGCN__) +using ::nearbyint; +using ::nearbyintf; +#endif + +// Floating-point manipulation functions using ::frexp; -using ::hypot; -using ::hypotf; +using ::frexpf; +using ::ldexp; +using ::ldexpf; +using ::modf; +using ::modff; +using ::scalbln; +using ::scalblnf; +using ::scalbn; +using ::scalbnf; using ::ilogb; using ::ilogbf; +using ::logb; +using ::logbf; +using ::nextafter; +using ::nextafterf; +// using ::nextforward +// using ::nextforwardf +using ::copysign; +using ::copysignf; + +// Classification and comparison +// using ::fpclassify; // using ::isfinite; // using ::isgreater; // using ::isgreaterequal; @@ -271,62 +483,7 @@ using ::ilogbf; // using ::isnan; // using ::isnormal; // using ::isunordered; -// using ::labs; -using ::ldexp; -using ::ldexpf; -using ::lgamma; -using ::lgammaf; -// using ::llabs; -// using ::llrint; -using ::log; -using ::log10; -using ::log10f; -using ::log1p; -using ::log1pf; -using ::log2; -using ::log2f; -using ::logb; -using ::logbf; -using ::logf; -// using ::lrint; -// using ::lround; -// using ::llround; -using ::modf; -using ::modff; -// using ::nan; -// using ::nanf; -// using ::nearbyint; -using ::nextafter; -using ::nextafterf; -using ::pow; -using ::powf; -using ::remainder; -using ::remainderf; -using ::remquo; -using ::remquof; -using ::rint; -using ::rintf; -using ::round; -using ::roundf; -using ::scalbln; -using ::scalblnf; -using ::scalbn; -using ::scalbnf; // using ::signbit; -using ::sin; -using ::sinf; -using ::sinh; -using ::sinhf; -using ::sqrt; -using ::sqrtf; -using ::tan; -using ::tanf; -using ::tanh; -using ::tanhf; -using ::tgamma; -using ::tgammaf; -using ::trunc; -using ::truncf; #ifdef _LIBCPP_END_NAMESPACE_STD _LIBCPP_END_NAMESPACE_STD From 728d5572f21cf45374a80d9883f96a979ad226a3 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Tue, 1 Jul 2025 17:15:30 +0100 Subject: [PATCH 28/40] [SYCL] Add cmath-fallback header test --- .../math-builtins/cmath-fallback.cpp | 446 ++++++++++++++++++ 1 file changed, 446 insertions(+) create mode 100644 sycl/test/check_device_code/math-builtins/cmath-fallback.cpp diff --git a/sycl/test/check_device_code/math-builtins/cmath-fallback.cpp b/sycl/test/check_device_code/math-builtins/cmath-fallback.cpp new file mode 100644 index 0000000000000..1dfd97302cd1d --- /dev/null +++ b/sycl/test/check_device_code/math-builtins/cmath-fallback.cpp @@ -0,0 +1,446 @@ +// REQUIRES: cuda +// Note: This isn't really target specific and should be switched to spir when +// it's enabled for it. + +// RUN: %clangxx -fsycl -fsycl-targets=nvptx64-nvidia-cuda -S -Xclang -emit-llvm -fsycl-device-only %s -o - | FileCheck %s + +#include +#include + +// CHECK-LABEL: entry +SYCL_EXTERNAL void entry(float *fp, double *dp, int *ip, long *lp, + long long *llp, float *rf, double *rd, int *ri) { + int idx = 0; + + // CHECK: __spirv_ocl_fmodff + rf[idx++] = std::fmod(fp[0], fp[1]); + // CHECK: __spirv_ocl_fmodff + rf[idx++] = std::fmodf(fp[2], fp[1]); + // CHECK: __spirv_ocl_fmoddd + rd[idx++] = std::fmod(dp[0], dp[1]); + // CHECK: __spirv_ocl_fmoddd + rd[idx++] = std::fmod(fp[0], ip[1]); + + // CHECK: __spirv_ocl_remainderff + rf[idx++] = std::remainder(fp[0], fp[1]); + // CHECK: __spirv_ocl_remainderff + rf[idx++] = std::remainderf(fp[2], fp[1]); + // CHECK: __spirv_ocl_remainderdd + rd[idx++] = std::remainder(dp[0], dp[1]); + // CHECK: __spirv_ocl_remainderdd + rd[idx++] = std::remainder(fp[0], ip[1]); + + // CHECK: __spirv_ocl_remquoff + rf[idx++] = std::remquo(fp[0], fp[1], ip); + // CHECK: __spirv_ocl_remquoff + rf[idx++] = std::remquof(fp[2], fp[1], ip); + // CHECK: __spirv_ocl_remquodd + rd[idx++] = std::remquo(dp[0], dp[1], ip); + // CHECK: __spirv_ocl_remquodd + rd[idx++] = std::remquo(fp[0], ip[1], ip); + + // CHECK: __spirv_ocl_fmaff + rf[idx++] = std::fma(fp[0], fp[1], fp[2]); + // CHECK: __spirv_ocl_fmaff + rf[idx++] = std::fmaf(fp[3], fp[1], fp[2]); + // CHECK: __spirv_ocl_fmadd + rd[idx++] = std::fma(dp[0], dp[1], dp[2]); + // CHECK: __spirv_ocl_fmadd + rd[idx++] = std::fma(fp[0], ip[1], fp[2]); + + // CHECK: __spirv_ocl_fmaxff + rf[idx++] = std::fmax(fp[0], fp[1]); + // CHECK: __spirv_ocl_fmaxff + rf[idx++] = std::fmaxf(fp[2], fp[1]); + // CHECK: __spirv_ocl_fmaxdd + rd[idx++] = std::fmax(dp[0], dp[1]); + // CHECK: __spirv_ocl_fmaxdd + rd[idx++] = std::fmax(fp[0], ip[1]); + + // CHECK: __spirv_ocl_fminff + rf[idx++] = std::fmin(fp[0], fp[1]); + // CHECK: __spirv_ocl_fminff + rf[idx++] = std::fminf(fp[2], fp[1]); + // CHECK: __spirv_ocl_fmindd + rd[idx++] = std::fmin(dp[0], dp[1]); + // CHECK: __spirv_ocl_fmindd + rd[idx++] = std::fmin(fp[0], ip[1]); + + // CHECK: __spirv_ocl_fdimff + rf[idx++] = std::fdim(fp[0], fp[1]); + // CHECK: __spirv_ocl_fdimff + rf[idx++] = std::fdimf(fp[2], fp[1]); + // CHECK: __spirv_ocl_fdimdd + rd[idx++] = std::fdim(dp[0], dp[1]); + // CHECK: __spirv_ocl_fdimdd + rd[idx++] = std::fdim(fp[0], ip[1]); + + // CHECK: __spirv_ocl_expf + rf[idx++] = std::exp(fp[0]); + // CHECK: __spirv_ocl_expf + rf[idx++] = std::expf(fp[1]); + // CHECK: __spirv_ocl_expd + rd[idx++] = std::exp(dp[0]); + // CHECK: __spirv_ocl_expd + rd[idx++] = std::exp(ip[0]); + + // CHECK: __spirv_ocl_exp2f + rf[idx++] = std::exp2(fp[0]); + // CHECK: __spirv_ocl_exp2f + rf[idx++] = std::exp2f(fp[1]); + // CHECK: __spirv_ocl_exp2d + rd[idx++] = std::exp2(dp[0]); + // CHECK: __spirv_ocl_exp2d + rd[idx++] = std::exp2(ip[0]); + + // CHECK: __spirv_ocl_expm1f + rf[idx++] = std::expm1(fp[0]); + // CHECK: __spirv_ocl_expm1f + rf[idx++] = std::expm1f(fp[1]); + // CHECK: __spirv_ocl_expm1d + rd[idx++] = std::expm1(dp[0]); + // CHECK: __spirv_ocl_expm1d + rd[idx++] = std::expm1(ip[0]); + + // CHECK: __spirv_ocl_logf + rf[idx++] = std::log(fp[0]); + // CHECK: __spirv_ocl_logf + rf[idx++] = std::logf(fp[1]); + // CHECK: __spirv_ocl_logd + rd[idx++] = std::log(dp[0]); + // CHECK: __spirv_ocl_logd + rd[idx++] = std::log(ip[0]); + + // CHECK: __spirv_ocl_log10f + rf[idx++] = std::log10(fp[0]); + // CHECK: __spirv_ocl_log10f + rf[idx++] = std::log10f(fp[1]); + // CHECK: __spirv_ocl_log10d + rd[idx++] = std::log10(dp[0]); + // CHECK: __spirv_ocl_log10d + rd[idx++] = std::log10(ip[0]); + + // CHECK: __spirv_ocl_log2f + rf[idx++] = std::log2(fp[0]); + // CHECK: __spirv_ocl_log2f + rf[idx++] = std::log2f(fp[1]); + // CHECK: __spirv_ocl_log2d + rd[idx++] = std::log2(dp[0]); + // CHECK: __spirv_ocl_log2d + rd[idx++] = std::log2(ip[0]); + + // CHECK: __spirv_ocl_log1pf + rf[idx++] = std::log1p(fp[0]); + // CHECK: __spirv_ocl_log1pf + rf[idx++] = std::log1pf(fp[1]); + // CHECK: __spirv_ocl_log1pd + rd[idx++] = std::log1p(dp[0]); + // CHECK: __spirv_ocl_log1pd + rd[idx++] = std::log1p(ip[0]); + + // CHECK: __spirv_ocl_powff + rf[idx++] = std::pow(fp[0], fp[1]); + // CHECK: __spirv_ocl_powff + rf[idx++] = std::powf(fp[2], fp[1]); + // CHECK: __spirv_ocl_powdd + rd[idx++] = std::pow(dp[0], dp[1]); + // CHECK: __spirv_ocl_powdd + rd[idx++] = std::pow(ip[0], fp[1]); + + // CHECK: __spirv_ocl_sqrtf + rf[idx++] = std::sqrt(fp[0]); + // CHECK: __spirv_ocl_sqrtf + rf[idx++] = std::sqrtf(fp[1]); + // CHECK: __spirv_ocl_sqrtd + rd[idx++] = std::sqrt(dp[0]); + // CHECK: __spirv_ocl_sqrtd + rd[idx++] = std::sqrt(ip[0]); + + // CHECK: __spirv_ocl_cbrtf + rf[idx++] = std::cbrt(fp[0]); + // CHECK: __spirv_ocl_cbrtf + rf[idx++] = std::cbrtf(fp[1]); + // CHECK: __spirv_ocl_cbrtd + rd[idx++] = std::cbrt(dp[0]); + // CHECK: __spirv_ocl_cbrtd + rd[idx++] = std::cbrt(ip[0]); + + // CHECK: __spirv_ocl_hypotff + rf[idx++] = std::hypot(fp[0], fp[1]); + // CHECK: __spirv_ocl_hypotff + rf[idx++] = std::hypotf(fp[2], fp[1]); + // CHECK: __spirv_ocl_hypotdd + rd[idx++] = std::hypot(dp[0], dp[1]); + // CHECK: __spirv_ocl_hypotdd + rd[idx++] = std::hypot(ip[0], fp[1]); + + // CHECK: __spirv_ocl_sinf + rf[idx++] = std::sin(fp[0]); + // CHECK: __spirv_ocl_sinf + rf[idx++] = std::sinf(fp[1]); + // CHECK: __spirv_ocl_sind + rd[idx++] = std::sin(dp[0]); + // CHECK: __spirv_ocl_sind + rd[idx++] = std::sin(ip[0]); + + // CHECK: __spirv_ocl_cosf + rf[idx++] = std::cos(fp[0]); + // CHECK: __spirv_ocl_cosf + rf[idx++] = std::cosf(fp[1]); + // CHECK: __spirv_ocl_cosd + rd[idx++] = std::cos(dp[0]); + // CHECK: __spirv_ocl_cosd + rd[idx++] = std::cos(ip[0]); + + // CHECK: __spirv_ocl_tanf + rf[idx++] = std::tan(fp[0]); + // CHECK: __spirv_ocl_tanf + rf[idx++] = std::tanf(fp[1]); + // CHECK: __spirv_ocl_tand + rd[idx++] = std::tan(dp[0]); + // CHECK: __spirv_ocl_tand + rd[idx++] = std::tan(ip[0]); + + // CHECK: __spirv_ocl_asinf + rf[idx++] = std::asin(fp[0]); + // CHECK: __spirv_ocl_asinf + rf[idx++] = std::asinf(fp[1]); + // CHECK: __spirv_ocl_asind + rd[idx++] = std::asin(dp[0]); + // CHECK: __spirv_ocl_asind + rd[idx++] = std::asin(ip[0]); + + // CHECK: __spirv_ocl_acosf + rf[idx++] = std::acos(fp[0]); + // CHECK: __spirv_ocl_acosf + rf[idx++] = std::acosf(fp[1]); + // CHECK: __spirv_ocl_acosd + rd[idx++] = std::acos(dp[0]); + // CHECK: __spirv_ocl_acosd + rd[idx++] = std::acos(ip[0]); + + // CHECK: __spirv_ocl_atanf + rf[idx++] = std::atan(fp[0]); + // CHECK: __spirv_ocl_atanf + rf[idx++] = std::atanf(fp[1]); + // CHECK: __spirv_ocl_atand + rd[idx++] = std::atan(dp[0]); + // CHECK: __spirv_ocl_atand + rd[idx++] = std::atan(ip[0]); + + // CHECK: __spirv_ocl_atan2ff + rf[idx++] = std::atan2(fp[0], fp[1]); + // CHECK: __spirv_ocl_atan2ff + rf[idx++] = std::atan2f(fp[2], fp[1]); + // CHECK: __spirv_ocl_atan2dd + rd[idx++] = std::atan2(dp[0], dp[1]); + // CHECK: __spirv_ocl_atan2dd + rd[idx++] = std::atan2(ip[0], fp[1]); + + // CHECK: __spirv_ocl_sinhf + rf[idx++] = std::sinh(fp[0]); + // CHECK: __spirv_ocl_sinhf + rf[idx++] = std::sinhf(fp[1]); + // CHECK: __spirv_ocl_sinhd + rd[idx++] = std::sinh(dp[0]); + // CHECK: __spirv_ocl_sinhd + rd[idx++] = std::sinh(ip[0]); + + // CHECK: __spirv_ocl_coshf + rf[idx++] = std::cosh(fp[0]); + // CHECK: __spirv_ocl_coshf + rf[idx++] = std::coshf(fp[1]); + // CHECK: __spirv_ocl_coshd + rd[idx++] = std::cosh(dp[0]); + // CHECK: __spirv_ocl_coshd + rd[idx++] = std::cosh(ip[0]); + + // CHECK: __spirv_ocl_tanhf + rf[idx++] = std::tanh(fp[0]); + // CHECK: __spirv_ocl_tanhf + rf[idx++] = std::tanhf(fp[1]); + // CHECK: __spirv_ocl_tanhd + rd[idx++] = std::tanh(dp[0]); + // CHECK: __spirv_ocl_tanhd + rd[idx++] = std::tanh(ip[0]); + + // CHECK: __spirv_ocl_asinhf + rf[idx++] = std::asinh(fp[0]); + // CHECK: __spirv_ocl_asinhf + rf[idx++] = std::asinhf(fp[1]); + // CHECK: __spirv_ocl_asinhd + rd[idx++] = std::asinh(dp[0]); + // CHECK: __spirv_ocl_asinhd + rd[idx++] = std::asinh(ip[0]); + + // CHECK: __spirv_ocl_acoshf + rf[idx++] = std::acosh(fp[0]); + // CHECK: __spirv_ocl_acoshf + rf[idx++] = std::acoshf(fp[1]); + // CHECK: __spirv_ocl_acoshd + rd[idx++] = std::acosh(dp[0]); + // CHECK: __spirv_ocl_acoshd + rd[idx++] = std::acosh(ip[0]); + + // CHECK: __spirv_ocl_atanhf + rf[idx++] = std::atanh(fp[0]); + // CHECK: __spirv_ocl_atanhf + rf[idx++] = std::atanhf(fp[1]); + // CHECK: __spirv_ocl_atanhd + rd[idx++] = std::atanh(dp[0]); + // CHECK: __spirv_ocl_atanhd + rd[idx++] = std::atanh(ip[0]); + + // CHECK: __spirv_ocl_erff + rf[idx++] = std::erf(fp[0]); + // CHECK: __spirv_ocl_erff + rf[idx++] = std::erff(fp[1]); + // CHECK: __spirv_ocl_erfd + rd[idx++] = std::erf(dp[0]); + // CHECK: __spirv_ocl_erfd + rd[idx++] = std::erf(ip[0]); + + // CHECK: __spirv_ocl_erfcf + rf[idx++] = std::erfc(fp[0]); + // CHECK: __spirv_ocl_erfcf + rf[idx++] = std::erfcf(fp[1]); + // CHECK: __spirv_ocl_erfcd + rd[idx++] = std::erfc(dp[0]); + // CHECK: __spirv_ocl_erfcd + rd[idx++] = std::erfc(ip[0]); + + // CHECK: __spirv_ocl_tgammaf + rf[idx++] = std::tgamma(fp[0]); + // CHECK: __spirv_ocl_tgammaf + rf[idx++] = std::tgammaf(fp[1]); + // CHECK: __spirv_ocl_tgammad + rd[idx++] = std::tgamma(dp[0]); + // CHECK: __spirv_ocl_tgammad + rd[idx++] = std::tgamma(ip[0]); + + // CHECK: __spirv_ocl_lgammaf + rf[idx++] = std::lgamma(fp[0]); + // CHECK: __spirv_ocl_lgammaf + rf[idx++] = std::lgammaf(fp[1]); + // CHECK: __spirv_ocl_lgammad + rd[idx++] = std::lgamma(dp[0]); + // CHECK: __spirv_ocl_lgammad + rd[idx++] = std::lgamma(ip[0]); + + // CHECK: __spirv_ocl_ceilf + rf[idx++] = std::ceil(fp[0]); + // CHECK: __spirv_ocl_ceilf + rf[idx++] = std::ceilf(fp[1]); + // CHECK: __spirv_ocl_ceild + rd[idx++] = std::ceil(dp[0]); + // CHECK: __spirv_ocl_ceild + rd[idx++] = std::ceil(ip[0]); + + // CHECK: __spirv_ocl_floorf + rf[idx++] = std::floor(fp[0]); + // CHECK: __spirv_ocl_floorf + rf[idx++] = std::floorf(fp[1]); + // CHECK: __spirv_ocl_floord + rd[idx++] = std::floor(dp[0]); + // CHECK: __spirv_ocl_floord + rd[idx++] = std::floor(ip[0]); + + // CHECK: __spirv_ocl_truncf + rf[idx++] = std::trunc(fp[0]); + // CHECK: __spirv_ocl_truncf + rf[idx++] = std::truncf(fp[1]); + // CHECK: __spirv_ocl_truncd + rd[idx++] = std::trunc(dp[0]); + // CHECK: __spirv_ocl_truncd + rd[idx++] = std::trunc(ip[0]); + + // CHECK: __spirv_ocl_roundf + rf[idx++] = std::round(fp[0]); + // CHECK: __spirv_ocl_roundf + rf[idx++] = std::roundf(fp[1]); + // CHECK: __spirv_ocl_roundd + rd[idx++] = std::round(dp[0]); + // CHECK: __spirv_ocl_roundd + rd[idx++] = std::round(ip[0]); + + // CHECK: __spirv_ocl_rintf + rf[idx++] = std::rint(fp[0]); + // CHECK: __spirv_ocl_rintf + rf[idx++] = std::rintf(fp[1]); + // CHECK: __spirv_ocl_rintd + rd[idx++] = std::rint(dp[0]); + // CHECK: __spirv_ocl_rintd + rd[idx++] = std::rint(ip[0]); + + // CHECK: __spirv_ocl_frexpf + rf[idx++] = std::frexp(fp[0], ip); + // CHECK: __spirv_ocl_frexpf + rf[idx++] = std::frexpf(fp[1], ip); + // CHECK: __spirv_ocl_frexpd + rd[idx++] = std::frexp(dp[0], ip); + // CHECK: __spirv_ocl_frexpd + rd[idx++] = std::frexp(ip[0], ip); + + // CHECK: __spirv_ocl_ldexpf + rf[idx++] = std::ldexp(fp[0], ip[0]); + // CHECK: __spirv_ocl_ldexpf + rf[idx++] = std::ldexpf(fp[1], ip[0]); + // CHECK: __spirv_ocl_ldexpd + rd[idx++] = std::ldexp(dp[0], ip[0]); + // CHECK: __spirv_ocl_ldexpd + rd[idx++] = std::ldexp(ip[0], ip[0]); + + // CHECK: __spirv_ocl_modff + rf[idx++] = std::modf(fp[0], fp); + // CHECK: __spirv_ocl_modff + rf[idx++] = std::modff(fp[1], fp); + // CHECK: __spirv_ocl_modfd + rd[idx++] = std::modf(dp[0], dp); + // CHECK: __spirv_ocl_modfd + rd[idx++] = std::modf(ip[0], dp); + + // CHECK: __spirv_ocl_ldexpf + rf[idx++] = std::scalbn(fp[0], ip[0]); + // CHECK: __spirv_ocl_ldexpf + rf[idx++] = std::scalbnf(fp[1], ip[0]); + // CHECK: __spirv_ocl_ldexpd + rd[idx++] = std::scalbn(dp[0], ip[0]); + // CHECK: __spirv_ocl_ldexpd + rd[idx++] = std::scalbn(ip[0], ip[0]); + + // CHECK: __spirv_ocl_ilogbf + ri[idx++] = std::ilogb(fp[0]); + // CHECK: __spirv_ocl_ilogbf + ri[idx++] = std::ilogbf(fp[1]); + // CHECK: __spirv_ocl_ilogbd + ri[idx++] = std::ilogb(dp[0]); + // CHECK: __spirv_ocl_ilogbd + ri[idx++] = std::ilogb(ip[0]); + + // CHECK: __spirv_ocl_logbf + rf[idx++] = std::logb(fp[0]); + // CHECK: __spirv_ocl_logbf + rf[idx++] = std::logbf(fp[1]); + // CHECK: __spirv_ocl_logbd + rd[idx++] = std::logb(dp[0]); + // CHECK: __spirv_ocl_logbd + rd[idx++] = std::logb(ip[0]); + + // CHECK: __spirv_ocl_nextafterf + rf[idx++] = std::nextafter(fp[0], fp[1]); + // CHECK: __spirv_ocl_nextafterf + rf[idx++] = std::nextafterf(fp[2], fp[1]); + // CHECK: __spirv_ocl_nextafterd + rd[idx++] = std::nextafter(dp[0], dp[1]); + // CHECK: __spirv_ocl_nextafterd + rd[idx++] = std::nextafter(ip[0], fp[1]); + + // CHECK: __spirv_ocl_copysignf + rf[idx++] = std::copysign(fp[0], fp[1]); + // CHECK: __spirv_ocl_copysignf + rf[idx++] = std::copysignf(fp[2], fp[1]); + // CHECK: __spirv_ocl_copysignd + rd[idx++] = std::copysign(dp[0], dp[1]); + // CHECK: __spirv_ocl_copysignd + rd[idx++] = std::copysign(ip[0], fp[1]); +} From 6ac4ba1eafcb16dca635869748a2a4c62cb35f2a Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Tue, 1 Jul 2025 17:28:57 +0100 Subject: [PATCH 29/40] [SYCL] Fix formatting --- .../sycl/stl_wrappers/cmath-fallback.h | 20 +++++++++---------- .../math-builtins/cmath-fallback.cpp | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/sycl/include/sycl/stl_wrappers/cmath-fallback.h b/sycl/include/sycl/stl_wrappers/cmath-fallback.h index c4940d23acd80..e7f4719ea2c04 100644 --- a/sycl/include/sycl/stl_wrappers/cmath-fallback.h +++ b/sycl/include/sycl/stl_wrappers/cmath-fallback.h @@ -423,20 +423,20 @@ using ::erf; using ::erfc; using ::erfcf; using ::erff; -using ::tgamma; -using ::tgammaf; using ::lgamma; using ::lgammaf; +using ::tgamma; +using ::tgammaf; // Nearest integer floating-point operations using ::ceil; using ::ceilf; using ::floor; using ::floorf; -using ::trunc; -using ::truncf; using ::round; using ::roundf; +using ::trunc; +using ::truncf; // using ::lround; // using ::llround; using ::rint; @@ -452,20 +452,20 @@ using ::nearbyintf; // Floating-point manipulation functions using ::frexp; using ::frexpf; +using ::ilogb; +using ::ilogbf; using ::ldexp; using ::ldexpf; +using ::logb; +using ::logbf; using ::modf; using ::modff; +using ::nextafter; +using ::nextafterf; using ::scalbln; using ::scalblnf; using ::scalbn; using ::scalbnf; -using ::ilogb; -using ::ilogbf; -using ::logb; -using ::logbf; -using ::nextafter; -using ::nextafterf; // using ::nextforward // using ::nextforwardf using ::copysign; diff --git a/sycl/test/check_device_code/math-builtins/cmath-fallback.cpp b/sycl/test/check_device_code/math-builtins/cmath-fallback.cpp index 1dfd97302cd1d..ae3c77e852c18 100644 --- a/sycl/test/check_device_code/math-builtins/cmath-fallback.cpp +++ b/sycl/test/check_device_code/math-builtins/cmath-fallback.cpp @@ -4,8 +4,8 @@ // RUN: %clangxx -fsycl -fsycl-targets=nvptx64-nvidia-cuda -S -Xclang -emit-llvm -fsycl-device-only %s -o - | FileCheck %s -#include #include +#include // CHECK-LABEL: entry SYCL_EXTERNAL void entry(float *fp, double *dp, int *ip, long *lp, From d9713d53e253729f575e0fe14f9d9c9e65c4beee Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Wed, 2 Jul 2025 15:51:08 +0100 Subject: [PATCH 30/40] [SYCL] Fixup promotion overloads --- .../sycl/stl_wrappers/cmath-fallback.h | 102 ++++++++++++------ .../math-builtins/cmath-fallback.cpp | 24 +++++ 2 files changed, 91 insertions(+), 35 deletions(-) diff --git a/sycl/include/sycl/stl_wrappers/cmath-fallback.h b/sycl/include/sycl/stl_wrappers/cmath-fallback.h index e7f4719ea2c04..6527f453d2fda 100644 --- a/sycl/include/sycl/stl_wrappers/cmath-fallback.h +++ b/sycl/include/sycl/stl_wrappers/cmath-fallback.h @@ -19,6 +19,54 @@ #define __DPCPP_SYCL_DEVICE_C \ extern "C" __attribute__((sycl_device_only, always_inline)) +// Promotion templates: the C++ standard library provides overloads that allow +// arguments of math functions to be promoted. Any floating-point argument is +// allowed to accept any integer type, which should then be promoted to double. +// When multiple floating point arguments are available passing arguments with +// different precision should promote to the larger type. The template helpers +// below provide the machinery to define these promoting overloads. +template struct __dpcpp_promote { +private: + // Integer types are promoted to double. + template + static typename std::enable_if::value, double>::type + test(); + + // Floating point types are used as-is. + template + static typename std::enable_if::value, U>::type + test(); + +public: + // We rely on dummy templated methods and decltype to select the right type + // based on the input T. + typedef decltype(test()) type; +}; + +// With a single paramter we only need to promote integers. +template +using __dpcpp_promote_1 = std::enable_if::value, double>; + +// With two or three parameters we need to promote integers and possibly +// floating point types. We rely on operator+ with decltype to deduce the +// overall promotion type. This is only needed if at least one of the parameter +// is an integer, or if there's multiple different floating point types. +template +using __dpcpp_promote_2 = + std::enable_if::value || std::is_integral::value || + std::is_integral::value, + decltype(typename __dpcpp_promote::type(0) + + typename __dpcpp_promote::type(0))>; + +template +using __dpcpp_promote_3 = + std::enable_if::value && std::is_same::value) || + std::is_integral::value || + std::is_integral::value || std::is_integral::value, + decltype(typename __dpcpp_promote::type(0) + + typename __dpcpp_promote::type(0) + + typename __dpcpp_promote::type(0))>; + // For each math built-in we need to define float and double overloads, an // extern "C" float variant with the 'f' suffix, and a version that promotes to // double if any floating-point parameter passed is an integer. @@ -35,9 +83,7 @@ __DPCPP_SYCL_DEVICE float NAME(float x) { return __spirv_ocl_##NAME(x); } \ __DPCPP_SYCL_DEVICE double NAME(double x) { return __spirv_ocl_##NAME(x); } \ template \ - __DPCPP_SYCL_DEVICE \ - typename std::enable_if::value, double>::type \ - NAME(T x) { \ + __DPCPP_SYCL_DEVICE typename __dpcpp_promote_1::type NAME(T x) { \ return __spirv_ocl_##NAME((double)x); \ } @@ -52,10 +98,9 @@ return __spirv_ocl_##NAME(x, y); \ } \ template \ - __DPCPP_SYCL_DEVICE typename std::enable_if< \ - std::is_integral::value || std::is_integral::value, double>::type \ - NAME(T x, U y) { \ - return __spirv_ocl_##NAME((double)x, (double)y); \ + __DPCPP_SYCL_DEVICE __dpcpp_promote_2::type NAME(T x, U y) { \ + typedef typename __dpcpp_promote_2::type type; \ + return __spirv_ocl_##NAME((type)x, (type)y); \ } /// @@ -82,9 +127,7 @@ __DPCPP_SYCL_DEVICE float fabs(float x) { return x < 0 ? -x : x; } __DPCPP_SYCL_DEVICE_C float fabsf(float x) { return x < 0 ? -x : x; } __DPCPP_SYCL_DEVICE double fabs(double x) { return x < 0 ? -x : x; } template -__DPCPP_SYCL_DEVICE - typename std::enable_if::value, double>::type - fabs(T x) { +__DPCPP_SYCL_DEVICE typename __dpcpp_promote_1::type fabs(T x) { return x < 0 ? -x : x; } @@ -101,10 +144,10 @@ __DPCPP_SYCL_DEVICE double remquo(double x, double y, int *q) { return __spirv_ocl_remquo(x, y, q); } template -__DPCPP_SYCL_DEVICE typename std::enable_if< - std::is_integral::value || std::is_integral::value, double>::type -remquo(T x, U y, int *q) { - return __spirv_ocl_remquo((double)x, (double)y, q); +__DPCPP_SYCL_DEVICE typename __dpcpp_promote_2::type remquo(T x, U y, + int *q) { + typedef typename __dpcpp_promote_2::type type; + return __spirv_ocl_remquo((type)x, (type)y, q); } __DPCPP_SYCL_DEVICE_C float fmaf(float x, float y, float z) { @@ -117,12 +160,10 @@ __DPCPP_SYCL_DEVICE double fma(double x, double y, double z) { return __spirv_ocl_fma(x, y, z); } template -__DPCPP_SYCL_DEVICE typename std::enable_if::value || - std::is_integral::value || - std::is_integral::value, - double>::type -fma(T x, U y, V z) { - return __spirv_ocl_fma((double)x, (double)y, (double)z); +__DPCPP_SYCL_DEVICE typename __dpcpp_promote_3::type fma(T x, U y, + V z) { + typedef typename __dpcpp_promote_3::type type; + return __spirv_ocl_fma((type)x, (type)y, (type)z); } __DPCPP_SPIRV_MAP_BINARY(fmax); @@ -221,9 +262,7 @@ __DPCPP_SYCL_DEVICE double frexp(double x, int *exp) { return __spirv_ocl_frexp(x, exp); } template -__DPCPP_SYCL_DEVICE - typename std::enable_if::value, double>::type - frexp(T x, int *exp) { +__DPCPP_SYCL_DEVICE typename __dpcpp_promote_1::type frexp(T x, int *exp) { return __spirv_ocl_frexp((double)x, exp); } @@ -237,9 +276,7 @@ __DPCPP_SYCL_DEVICE double ldexp(double x, int exp) { return __spirv_ocl_ldexp(x, exp); } template -__DPCPP_SYCL_DEVICE - typename std::enable_if::value, double>::type - ldexp(T x, int exp) { +__DPCPP_SYCL_DEVICE typename __dpcpp_promote_1::type ldexp(T x, int exp) { return __spirv_ocl_ldexp((double)x, exp); } @@ -254,9 +291,8 @@ __DPCPP_SYCL_DEVICE double modf(double x, double *intpart) { } // modf only supports integer x when the intpart is double template -__DPCPP_SYCL_DEVICE - typename std::enable_if::value, double>::type - modf(T x, double *intpart) { +__DPCPP_SYCL_DEVICE typename __dpcpp_promote_1::type modf(T x, + double *intpart) { return __spirv_ocl_modf((double)x, intpart); } @@ -270,9 +306,7 @@ __DPCPP_SYCL_DEVICE double scalbn(double x, int exp) { return __spirv_ocl_ldexp(x, exp); } template -__DPCPP_SYCL_DEVICE - typename std::enable_if::value, double>::type - scalbn(T x, int exp) { +__DPCPP_SYCL_DEVICE typename __dpcpp_promote_1::type scalbn(T x, int exp) { return __spirv_ocl_ldexp((double)x, exp); } @@ -286,9 +320,7 @@ __DPCPP_SYCL_DEVICE double scalbln(double x, long exp) { return __spirv_ocl_ldexp(x, (int)exp); } template -__DPCPP_SYCL_DEVICE - typename std::enable_if::value, double>::type - scalbln(T x, long exp) { +__DPCPP_SYCL_DEVICE typename __dpcpp_promote_1::type scalbln(T x, long exp) { return __spirv_ocl_ldexp((double)x, (int)exp); } diff --git a/sycl/test/check_device_code/math-builtins/cmath-fallback.cpp b/sycl/test/check_device_code/math-builtins/cmath-fallback.cpp index ae3c77e852c18..e6b691ffccc12 100644 --- a/sycl/test/check_device_code/math-builtins/cmath-fallback.cpp +++ b/sycl/test/check_device_code/math-builtins/cmath-fallback.cpp @@ -20,6 +20,8 @@ SYCL_EXTERNAL void entry(float *fp, double *dp, int *ip, long *lp, rd[idx++] = std::fmod(dp[0], dp[1]); // CHECK: __spirv_ocl_fmoddd rd[idx++] = std::fmod(fp[0], ip[1]); + // CHECK: __spirv_ocl_fmoddd + rd[idx++] = std::fmod(fp[0], dp[1]); // CHECK: __spirv_ocl_remainderff rf[idx++] = std::remainder(fp[0], fp[1]); @@ -29,6 +31,8 @@ SYCL_EXTERNAL void entry(float *fp, double *dp, int *ip, long *lp, rd[idx++] = std::remainder(dp[0], dp[1]); // CHECK: __spirv_ocl_remainderdd rd[idx++] = std::remainder(fp[0], ip[1]); + // CHECK: __spirv_ocl_remainderdd + rd[idx++] = std::remainder(fp[0], dp[1]); // CHECK: __spirv_ocl_remquoff rf[idx++] = std::remquo(fp[0], fp[1], ip); @@ -38,6 +42,8 @@ SYCL_EXTERNAL void entry(float *fp, double *dp, int *ip, long *lp, rd[idx++] = std::remquo(dp[0], dp[1], ip); // CHECK: __spirv_ocl_remquodd rd[idx++] = std::remquo(fp[0], ip[1], ip); + // CHECK: __spirv_ocl_remquodd + rd[idx++] = std::remquo(fp[0], dp[1], ip); // CHECK: __spirv_ocl_fmaff rf[idx++] = std::fma(fp[0], fp[1], fp[2]); @@ -47,6 +53,8 @@ SYCL_EXTERNAL void entry(float *fp, double *dp, int *ip, long *lp, rd[idx++] = std::fma(dp[0], dp[1], dp[2]); // CHECK: __spirv_ocl_fmadd rd[idx++] = std::fma(fp[0], ip[1], fp[2]); + // CHECK: __spirv_ocl_fmadd + rd[idx++] = std::fma(fp[0], dp[1], fp[2]); // CHECK: __spirv_ocl_fmaxff rf[idx++] = std::fmax(fp[0], fp[1]); @@ -56,6 +64,8 @@ SYCL_EXTERNAL void entry(float *fp, double *dp, int *ip, long *lp, rd[idx++] = std::fmax(dp[0], dp[1]); // CHECK: __spirv_ocl_fmaxdd rd[idx++] = std::fmax(fp[0], ip[1]); + // CHECK: __spirv_ocl_fmaxdd + rd[idx++] = std::fmax(fp[0], dp[1]); // CHECK: __spirv_ocl_fminff rf[idx++] = std::fmin(fp[0], fp[1]); @@ -65,6 +75,8 @@ SYCL_EXTERNAL void entry(float *fp, double *dp, int *ip, long *lp, rd[idx++] = std::fmin(dp[0], dp[1]); // CHECK: __spirv_ocl_fmindd rd[idx++] = std::fmin(fp[0], ip[1]); + // CHECK: __spirv_ocl_fmindd + rd[idx++] = std::fmin(fp[0], dp[1]); // CHECK: __spirv_ocl_fdimff rf[idx++] = std::fdim(fp[0], fp[1]); @@ -74,6 +86,8 @@ SYCL_EXTERNAL void entry(float *fp, double *dp, int *ip, long *lp, rd[idx++] = std::fdim(dp[0], dp[1]); // CHECK: __spirv_ocl_fdimdd rd[idx++] = std::fdim(fp[0], ip[1]); + // CHECK: __spirv_ocl_fdimdd + rd[idx++] = std::fdim(fp[0], dp[1]); // CHECK: __spirv_ocl_expf rf[idx++] = std::exp(fp[0]); @@ -146,6 +160,8 @@ SYCL_EXTERNAL void entry(float *fp, double *dp, int *ip, long *lp, rd[idx++] = std::pow(dp[0], dp[1]); // CHECK: __spirv_ocl_powdd rd[idx++] = std::pow(ip[0], fp[1]); + // CHECK: __spirv_ocl_powdd + rd[idx++] = std::pow(dp[0], fp[1]); // CHECK: __spirv_ocl_sqrtf rf[idx++] = std::sqrt(fp[0]); @@ -173,6 +189,8 @@ SYCL_EXTERNAL void entry(float *fp, double *dp, int *ip, long *lp, rd[idx++] = std::hypot(dp[0], dp[1]); // CHECK: __spirv_ocl_hypotdd rd[idx++] = std::hypot(ip[0], fp[1]); + // CHECK: __spirv_ocl_hypotdd + rd[idx++] = std::hypot(dp[0], fp[1]); // CHECK: __spirv_ocl_sinf rf[idx++] = std::sin(fp[0]); @@ -236,6 +254,8 @@ SYCL_EXTERNAL void entry(float *fp, double *dp, int *ip, long *lp, rd[idx++] = std::atan2(dp[0], dp[1]); // CHECK: __spirv_ocl_atan2dd rd[idx++] = std::atan2(ip[0], fp[1]); + // CHECK: __spirv_ocl_atan2dd + rd[idx++] = std::atan2(dp[0], fp[1]); // CHECK: __spirv_ocl_sinhf rf[idx++] = std::sinh(fp[0]); @@ -434,6 +454,8 @@ SYCL_EXTERNAL void entry(float *fp, double *dp, int *ip, long *lp, rd[idx++] = std::nextafter(dp[0], dp[1]); // CHECK: __spirv_ocl_nextafterd rd[idx++] = std::nextafter(ip[0], fp[1]); + // CHECK: __spirv_ocl_nextafterd + rd[idx++] = std::nextafter(dp[0], fp[1]); // CHECK: __spirv_ocl_copysignf rf[idx++] = std::copysign(fp[0], fp[1]); @@ -443,4 +465,6 @@ SYCL_EXTERNAL void entry(float *fp, double *dp, int *ip, long *lp, rd[idx++] = std::copysign(dp[0], dp[1]); // CHECK: __spirv_ocl_copysignd rd[idx++] = std::copysign(ip[0], fp[1]); + // CHECK: __spirv_ocl_copysignd + rd[idx++] = std::copysign(dp[0], fp[1]); } From 56d0e33517f3e31146e9f2b69d250f52fad09671 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Wed, 2 Jul 2025 15:55:20 +0100 Subject: [PATCH 31/40] [SYCL] Skip sycl.hpp in lit test --- .../test/check_device_code/math-builtins/cmath-fallback.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/test/check_device_code/math-builtins/cmath-fallback.cpp b/sycl/test/check_device_code/math-builtins/cmath-fallback.cpp index e6b691ffccc12..713569258b387 100644 --- a/sycl/test/check_device_code/math-builtins/cmath-fallback.cpp +++ b/sycl/test/check_device_code/math-builtins/cmath-fallback.cpp @@ -5,11 +5,11 @@ // RUN: %clangxx -fsycl -fsycl-targets=nvptx64-nvidia-cuda -S -Xclang -emit-llvm -fsycl-device-only %s -o - | FileCheck %s #include -#include // CHECK-LABEL: entry -SYCL_EXTERNAL void entry(float *fp, double *dp, int *ip, long *lp, - long long *llp, float *rf, double *rd, int *ri) { +__attribute__((sycl_device)) void entry(float *fp, double *dp, int *ip, + long *lp, long long *llp, float *rf, + double *rd, int *ri) { int idx = 0; // CHECK: __spirv_ocl_fmodff From 459ed03f9494438f89163c98369a34288e9c871b Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Wed, 2 Jul 2025 16:56:51 +0100 Subject: [PATCH 32/40] [SYCL] Fixup SFINAE --- sycl/include/sycl/stl_wrappers/cmath-fallback.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sycl/include/sycl/stl_wrappers/cmath-fallback.h b/sycl/include/sycl/stl_wrappers/cmath-fallback.h index 6527f453d2fda..54633f91ea21d 100644 --- a/sycl/include/sycl/stl_wrappers/cmath-fallback.h +++ b/sycl/include/sycl/stl_wrappers/cmath-fallback.h @@ -25,7 +25,9 @@ // When multiple floating point arguments are available passing arguments with // different precision should promote to the larger type. The template helpers // below provide the machinery to define these promoting overloads. -template struct __dpcpp_promote { +template ::value || + std::is_floating_point::value)> +struct __dpcpp_promote { private: // Integer types are promoted to double. template @@ -43,6 +45,9 @@ template struct __dpcpp_promote { typedef decltype(test()) type; }; +// Variant without ::type to allow SFINAE for non promotable types. +template struct __dpcpp_promote {}; + // With a single paramter we only need to promote integers. template using __dpcpp_promote_1 = std::enable_if::value, double>; From a84f1e319054c41822ff2feaa75e780613e6863e Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Thu, 3 Jul 2025 11:52:40 +0100 Subject: [PATCH 33/40] [SYCL] Minor cleanups --- clang/lib/AST/Decl.cpp | 6 ++++-- clang/test/CodeGenSYCL/sycl-device-only.cpp | 14 +++++++++----- .../include/sycl/stl_wrappers/cmath-fallback.h | 18 ++++++++++-------- .../math-builtins/cmath-fallback.cpp | 11 +++++++++++ 4 files changed, 34 insertions(+), 15 deletions(-) diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 5182a0f342015..5a5cf72091a88 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3729,8 +3729,10 @@ unsigned FunctionDecl::getBuiltinID(bool ConsiderWrapperFunctions) const { !(BuiltinID == Builtin::BIprintf || BuiltinID == Builtin::BImalloc)) return 0; - if (Context.getLangOpts().isSYCL() && hasAttr() && - BuiltinID != Builtin::BIprintf) { + // SYCL doesn't have a device-side standard library. SYCLDeviceOnlyAttr may + // be used to provide device-side definitions of standard functions, so + // anything with that attribute shouldn't be treated as a builtin. + if (Context.getLangOpts().isSYCL() && hasAttr()) { return 0; } diff --git a/clang/test/CodeGenSYCL/sycl-device-only.cpp b/clang/test/CodeGenSYCL/sycl-device-only.cpp index f9a97cb803fae..6196f5b6744bb 100644 --- a/clang/test/CodeGenSYCL/sycl-device-only.cpp +++ b/clang/test/CodeGenSYCL/sycl-device-only.cpp @@ -21,7 +21,7 @@ __attribute__((sycl_device)) int bar(int b) { return foo(b); } __attribute__((sycl_device_only)) int fooswap(int a) { return a + 20; } int fooswap(int a) { return a + 10; } -// Use a `sycl_device` function as entry point +// Use a `sycl_device` function as entry point. __attribute__((sycl_device)) int barswap(int b) { return fooswap(b); } // Verify that in extern C the attribute enables mangling. @@ -32,11 +32,12 @@ extern "C" { int fooc(int a) { return a + 10; } __attribute__((sycl_device_only)) int fooc(int a) { return a + 20; } -// Use a `sycl_device` function as entry point +// Use a `sycl_device` function as entry point. __attribute__((sycl_device)) int barc(int b) { return fooc(b); } } -// Check that both attributes can work together +// Verify that both attributes can work together. +// // CHECK-LABEL: _Z3fooai // CHECKH: %add = add nsw i32 %0, 10 // CHECKD: %add = add nsw i32 %0, 20 @@ -45,9 +46,12 @@ __attribute__((sycl_device_only, sycl_device)) int fooa(int a) { return a + 20; } -// Use a `sycl_device` function as entry point +// Use a `sycl_device` function as entry point. __attribute__((sycl_device)) int bara(int b) { return fooa(b); } +// Verify that the order of declaration doesn't change the behavior when using +// both attributes. +// // CHECK-LABEL: _Z3fooaswapi // CHECKH: %add = add nsw i32 %0, 10 // CHECKD: %add = add nsw i32 %0, 20 @@ -56,5 +60,5 @@ __attribute__((sycl_device_only, sycl_device)) int fooaswap(int a) { } int fooaswap(int a) { return a + 10; } -// Use a `sycl_device` function as entry point +// Use a `sycl_device` function as entry point. __attribute__((sycl_device)) int baraswap(int b) { return fooaswap(b); } diff --git a/sycl/include/sycl/stl_wrappers/cmath-fallback.h b/sycl/include/sycl/stl_wrappers/cmath-fallback.h index 54633f91ea21d..abf1a16aae8b8 100644 --- a/sycl/include/sycl/stl_wrappers/cmath-fallback.h +++ b/sycl/include/sycl/stl_wrappers/cmath-fallback.h @@ -45,7 +45,7 @@ struct __dpcpp_promote { typedef decltype(test()) type; }; -// Variant without ::type to allow SFINAE for non promotable types. +// Variant without ::type to allow SFINAE for non-promotable types. template struct __dpcpp_promote {}; // With a single paramter we only need to promote integers. @@ -73,11 +73,11 @@ using __dpcpp_promote_3 = typename __dpcpp_promote::type(0))>; // For each math built-in we need to define float and double overloads, an -// extern "C" float variant with the 'f' suffix, and a version that promotes to -// double if any floating-point parameter passed is an integer. +// extern "C" float variant with the 'f' suffix, and a version that promotes +// integers or mixed precision floating-point parameters. // -// TODO: Consider targets that don't have double support -// TODO: Enable long double support where possible +// TODO: Consider targets that don't have double support. +// TODO: Enable long double support where possible. // // The following two macros provide an easy way to define these overloads for // basic built-ins with one or two floating-point parameters. @@ -109,7 +109,7 @@ using __dpcpp_promote_3 = } /// -// FIXME: Move this to a cstdlib fallback header +// FIXME: Move this to a cstdlib fallback header. __DPCPP_SYCL_DEVICE div_t div(int x, int y) { return {x / y, x % y}; } __DPCPP_SYCL_DEVICE ldiv_t ldiv(long x, long y) { return {x / y, x % y}; } @@ -294,7 +294,7 @@ __DPCPP_SYCL_DEVICE float modf(float x, float *intpart) { __DPCPP_SYCL_DEVICE double modf(double x, double *intpart) { return __spirv_ocl_modf(x, intpart); } -// modf only supports integer x when the intpart is double +// modf only supports integer x when the intpart is double. template __DPCPP_SYCL_DEVICE typename __dpcpp_promote_1::type modf(T x, double *intpart) { @@ -332,9 +332,11 @@ __DPCPP_SYCL_DEVICE typename __dpcpp_promote_1::type scalbln(T x, long exp) { __DPCPP_SYCL_DEVICE_C int ilogbf(float x) { return __spirv_ocl_ilogb(x); } __DPCPP_SYCL_DEVICE int ilogb(float x) { return __spirv_ocl_ilogb(x); } __DPCPP_SYCL_DEVICE int ilogb(double x) { return __spirv_ocl_ilogb(x); } +// ilogb needs a special template since its signature doesn't include the +// promoted type anywhere, so it needs to be specialized differently. template ::value, bool>::type = true> -__DPCPP_SYCL_DEVICE double ilogb(T x) { +__DPCPP_SYCL_DEVICE int ilogb(T x) { return __spirv_ocl_ilogb((double)x); } diff --git a/sycl/test/check_device_code/math-builtins/cmath-fallback.cpp b/sycl/test/check_device_code/math-builtins/cmath-fallback.cpp index 713569258b387..1da771bfc1861 100644 --- a/sycl/test/check_device_code/math-builtins/cmath-fallback.cpp +++ b/sycl/test/check_device_code/math-builtins/cmath-fallback.cpp @@ -10,8 +10,19 @@ __attribute__((sycl_device)) void entry(float *fp, double *dp, int *ip, long *lp, long long *llp, float *rf, double *rd, int *ri) { + // Use an incrementing index to prevent the compiler from optimizing some + // calls that would store to the same address. int idx = 0; + // For each supported standard math built-in, we test that the following + // overloads are properly mapped to __spirv_ built-ins: + // + // * Float only. + // * Float only with 'f' suffix. + // * Double only. + // * Integer promotion. + // * Mixed floating point promotion (when applicable). + // // CHECK: __spirv_ocl_fmodff rf[idx++] = std::fmod(fp[0], fp[1]); // CHECK: __spirv_ocl_fmodff From 2bdd6a52549cba8a048eb7d64b093ec611c88a83 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Fri, 11 Jul 2025 13:54:05 +0100 Subject: [PATCH 34/40] [SYCL] Rename cmath header --- .../{cmath-fallback.h => __sycl_cmath_wrapper_impl.hpp} | 0 sycl/include/sycl/stl_wrappers/cmath | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename sycl/include/sycl/stl_wrappers/{cmath-fallback.h => __sycl_cmath_wrapper_impl.hpp} (100%) diff --git a/sycl/include/sycl/stl_wrappers/cmath-fallback.h b/sycl/include/sycl/stl_wrappers/__sycl_cmath_wrapper_impl.hpp similarity index 100% rename from sycl/include/sycl/stl_wrappers/cmath-fallback.h rename to sycl/include/sycl/stl_wrappers/__sycl_cmath_wrapper_impl.hpp diff --git a/sycl/include/sycl/stl_wrappers/cmath b/sycl/include/sycl/stl_wrappers/cmath index eeb8261523d1a..14e64c29c9c0a 100644 --- a/sycl/include/sycl/stl_wrappers/cmath +++ b/sycl/include/sycl/stl_wrappers/cmath @@ -25,7 +25,7 @@ // *** *** #if defined(__NVPTX__) || defined(__AMDGCN__) -#include "cmath-fallback.h" +#include "__sycl_cmath_wrapper_impl.hpp" #endif #include From ea6a3c5164b8e79fb53c4c7eefba74c423aee54f Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Fri, 11 Jul 2025 13:57:29 +0100 Subject: [PATCH 35/40] [SYCL] Switch attribute from GNU to Clang --- clang/include/clang/Basic/Attr.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 6a10a02d877a8..c44c9cf61e8b1 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1608,7 +1608,7 @@ def SYCLDevice : InheritableAttr { } def SYCLDeviceOnly : InheritableAttr { - let Spellings = [GNU<"sycl_device_only">]; + let Spellings = [Clang<"sycl_device_only">]; let Subjects = SubjectList<[Function]>; let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; let Documentation = [SYCLDeviceOnlyDocs]; From fba70423b14081d24df8fc134422e77037e52c08 Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Mon, 14 Jul 2025 07:29:34 +0100 Subject: [PATCH 36/40] [SYCL] Update copyright header --- sycl/include/sycl/stl_wrappers/__sycl_cmath_wrapper_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/sycl/stl_wrappers/__sycl_cmath_wrapper_impl.hpp b/sycl/include/sycl/stl_wrappers/__sycl_cmath_wrapper_impl.hpp index abf1a16aae8b8..06741b2a1ea55 100644 --- a/sycl/include/sycl/stl_wrappers/__sycl_cmath_wrapper_impl.hpp +++ b/sycl/include/sycl/stl_wrappers/__sycl_cmath_wrapper_impl.hpp @@ -1,4 +1,4 @@ -//==------------- cmath-fallback.h -----------------------------------------==// +//==------------- __sycl_cmath_wrapper_impl.hpp ----------------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. From 5d13b1dbdcf62dd414fce10264a74da558a58ef4 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Mon, 14 Jul 2025 10:54:48 -0700 Subject: [PATCH 37/40] Mark __sycl_cmath_wrapper_impl.hpp as expected to fail self-contained headers test. The header is supposed to be moved to the clang headers soon and it can be included only after cmath. @npmiller, there are still some issues we might need to address. For instance, std::is_floating_point, std::enable_if and other type traits are used w/o including corresponding header from the standard library. It seems that cmath indirectly includes type_traits, but we probably should include it explicitly. There are also use of undeclared identifiers from cmath, but we expect that this header is included only from cmath wrapper and only after this snippet: ```c++ // Include real STL header - the next one from the include search // directories. // GCC/clang support go through this path. // MSVC doesn't support "#include_next", so we have to be creative. // Our header is located in "stl_wrappers/complex" so it won't be picked by the // following include. MSVC's installation, on the other hand, has the layout // where the following would result in the we want. This is obviously // hacky, but the best we can do... ``` --- sycl/test/self-contained-headers/lit.local.cfg | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sycl/test/self-contained-headers/lit.local.cfg b/sycl/test/self-contained-headers/lit.local.cfg index 9641fdb488397..b15ee42737c2c 100644 --- a/sycl/test/self-contained-headers/lit.local.cfg +++ b/sycl/test/self-contained-headers/lit.local.cfg @@ -10,4 +10,8 @@ config.sycl_headers_xfail = [ os.path.join( "sycl", "ext", "intel", "esimd", "detail", "types_elementary.hpp" ), + # FIXME: remove this rule when the header is moved to the clang project + os.path.join( + "sycl", "stl_wrappers", "__sycl_cmath_wrapper_impl.hpp" + ), ] From ef1e40633a42c161e0dabf5bf92ef1b62e9687a1 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Mon, 14 Jul 2025 13:21:11 -0700 Subject: [PATCH 38/40] Make __sycl_cmath_wrapper_impl.hpp more upstream-able. --- .../__sycl_cmath_wrapper_impl.hpp | 255 +++++++++--------- 1 file changed, 122 insertions(+), 133 deletions(-) diff --git a/sycl/include/sycl/stl_wrappers/__sycl_cmath_wrapper_impl.hpp b/sycl/include/sycl/stl_wrappers/__sycl_cmath_wrapper_impl.hpp index 06741b2a1ea55..2e388d5259da3 100644 --- a/sycl/include/sycl/stl_wrappers/__sycl_cmath_wrapper_impl.hpp +++ b/sycl/include/sycl/stl_wrappers/__sycl_cmath_wrapper_impl.hpp @@ -15,8 +15,8 @@ #ifdef __SYCL_DEVICE_ONLY__ // The 'sycl_device_only' attribute enables device-side overloading. -#define __DPCPP_SYCL_DEVICE __attribute__((sycl_device_only, always_inline)) -#define __DPCPP_SYCL_DEVICE_C \ +#define __SYCL_DEVICE __attribute__((sycl_device_only, always_inline)) +#define __SYCL_DEVICE_C \ extern "C" __attribute__((sycl_device_only, always_inline)) // Promotion templates: the C++ standard library provides overloads that allow @@ -27,7 +27,7 @@ // below provide the machinery to define these promoting overloads. template ::value || std::is_floating_point::value)> -struct __dpcpp_promote { +struct __sycl_promote { private: // Integer types are promoted to double. template @@ -46,31 +46,31 @@ struct __dpcpp_promote { }; // Variant without ::type to allow SFINAE for non-promotable types. -template struct __dpcpp_promote {}; +template struct __sycl_promote {}; // With a single paramter we only need to promote integers. template -using __dpcpp_promote_1 = std::enable_if::value, double>; +using __sycl_promote_1 = std::enable_if::value, double>; // With two or three parameters we need to promote integers and possibly // floating point types. We rely on operator+ with decltype to deduce the // overall promotion type. This is only needed if at least one of the parameter // is an integer, or if there's multiple different floating point types. template -using __dpcpp_promote_2 = +using __sycl_promote_2 = std::enable_if::value || std::is_integral::value || std::is_integral::value, - decltype(typename __dpcpp_promote::type(0) + - typename __dpcpp_promote::type(0))>; + decltype(typename __sycl_promote::type(0) + + typename __sycl_promote::type(0))>; template -using __dpcpp_promote_3 = +using __sycl_promote_3 = std::enable_if::value && std::is_same::value) || std::is_integral::value || std::is_integral::value || std::is_integral::value, - decltype(typename __dpcpp_promote::type(0) + - typename __dpcpp_promote::type(0) + - typename __dpcpp_promote::type(0))>; + decltype(typename __sycl_promote::type(0) + + typename __sycl_promote::type(0) + + typename __sycl_promote::type(0))>; // For each math built-in we need to define float and double overloads, an // extern "C" float variant with the 'f' suffix, and a version that promotes @@ -81,269 +81,258 @@ using __dpcpp_promote_3 = // // The following two macros provide an easy way to define these overloads for // basic built-ins with one or two floating-point parameters. -#define __DPCPP_SPIRV_MAP_UNARY(NAME) \ - __DPCPP_SYCL_DEVICE_C float NAME##f(float x) { \ - return __spirv_ocl_##NAME(x); \ - } \ - __DPCPP_SYCL_DEVICE float NAME(float x) { return __spirv_ocl_##NAME(x); } \ - __DPCPP_SYCL_DEVICE double NAME(double x) { return __spirv_ocl_##NAME(x); } \ +#define __SYCL_SPIRV_MAP_UNARY(NAME) \ + __SYCL_DEVICE_C float NAME##f(float x) { return __spirv_ocl_##NAME(x); } \ + __SYCL_DEVICE float NAME(float x) { return __spirv_ocl_##NAME(x); } \ + __SYCL_DEVICE double NAME(double x) { return __spirv_ocl_##NAME(x); } \ template \ - __DPCPP_SYCL_DEVICE typename __dpcpp_promote_1::type NAME(T x) { \ + __SYCL_DEVICE typename __sycl_promote_1::type NAME(T x) { \ return __spirv_ocl_##NAME((double)x); \ } -#define __DPCPP_SPIRV_MAP_BINARY(NAME) \ - __DPCPP_SYCL_DEVICE_C float NAME##f(float x, float y) { \ +#define __SYCL_SPIRV_MAP_BINARY(NAME) \ + __SYCL_DEVICE_C float NAME##f(float x, float y) { \ return __spirv_ocl_##NAME(x, y); \ } \ - __DPCPP_SYCL_DEVICE float NAME(float x, float y) { \ + __SYCL_DEVICE float NAME(float x, float y) { \ return __spirv_ocl_##NAME(x, y); \ } \ - __DPCPP_SYCL_DEVICE double NAME(double x, double y) { \ + __SYCL_DEVICE double NAME(double x, double y) { \ return __spirv_ocl_##NAME(x, y); \ } \ template \ - __DPCPP_SYCL_DEVICE __dpcpp_promote_2::type NAME(T x, U y) { \ - typedef typename __dpcpp_promote_2::type type; \ + __SYCL_DEVICE __sycl_promote_2::type NAME(T x, U y) { \ + typedef typename __sycl_promote_2::type type; \ return __spirv_ocl_##NAME((type)x, (type)y); \ } /// // FIXME: Move this to a cstdlib fallback header. -__DPCPP_SYCL_DEVICE div_t div(int x, int y) { return {x / y, x % y}; } -__DPCPP_SYCL_DEVICE ldiv_t ldiv(long x, long y) { return {x / y, x % y}; } -__DPCPP_SYCL_DEVICE lldiv_t ldiv(long long x, long long y) { - return {x / y, x % y}; -} +__SYCL_DEVICE div_t div(int x, int y) { return {x / y, x % y}; } +__SYCL_DEVICE ldiv_t ldiv(long x, long y) { return {x / y, x % y}; } +__SYCL_DEVICE lldiv_t ldiv(long long x, long long y) { return {x / y, x % y}; } -__DPCPP_SYCL_DEVICE long long abs(long long n) { return n < 0 ? -n : n; } -__DPCPP_SYCL_DEVICE_C long long llabs(long long n) { return n < 0 ? -n : n; } -__DPCPP_SYCL_DEVICE long abs(long n) { return n < 0 ? -n : n; } -__DPCPP_SYCL_DEVICE int abs(int n) { return n < 0 ? -n : n; } -__DPCPP_SYCL_DEVICE_C long labs(long n) { return n < 0 ? -n : n; } +__SYCL_DEVICE long long abs(long long n) { return n < 0 ? -n : n; } +__SYCL_DEVICE_C long long llabs(long long n) { return n < 0 ? -n : n; } +__SYCL_DEVICE long abs(long n) { return n < 0 ? -n : n; } +__SYCL_DEVICE int abs(int n) { return n < 0 ? -n : n; } +__SYCL_DEVICE_C long labs(long n) { return n < 0 ? -n : n; } /// Basic operations // -__DPCPP_SYCL_DEVICE float abs(float x) { return x < 0 ? -x : x; } -__DPCPP_SYCL_DEVICE double abs(double x) { return x < 0 ? -x : x; } -__DPCPP_SYCL_DEVICE float fabs(float x) { return x < 0 ? -x : x; } -__DPCPP_SYCL_DEVICE_C float fabsf(float x) { return x < 0 ? -x : x; } -__DPCPP_SYCL_DEVICE double fabs(double x) { return x < 0 ? -x : x; } +__SYCL_DEVICE float abs(float x) { return x < 0 ? -x : x; } +__SYCL_DEVICE double abs(double x) { return x < 0 ? -x : x; } +__SYCL_DEVICE float fabs(float x) { return x < 0 ? -x : x; } +__SYCL_DEVICE_C float fabsf(float x) { return x < 0 ? -x : x; } +__SYCL_DEVICE double fabs(double x) { return x < 0 ? -x : x; } template -__DPCPP_SYCL_DEVICE typename __dpcpp_promote_1::type fabs(T x) { +__SYCL_DEVICE typename __sycl_promote_1::type fabs(T x) { return x < 0 ? -x : x; } -__DPCPP_SPIRV_MAP_BINARY(fmod); -__DPCPP_SPIRV_MAP_BINARY(remainder); +__SYCL_SPIRV_MAP_BINARY(fmod); +__SYCL_SPIRV_MAP_BINARY(remainder); -__DPCPP_SYCL_DEVICE_C float remquof(float x, float y, int *q) { +__SYCL_DEVICE_C float remquof(float x, float y, int *q) { return __spirv_ocl_remquo(x, y, q); } -__DPCPP_SYCL_DEVICE float remquo(float x, float y, int *q) { +__SYCL_DEVICE float remquo(float x, float y, int *q) { return __spirv_ocl_remquo(x, y, q); } -__DPCPP_SYCL_DEVICE double remquo(double x, double y, int *q) { +__SYCL_DEVICE double remquo(double x, double y, int *q) { return __spirv_ocl_remquo(x, y, q); } template -__DPCPP_SYCL_DEVICE typename __dpcpp_promote_2::type remquo(T x, U y, - int *q) { - typedef typename __dpcpp_promote_2::type type; +__SYCL_DEVICE typename __sycl_promote_2::type remquo(T x, U y, int *q) { + typedef typename __sycl_promote_2::type type; return __spirv_ocl_remquo((type)x, (type)y, q); } -__DPCPP_SYCL_DEVICE_C float fmaf(float x, float y, float z) { +__SYCL_DEVICE_C float fmaf(float x, float y, float z) { return __spirv_ocl_fma(x, y, z); } -__DPCPP_SYCL_DEVICE float fma(float x, float y, float z) { +__SYCL_DEVICE float fma(float x, float y, float z) { return __spirv_ocl_fma(x, y, z); } -__DPCPP_SYCL_DEVICE double fma(double x, double y, double z) { +__SYCL_DEVICE double fma(double x, double y, double z) { return __spirv_ocl_fma(x, y, z); } template -__DPCPP_SYCL_DEVICE typename __dpcpp_promote_3::type fma(T x, U y, - V z) { - typedef typename __dpcpp_promote_3::type type; +__SYCL_DEVICE typename __sycl_promote_3::type fma(T x, U y, V z) { + typedef typename __sycl_promote_3::type type; return __spirv_ocl_fma((type)x, (type)y, (type)z); } -__DPCPP_SPIRV_MAP_BINARY(fmax); -__DPCPP_SPIRV_MAP_BINARY(fmin); -__DPCPP_SPIRV_MAP_BINARY(fdim); +__SYCL_SPIRV_MAP_BINARY(fmax); +__SYCL_SPIRV_MAP_BINARY(fmin); +__SYCL_SPIRV_MAP_BINARY(fdim); // unsupported: nan /// Exponential functions // -__DPCPP_SPIRV_MAP_UNARY(exp); -__DPCPP_SPIRV_MAP_UNARY(exp2); -__DPCPP_SPIRV_MAP_UNARY(expm1); -__DPCPP_SPIRV_MAP_UNARY(log); -__DPCPP_SPIRV_MAP_UNARY(log10); -__DPCPP_SPIRV_MAP_UNARY(log2); -__DPCPP_SPIRV_MAP_UNARY(log1p); +__SYCL_SPIRV_MAP_UNARY(exp); +__SYCL_SPIRV_MAP_UNARY(exp2); +__SYCL_SPIRV_MAP_UNARY(expm1); +__SYCL_SPIRV_MAP_UNARY(log); +__SYCL_SPIRV_MAP_UNARY(log10); +__SYCL_SPIRV_MAP_UNARY(log2); +__SYCL_SPIRV_MAP_UNARY(log1p); /// Power functions // -__DPCPP_SPIRV_MAP_BINARY(pow); -__DPCPP_SPIRV_MAP_UNARY(sqrt); -__DPCPP_SPIRV_MAP_UNARY(cbrt); -__DPCPP_SPIRV_MAP_BINARY(hypot); +__SYCL_SPIRV_MAP_BINARY(pow); +__SYCL_SPIRV_MAP_UNARY(sqrt); +__SYCL_SPIRV_MAP_UNARY(cbrt); +__SYCL_SPIRV_MAP_BINARY(hypot); /// Trigonometric functions // -__DPCPP_SPIRV_MAP_UNARY(sin); -__DPCPP_SPIRV_MAP_UNARY(cos); -__DPCPP_SPIRV_MAP_UNARY(tan); -__DPCPP_SPIRV_MAP_UNARY(asin); -__DPCPP_SPIRV_MAP_UNARY(acos); -__DPCPP_SPIRV_MAP_UNARY(atan); -__DPCPP_SPIRV_MAP_BINARY(atan2); +__SYCL_SPIRV_MAP_UNARY(sin); +__SYCL_SPIRV_MAP_UNARY(cos); +__SYCL_SPIRV_MAP_UNARY(tan); +__SYCL_SPIRV_MAP_UNARY(asin); +__SYCL_SPIRV_MAP_UNARY(acos); +__SYCL_SPIRV_MAP_UNARY(atan); +__SYCL_SPIRV_MAP_BINARY(atan2); /// Hyperbolic functions // -__DPCPP_SPIRV_MAP_UNARY(sinh); -__DPCPP_SPIRV_MAP_UNARY(cosh); -__DPCPP_SPIRV_MAP_UNARY(tanh); -__DPCPP_SPIRV_MAP_UNARY(asinh); -__DPCPP_SPIRV_MAP_UNARY(acosh); -__DPCPP_SPIRV_MAP_UNARY(atanh); +__SYCL_SPIRV_MAP_UNARY(sinh); +__SYCL_SPIRV_MAP_UNARY(cosh); +__SYCL_SPIRV_MAP_UNARY(tanh); +__SYCL_SPIRV_MAP_UNARY(asinh); +__SYCL_SPIRV_MAP_UNARY(acosh); +__SYCL_SPIRV_MAP_UNARY(atanh); /// Error and gamma functions // -__DPCPP_SPIRV_MAP_UNARY(erf); -__DPCPP_SPIRV_MAP_UNARY(erfc); -__DPCPP_SPIRV_MAP_UNARY(tgamma); -__DPCPP_SPIRV_MAP_UNARY(lgamma); +__SYCL_SPIRV_MAP_UNARY(erf); +__SYCL_SPIRV_MAP_UNARY(erfc); +__SYCL_SPIRV_MAP_UNARY(tgamma); +__SYCL_SPIRV_MAP_UNARY(lgamma); /// Nearest integer floating-point operations // -__DPCPP_SPIRV_MAP_UNARY(ceil); -__DPCPP_SPIRV_MAP_UNARY(floor); -__DPCPP_SPIRV_MAP_UNARY(trunc); -__DPCPP_SPIRV_MAP_UNARY(round); +__SYCL_SPIRV_MAP_UNARY(ceil); +__SYCL_SPIRV_MAP_UNARY(floor); +__SYCL_SPIRV_MAP_UNARY(trunc); +__SYCL_SPIRV_MAP_UNARY(round); // unsupported: lround, llround (no spirv mapping) -__DPCPP_SPIRV_MAP_UNARY(rint); +__SYCL_SPIRV_MAP_UNARY(rint); // unsupported: lrint, llrint (no spirv mapping) // unsupported (partially, no spirv mapping): nearbyint #if defined(__NVPTX__) extern "C" SYCL_EXTERNAL float __nv_nearbyintf(float); extern "C" SYCL_EXTERNAL double __nv_nearbyint(double); -__DPCPP_SYCL_DEVICE_C float nearbyintf(float x) { return __nv_nearbyintf(x); } -__DPCPP_SYCL_DEVICE float nearbyint(float x) { return __nv_nearbyintf(x); } -__DPCPP_SYCL_DEVICE double nearbyint(double x) { return __nv_nearbyintf(x); } +__SYCL_DEVICE_C float nearbyintf(float x) { return __nv_nearbyintf(x); } +__SYCL_DEVICE float nearbyint(float x) { return __nv_nearbyintf(x); } +__SYCL_DEVICE double nearbyint(double x) { return __nv_nearbyintf(x); } #elif defined(__AMDGCN__) extern "C" SYCL_EXTERNAL float __ocml_nearbyint_f32(float); extern "C" SYCL_EXTERNAL double __ocml_nearbyint_f64(double); -__DPCPP_SYCL_DEVICE_C float nearbyintf(float x) { - return __ocml_nearbyint_f32(x); -} -__DPCPP_SYCL_DEVICE float nearbyint(float x) { return __ocml_nearbyint_f32(x); } -__DPCPP_SYCL_DEVICE double nearbyint(double x) { - return __ocml_nearbyint_f64(x); -} +__SYCL_DEVICE_C float nearbyintf(float x) { return __ocml_nearbyint_f32(x); } +__SYCL_DEVICE float nearbyint(float x) { return __ocml_nearbyint_f32(x); } +__SYCL_DEVICE double nearbyint(double x) { return __ocml_nearbyint_f64(x); } #endif /// Floating-point manipulation functions // -__DPCPP_SYCL_DEVICE_C float frexpf(float x, int *exp) { +__SYCL_DEVICE_C float frexpf(float x, int *exp) { return __spirv_ocl_frexp(x, exp); } -__DPCPP_SYCL_DEVICE float frexp(float x, int *exp) { +__SYCL_DEVICE float frexp(float x, int *exp) { return __spirv_ocl_frexp(x, exp); } -__DPCPP_SYCL_DEVICE double frexp(double x, int *exp) { +__SYCL_DEVICE double frexp(double x, int *exp) { return __spirv_ocl_frexp(x, exp); } template -__DPCPP_SYCL_DEVICE typename __dpcpp_promote_1::type frexp(T x, int *exp) { +__SYCL_DEVICE typename __sycl_promote_1::type frexp(T x, int *exp) { return __spirv_ocl_frexp((double)x, exp); } -__DPCPP_SYCL_DEVICE_C float ldexpf(float x, int exp) { +__SYCL_DEVICE_C float ldexpf(float x, int exp) { return __spirv_ocl_ldexp(x, exp); } -__DPCPP_SYCL_DEVICE float ldexp(float x, int exp) { +__SYCL_DEVICE float ldexp(float x, int exp) { return __spirv_ocl_ldexp(x, exp); } -__DPCPP_SYCL_DEVICE double ldexp(double x, int exp) { +__SYCL_DEVICE double ldexp(double x, int exp) { return __spirv_ocl_ldexp(x, exp); } template -__DPCPP_SYCL_DEVICE typename __dpcpp_promote_1::type ldexp(T x, int exp) { +__SYCL_DEVICE typename __sycl_promote_1::type ldexp(T x, int exp) { return __spirv_ocl_ldexp((double)x, exp); } -__DPCPP_SYCL_DEVICE_C float modff(float x, float *intpart) { +__SYCL_DEVICE_C float modff(float x, float *intpart) { return __spirv_ocl_modf(x, intpart); } -__DPCPP_SYCL_DEVICE float modf(float x, float *intpart) { +__SYCL_DEVICE float modf(float x, float *intpart) { return __spirv_ocl_modf(x, intpart); } -__DPCPP_SYCL_DEVICE double modf(double x, double *intpart) { +__SYCL_DEVICE double modf(double x, double *intpart) { return __spirv_ocl_modf(x, intpart); } // modf only supports integer x when the intpart is double. template -__DPCPP_SYCL_DEVICE typename __dpcpp_promote_1::type modf(T x, - double *intpart) { +__SYCL_DEVICE typename __sycl_promote_1::type modf(T x, double *intpart) { return __spirv_ocl_modf((double)x, intpart); } -__DPCPP_SYCL_DEVICE_C float scalbnf(float x, int exp) { +__SYCL_DEVICE_C float scalbnf(float x, int exp) { return __spirv_ocl_ldexp(x, exp); } -__DPCPP_SYCL_DEVICE float scalbn(float x, int exp) { +__SYCL_DEVICE float scalbn(float x, int exp) { return __spirv_ocl_ldexp(x, exp); } -__DPCPP_SYCL_DEVICE double scalbn(double x, int exp) { +__SYCL_DEVICE double scalbn(double x, int exp) { return __spirv_ocl_ldexp(x, exp); } template -__DPCPP_SYCL_DEVICE typename __dpcpp_promote_1::type scalbn(T x, int exp) { +__SYCL_DEVICE typename __sycl_promote_1::type scalbn(T x, int exp) { return __spirv_ocl_ldexp((double)x, exp); } -__DPCPP_SYCL_DEVICE_C float scalblnf(float x, long exp) { +__SYCL_DEVICE_C float scalblnf(float x, long exp) { return __spirv_ocl_ldexp(x, (int)exp); } -__DPCPP_SYCL_DEVICE float scalbln(float x, long exp) { +__SYCL_DEVICE float scalbln(float x, long exp) { return __spirv_ocl_ldexp(x, (int)exp); } -__DPCPP_SYCL_DEVICE double scalbln(double x, long exp) { +__SYCL_DEVICE double scalbln(double x, long exp) { return __spirv_ocl_ldexp(x, (int)exp); } template -__DPCPP_SYCL_DEVICE typename __dpcpp_promote_1::type scalbln(T x, long exp) { +__SYCL_DEVICE typename __sycl_promote_1::type scalbln(T x, long exp) { return __spirv_ocl_ldexp((double)x, (int)exp); } -__DPCPP_SYCL_DEVICE_C int ilogbf(float x) { return __spirv_ocl_ilogb(x); } -__DPCPP_SYCL_DEVICE int ilogb(float x) { return __spirv_ocl_ilogb(x); } -__DPCPP_SYCL_DEVICE int ilogb(double x) { return __spirv_ocl_ilogb(x); } +__SYCL_DEVICE_C int ilogbf(float x) { return __spirv_ocl_ilogb(x); } +__SYCL_DEVICE int ilogb(float x) { return __spirv_ocl_ilogb(x); } +__SYCL_DEVICE int ilogb(double x) { return __spirv_ocl_ilogb(x); } // ilogb needs a special template since its signature doesn't include the // promoted type anywhere, so it needs to be specialized differently. template ::value, bool>::type = true> -__DPCPP_SYCL_DEVICE int ilogb(T x) { +__SYCL_DEVICE int ilogb(T x) { return __spirv_ocl_ilogb((double)x); } -__DPCPP_SPIRV_MAP_UNARY(logb); -__DPCPP_SPIRV_MAP_BINARY(nextafter); +__SYCL_SPIRV_MAP_UNARY(logb); +__SYCL_SPIRV_MAP_BINARY(nextafter); // unsupported: nextforward -__DPCPP_SPIRV_MAP_BINARY(copysign); +__SYCL_SPIRV_MAP_BINARY(copysign); /// Classification and comparison // @@ -533,9 +522,9 @@ _GLIBCXX_END_NAMESPACE_VERSION } // namespace std #endif -#undef __DPCPP_SPIRV_MAP_BINARY -#undef __DPCPP_SPIRV_MAP_UNARY -#undef __DPCPP_SYCL_DEVICE_C -#undef __DPCPP_SYCL_DEVICE +#undef __SYCL_SPIRV_MAP_BINARY +#undef __SYCL_SPIRV_MAP_UNARY +#undef __SYCL_DEVICE_C +#undef __SYCL_DEVICE #endif #endif From be69cdd36314417dc662bceca8f0d13fe40372f3 Mon Sep 17 00:00:00 2001 From: Alexey Bader Date: Mon, 14 Jul 2025 14:55:41 -0700 Subject: [PATCH 39/40] Update the header guard macro name. --- sycl/include/sycl/stl_wrappers/__sycl_cmath_wrapper_impl.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/include/sycl/stl_wrappers/__sycl_cmath_wrapper_impl.hpp b/sycl/include/sycl/stl_wrappers/__sycl_cmath_wrapper_impl.hpp index 2e388d5259da3..31bdaf236d9b6 100644 --- a/sycl/include/sycl/stl_wrappers/__sycl_cmath_wrapper_impl.hpp +++ b/sycl/include/sycl/stl_wrappers/__sycl_cmath_wrapper_impl.hpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef __CMATH_FALLBACK_H__ -#define __CMATH_FALLBACK_H__ +#ifndef __SYCL_CMATH_WRAPPER_IMPL_HPP__ +#define __SYCL_CMATH_WRAPPER_IMPL_HPP__ // This header defines device-side overloads of functions based on // their equivalent __spirv_ built-ins. From 6d52c978c940da7dd605d487173abedef904d3cf Mon Sep 17 00:00:00 2001 From: Nicolas Miller Date: Tue, 15 Jul 2025 13:40:38 +0100 Subject: [PATCH 40/40] [SYCL] Make sycl_device and sycl_device_only incompatible --- clang/include/clang/Basic/Attr.td | 1 + clang/include/clang/Basic/AttrDocs.td | 4 +-- clang/lib/CodeGen/CodeGenModule.cpp | 12 -------- clang/lib/Sema/SemaDecl.cpp | 16 +++++++++++ clang/lib/Sema/SemaOverload.cpp | 17 ++++++++--- clang/test/CodeGenSYCL/sycl-device-only.cpp | 27 ------------------ clang/test/SemaSYCL/sycl-device-only.cpp | 31 +++++++++++++++++++++ sycl/include/sycl/stl_wrappers/cmath | 3 +- 8 files changed, 65 insertions(+), 46 deletions(-) create mode 100644 clang/test/SemaSYCL/sycl-device-only.cpp diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index c44c9cf61e8b1..bae6b08b0a2b5 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1613,6 +1613,7 @@ def SYCLDeviceOnly : InheritableAttr { let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; let Documentation = [SYCLDeviceOnlyDocs]; } +def : MutualExclusions<[SYCLDevice, SYCLDeviceOnly]>; def SYCLGlobalVar : InheritableAttr { let Spellings = [GNU<"sycl_global_var">]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 000a0e522e8cc..4f4dc846abc3e 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -4527,8 +4527,8 @@ is only available for the device. It allows functions marked with it to overload existing functions without the attribute, in which case the overload with the attribute will be used on the device side and the overload without will be used on the host side. Note: as opposed to ``sycl_device`` this does -not mark the function as being exported, both attributes can be used together -if needed. +not mark the function as being exported, both attributes are incompatible and +can't be used together. }]; } diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 4a077f3fbf5de..af3ea664ee4c7 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4429,18 +4429,6 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) { // symbol it may be a SYCLDeviceOnlyAttr case. if (DDI != DeferredDecls.end()) { auto *PreviousGlobal = cast(DDI->second.getDecl()); - - // If the host declaration was already processed and the device only - // declaration is also a sycl external declaration, remove the host - // variant and skip. The device only variant will be generated later - // as it's marked sycl external. - if (!PreviousGlobal->hasAttr() && - Global->hasAttr() && - Global->hasAttr()) { - DeferredDecls.erase(DDI); - return; - } - // If the host declaration was already processed, replace it with the // device only declaration. if (!PreviousGlobal->hasAttr() && diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 32c800c247728..ed6d10b191542 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -1486,6 +1486,17 @@ void Sema::ActOnExitFunctionContext() { static bool AllowOverloadingOfFunction(const LookupResult &Previous, ASTContext &Context, const FunctionDecl *New) { + // SYCLDeviceOnlyAttr allows device side overloads of SYCL function, but it + // is incompatible with SYCLDeviceAttr, so don't allow overloads when both + // attributes are present. + if (Context.getLangOpts().isSYCL() && + Previous.getResultKind() == LookupResultKind::Found && + ((New->hasAttr() && + Previous.getFoundDecl()->hasAttr()) || + (New->hasAttr() && + Previous.getFoundDecl()->hasAttr()))) + return false; + if (Context.getLangOpts().CPlusPlus || New->hasAttr()) return true; @@ -3702,6 +3713,11 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD, Scope *S, return true; } + // Never merge SYCLDeviceOnlyAttr functions in their host variant + if (getLangOpts().isSYCL() && + Old->hasAttr() != New->hasAttr()) + return false; + diag::kind PrevDiag; SourceLocation OldLocation; std::tie(PrevDiag, OldLocation) = diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 91c9106be81c1..deaeb8ca14337 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -1630,11 +1630,20 @@ static bool IsOverloadOrOverrideImpl(Sema &SemaRef, FunctionDecl *New, } // Allow overloads with SYCLDeviceOnlyAttr - if (SemaRef.getLangOpts().isSYCL()) { - if (Old->hasAttr() != - New->hasAttr()) { - return true; + if (SemaRef.getLangOpts().isSYCL() && (Old->hasAttr() != + New->hasAttr())) { + // SYCLDeviceOnlyAttr and SYCLDeviceAttr functions can't overload + if (((New->hasAttr() && + Old->hasAttr()) || + (New->hasAttr() && + Old->hasAttr()))) { + SemaRef.Diag(New->getLocation(), diag::err_redefinition) + << New->getDeclName(); + SemaRef.notePreviousDefinition(Old, New->getLocation()); + return false; } + + return true; } // The signatures match; this is not an overload. diff --git a/clang/test/CodeGenSYCL/sycl-device-only.cpp b/clang/test/CodeGenSYCL/sycl-device-only.cpp index 6196f5b6744bb..4462aaa273b27 100644 --- a/clang/test/CodeGenSYCL/sycl-device-only.cpp +++ b/clang/test/CodeGenSYCL/sycl-device-only.cpp @@ -35,30 +35,3 @@ __attribute__((sycl_device_only)) int fooc(int a) { return a + 20; } // Use a `sycl_device` function as entry point. __attribute__((sycl_device)) int barc(int b) { return fooc(b); } } - -// Verify that both attributes can work together. -// -// CHECK-LABEL: _Z3fooai -// CHECKH: %add = add nsw i32 %0, 10 -// CHECKD: %add = add nsw i32 %0, 20 -int fooa(int a) { return a + 10; } -__attribute__((sycl_device_only, sycl_device)) int fooa(int a) { - return a + 20; -} - -// Use a `sycl_device` function as entry point. -__attribute__((sycl_device)) int bara(int b) { return fooa(b); } - -// Verify that the order of declaration doesn't change the behavior when using -// both attributes. -// -// CHECK-LABEL: _Z3fooaswapi -// CHECKH: %add = add nsw i32 %0, 10 -// CHECKD: %add = add nsw i32 %0, 20 -__attribute__((sycl_device_only, sycl_device)) int fooaswap(int a) { - return a + 20; -} -int fooaswap(int a) { return a + 10; } - -// Use a `sycl_device` function as entry point. -__attribute__((sycl_device)) int baraswap(int b) { return fooaswap(b); } diff --git a/clang/test/SemaSYCL/sycl-device-only.cpp b/clang/test/SemaSYCL/sycl-device-only.cpp new file mode 100644 index 0000000000000..bc50dc827000c --- /dev/null +++ b/clang/test/SemaSYCL/sycl-device-only.cpp @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -fsycl-is-device -fsyntax-only -verify %s +// RUN: %clang_cc1 -fsycl-is-host -fsyntax-only -verify %s + +// Semantic tests for sycl_device_only attribute + +// Valid uses +int foook(int x) { + return x + 10; +} + +__attribute__((sycl_device_only)) int foook(int x) { + return x + 20; +} + +// Conflicting attributes +// expected-note@+1 {{conflicting attribute is here}} +__attribute__((sycl_device_only, sycl_device)) // expected-error {{'sycl_device' and 'sycl_device_only' attributes are not compatible}} +int fooconflict(int x) { + return x + 20; +} + +// Bad overload +__attribute__((sycl_device)) +int foobad(int x) { // expected-note {{previous definition is here}} + return x + 10; +} + +__attribute__((sycl_device_only)) +int foobad(int x) { // expected-error {{redefinition of 'foobad'}} + return x + 20; +} diff --git a/sycl/include/sycl/stl_wrappers/cmath b/sycl/include/sycl/stl_wrappers/cmath index 14e64c29c9c0a..1ef59326fbbf9 100644 --- a/sycl/include/sycl/stl_wrappers/cmath +++ b/sycl/include/sycl/stl_wrappers/cmath @@ -26,7 +26,7 @@ #if defined(__NVPTX__) || defined(__AMDGCN__) #include "__sycl_cmath_wrapper_impl.hpp" -#endif +#else #include @@ -209,3 +209,4 @@ extern __DPCPP_SYCL_EXTERNAL float _hypotf(float x, float y); } #endif #endif // __SYCL_DEVICE_ONLY__ +#endif