From bc98cc4e38dee698a090ca7b116c87bdf26e09af Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Wed, 18 Dec 2019 00:01:15 +0300 Subject: [PATCH] [SYCL] Fix issue with half and -fsycl-unnamed-lambda When `-fsycl-unnamed-lambda` is present, mapping from SYCL Kernel function to a corresponding OpenCL kernel name is done via `__unique_stable_name` built-in. It is used by device compiler to generate integration header and it is used by host compiler to find kernels information in there. The problem is that we might get different results for the same SYCL Kernel function when compiling for host and device: the issue appears if kernel uses `half` data type which is represented as: - `cl::sycl::detail::half_impl::half` on host - `_Float16` on device Actually, similar issue exists even without `-fsycl-unnamed-lambda`, but for that case we have a work-around in form of `#define _Float16 cl::sycl::detail::half_impl::half` in `kernel_desc.hpp` to turn device half representation into a host one. The same trick doesn't apply here and the problem is fixed by doing the following: - for `UniqueStableMangler`, we mangle `cl::sycl::detail::half_impl::half` in the same way as `_Float16`, i.e. `FD16_` - for `UniqueStableMandlger`, `cl::sycl::detail::half_impl::half` is marked as non-substitutable to avoid other differences in mangled name Signed-off-by: Alexey Sachkov --- clang/lib/AST/ItaniumMangle.cpp | 71 +++++++++++++++++++ .../CodeGenSYCL/half-with-unnamed-lambda.cpp | 68 ++++++++++++++++++ .../regression/fp16-with-unnamed-lambda.cpp | 43 +++++++++++ 3 files changed, 182 insertions(+) create mode 100644 clang/test/CodeGenSYCL/half-with-unnamed-lambda.cpp create mode 100644 sycl/test/regression/fp16-with-unnamed-lambda.cpp diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index c0dcc69df613c..664541465e296 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -2445,6 +2445,67 @@ static bool isTypeSubstitutable(Qualifiers Quals, const Type *Ty, return true; } +namespace { +struct DeclContextDesc { + Decl::Kind DeclKind; + StringRef Name; +}; +} // namespace + +// For Scopes argument, the only supported Decl::Kind values are: +// - Namespace +// - CXXRecord +// - ClassTemplateSpecialization +static bool matchQualifiedTypeName(const QualType &Ty, + ArrayRef Scopes) { + // The idea: check the declaration context chain starting from the type + // itself. At each step check the context is of expected kind + // (namespace) and name. + const CXXRecordDecl *RecTy = Ty->getAsCXXRecordDecl(); + + if (!RecTy) + return false; // only classes/structs supported + const auto *Ctx = dyn_cast(RecTy); + + for (const auto &Scope : llvm::reverse(Scopes)) { + Decl::Kind DK = Ctx->getDeclKind(); + StringRef Name = ""; + + if (DK != Scope.DeclKind) + return false; + + switch (DK) { + case Decl::Kind::ClassTemplateSpecialization: + // ClassTemplateSpecializationDecl inherits from CXXRecordDecl + case Decl::Kind::CXXRecord: + Name = cast(Ctx)->getName(); + break; + case Decl::Kind::Namespace: + Name = cast(Ctx)->getName(); + break; + default: + return false; + } + if (Name != Scope.Name) + return false; + Ctx = Ctx->getParent(); + } + return Ctx->isTranslationUnit(); +} + +static bool isSYCLHostHalfType(const Type *Ty) { + // FIXME: this is not really portable, since the bunch of namespace below + // is not specified by the SYCL standard and highly depends on particular + // implementation + static const std::array Scopes = { + DeclContextDesc{Decl::Kind::Namespace, "cl"}, + DeclContextDesc{Decl::Kind::Namespace, "sycl"}, + DeclContextDesc{Decl::Kind::Namespace, "detail"}, + DeclContextDesc{Decl::Kind::Namespace, "half_impl"}, + DeclContextDesc{Decl::Kind::CXXRecord, "half"}}; + return matchQualifiedTypeName(QualType(Ty, 0), Scopes); +} + void CXXNameMangler::mangleType(QualType T) { // If our type is instantiation-dependent but not dependent, we mangle // it as it was written in the source, removing any top-level sugar. @@ -2504,6 +2565,11 @@ void CXXNameMangler::mangleType(QualType T) { bool isSubstitutable = isTypeSubstitutable(quals, ty, Context.getASTContext()); + if (Context.isUniqueNameMangler() && isSYCLHostHalfType(ty)) { + // Set isSubstitutable to false for cl::sycl::detail::half_impl::half + // to achieve the same mangling for other components + isSubstitutable = false; + } if (isSubstitutable && mangleSubstitution(T)) return; @@ -2980,6 +3046,11 @@ void CXXNameMangler::mangleType(const RecordType *T) { mangleType(static_cast(T)); } void CXXNameMangler::mangleType(const TagType *T) { + if (Context.isUniqueNameMangler() && isSYCLHostHalfType(T)) { + // Mangle cl::sycl::detail::half_imple::half as _Float16 + mangleType(Context.getASTContext().Float16Ty); + return; + } mangleName(T->getDecl()); } diff --git a/clang/test/CodeGenSYCL/half-with-unnamed-lambda.cpp b/clang/test/CodeGenSYCL/half-with-unnamed-lambda.cpp new file mode 100644 index 0000000000000..f5b757f750d3e --- /dev/null +++ b/clang/test/CodeGenSYCL/half-with-unnamed-lambda.cpp @@ -0,0 +1,68 @@ +// RUN: %clangxx -fsycl-device-only -fsycl-unnamed-lambda -emit-llvm %s -o %t1.bc +// RUN: llvm-dis %t1.bc -o - | FileCheck %s +// RUN: %clangxx -fsycl-device-only -fsycl-unnamed-lambda -emit-llvm %s -DUSE_WRAPPER=1 -o %t2.bc +// RUN: llvm-dis %t2.bc -o - | FileCheck %s + +// Mangling of kernel lambda must be the same for both versions of half +// CHECK: __unique_stable_name{{.*}} = private unnamed_addr constant [52 x i8] c"_ZTSN2cl4sycl6bufferINS0_4pairIDF16_NS0_5dummyEEEEE\00" + +// Helper function to get string returned by __unique_stable_name in LLVM IR +template +void print() { + auto temp = __unique_stable_name(T); +} + +// Helper function to get "print" emitted in device code +template +__attribute__((sycl_kernel)) void helper(F f) { + print(); + f(); +} + +// Half wrapper, as it defined in SYCL headers +namespace cl { +namespace sycl { +namespace detail { +namespace half_impl { +class half { +public: + half operator=(int) {return *this;} +}; +} // namespace half_impl +} // namespace detail +} // namespace sycl +} // namespace cl + +#ifndef USE_WRAPPER +using half = _Float16; +#else +using half = cl::sycl::detail::half_impl::half; +#endif + +// A few more fake data types to complicate the mangling +namespace cl { +namespace sycl { +struct dummy { + int a; +}; +template +struct pair { + T1 a; + T2 b; +}; +template +class buffer { +public: + T &operator[](int) const { return value; } + mutable T value; +}; +} // namespace sycl +} // namespace cl + +int main() { + cl::sycl::buffer> B1; + + helper([](){}); + + return 0; +} diff --git a/sycl/test/regression/fp16-with-unnamed-lambda.cpp b/sycl/test/regression/fp16-with-unnamed-lambda.cpp new file mode 100644 index 0000000000000..ead806dcce68a --- /dev/null +++ b/sycl/test/regression/fp16-with-unnamed-lambda.cpp @@ -0,0 +1,43 @@ +// RUN: %clangxx -fsycl -fsycl-unnamed-lambda %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +#include + +#include + +int main() { + auto AsyncHandler = [](cl::sycl::exception_list EL) { + for (std::exception_ptr const &P : EL) { + try { + std::rethrow_exception(P); + } catch (std::exception const &E) { + std::cerr << "Caught async SYCL exception: " << E.what() << std::endl; + } + } + }; + + cl::sycl::queue Q(AsyncHandler); + + cl::sycl::device D = Q.get_device(); + if (!D.has_extension("cl_khr_fp16")) + return 0; // Skip the test if halfs are not supported + + cl::sycl::buffer Buf(1); + + Q.submit([&](cl::sycl::handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.single_task([=]() { + Acc[0] = 1; + }); + }); + + Q.wait_and_throw(); + + auto Acc = Buf.get_access(); + if (1 != Acc[0]) { + std::cerr << "Incorrect result, got: " << Acc[0] + << ", expected: 1" << std::endl; + return 1; + } + + return 0; +}