From 12ad80fe2affbd2df59a2794c92e98d25454c254 Mon Sep 17 00:00:00 2001 From: Mariya Podchishchaeva Date: Fri, 31 May 2019 08:44:10 +0300 Subject: [PATCH] [SYCL] Remove _Float16 from integration header The main problem that half type is defined as _Float16 on the device and as manually implemented type on host so compiler could add _Float16 to the integration header and it will produce errors in host compilation. Signed-off-by: Mariya Podchishchaeva --- sycl/include/CL/sycl/detail/kernel_desc.hpp | 14 ++++++++++++++ sycl/test/sub_group/load_store.cpp | 13 +++---------- sycl/test/sub_group/shuffle.cpp | 8 ++------ 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/sycl/include/CL/sycl/detail/kernel_desc.hpp b/sycl/include/CL/sycl/detail/kernel_desc.hpp index c887fea8d8446..f5cecb8dfaa2a 100644 --- a/sycl/include/CL/sycl/detail/kernel_desc.hpp +++ b/sycl/include/CL/sycl/detail/kernel_desc.hpp @@ -15,6 +15,20 @@ namespace cl { namespace sycl { namespace detail { +namespace half_impl { + +class half; +// Half type is defined as _Float16 on device and as manually implemented half +// type on host. Integration header is generated by device compiler so it sees +// half type as _Float16 and it will add _Float16 to integration header if it +// is used in kernel name template parameters. To avoid errors in host +// compilation we remove _Float16 from integration header using following macro. +#ifndef __SYCL_DEVICE_ONLY__ +#define _Float16 cl::sycl::detail::half_impl::half +#endif + +} // namespace half_impl + // kernel parameter kinds enum class kernel_param_kind_t { kind_accessor, diff --git a/sycl/test/sub_group/load_store.cpp b/sycl/test/sub_group/load_store.cpp index e89677cacbf15..19cb8ef10183c 100644 --- a/sycl/test/sub_group/load_store.cpp +++ b/sycl/test/sub_group/load_store.cpp @@ -16,10 +16,6 @@ template class sycl_subgr; using namespace cl::sycl; -// TODO remove this workaround when integration header will support correct -// half generation -struct wa_half; -typedef half aligned_half __attribute__((aligned(16))); template void check(queue &Queue) { const int G = 1024, L = 64; @@ -34,12 +30,10 @@ template void check(queue &Queue) { acc[i] += 0.1; // Check that floating point types are not casted to int } } - using TT = typename std::conditional::value, - wa_half, T>::type; Queue.submit([&](handler &cgh) { auto acc = syclbuf.template get_access(cgh); auto sgsizeacc = sgsizebuf.get_access(cgh); - cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { + cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { intel::sub_group SG = NdItem.get_sub_group(); if (SG.get_group_id().get(0) % N == 0) { size_t WGSGoffset = @@ -103,12 +97,10 @@ template void check(queue &Queue) { } } - using TT = typename std::conditional::value, - wa_half, T>::type; Queue.submit([&](handler &cgh) { auto acc = syclbuf.template get_access(cgh); auto sgsizeacc = sgsizebuf.get_access(cgh); - cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { + cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { intel::sub_group SG = NdItem.get_sub_group(); if (NdItem.get_global_id(0) == 0) sgsizeacc[0] = SG.get_max_local_range()[0]; @@ -180,6 +172,7 @@ int main() { check(Queue); check(Queue); if (Queue.get_device().has_extension("cl_khr_fp16")) { + typedef half aligned_half __attribute__((aligned(16))); check(Queue); check(Queue); check(Queue); diff --git a/sycl/test/sub_group/shuffle.cpp b/sycl/test/sub_group/shuffle.cpp index 66b74767710e9..9e1437a4a3753 100644 --- a/sycl/test/sub_group/shuffle.cpp +++ b/sycl/test/sub_group/shuffle.cpp @@ -33,8 +33,6 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) { buffer> buf_down(G); buffer> buf_xor(G); buffer sgsizebuf(1); - using TT = typename std::conditional::value, wa_half, - T>::type; Queue.submit([&](handler &cgh) { auto acc2 = buf2.template get_access(cgh); auto acc2_up = buf2_up.template get_access(cgh); @@ -48,7 +46,7 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) { auto acc_xor = buf_xor.template get_access(cgh); auto sgsizeacc = sgsizebuf.get_access(cgh); - cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { + cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { intel::sub_group SG = NdItem.get_sub_group(); uint32_t wggid = NdItem.get_global_id(0); uint32_t sgid = SG.get_group_id().get(0); @@ -141,8 +139,6 @@ template void check(queue &Queue, size_t G = 240, size_t L = 60) { buffer buf_down(G); buffer buf_xor(G); buffer sgsizebuf(1); - using TT = typename std::conditional::value, wa_half, - T>::type; Queue.submit([&](handler &cgh) { auto acc2 = buf2.template get_access(cgh); auto acc2_up = buf2_up.template get_access(cgh); @@ -156,7 +152,7 @@ template void check(queue &Queue, size_t G = 240, size_t L = 60) { auto acc_xor = buf_xor.template get_access(cgh); auto sgsizeacc = sgsizebuf.get_access(cgh); - cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { + cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { intel::sub_group SG = NdItem.get_sub_group(); uint32_t wggid = NdItem.get_global_id(0); uint32_t sgid = SG.get_group_id().get(0);