diff --git a/SYCL/Basic/bit_cast/bit_cast.cpp b/SYCL/Basic/bit_cast/bit_cast.cpp index d6b8ce6002..dfaf77ba1c 100644 --- a/SYCL/Basic/bit_cast/bit_cast.cpp +++ b/SYCL/Basic/bit_cast/bit_cast.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl-device-code-split=per_kernel -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out @@ -13,11 +13,11 @@ constexpr sycl::access::mode sycl_write = sycl::access::mode::write; template class BitCastKernel; -template To doBitCast(const From &ValueToConvert) { +template +To doBitCast(sycl::queue Queue, const From &ValueToConvert) { std::vector Vec(1); { sycl::buffer Buf(Vec.data(), 1); - sycl::queue Queue; Queue.submit([&](sycl::handler &cgh) { auto acc = Buf.template get_access(cgh); cgh.single_task>([=]() { @@ -28,8 +28,10 @@ template To doBitCast(const From &ValueToConvert) { return Vec[0]; } -template int test(const From &Value) { - auto ValueConvertedTwoTimes = doBitCast(doBitCast(Value)); +template +int test(sycl::queue Queue, const From &Value) { + auto ValueConvertedTwoTimes = + doBitCast(Queue, doBitCast(Queue, Value)); bool isOriginalValueEqualsToConvertedTwoTimes = false; if (std::is_integral::value) { isOriginalValueEqualsToConvertedTwoTimes = Value == ValueConvertedTwoTimes; @@ -54,31 +56,34 @@ template int test(const From &Value) { } int main() { + sycl::queue Queue; int ReturnCode = 0; - std::cout << "sycl::half to unsigned short ...\n"; - ReturnCode += test(sycl::half(1.0f)); + if (Queue.get_device().has(sycl::aspect::fp16)) { + std::cout << "sycl::half to unsigned short ...\n"; + ReturnCode += test(Queue, sycl::half(1.0f)); - std::cout << "unsigned short to sycl::half ...\n"; - ReturnCode += test(static_cast(16384)); + std::cout << "unsigned short to sycl::half ...\n"; + ReturnCode += test(Queue, static_cast(16384)); - std::cout << "sycl::half to short ...\n"; - ReturnCode += test(sycl::half(1.0f)); + std::cout << "sycl::half to short ...\n"; + ReturnCode += test(Queue, sycl::half(1.0f)); - std::cout << "short to sycl::half ...\n"; - ReturnCode += test(static_cast(16384)); + std::cout << "short to sycl::half ...\n"; + ReturnCode += test(Queue, static_cast(16384)); + } std::cout << "int to float ...\n"; - ReturnCode += test(static_cast(2)); + ReturnCode += test(Queue, static_cast(2)); std::cout << "float to int ...\n"; - ReturnCode += test(static_cast(-2.4f)); + ReturnCode += test(Queue, static_cast(-2.4f)); std::cout << "unsigned int to float ...\n"; - ReturnCode += test(static_cast(6)); + ReturnCode += test(Queue, static_cast(6)); std::cout << "float to unsigned int ...\n"; - ReturnCode += test(static_cast(-2.4f)); + ReturnCode += test(Queue, static_cast(-2.4f)); return ReturnCode; } diff --git a/SYCL/Basic/built-ins.cpp b/SYCL/Basic/built-ins.cpp index 06ffb9e240..e0f0be0d98 100644 --- a/SYCL/Basic/built-ins.cpp +++ b/SYCL/Basic/built-ins.cpp @@ -3,6 +3,11 @@ // RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER // RUN: %ACC_RUN_PLACEHOLDER %t.out %ACC_CHECK_PLACEHOLDER +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -D__SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ %s -o %t_nonvar.out +// RUN: %CPU_RUN_PLACEHOLDER %t_nonvar.out %CPU_CHECK_PLACEHOLDER +// RUN: %GPU_RUN_PLACEHOLDER %t_nonvar.out %GPU_CHECK_PLACEHOLDER +// RUN: %ACC_RUN_PLACEHOLDER %t_nonvar.out %ACC_CHECK_PLACEHOLDER + // CUDA does not support printf. // UNSUPPORTED: cuda // @@ -28,15 +33,25 @@ static const CONSTANT char format[] = "Hello, World! %d %f\n"; int main() { s::queue q{}; +#ifndef __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ + if (!q.get_device().has(sycl::aspect::fp64)) { + std::cout + << "Test without __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ defined is " + "skipped because the device did not have fp64." + << std::endl; + return 0; + } +#endif + // Test printf q.submit([&](s::handler &CGH) { CGH.single_task([=]() { - s::ext::oneapi::experimental::printf(format, 123, 1.23); + s::ext::oneapi::experimental::printf(format, 123, 1.23f); // CHECK: {{(Hello, World! 123 1.23)?}} }); }).wait(); - s::ext::oneapi::experimental::printf(format, 321, 3.21); + s::ext::oneapi::experimental::printf(format, 321, 3.21f); // CHECK: {{(Hello, World! 123 1.23)?}} // Test common @@ -47,7 +62,7 @@ int main() { auto AccMin = BufMin.get_access(cgh); auto AccMax = BufMax.get_access(cgh); cgh.single_task([=]() { - AccMax[0] = s::max(s::cl_float2{0.5f, 2.5}, s::cl_float2{2.3f, 2.3}); + AccMax[0] = s::max(s::cl_float2{0.5f, 2.5f}, s::cl_float2{2.3f, 2.3f}); AccMin[0] = s::min(s::cl_float{0.5f}, s::cl_float{2.3f}); }); }); diff --git a/SYCL/Basic/half_builtins.cpp b/SYCL/Basic/half_builtins.cpp index 1ef9cb2da9..34fab08c89 100644 --- a/SYCL/Basic/half_builtins.cpp +++ b/SYCL/Basic/half_builtins.cpp @@ -165,6 +165,14 @@ template bool check(vec a, vec b) { int main() { queue q; + + if (!q.get_device().has(sycl::aspect::fp16)) { + std::cout + << "Test was skipped because the selected device does not support fp16" + << std::endl; + return 0; + } + float16 a, b, c, d; for (int i = 0; i < SZ_max; i++) { a[i] = i / (float)SZ_max; diff --git a/SYCL/Basic/scalar_vec_access.cpp b/SYCL/Basic/scalar_vec_access.cpp index b945db3964..fc12c7bb86 100644 --- a/SYCL/Basic/scalar_vec_access.cpp +++ b/SYCL/Basic/scalar_vec_access.cpp @@ -29,7 +29,7 @@ int main() { // Test that it is possible to get a reference to single element of the // vector type. This behavior could possibly change in the future, this // test is necessary to track that. - float4_t my_float4 = {0.0, 1.0, 2.0, 3.0}; + float4_t my_float4 = {0.0f, 1.0f, 2.0f, 3.0f}; float f[4]; for (int i = 0; i < 4; ++i) { f[i] = reinterpret_cast(&my_float4)[i]; @@ -40,14 +40,14 @@ int main() { } // Test that there is no template resolution error - sycl::float4 a = {1.0, 2.0, 3.0, 4.0}; + sycl::float4 a = {1.0f, 2.0f, 3.0f, 4.0f}; out << sycl::native::recip(a.x()) << sycl::endl; }); }); Q.wait(); // Test that there is no ambiguity in overload resolution. - sycl::float4 a = {1.0, 2.0, 3.0, 4.0}; + sycl::float4 a = {1.0f, 2.0f, 3.0f, 4.0f}; std::cout << a.x() << std::endl; return 0; diff --git a/SYCL/Basic/stream/stream.cpp b/SYCL/Basic/stream/stream.cpp index ebdd3c9458..2c286a22a0 100644 --- a/SYCL/Basic/stream/stream.cpp +++ b/SYCL/Basic/stream/stream.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER // RUN: %GPU_RUN_ON_LINUX_PLACEHOLDER %t.out %GPU_CHECK_ON_LINUX_PLACEHOLDER // RUN: %ACC_RUN_PLACEHOLDER %t.out %ACC_CHECK_PLACEHOLDER @@ -97,26 +97,14 @@ int main() { // CHECK-NEXT: -12345678901245 // CHECK-NEXT: 12345678901245 - // Floating point types + // Floats Out << 33.4f << endl; - Out << 5.2 << endl; Out << -33.4f << endl; - Out << -5.2 << endl; - Out << 0.0003 << endl; - Out << -1.0 / 0.0 << endl; - Out << 1.0 / 0.0 << endl; - Out << sycl::sqrt(-1.0) << endl; Out << -1.0f / 0.0f << endl; Out << 1.0f / 0.0f << endl; Out << sycl::sqrt(-1.0f) << endl; // CHECK-NEXT: 33.4 - // CHECK-NEXT: 5.2 // CHECK-NEXT: -33.4 - // CHECK-NEXT: -5.2 - // CHECK-NEXT: 0.0003 - // CHECK-NEXT: -inf - // CHECK-NEXT: inf - // CHECK-NEXT: nan // CHECK-NEXT: -inf // CHECK-NEXT: inf // CHECK-NEXT: nan @@ -205,6 +193,33 @@ int main() { }); Queue.wait(); + if (Queue.get_device().has(sycl::aspect::fp64)) { + Queue.submit([&](handler &CGH) { + stream Out(1024, 80, CGH); + CGH.single_task([=]() { + // Double + Out << 5.2 << endl; + Out << -5.2 << endl; + Out << 0.0003 << endl; + Out << -1.0 / 0.0 << endl; + Out << 1.0 / 0.0 << endl; + Out << sycl::sqrt(-1.0) << endl; + }); + }); + Queue.wait(); + } else { + // Repeat skipped message same number of times as the number of skipped + // output lines. + for (size_t I = 0; I < 6; ++I) + std::cout << "Skipped double test." << std::endl; + } + // CHECK-NEXT: {{(5.2|Skipped double test.)}} + // CHECK-NEXT: {{(-5.2|Skipped double test.)}} + // CHECK-NEXT: {{(0.0003|Skipped double test.)}} + // CHECK-NEXT: {{(-inf|Skipped double test.)}} + // CHECK-NEXT: {{(inf|Skipped double test.)}} + // CHECK-NEXT: {{(nan|Skipped double test.)}} + // Stream in parallel_for Queue.submit([&](handler &CGH) { stream Out(1024, 80, CGH); diff --git a/SYCL/Basic/vector_operators.cpp b/SYCL/Basic/vector_operators.cpp index c52f7592b9..63a1af6dda 100644 --- a/SYCL/Basic/vector_operators.cpp +++ b/SYCL/Basic/vector_operators.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl-device-code-split=per_kernel -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out @@ -38,6 +38,7 @@ template void check_vector_size() { } int main() { + s::queue Queue; /* Separate checks for NumElements=1 edge case */ @@ -46,7 +47,6 @@ int main() { vec_type res; { s::buffer Buf(&res, s::range<1>(1)); - s::queue Queue; Queue.submit([&](s::handler &cgh) { auto Acc = Buf.get_access(cgh); cgh.single_task([=]() { @@ -67,7 +67,6 @@ int main() { vec_type res; { s::buffer Buf(&res, s::range<1>(1)); - s::queue Queue; Queue.submit([&](s::handler &cgh) { auto Acc = Buf.get_access(cgh); cgh.single_task([=]() { @@ -94,7 +93,6 @@ int main() { res_vec_type res; { s::buffer Buf(&res, s::range<1>(1)); - s::queue Queue; Queue.submit([&](s::handler &cgh) { auto Acc = Buf.get_access(cgh); cgh.single_task([=]() { @@ -109,12 +107,11 @@ int main() { } // Operator <, cl_double - { + if (Queue.get_device().has(sycl::aspect::fp64)) { using res_vec_type = s::vec; res_vec_type res; { s::buffer Buf(&res, s::range<1>(1)); - s::queue Queue; Queue.submit([&](s::handler &cgh) { auto Acc = Buf.get_access(cgh); cgh.single_task([=]() { @@ -134,7 +131,6 @@ int main() { res_vec_type res; { s::buffer Buf(&res, s::range<1>(1)); - s::queue Queue; Queue.submit([&](s::handler &cgh) { auto Acc = Buf.get_access(cgh); cgh.single_task([=]() { @@ -149,12 +145,11 @@ int main() { } // Operator <=, cl_half - { + if (Queue.get_device().has(sycl::aspect::fp16)) { using res_vec_type = s::vec; res_vec_type res; { s::buffer Buf(&res, s::range<1>(1)); - s::queue Queue; Queue.submit([&](s::handler &cgh) { auto Acc = Buf.get_access(cgh); cgh.single_task([=]() { @@ -176,7 +171,6 @@ int main() { res_vec_type res; { s::buffer Buf(&res, s::range<1>(1)); - s::queue Queue; Queue.submit([&](s::handler &cgh) { auto Acc = Buf.get_access(cgh); cgh.single_task([=]() { @@ -196,7 +190,6 @@ int main() { res_vec_type res; { s::buffer Buf(&res, s::range<1>(1)); - s::queue Queue; Queue.submit([&](s::handler &cgh) { auto Acc = Buf.get_access(cgh); cgh.single_task([=]() { @@ -216,7 +209,6 @@ int main() { res_vec_type res; { s::buffer Buf(&res, s::range<1>(1)); - s::queue Queue; Queue.submit([&](s::handler &cgh) { auto Acc = Buf.get_access(cgh); cgh.single_task([=]() { @@ -236,7 +228,6 @@ int main() { res_vec_type res; { s::buffer Buf(&res, s::range<1>(1)); - s::queue Queue; Queue.submit([&](s::handler &cgh) { auto Acc = Buf.get_access(cgh); cgh.single_task([=]() { @@ -257,7 +248,6 @@ int main() { res_vec_type res; { s::buffer Buf(&res, s::range<1>(1)); - s::queue Queue; Queue.submit([&](s::handler &cgh) { auto Acc = Buf.get_access(cgh); cgh.single_task([=]() { diff --git a/SYCL/Complex/sycl_complex_stream_test.cpp b/SYCL/Complex/sycl_complex_stream_test.cpp index 7878f55984..9aa10c6db6 100644 --- a/SYCL/Complex/sycl_complex_stream_test.cpp +++ b/SYCL/Complex/sycl_complex_stream_test.cpp @@ -55,27 +55,37 @@ template struct test_istream_operator { } }; -int main() { - sycl::queue Q; - +template bool test_common(sycl::queue Q) { bool test_passes = true; - - test_passes &= - test_valid_types(Q, cmplx(1.5, -1.0)); test_passes &= - test_valid_types(Q, cmplx(INFINITY, INFINITY)); + test_valid_types(Q, cmplx(1.5, -1.0)); + test_passes &= test_valid_types( + Q, cmplx(INFINITY, INFINITY)); test_passes &= - test_valid_types(Q, cmplx(NAN, NAN)); + test_valid_types(Q, cmplx(NAN, NAN)); - test_passes &= test_valid_types(cmplx(1.5, -1.0)); + test_passes &= test_valid_types(cmplx(1.5, -1.0)); test_passes &= - test_valid_types(cmplx(INFINITY, INFINITY)); - test_passes &= test_valid_types(cmplx(NAN, NAN)); + test_valid_types(cmplx(INFINITY, INFINITY)); + test_passes &= test_valid_types(cmplx(NAN, NAN)); - test_passes &= test_valid_types(cmplx(1.5, -1.0)); + test_passes &= test_valid_types(cmplx(1.5, -1.0)); test_passes &= - test_valid_types(cmplx(INFINITY, INFINITY)); - test_passes &= test_valid_types(cmplx(NAN, NAN)); + test_valid_types(cmplx(INFINITY, INFINITY)); + test_passes &= test_valid_types(cmplx(NAN, NAN)); + return test_passes; +} + +int main() { + sycl::queue Q; + + bool test_passes = true; + + test_passes &= test_common(Q); + if (Q.get_device().has(sycl::aspect::fp16)) + test_passes &= test_common(Q); + if (Q.get_device().has(sycl::aspect::fp64)) + test_passes &= test_common(Q); if (!test_passes) std::cerr << "Stream operator with complex test fails\n"; diff --git a/SYCL/DeviceLib/built-ins/printf.cpp b/SYCL/DeviceLib/built-ins/printf.cpp index 5afc03f565..36aee3b8f2 100644 --- a/SYCL/DeviceLib/built-ins/printf.cpp +++ b/SYCL/DeviceLib/built-ins/printf.cpp @@ -2,10 +2,15 @@ // HIP doesn't support printf. // CUDA doesn't support vector format specifiers ("%v"). // -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER // RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER // RUN: %ACC_RUN_PLACEHOLDER %t.out %ACC_CHECK_PLACEHOLDER +// +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -D__SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ %s -o %t_nonvar.out +// RUN: %CPU_RUN_PLACEHOLDER %t_nonvar.out %CPU_CHECK_PLACEHOLDER +// RUN: %GPU_RUN_PLACEHOLDER %t_nonvar.out %GPU_CHECK_PLACEHOLDER +// RUN: %ACC_RUN_PLACEHOLDER %t_nonvar.out %ACC_CHECK_PLACEHOLDER #include @@ -34,9 +39,8 @@ static const CONSTANT char format_vec[] = "%d,%d,%d,%d\n"; const CONSTANT char format_hello_world_2[] = "%lu: Hello, World!\n"; int main() { + queue Queue(default_selector_v); { - queue Queue(default_selector_v); - Queue.submit([&](handler &CGH) { CGH.single_task([=]() { // String @@ -54,17 +58,6 @@ int main() { // CHECK: 123 // CHECK-NEXT: -123 - // Floating point types - { - // You can declare format string in non-global scope, but in this case - // static keyword is required - static const CONSTANT char format[] = "%.1f\n"; - ext::oneapi::experimental::printf(format, 33.4f); - ext::oneapi::experimental::printf(format, -33.4f); - } - // CHECK-NEXT: 33.4 - // CHECK-NEXT: -33.4 - // Vectors sycl::vec v4{5, 6, 7, 8}; #if defined(__SYCL_DEVICE_ONLY__) && defined(__SPIR__) @@ -107,8 +100,38 @@ int main() { Queue.wait(); } +#ifndef __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ + // Currently printf will promote floating point values to doubles. + // __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ changes the behavior to not use + // a variadic function, so if it is defined it will not promote the floating + // point arguments. + if (Queue.get_device().has(sycl::aspect::fp64)) +#endif // __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ + { + Queue.submit([&](handler &CGH) { + CGH.single_task([=]() { + // Floating point types + { + // You can declare format string in non-global scope, but in this case + // static keyword is required + static const CONSTANT char format[] = "%.1f\n"; + ext::oneapi::experimental::printf(format, 33.4f); + ext::oneapi::experimental::printf(format, -33.4f); + } + }); + }); + Queue.wait(); + } +#ifndef __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ + else { + std::cout << "Skipped floating point test." << std::endl; + std::cout << "Skipped floating point test." << std::endl; + } +#endif // __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ + // CHECK-NEXT: {{(33.4|Skipped floating point test.)}} + // CHECK-NEXT: {{(-33.4|Skipped floating point test.)}} + { - queue Queue(default_selector_v); // printf in parallel_for Queue.submit([&](handler &CGH) { CGH.parallel_for(range<1>(10), [=](id<1> i) { diff --git a/SYCL/DeviceLib/built-ins/scalar_relational.cpp b/SYCL/DeviceLib/built-ins/scalar_relational.cpp index 3e920f70c6..c9af02b703 100644 --- a/SYCL/DeviceLib/built-ins/scalar_relational.cpp +++ b/SYCL/DeviceLib/built-ins/scalar_relational.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl-device-code-split=per_kernel -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out @@ -11,12 +11,13 @@ namespace s = sycl; int main() { + s::queue myQueue; + // isequal-float { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -32,7 +33,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -48,7 +48,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -64,7 +63,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -80,7 +78,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -96,7 +93,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -112,7 +108,6 @@ int main() { s::cl_int r{1}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -128,7 +123,6 @@ int main() { s::cl_int r{1}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task( @@ -143,7 +137,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task( @@ -158,7 +151,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task( @@ -173,7 +165,6 @@ int main() { s::cl_int r{1}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task( @@ -184,11 +175,10 @@ int main() { } // isnormal-double - { + if (myQueue.get_device().has(sycl::aspect::fp64)) { s::cl_int r{1}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task( @@ -203,7 +193,6 @@ int main() { s::cl_int r{1}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -219,7 +208,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -235,7 +223,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task( @@ -250,7 +237,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task( @@ -264,7 +250,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task( @@ -279,7 +264,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task( @@ -294,7 +278,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task( @@ -309,7 +292,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task( @@ -324,7 +306,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task( @@ -339,7 +320,6 @@ int main() { s::cl_float r{0.0f}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -356,7 +336,6 @@ int main() { s::cl_float r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -373,7 +352,6 @@ int main() { s::cl_float r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -390,7 +368,6 @@ int main() { s::cl_float r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { diff --git a/SYCL/DeviceLib/built-ins/vector_geometric.cpp b/SYCL/DeviceLib/built-ins/vector_geometric.cpp index 4ee7735d5b..fc939e3cbb 100644 --- a/SYCL/DeviceLib/built-ins/vector_geometric.cpp +++ b/SYCL/DeviceLib/built-ins/vector_geometric.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl-device-code-split=per_kernel -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out @@ -15,12 +15,13 @@ template bool isEqualTo(T x, T y, T epsilon = 0.005) { } int main() { + s::queue myQueue; + // dot { s::cl_float r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -41,7 +42,6 @@ int main() { s::cl_float4 r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -74,11 +74,10 @@ int main() { } // cross (double) - { + if (myQueue.get_device().has(sycl::aspect::fp64)) { s::cl_double4 r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -115,7 +114,6 @@ int main() { s::cl_float r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -139,7 +137,6 @@ int main() { s::cl_float r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -158,7 +155,6 @@ int main() { s::cl_float2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -181,7 +177,6 @@ int main() { s::cl_float r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -205,7 +200,6 @@ int main() { s::cl_float r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -224,7 +218,6 @@ int main() { s::cl_float2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { diff --git a/SYCL/DeviceLib/built-ins/vector_relational.cpp b/SYCL/DeviceLib/built-ins/vector_relational.cpp index fc7571b1ae..7c6d20ba20 100644 --- a/SYCL/DeviceLib/built-ins/vector_relational.cpp +++ b/SYCL/DeviceLib/built-ins/vector_relational.cpp @@ -522,9 +522,9 @@ int main() { myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::bitselect(s::cl_float4{112.112, 12.12, 0, 0.0}, - s::cl_float4{34.34, 23.23, 1, 0.0}, - s::cl_float4{3.3, 6.6, 1, 0.0}); + AccR[0] = s::bitselect(s::cl_float4{112.112f, 12.12f, 0, 0.0f}, + s::cl_float4{34.34f, 23.23f, 1, 0.0f}, + s::cl_float4{3.3f, 6.6f, 1, 0.0f}); }); // Using NAN/INFINITY as any float produced consistent results // between host and device. }); diff --git a/SYCL/DeviceLib/std_complex_math_test.cpp b/SYCL/DeviceLib/std_complex_math_test.cpp index f130190e15..5adcd2ef0a 100644 --- a/SYCL/DeviceLib/std_complex_math_test.cpp +++ b/SYCL/DeviceLib/std_complex_math_test.cpp @@ -196,6 +196,18 @@ void device_complex_test_2(s::queue &deviceQueue) { #endif int main() { s::queue deviceQueue; + +#ifdef _WIN32 + // std::complex math on Windows uses doubles internally so fp64 is required to + // run this test. + if (!deviceQueue.get_device().has(s::aspect::fp64)) { + std::cout << "Skipping test as device does not support fp64 which is " + "required for math operations on std::complex on Windows." + << std::endl; + return 0; + } +#endif + device_complex_test_1(deviceQueue); #ifndef _WIN32 device_complex_test_2(deviceQueue); diff --git a/SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp b/SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp index 755dc2566e..d98eb0160d 100644 --- a/SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp +++ b/SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp @@ -264,27 +264,35 @@ int main(void) { bool passed = true; using BinOp = esimd_test::BinaryOp; + bool SupportsDouble = dev.has(aspect::fp64); + bool SupportsHalf = dev.has(aspect::fp16); + auto arith_ops = esimd_test::ArithBinaryOpsNoDiv; passed &= test(arith_ops, q); passed &= test(arith_ops, q, 0.000001f); - if (dev.has(aspect::fp64)) + if (SupportsDouble) passed &= test(arith_ops, q, 1e-15); passed &= test(arith_ops, q, 0.000001f); - passed &= test(arith_ops, q, 1); - passed &= test(arith_ops, q, 1); - if (dev.has(aspect::fp64)) + if (SupportsHalf) + passed &= test(arith_ops, q, 1); + if (SupportsHalf) + passed &= test(arith_ops, q, 1); + if (SupportsDouble && SupportsHalf) passed &= test(arith_ops, q); passed &= test(arith_ops, q); #ifdef USE_BF16 passed &= test(arith_ops, q); - passed &= test(arith_ops, q, 0.03); + if (SupportsHalf) + passed &= test(arith_ops, q, 0.03); #endif // USE_BF16 #ifdef USE_TF32 passed &= test(arith_ops, q, 0.000001f); passed &= test(arith_ops, q, 0.000001f); passed &= test(arith_ops, q, 0.000001f); - passed &= test(arith_ops, q, 0.000001f); + if (SupportsHalf) + passed &= + test(arith_ops, q, 0.000001f); passed &= test(arith_ops, q, 0.000001f); passed &= test(arith_ops, q, 0.000001f); @@ -294,27 +302,31 @@ int main(void) { passed &= test(div_op, q); passed &= test(div_op, q, 0.000001f); #ifndef WA_BUG - if (dev.has(aspect::fp64)) + if (SupportsDouble) passed &= test(div_op, q); #endif // WA_BUG passed &= test(div_op, q, 0.000001f); - passed &= test(div_op, q, 1); - passed &= test(div_op, q, 1); + if (SupportsHalf) + passed &= test(div_op, q, 1); + if (SupportsHalf) + passed &= test(div_op, q, 1); #ifndef WA_BUG - if (dev.has(aspect::fp64)) + if (SupportsDouble && SupportsHalf) passed &= test(div_op, q); #endif // WA_BUG passed &= test(div_op, q); #ifdef USE_BF16 passed &= test(div_op, q); - passed &= test(div_op, q, 0.03); + if (SupportsHalf) + passed &= test(div_op, q, 0.03); #endif // USE_BF16 #ifdef USE_TF32 passed &= test(div_op, q, 0.000001f); passed &= test(div_op, q, 0.000001f); passed &= test(div_op, q, 0.000001f); - passed &= test(div_op, q, 0.000001f); + if (SupportsHalf) + passed &= test(div_op, q, 0.000001f); passed &= test(div_op, q, 0.000001f); passed &= test(div_op, q, 0.000001f); @@ -351,24 +363,28 @@ int main(void) { auto cmp_ops = esimd_test::CmpOps; passed &= test(cmp_ops, q); passed &= test(cmp_ops, q); - if (dev.has(aspect::fp64)) + if (SupportsDouble) passed &= test(cmp_ops, q); passed &= test(cmp_ops, q); - passed &= test(cmp_ops, q, 1); - passed &= test(cmp_ops, q, 1); - if (dev.has(aspect::fp64)) + if (SupportsHalf) + passed &= test(cmp_ops, q, 1); + if (SupportsHalf) + passed &= test(cmp_ops, q, 1); + if (SupportsDouble) passed &= test(cmp_ops, q); passed &= test(cmp_ops, q); #ifdef USE_BF16 passed &= test(cmp_ops, q); - passed &= test(cmp_ops, q); + if (SupportsHalf) + passed &= test(cmp_ops, q); #endif // USE_BF16 #ifdef USE_TF32 passed &= test(cmp_ops, q); passed &= test(cmp_ops, q); passed &= test(cmp_ops, q); - passed &= test(cmp_ops, q); + if (SupportsHalf) + passed &= test(cmp_ops, q); passed &= test(cmp_ops, q); passed &= test(cmp_ops, q); #endif // USE_TF32 diff --git a/SYCL/ESIMD/api/saturation_smoke.cpp b/SYCL/ESIMD/api/saturation_smoke.cpp index 005cb1497e..3d2ab818d4 100644 --- a/SYCL/ESIMD/api/saturation_smoke.cpp +++ b/SYCL/ESIMD/api/saturation_smoke.cpp @@ -185,10 +185,13 @@ int main(int argc, char **argv) { std::cout << "Running on " << dev.get_info() << "\n"; const bool doublesSupported = dev.has(sycl::aspect::fp64); + const bool halfsSupported = dev.has(sycl::aspect::fp16); bool passed = true; - passed &= test(q); - passed &= test(q); + if (halfsSupported) + passed &= test(q); + if (halfsSupported) + passed &= test(q); passed &= test(q); if (doublesSupported) passed &= test(q); @@ -207,7 +210,8 @@ int main(int argc, char **argv) { passed &= test(q); passed &= test(q); - passed &= test(q); + if (halfsSupported) + passed &= test(q); if (doublesSupported) passed &= test(q); diff --git a/SYCL/ESIMD/api/simd_memory_access.cpp b/SYCL/ESIMD/api/simd_memory_access.cpp index 4912d52ca9..2e637c7f2f 100644 --- a/SYCL/ESIMD/api/simd_memory_access.cpp +++ b/SYCL/ESIMD/api/simd_memory_access.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // REQUIRES: gpu // UNSUPPORTED: cuda || hip -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl-device-code-split=per_kernel -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // // The test checks functionality of the memory access APIs which are members of @@ -155,6 +155,7 @@ int main(int argc, char **argv) { auto dev = q.get_device(); std::cout << "Running on " << dev.get_info() << "\n"; + const bool halfsSupported = dev.has(sycl::aspect::fp16); bool passed = true; passed &= test(q, size); @@ -165,7 +166,8 @@ int main(int argc, char **argv) { passed &= test(q, size); passed &= test(q, size); passed &= test(q, size); - passed &= test(q, size); + if (halfsSupported) + passed &= test(q, size); passed &= test(q, size); passed &= test(q, size); @@ -175,7 +177,8 @@ int main(int argc, char **argv) { passed &= test(q, size); passed &= test(q, size); passed &= test(q, size); - passed &= test(q, size); + if (halfsSupported) + passed &= test(q, size); std::cout << (passed ? "=== Test passed\n" : "=== Test FAILED\n"); return passed ? 0 : 1; diff --git a/SYCL/ESIMD/api/simd_view_copy_move_assign.cpp b/SYCL/ESIMD/api/simd_view_copy_move_assign.cpp index 811605fb65..11ea98577a 100644 --- a/SYCL/ESIMD/api/simd_view_copy_move_assign.cpp +++ b/SYCL/ESIMD/api/simd_view_copy_move_assign.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // REQUIRES: gpu // UNSUPPORTED: cuda || hip -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl-device-code-split=per_kernel -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // This test checks the behavior of simd_view constructors @@ -164,7 +164,8 @@ int main(void) { bool passed = true; passed &= testT(q); passed &= testT(q); - passed &= testT(q); + if (dev.has(sycl::aspect::fp16)) + passed &= testT(q); return passed ? 0 : 1; } diff --git a/SYCL/ESIMD/api/simd_view_select_2d_fp.cpp b/SYCL/ESIMD/api/simd_view_select_2d_fp.cpp index b2d326b690..3c8e53d032 100644 --- a/SYCL/ESIMD/api/simd_view_select_2d_fp.cpp +++ b/SYCL/ESIMD/api/simd_view_select_2d_fp.cpp @@ -22,7 +22,8 @@ int main(int argc, char **argv) { << "\n"; bool passed = true; - passed &= test(q); + if (dev.has(sycl::aspect::fp16)) + passed &= test(q); passed &= test(q); if (dev.has(sycl::aspect::fp64)) passed &= test(q); diff --git a/SYCL/ESIMD/api/slm_gather_scatter.cpp b/SYCL/ESIMD/api/slm_gather_scatter.cpp index 5e3f1960d1..ceefb08f55 100644 --- a/SYCL/ESIMD/api/slm_gather_scatter.cpp +++ b/SYCL/ESIMD/api/slm_gather_scatter.cpp @@ -1,6 +1,6 @@ // REQUIRES: gpu // UNSUPPORTED: cuda || hip -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl-device-code-split=per_kernel -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // // The test checks functionality of the slm_gather/slm_scatter ESIMD APIs. @@ -139,8 +139,10 @@ int main(void) { passed &= test(q); passed &= test(q); - passed &= test(q); - passed &= test(q); + if (dev.has(aspect::fp16)) { + passed &= test(q); + passed &= test(q); + } return passed ? 0 : 1; } diff --git a/SYCL/ESIMD/api/slm_gather_scatter_heavy.cpp b/SYCL/ESIMD/api/slm_gather_scatter_heavy.cpp index 3935790500..76451e6d30 100644 --- a/SYCL/ESIMD/api/slm_gather_scatter_heavy.cpp +++ b/SYCL/ESIMD/api/slm_gather_scatter_heavy.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // REQUIRES: gpu // UNSUPPORTED: cuda || hip -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl-device-code-split=per_kernel -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // // The test checks functionality of the slm gather/scatter ESIMD intrinsics. @@ -463,8 +463,10 @@ int main(int argc, char **argv) { passed &= test(q); passed &= test(q); passed &= test_vl1(q); - passed &= test_vl1(q); - passed &= test(q); + if (dev.has(aspect::fp16)) { + passed &= test_vl1(q); + passed &= test(q); + } std::cout << (!passed ? "TEST FAILED\n" : "TEST Passed\n"); return passed ? 0 : 1; diff --git a/SYCL/ESIMD/api/svm_gather_scatter.cpp b/SYCL/ESIMD/api/svm_gather_scatter.cpp index 7d149b3fc0..09cf3e9d30 100644 --- a/SYCL/ESIMD/api/svm_gather_scatter.cpp +++ b/SYCL/ESIMD/api/svm_gather_scatter.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // REQUIRES: gpu && !gpu-intel-pvc // UNSUPPORTED: cuda || hip -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl-device-code-split=per_kernel -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // Regression test for SVM gather/scatter API. @@ -104,12 +104,14 @@ int main(void) { Pass &= test(Q); Pass &= test(Q); - Pass &= test(Q); - Pass &= test(Q); - Pass &= test(Q); - Pass &= test(Q); - Pass &= test(Q); - Pass &= test(Q); + if (Dev.has(aspect::fp16)) { + Pass &= test(Q); + Pass &= test(Q); + Pass &= test(Q); + Pass &= test(Q); + Pass &= test(Q); + Pass &= test(Q); + } Pass &= test(Q); Pass &= test(Q); diff --git a/SYCL/ESIMD/api/unary_ops_heavy.cpp b/SYCL/ESIMD/api/unary_ops_heavy.cpp index df14cecc0b..71206d0ed5 100644 --- a/SYCL/ESIMD/api/unary_ops_heavy.cpp +++ b/SYCL/ESIMD/api/unary_ops_heavy.cpp @@ -157,6 +157,8 @@ int main(void) { auto dev = q.get_device(); std::cout << "Running on " << dev.get_info() << "\n"; + const bool doublesSupported = dev.has(sycl::aspect::fp64); + const bool halfsSupported = dev.has(sycl::aspect::fp16); bool passed = true; using UnOp = esimd_test::UnaryOp; @@ -171,10 +173,12 @@ int main(void) { passed &= test(mod_ops, q); passed &= test(mod_ops, q); passed &= test(mod_ops, q); - passed &= test(mod_ops, q); - passed &= test(mod_ops, q); + if (halfsSupported) + passed &= test(mod_ops, q); + if (halfsSupported) + passed &= test(mod_ops, q); passed &= test(mod_ops, q); - if (dev.has(aspect::fp64)) + if (doublesSupported) passed &= test(mod_ops, q); auto signed_ops = esimd_test::OpSeq{}; @@ -182,9 +186,10 @@ int main(void) { passed &= test(signed_ops, q); passed &= test(signed_ops, q); passed &= test(signed_ops, q); - passed &= test(signed_ops, q); + if (halfsSupported) + passed &= test(signed_ops, q); passed &= test(signed_ops, q); - if (dev.has(aspect::fp64)) + if (doublesSupported) passed &= test(signed_ops, q); #ifdef USE_BF16 diff --git a/SYCL/ESIMD/printf.cpp b/SYCL/ESIMD/printf.cpp index 4a65166ec5..0714852fe6 100644 --- a/SYCL/ESIMD/printf.cpp +++ b/SYCL/ESIMD/printf.cpp @@ -10,9 +10,17 @@ // UNSUPPORTED: cuda || hip // CUDA and HIP don't support printf. // -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-device-code-split=per_kernel %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER // +// clang-format off +// Test using __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ is disabled until ESIMD +// supports it. +// FIXME: enable that test +// RUNx: %clangxx -fsycl -fsycl-targets=%sycl_triple -D__SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ %s -o %t_nonvar.out +// RUNx: %GPU_RUN_PLACEHOLDER %t_nonvar.out %GPU_CHECK_PLACEHOLDER +// clang-format on +// //===----------------------------------------------------------------------===// // // The test checks that ESIMD kernels support printf functionality. @@ -44,10 +52,8 @@ static const CONSTANT char format_hello_world[] = "Hello, World!\n"; const CONSTANT char format_int[] = "%d\n"; int main() { + queue Queue(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler()); { - queue Queue(esimd_test::ESIMDSelector, - esimd_test::createExceptionHandler()); - Queue.submit([&](handler &CGH) { CGH.single_task([=]() SYCL_ESIMD_KERNEL { // String @@ -60,17 +66,6 @@ int main() { // CHECK-NEXT: 123 // CHECK-NEXT: -123 - // Floating point types - { - // You can declare format string in non-global scope, but in this case - // static keyword is required - static const CONSTANT char format[] = "%f\n"; - oneapi::experimental::printf(format, 33.4f); - oneapi::experimental::printf(format, -33.4f); - } - // CHECK-NEXT: 33.4 - // CHECK-NEXT: -33.4 - // String types { static CONSTANT const char str_arg[] = "foo"; @@ -83,22 +78,59 @@ int main() { Queue.wait(); } +#ifndef __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ + // Currently printf will promote floating point values to doubles. + // __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ changes the behavior to not use + // a variadic function, so if it is defined it will not promote the floating + // point arguments. + if (Queue.get_device().has(sycl::aspect::fp64)) +#endif // __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ + { + Queue.submit([&](handler &CGH) { + CGH.single_task([=]() { + // Floating point types + { + // You can declare format string in non-global scope, but in this case + // static keyword is required + static const CONSTANT char format[] = "%.1f\n"; + ext::oneapi::experimental::printf(format, 33.4f); + ext::oneapi::experimental::printf(format, -33.4f); + } + }); + }); + Queue.wait(); + } +#ifndef __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ + else { + std::cout << "Skipped floating point test." << std::endl; + std::cout << "Skipped floating point test." << std::endl; + } +#endif // __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ + // CHECK-NEXT: {{(33.4|Skipped floating point test.)}} + // CHECK-NEXT: {{(-33.4|Skipped floating point test.)}} + { - queue Q(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler()); // printf in parallel_for constexpr int SIMD_SIZE = 16; constexpr int WORK_SIZE = SIMD_SIZE; - int *Mem = malloc_shared(WORK_SIZE * SIMD_SIZE, Q); + int *Mem = malloc_shared(WORK_SIZE * SIMD_SIZE, Queue); for (int I = 0; I < WORK_SIZE * SIMD_SIZE; I++) Mem[I] = I; std::cout << "Start parallel_for:" << std::endl; - Q.parallel_for(range<1>(WORK_SIZE), [=](id<1> i) SYCL_ESIMD_KERNEL { - static const CONSTANT char STR_LU_D[] = "Thread-id: %d, Value: %d\n"; - ext::intel::esimd::simd Vec(Mem + i * SIMD_SIZE); - // cast to uint64_t to be sure that we pass 64-bit unsigned value - oneapi::experimental::printf(STR_LU_D, (uint64_t)i[0], (int)Vec[i]); - }).wait(); - free(Mem, Q); + Queue + .parallel_for(range<1>(WORK_SIZE), + [=](id<1> i) SYCL_ESIMD_KERNEL { + static const CONSTANT char STR_LU_D[] = + "Thread-id: %d, Value: %d\n"; + ext::intel::esimd::simd Vec( + Mem + i * SIMD_SIZE); + // cast to uint64_t to be sure that we pass 64-bit + // unsigned value + oneapi::experimental::printf(STR_LU_D, (uint64_t)i[0], + (int)Vec[i]); + }) + .wait(); + free(Mem, Queue); // CHECK-LABEL: Start parallel_for // CHECK-DAG: Thread-id: 0, Value: 0 // CHECK-DAG: Thread-id: 1, Value: 17 diff --git a/SYCL/ESIMD/regression/half_conversion_test.cpp b/SYCL/ESIMD/regression/half_conversion_test.cpp index 7be3dce9d1..7a661fa5fc 100644 --- a/SYCL/ESIMD/regression/half_conversion_test.cpp +++ b/SYCL/ESIMD/regression/half_conversion_test.cpp @@ -1,6 +1,6 @@ // REQUIRES: gpu // UNSUPPORTED: cuda || hip -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-device-code-split=per_kernel %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out //==- half_conversion_test.cpp - Test for half conversion under ESIMD_EMULATOR // backend -==/ @@ -81,7 +81,11 @@ int main(int argc, char *argv[]) { std::cout << "\n===================" << std::endl; passed &= test(q, 1); std::cout << "\n===================" << std::endl; - passed &= test(q, 1); + if (q.get_device().has(sycl::aspect::fp16)) + passed &= test(q, 1); + else + std::cout << "Half case skipped as the device does not support fp16." + << std::endl; std::cout << "\n===================" << std::endl; passed &= test(q, 1); std::cout << "\n===================" << std::endl; diff --git a/SYCL/ESIMD/regression/store_zero_const.cpp b/SYCL/ESIMD/regression/store_zero_const.cpp index dcb5896137..a6a27527f1 100644 --- a/SYCL/ESIMD/regression/store_zero_const.cpp +++ b/SYCL/ESIMD/regression/store_zero_const.cpp @@ -91,7 +91,8 @@ int main(void) { Passed &= test(Q); Passed &= test(Q); Passed &= test(Q); - Passed &= test(Q); + if (Q.get_device().has(aspect::fp64)) + Passed &= test(Q); std::cout << (Passed ? "Passed\n" : "FAILED\n"); return Passed ? 0 : 1; diff --git a/SYCL/ESIMD/vadd_half.cpp b/SYCL/ESIMD/vadd_half.cpp index 1482aae505..596feccb81 100644 --- a/SYCL/ESIMD/vadd_half.cpp +++ b/SYCL/ESIMD/vadd_half.cpp @@ -59,6 +59,13 @@ int main(int argc, char **argv) { auto dev = q.get_device(); std::cout << "Running on " << dev.get_info() << "\n"; + if (!dev.has(sycl::aspect::fp16)) { + std::cout << "Test was skipped becasue the selected device does not " + "support sycl::aspect::fp16" + << std::endl; + return 0; + } + TstT *A = malloc_shared(Size, q); SrcT *B = malloc_shared(Size, q); using DstT = __ESIMD_DNS::computation_type_t; diff --git a/SYCL/GroupAlgorithm/SYCL2020/group_broadcast.cpp b/SYCL/GroupAlgorithm/SYCL2020/group_broadcast.cpp index 6c230fa4c8..a8ace052d9 100644 --- a/SYCL/GroupAlgorithm/SYCL2020/group_broadcast.cpp +++ b/SYCL/GroupAlgorithm/SYCL2020/group_broadcast.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out @@ -84,7 +84,7 @@ int main() { std::fill(output.begin(), output.end(), std::complex(0, 0)); test(q, input, output); } - { + if (q.get_device().has(sycl::aspect::fp64)) { std::array, N> input; std::array, 3> output; for (int i = 0; i < N; ++i) { diff --git a/SYCL/GroupAlgorithm/broadcast.cpp b/SYCL/GroupAlgorithm/broadcast.cpp index 9d80740d17..a929bd2f4f 100644 --- a/SYCL/GroupAlgorithm/broadcast.cpp +++ b/SYCL/GroupAlgorithm/broadcast.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out @@ -84,7 +84,7 @@ int main() { std::fill(output.begin(), output.end(), std::complex(0, 0)); test(q, input, output); } - { + if (q.get_device().has(sycl::aspect::fp64)) { std::array, N> input; std::array, 3> output; for (int i = 0; i < N; ++i) { diff --git a/SYCL/Regression/local-arg-align.cpp b/SYCL/Regression/local-arg-align.cpp index f064a73c3b..ecd4fea37e 100644 --- a/SYCL/Regression/local-arg-align.cpp +++ b/SYCL/Regression/local-arg-align.cpp @@ -24,15 +24,15 @@ int main(int argc, char *argv[]) { buffer res(2); q.submit([&](sycl::handler &h) { - // Use two local buffers, one with an int and one with a double4 + // Use two local buffers, one with an int and one with a float4 local_accessor a(1, h); - local_accessor b(1, h); + local_accessor b(1, h); auto ares = res.get_access(h); // Manually capture kernel arguments to ensure an order with the int - // argument first and the double4 argument second. If the two arguments are - // simply laid out consecutively, the double4 argument will not be + // argument first and the float4 argument second. If the two arguments are + // simply laid out consecutively, the float4 argument will not be // correctly aligned. h.parallel_for(1, [a, b, ares](sycl::id<1> i) { // Get the addresses of the two local buffers @@ -52,10 +52,10 @@ int main(int argc, char *argv[]) { ret = -1; } - if (hres[1] % sizeof(double4) != 0) { + if (hres[1] % sizeof(float4) != 0) { std::cout << "Error: incorrect alignment for argument b, required alignment: " - << sizeof(double4) << ", address: " << (void *)hres[1] << std::endl; + << sizeof(float4) << ", address: " << (void *)hres[1] << std::endl; ret = -1; } diff --git a/SYCL/SubGroup/broadcast_fp16.cpp b/SYCL/SubGroup/broadcast_fp16.cpp index cde8e94477..4e2a35f660 100644 --- a/SYCL/SubGroup/broadcast_fp16.cpp +++ b/SYCL/SubGroup/broadcast_fp16.cpp @@ -13,7 +13,12 @@ int main() { queue Queue; - check(Queue); - std::cout << "Test passed." << std::endl; + if (Queue.get_device().has(sycl::aspect::fp16)) { + check(Queue); + std::cout << "Test passed." << std::endl; + } else { + std::cout << "Test skipped because device doesn't support aspect::fp16" + << std::endl; + } return 0; } diff --git a/SYCL/SubGroup/generic-shuffle.cpp b/SYCL/SubGroup/generic-shuffle.cpp index e979ddfa85..76be56ef3e 100644 --- a/SYCL/SubGroup/generic-shuffle.cpp +++ b/SYCL/SubGroup/generic-shuffle.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl-device-code-split=per_kernel -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out @@ -225,11 +225,17 @@ int main() { check_struct>( Queue, ComplexFloatGenerator); - auto ComplexDoubleGenerator = [state = std::complex(0, 1)]() mutable { - return state += std::complex(2, 2); - }; - check_struct>( - Queue, ComplexDoubleGenerator); + if (Queue.get_device().has(sycl::aspect::fp64)) { + auto ComplexDoubleGenerator = [state = + std::complex(0, 1)]() mutable { + return state += std::complex(2, 2); + }; + check_struct>( + Queue, ComplexDoubleGenerator); + } else { + std::cout << "fp64 tests were skipped due to the device not supporting the " + "aspect."; + } std::cout << "Test passed." << std::endl; return 0; diff --git a/SYCL/SubGroup/shuffle_fp16.cpp b/SYCL/SubGroup/shuffle_fp16.cpp index da52ed3336..70224da203 100644 --- a/SYCL/SubGroup/shuffle_fp16.cpp +++ b/SYCL/SubGroup/shuffle_fp16.cpp @@ -22,7 +22,12 @@ int main() { queue Queue; - check(Queue); - std::cout << "Test passed." << std::endl; + if (Queue.get_device().has(sycl::aspect::fp16)) { + check(Queue); + std::cout << "Test passed." << std::endl; + } else { + std::cout << "Test skipped because device doesn't support aspect::fp16" + << std::endl; + } return 0; } diff --git a/SYCL/USM/copy.cpp b/SYCL/USM/copy.cpp index edc543cd28..616168ff70 100644 --- a/SYCL/USM/copy.cpp +++ b/SYCL/USM/copy.cpp @@ -20,20 +20,48 @@ template class transfer; static constexpr int N = 100; // should be even -struct test_struct { +struct test_struct_minimum { short a; int b; long c; long long d; - half e; float f; }; -bool operator==(const test_struct &lhs, const test_struct &rhs) { +bool operator==(const test_struct_minimum &lhs, + const test_struct_minimum &rhs) { + return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d && + lhs.f == rhs.f; +} + +struct test_struct_all : public test_struct_minimum { + sycl::half e; + double g; +}; + +bool operator==(const test_struct_all &lhs, const test_struct_all &rhs) { + return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d && + lhs.e == rhs.e && lhs.f == rhs.f && lhs.g == rhs.g; +} + +struct test_struct_half : public test_struct_minimum { + sycl::half e; +}; + +bool operator==(const test_struct_half &lhs, const test_struct_half &rhs) { return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d && lhs.e == rhs.e && lhs.f == rhs.f; } +struct test_struct_double : public test_struct_minimum { + double g; +}; + +bool operator==(const test_struct_double &lhs, const test_struct_double &rhs) { + return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d && + lhs.f == rhs.f && lhs.g == rhs.g; +} + template T *regular(queue q, alloc kind) { return malloc(N, q, kind); } @@ -89,17 +117,34 @@ int main() { queue q; auto dev = q.get_device(); - test_struct test_obj{4, 42, 424, 4242, 4.2f, 4.242f}; + const bool DoublesSupported = dev.has(sycl::aspect::fp64); + const bool HalfsSupported = dev.has(sycl::aspect::fp16); + + test_struct_all test_obj_all{4, 42, 424, 4242, 4.2f, 4.242, 4.24242}; + test_struct_half test_obj_half{4, 42, 424, 4242, 4.2f, 4.242}; + test_struct_double test_obj_double{4, 42, 424, 4242, 4.242, 4.24242}; + test_struct_minimum test_obj_minimum{4, 42, 424, 4242, 4.242}; if (dev.has(aspect::usm_host_allocations)) { runTests(q, 4, alloc::host, alloc::host); runTests(q, 42, alloc::host, alloc::host); runTests(q, 424, alloc::host, alloc::host); runTests(q, 4242, alloc::host, alloc::host); - runTests(q, half(4.2f), alloc::host, alloc::host); + if (HalfsSupported) + runTests(q, half(4.2f), alloc::host, alloc::host); runTests(q, 4.242f, alloc::host, alloc::host); - runTests(q, 4.24242, alloc::host, alloc::host); - runTests(q, test_obj, alloc::host, alloc::host); + if (DoublesSupported) + runTests(q, 4.24242, alloc::host, alloc::host); + if (HalfsSupported && DoublesSupported) + runTests(q, test_obj_all, alloc::host, alloc::host); + else if (HalfsSupported) + runTests(q, test_obj_half, alloc::host, alloc::host); + else if (DoublesSupported) + runTests(q, test_obj_double, alloc::host, + alloc::host); + else + runTests(q, test_obj_minimum, alloc::host, + alloc::host); } if (dev.has(aspect::usm_shared_allocations)) { @@ -107,10 +152,22 @@ int main() { runTests(q, 42, alloc::shared, alloc::shared); runTests(q, 424, alloc::shared, alloc::shared); runTests(q, 4242, alloc::shared, alloc::shared); - runTests(q, half(4.2f), alloc::shared, alloc::shared); + if (HalfsSupported) + runTests(q, half(4.2f), alloc::shared, alloc::shared); runTests(q, 4.242f, alloc::shared, alloc::shared); - runTests(q, 4.24242, alloc::shared, alloc::shared); - runTests(q, test_obj, alloc::shared, alloc::shared); + if (DoublesSupported) + runTests(q, 4.24242, alloc::shared, alloc::shared); + if (HalfsSupported && DoublesSupported) + runTests(q, test_obj_all, alloc::shared, alloc::shared); + else if (HalfsSupported) + runTests(q, test_obj_half, alloc::shared, + alloc::shared); + else if (DoublesSupported) + runTests(q, test_obj_double, alloc::shared, + alloc::shared); + else + runTests(q, test_obj_minimum, alloc::shared, + alloc::shared); } if (dev.has(aspect::usm_device_allocations)) { @@ -118,10 +175,22 @@ int main() { runTests(q, 42, alloc::device, alloc::device); runTests(q, 424, alloc::device, alloc::device); runTests(q, 4242, alloc::device, alloc::device); - runTests(q, half(4.2f), alloc::device, alloc::device); + if (HalfsSupported) + runTests(q, half(4.2f), alloc::device, alloc::device); runTests(q, 4.242f, alloc::device, alloc::device); - runTests(q, 4.24242, alloc::device, alloc::device); - runTests(q, test_obj, alloc::device, alloc::device); + if (DoublesSupported) + runTests(q, 4.24242, alloc::device, alloc::device); + if (HalfsSupported && DoublesSupported) + runTests(q, test_obj_all, alloc::device, alloc::device); + else if (HalfsSupported) + runTests(q, test_obj_half, alloc::device, + alloc::device); + else if (DoublesSupported) + runTests(q, test_obj_double, alloc::device, + alloc::device); + else + runTests(q, test_obj_minimum, alloc::device, + alloc::device); } if (dev.has(aspect::usm_host_allocations) && @@ -130,10 +199,21 @@ int main() { runTests(q, 42, alloc::host, alloc::shared); runTests(q, 424, alloc::host, alloc::shared); runTests(q, 4242, alloc::host, alloc::shared); - runTests(q, half(4.2f), alloc::host, alloc::shared); + if (HalfsSupported) + runTests(q, half(4.2f), alloc::host, alloc::shared); runTests(q, 4.242f, alloc::host, alloc::shared); - runTests(q, 4.24242, alloc::host, alloc::shared); - runTests(q, test_obj, alloc::host, alloc::shared); + if (DoublesSupported) + runTests(q, 4.24242, alloc::host, alloc::shared); + if (HalfsSupported && DoublesSupported) + runTests(q, test_obj_all, alloc::host, alloc::shared); + else if (HalfsSupported) + runTests(q, test_obj_half, alloc::host, alloc::shared); + else if (DoublesSupported) + runTests(q, test_obj_double, alloc::host, + alloc::shared); + else + runTests(q, test_obj_minimum, alloc::host, + alloc::shared); } if (dev.has(aspect::usm_host_allocations) && @@ -142,10 +222,21 @@ int main() { runTests(q, 42, alloc::host, alloc::device); runTests(q, 424, alloc::host, alloc::device); runTests(q, 4242, alloc::host, alloc::device); - runTests(q, half(4.2f), alloc::host, alloc::device); + if (HalfsSupported) + runTests(q, half(4.2f), alloc::host, alloc::device); runTests(q, 4.242f, alloc::host, alloc::device); - runTests(q, 4.24242, alloc::host, alloc::device); - runTests(q, test_obj, alloc::host, alloc::device); + if (DoublesSupported) + runTests(q, 4.24242, alloc::host, alloc::device); + if (HalfsSupported && DoublesSupported) + runTests(q, test_obj_all, alloc::host, alloc::device); + else if (HalfsSupported) + runTests(q, test_obj_half, alloc::host, alloc::device); + else if (DoublesSupported) + runTests(q, test_obj_double, alloc::host, + alloc::device); + else + runTests(q, test_obj_minimum, alloc::host, + alloc::device); } if (dev.has(aspect::usm_shared_allocations) && @@ -154,10 +245,22 @@ int main() { runTests(q, 42, alloc::shared, alloc::device); runTests(q, 424, alloc::shared, alloc::device); runTests(q, 4242, alloc::shared, alloc::device); - runTests(q, half(4.2f), alloc::shared, alloc::device); + if (HalfsSupported) + runTests(q, half(4.2f), alloc::shared, alloc::device); runTests(q, 4.242f, alloc::shared, alloc::device); - runTests(q, 4.24242, alloc::shared, alloc::device); - runTests(q, test_obj, alloc::shared, alloc::device); + if (DoublesSupported) + runTests(q, 4.24242, alloc::shared, alloc::device); + if (HalfsSupported && DoublesSupported) + runTests(q, test_obj_all, alloc::shared, alloc::device); + else if (HalfsSupported) + runTests(q, test_obj_half, alloc::shared, + alloc::device); + else if (DoublesSupported) + runTests(q, test_obj_double, alloc::shared, + alloc::device); + else + runTests(q, test_obj_minimum, alloc::shared, + alloc::device); } return 0; diff --git a/SYCL/USM/fill.cpp b/SYCL/USM/fill.cpp index 49947fcc3d..a991db5968 100644 --- a/SYCL/USM/fill.cpp +++ b/SYCL/USM/fill.cpp @@ -20,21 +20,48 @@ template class usm_aligned_device_transfer; static constexpr int N = 100; -struct test_struct { +struct test_struct_minimum { short a; int b; long c; long long d; - sycl::half e; float f; +}; + +bool operator==(const test_struct_minimum &lhs, + const test_struct_minimum &rhs) { + return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d && + lhs.f == rhs.f; +} + +struct test_struct_all : public test_struct_minimum { + sycl::half e; double g; }; -bool operator==(const test_struct &lhs, const test_struct &rhs) { +bool operator==(const test_struct_all &lhs, const test_struct_all &rhs) { return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d && lhs.e == rhs.e && lhs.f == rhs.f && lhs.g == rhs.g; } +struct test_struct_half : public test_struct_minimum { + sycl::half e; +}; + +bool operator==(const test_struct_half &lhs, const test_struct_half &rhs) { + return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d && + lhs.e == rhs.e && lhs.f == rhs.f; +} + +struct test_struct_double : public test_struct_minimum { + double g; +}; + +bool operator==(const test_struct_double &lhs, const test_struct_double &rhs) { + return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d && + lhs.f == rhs.f && lhs.g == rhs.g; +} + template void runHostTests(device dev, context ctxt, queue q, T val) { T *array; @@ -125,20 +152,31 @@ int main() { auto ctxt = q.get_context(); const bool DoublesSupported = dev.has(sycl::aspect::fp64); + const bool HalfsSupported = dev.has(sycl::aspect::fp16); - test_struct test_obj{4, 42, 424, 4242, 4.2f, 4.242, 4.24242}; + test_struct_all test_obj_all{4, 42, 424, 4242, 4.2f, 4.242, 4.24242}; + test_struct_half test_obj_half{4, 42, 424, 4242, 4.2f, 4.242}; + test_struct_double test_obj_double{4, 42, 424, 4242, 4.242, 4.24242}; + test_struct_minimum test_obj_minimum{4, 42, 424, 4242, 4.242}; if (dev.get_info()) { runHostTests(dev, ctxt, q, 4); runHostTests(dev, ctxt, q, 42); runHostTests(dev, ctxt, q, 424); runHostTests(dev, ctxt, q, 4242); - runHostTests(dev, ctxt, q, sycl::half(4.2f)); + if (HalfsSupported) + runHostTests(dev, ctxt, q, sycl::half(4.2f)); runHostTests(dev, ctxt, q, 4.242f); - if (DoublesSupported) { + if (DoublesSupported) runHostTests(dev, ctxt, q, 4.24242); - runHostTests(dev, ctxt, q, test_obj); - } + if (HalfsSupported && DoublesSupported) + runHostTests(dev, ctxt, q, test_obj_all); + else if (HalfsSupported) + runHostTests(dev, ctxt, q, test_obj_half); + else if (DoublesSupported) + runHostTests(dev, ctxt, q, test_obj_double); + else + runHostTests(dev, ctxt, q, test_obj_minimum); } if (dev.get_info()) { @@ -146,12 +184,19 @@ int main() { runSharedTests(dev, ctxt, q, 42); runSharedTests(dev, ctxt, q, 424); runSharedTests(dev, ctxt, q, 4242); - runSharedTests(dev, ctxt, q, sycl::half(4.2f)); + if (HalfsSupported) + runSharedTests(dev, ctxt, q, sycl::half(4.2f)); runSharedTests(dev, ctxt, q, 4.242f); - if (DoublesSupported) { + if (DoublesSupported) runSharedTests(dev, ctxt, q, 4.24242); - runSharedTests(dev, ctxt, q, test_obj); - } + if (HalfsSupported && DoublesSupported) + runSharedTests(dev, ctxt, q, test_obj_all); + else if (HalfsSupported) + runSharedTests(dev, ctxt, q, test_obj_half); + else if (DoublesSupported) + runSharedTests(dev, ctxt, q, test_obj_double); + else + runSharedTests(dev, ctxt, q, test_obj_minimum); } if (dev.get_info()) { @@ -159,12 +204,19 @@ int main() { runDeviceTests(dev, ctxt, q, 42); runDeviceTests(dev, ctxt, q, 420); runDeviceTests(dev, ctxt, q, 4242); - runDeviceTests(dev, ctxt, q, sycl::half(4.2f)); + if (HalfsSupported) + runDeviceTests(dev, ctxt, q, sycl::half(4.2f)); runDeviceTests(dev, ctxt, q, 4.242f); - if (DoublesSupported) { + if (DoublesSupported) runDeviceTests(dev, ctxt, q, 4.24242); - runDeviceTests(dev, ctxt, q, test_obj); - } + if (HalfsSupported && DoublesSupported) + runDeviceTests(dev, ctxt, q, test_obj_all); + else if (HalfsSupported) + runDeviceTests(dev, ctxt, q, test_obj_half); + else if (DoublesSupported) + runDeviceTests(dev, ctxt, q, test_obj_double); + else + runDeviceTests(dev, ctxt, q, test_obj_minimum); } return 0;