From 3c57e215bfb024ff5572f9cab999c271e0d4467b Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 27 Oct 2022 09:07:31 -0700 Subject: [PATCH 01/20] [SYCL] Prevent use of fp64 and fp16 when unsupported in more tests A number of tests currently use double and sycl::half despite them not being supported by the used device. Although it does not necessarily fail to run on those devices, they are not considered supported by the SYCL specification and should not be used in testing on those devices. This commit prevent parts of a selection of tests from running on devices that do not support the types used. Signed-off-by: Larsen, Steffen --- SYCL/Basic/bit_cast/bit_cast.cpp | 39 +++-- SYCL/Basic/half_builtins.cpp | 8 + SYCL/Basic/scalar_vec_access.cpp | 6 +- SYCL/Basic/stream/stream.cpp | 14 +- SYCL/Basic/vector_operators.cpp | 18 +-- SYCL/Complex/sycl_complex_stream_test.cpp | 38 +++-- .../DeviceLib/built-ins/scalar_relational.cpp | 29 +--- SYCL/DeviceLib/built-ins/vector_geometric.cpp | 15 +- .../DeviceLib/built-ins/vector_relational.cpp | 6 +- SYCL/Regression/local-arg-align.cpp | 12 +- SYCL/SubGroup/broadcast_fp16.cpp | 9 +- SYCL/SubGroup/generic-shuffle.cpp | 15 +- SYCL/USM/copy.cpp | 150 +++++++++++++++--- SYCL/USM/fill.cpp | 85 ++++++++-- 14 files changed, 292 insertions(+), 152 deletions(-) diff --git a/SYCL/Basic/bit_cast/bit_cast.cpp b/SYCL/Basic/bit_cast/bit_cast.cpp index d6b8ce6002..dfaf77ba1c 100644 --- a/SYCL/Basic/bit_cast/bit_cast.cpp +++ b/SYCL/Basic/bit_cast/bit_cast.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl-device-code-split=per_kernel -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out @@ -13,11 +13,11 @@ constexpr sycl::access::mode sycl_write = sycl::access::mode::write; template class BitCastKernel; -template To doBitCast(const From &ValueToConvert) { +template +To doBitCast(sycl::queue Queue, const From &ValueToConvert) { std::vector Vec(1); { sycl::buffer Buf(Vec.data(), 1); - sycl::queue Queue; Queue.submit([&](sycl::handler &cgh) { auto acc = Buf.template get_access(cgh); cgh.single_task>([=]() { @@ -28,8 +28,10 @@ template To doBitCast(const From &ValueToConvert) { return Vec[0]; } -template int test(const From &Value) { - auto ValueConvertedTwoTimes = doBitCast(doBitCast(Value)); +template +int test(sycl::queue Queue, const From &Value) { + auto ValueConvertedTwoTimes = + doBitCast(Queue, doBitCast(Queue, Value)); bool isOriginalValueEqualsToConvertedTwoTimes = false; if (std::is_integral::value) { isOriginalValueEqualsToConvertedTwoTimes = Value == ValueConvertedTwoTimes; @@ -54,31 +56,34 @@ template int test(const From &Value) { } int main() { + sycl::queue Queue; int ReturnCode = 0; - std::cout << "sycl::half to unsigned short ...\n"; - ReturnCode += test(sycl::half(1.0f)); + if (Queue.get_device().has(sycl::aspect::fp16)) { + std::cout << "sycl::half to unsigned short ...\n"; + ReturnCode += test(Queue, sycl::half(1.0f)); - std::cout << "unsigned short to sycl::half ...\n"; - ReturnCode += test(static_cast(16384)); + std::cout << "unsigned short to sycl::half ...\n"; + ReturnCode += test(Queue, static_cast(16384)); - std::cout << "sycl::half to short ...\n"; - ReturnCode += test(sycl::half(1.0f)); + std::cout << "sycl::half to short ...\n"; + ReturnCode += test(Queue, sycl::half(1.0f)); - std::cout << "short to sycl::half ...\n"; - ReturnCode += test(static_cast(16384)); + std::cout << "short to sycl::half ...\n"; + ReturnCode += test(Queue, static_cast(16384)); + } std::cout << "int to float ...\n"; - ReturnCode += test(static_cast(2)); + ReturnCode += test(Queue, static_cast(2)); std::cout << "float to int ...\n"; - ReturnCode += test(static_cast(-2.4f)); + ReturnCode += test(Queue, static_cast(-2.4f)); std::cout << "unsigned int to float ...\n"; - ReturnCode += test(static_cast(6)); + ReturnCode += test(Queue, static_cast(6)); std::cout << "float to unsigned int ...\n"; - ReturnCode += test(static_cast(-2.4f)); + ReturnCode += test(Queue, static_cast(-2.4f)); return ReturnCode; } diff --git a/SYCL/Basic/half_builtins.cpp b/SYCL/Basic/half_builtins.cpp index 1ef9cb2da9..34fab08c89 100644 --- a/SYCL/Basic/half_builtins.cpp +++ b/SYCL/Basic/half_builtins.cpp @@ -165,6 +165,14 @@ template bool check(vec a, vec b) { int main() { queue q; + + if (!q.get_device().has(sycl::aspect::fp16)) { + std::cout + << "Test was skipped because the selected device does not support fp16" + << std::endl; + return 0; + } + float16 a, b, c, d; for (int i = 0; i < SZ_max; i++) { a[i] = i / (float)SZ_max; diff --git a/SYCL/Basic/scalar_vec_access.cpp b/SYCL/Basic/scalar_vec_access.cpp index b945db3964..fc12c7bb86 100644 --- a/SYCL/Basic/scalar_vec_access.cpp +++ b/SYCL/Basic/scalar_vec_access.cpp @@ -29,7 +29,7 @@ int main() { // Test that it is possible to get a reference to single element of the // vector type. This behavior could possibly change in the future, this // test is necessary to track that. - float4_t my_float4 = {0.0, 1.0, 2.0, 3.0}; + float4_t my_float4 = {0.0f, 1.0f, 2.0f, 3.0f}; float f[4]; for (int i = 0; i < 4; ++i) { f[i] = reinterpret_cast(&my_float4)[i]; @@ -40,14 +40,14 @@ int main() { } // Test that there is no template resolution error - sycl::float4 a = {1.0, 2.0, 3.0, 4.0}; + sycl::float4 a = {1.0f, 2.0f, 3.0f, 4.0f}; out << sycl::native::recip(a.x()) << sycl::endl; }); }); Q.wait(); // Test that there is no ambiguity in overload resolution. - sycl::float4 a = {1.0, 2.0, 3.0, 4.0}; + sycl::float4 a = {1.0f, 2.0f, 3.0f, 4.0f}; std::cout << a.x() << std::endl; return 0; diff --git a/SYCL/Basic/stream/stream.cpp b/SYCL/Basic/stream/stream.cpp index ebdd3c9458..30f12a1c2b 100644 --- a/SYCL/Basic/stream/stream.cpp +++ b/SYCL/Basic/stream/stream.cpp @@ -98,25 +98,15 @@ int main() { // CHECK-NEXT: 12345678901245 // Floating point types + // NOTE: Double-precision floating point tests cannot be done here as + // they may not be supported by the device. Out << 33.4f << endl; - Out << 5.2 << endl; Out << -33.4f << endl; - Out << -5.2 << endl; - Out << 0.0003 << endl; - Out << -1.0 / 0.0 << endl; - Out << 1.0 / 0.0 << endl; - Out << sycl::sqrt(-1.0) << endl; Out << -1.0f / 0.0f << endl; Out << 1.0f / 0.0f << endl; Out << sycl::sqrt(-1.0f) << endl; // CHECK-NEXT: 33.4 - // CHECK-NEXT: 5.2 // CHECK-NEXT: -33.4 - // CHECK-NEXT: -5.2 - // CHECK-NEXT: 0.0003 - // CHECK-NEXT: -inf - // CHECK-NEXT: inf - // CHECK-NEXT: nan // CHECK-NEXT: -inf // CHECK-NEXT: inf // CHECK-NEXT: nan diff --git a/SYCL/Basic/vector_operators.cpp b/SYCL/Basic/vector_operators.cpp index c52f7592b9..63a1af6dda 100644 --- a/SYCL/Basic/vector_operators.cpp +++ b/SYCL/Basic/vector_operators.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl-device-code-split=per_kernel -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out @@ -38,6 +38,7 @@ template void check_vector_size() { } int main() { + s::queue Queue; /* Separate checks for NumElements=1 edge case */ @@ -46,7 +47,6 @@ int main() { vec_type res; { s::buffer Buf(&res, s::range<1>(1)); - s::queue Queue; Queue.submit([&](s::handler &cgh) { auto Acc = Buf.get_access(cgh); cgh.single_task([=]() { @@ -67,7 +67,6 @@ int main() { vec_type res; { s::buffer Buf(&res, s::range<1>(1)); - s::queue Queue; Queue.submit([&](s::handler &cgh) { auto Acc = Buf.get_access(cgh); cgh.single_task([=]() { @@ -94,7 +93,6 @@ int main() { res_vec_type res; { s::buffer Buf(&res, s::range<1>(1)); - s::queue Queue; Queue.submit([&](s::handler &cgh) { auto Acc = Buf.get_access(cgh); cgh.single_task([=]() { @@ -109,12 +107,11 @@ int main() { } // Operator <, cl_double - { + if (Queue.get_device().has(sycl::aspect::fp64)) { using res_vec_type = s::vec; res_vec_type res; { s::buffer Buf(&res, s::range<1>(1)); - s::queue Queue; Queue.submit([&](s::handler &cgh) { auto Acc = Buf.get_access(cgh); cgh.single_task([=]() { @@ -134,7 +131,6 @@ int main() { res_vec_type res; { s::buffer Buf(&res, s::range<1>(1)); - s::queue Queue; Queue.submit([&](s::handler &cgh) { auto Acc = Buf.get_access(cgh); cgh.single_task([=]() { @@ -149,12 +145,11 @@ int main() { } // Operator <=, cl_half - { + if (Queue.get_device().has(sycl::aspect::fp16)) { using res_vec_type = s::vec; res_vec_type res; { s::buffer Buf(&res, s::range<1>(1)); - s::queue Queue; Queue.submit([&](s::handler &cgh) { auto Acc = Buf.get_access(cgh); cgh.single_task([=]() { @@ -176,7 +171,6 @@ int main() { res_vec_type res; { s::buffer Buf(&res, s::range<1>(1)); - s::queue Queue; Queue.submit([&](s::handler &cgh) { auto Acc = Buf.get_access(cgh); cgh.single_task([=]() { @@ -196,7 +190,6 @@ int main() { res_vec_type res; { s::buffer Buf(&res, s::range<1>(1)); - s::queue Queue; Queue.submit([&](s::handler &cgh) { auto Acc = Buf.get_access(cgh); cgh.single_task([=]() { @@ -216,7 +209,6 @@ int main() { res_vec_type res; { s::buffer Buf(&res, s::range<1>(1)); - s::queue Queue; Queue.submit([&](s::handler &cgh) { auto Acc = Buf.get_access(cgh); cgh.single_task([=]() { @@ -236,7 +228,6 @@ int main() { res_vec_type res; { s::buffer Buf(&res, s::range<1>(1)); - s::queue Queue; Queue.submit([&](s::handler &cgh) { auto Acc = Buf.get_access(cgh); cgh.single_task([=]() { @@ -257,7 +248,6 @@ int main() { res_vec_type res; { s::buffer Buf(&res, s::range<1>(1)); - s::queue Queue; Queue.submit([&](s::handler &cgh) { auto Acc = Buf.get_access(cgh); cgh.single_task([=]() { diff --git a/SYCL/Complex/sycl_complex_stream_test.cpp b/SYCL/Complex/sycl_complex_stream_test.cpp index 7878f55984..9aa10c6db6 100644 --- a/SYCL/Complex/sycl_complex_stream_test.cpp +++ b/SYCL/Complex/sycl_complex_stream_test.cpp @@ -55,27 +55,37 @@ template struct test_istream_operator { } }; -int main() { - sycl::queue Q; - +template bool test_common(sycl::queue Q) { bool test_passes = true; - - test_passes &= - test_valid_types(Q, cmplx(1.5, -1.0)); test_passes &= - test_valid_types(Q, cmplx(INFINITY, INFINITY)); + test_valid_types(Q, cmplx(1.5, -1.0)); + test_passes &= test_valid_types( + Q, cmplx(INFINITY, INFINITY)); test_passes &= - test_valid_types(Q, cmplx(NAN, NAN)); + test_valid_types(Q, cmplx(NAN, NAN)); - test_passes &= test_valid_types(cmplx(1.5, -1.0)); + test_passes &= test_valid_types(cmplx(1.5, -1.0)); test_passes &= - test_valid_types(cmplx(INFINITY, INFINITY)); - test_passes &= test_valid_types(cmplx(NAN, NAN)); + test_valid_types(cmplx(INFINITY, INFINITY)); + test_passes &= test_valid_types(cmplx(NAN, NAN)); - test_passes &= test_valid_types(cmplx(1.5, -1.0)); + test_passes &= test_valid_types(cmplx(1.5, -1.0)); test_passes &= - test_valid_types(cmplx(INFINITY, INFINITY)); - test_passes &= test_valid_types(cmplx(NAN, NAN)); + test_valid_types(cmplx(INFINITY, INFINITY)); + test_passes &= test_valid_types(cmplx(NAN, NAN)); + return test_passes; +} + +int main() { + sycl::queue Q; + + bool test_passes = true; + + test_passes &= test_common(Q); + if (Q.get_device().has(sycl::aspect::fp16)) + test_passes &= test_common(Q); + if (Q.get_device().has(sycl::aspect::fp64)) + test_passes &= test_common(Q); if (!test_passes) std::cerr << "Stream operator with complex test fails\n"; diff --git a/SYCL/DeviceLib/built-ins/scalar_relational.cpp b/SYCL/DeviceLib/built-ins/scalar_relational.cpp index 3e920f70c6..885e02e66e 100644 --- a/SYCL/DeviceLib/built-ins/scalar_relational.cpp +++ b/SYCL/DeviceLib/built-ins/scalar_relational.cpp @@ -11,12 +11,13 @@ namespace s = sycl; int main() { + s::queue myQueue; + // isequal-float { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -32,7 +33,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -48,7 +48,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -64,7 +63,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -80,7 +78,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -96,7 +93,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -112,7 +108,6 @@ int main() { s::cl_int r{1}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -128,7 +123,6 @@ int main() { s::cl_int r{1}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task( @@ -143,7 +137,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task( @@ -158,7 +151,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task( @@ -173,7 +165,6 @@ int main() { s::cl_int r{1}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task( @@ -184,11 +175,10 @@ int main() { } // isnormal-double - { + if (myQueue.get_device().has(sycl::aspect::fp64)) { s::cl_int r{1}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task( @@ -203,7 +193,6 @@ int main() { s::cl_int r{1}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -219,7 +208,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -235,7 +223,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task( @@ -250,7 +237,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task( @@ -264,7 +250,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task( @@ -279,7 +264,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task( @@ -294,7 +278,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task( @@ -309,7 +292,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task( @@ -324,7 +306,6 @@ int main() { s::cl_int r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task( @@ -339,7 +320,6 @@ int main() { s::cl_float r{0.0f}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -356,7 +336,6 @@ int main() { s::cl_float r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -373,7 +352,6 @@ int main() { s::cl_float r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -390,7 +368,6 @@ int main() { s::cl_float r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { diff --git a/SYCL/DeviceLib/built-ins/vector_geometric.cpp b/SYCL/DeviceLib/built-ins/vector_geometric.cpp index 4ee7735d5b..fc939e3cbb 100644 --- a/SYCL/DeviceLib/built-ins/vector_geometric.cpp +++ b/SYCL/DeviceLib/built-ins/vector_geometric.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl-device-code-split=per_kernel -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out @@ -15,12 +15,13 @@ template bool isEqualTo(T x, T y, T epsilon = 0.005) { } int main() { + s::queue myQueue; + // dot { s::cl_float r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -41,7 +42,6 @@ int main() { s::cl_float4 r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -74,11 +74,10 @@ int main() { } // cross (double) - { + if (myQueue.get_device().has(sycl::aspect::fp64)) { s::cl_double4 r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -115,7 +114,6 @@ int main() { s::cl_float r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -139,7 +137,6 @@ int main() { s::cl_float r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -158,7 +155,6 @@ int main() { s::cl_float2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -181,7 +177,6 @@ int main() { s::cl_float r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -205,7 +200,6 @@ int main() { s::cl_float r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { @@ -224,7 +218,6 @@ int main() { s::cl_float2 r{0}; { s::buffer BufR(&r, s::range<1>(1)); - s::queue myQueue; myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { diff --git a/SYCL/DeviceLib/built-ins/vector_relational.cpp b/SYCL/DeviceLib/built-ins/vector_relational.cpp index fc7571b1ae..7c6d20ba20 100644 --- a/SYCL/DeviceLib/built-ins/vector_relational.cpp +++ b/SYCL/DeviceLib/built-ins/vector_relational.cpp @@ -522,9 +522,9 @@ int main() { myQueue.submit([&](s::handler &cgh) { auto AccR = BufR.get_access(cgh); cgh.single_task([=]() { - AccR[0] = s::bitselect(s::cl_float4{112.112, 12.12, 0, 0.0}, - s::cl_float4{34.34, 23.23, 1, 0.0}, - s::cl_float4{3.3, 6.6, 1, 0.0}); + AccR[0] = s::bitselect(s::cl_float4{112.112f, 12.12f, 0, 0.0f}, + s::cl_float4{34.34f, 23.23f, 1, 0.0f}, + s::cl_float4{3.3f, 6.6f, 1, 0.0f}); }); // Using NAN/INFINITY as any float produced consistent results // between host and device. }); diff --git a/SYCL/Regression/local-arg-align.cpp b/SYCL/Regression/local-arg-align.cpp index f064a73c3b..ecd4fea37e 100644 --- a/SYCL/Regression/local-arg-align.cpp +++ b/SYCL/Regression/local-arg-align.cpp @@ -24,15 +24,15 @@ int main(int argc, char *argv[]) { buffer res(2); q.submit([&](sycl::handler &h) { - // Use two local buffers, one with an int and one with a double4 + // Use two local buffers, one with an int and one with a float4 local_accessor a(1, h); - local_accessor b(1, h); + local_accessor b(1, h); auto ares = res.get_access(h); // Manually capture kernel arguments to ensure an order with the int - // argument first and the double4 argument second. If the two arguments are - // simply laid out consecutively, the double4 argument will not be + // argument first and the float4 argument second. If the two arguments are + // simply laid out consecutively, the float4 argument will not be // correctly aligned. h.parallel_for(1, [a, b, ares](sycl::id<1> i) { // Get the addresses of the two local buffers @@ -52,10 +52,10 @@ int main(int argc, char *argv[]) { ret = -1; } - if (hres[1] % sizeof(double4) != 0) { + if (hres[1] % sizeof(float4) != 0) { std::cout << "Error: incorrect alignment for argument b, required alignment: " - << sizeof(double4) << ", address: " << (void *)hres[1] << std::endl; + << sizeof(float4) << ", address: " << (void *)hres[1] << std::endl; ret = -1; } diff --git a/SYCL/SubGroup/broadcast_fp16.cpp b/SYCL/SubGroup/broadcast_fp16.cpp index cde8e94477..4e2a35f660 100644 --- a/SYCL/SubGroup/broadcast_fp16.cpp +++ b/SYCL/SubGroup/broadcast_fp16.cpp @@ -13,7 +13,12 @@ int main() { queue Queue; - check(Queue); - std::cout << "Test passed." << std::endl; + if (Queue.get_device().has(sycl::aspect::fp16)) { + check(Queue); + std::cout << "Test passed." << std::endl; + } else { + std::cout << "Test skipped because device doesn't support aspect::fp16" + << std::endl; + } return 0; } diff --git a/SYCL/SubGroup/generic-shuffle.cpp b/SYCL/SubGroup/generic-shuffle.cpp index e979ddfa85..9a04729ae8 100644 --- a/SYCL/SubGroup/generic-shuffle.cpp +++ b/SYCL/SubGroup/generic-shuffle.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl-device-code-split=per_kernel -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out @@ -225,11 +225,14 @@ int main() { check_struct>( Queue, ComplexFloatGenerator); - auto ComplexDoubleGenerator = [state = std::complex(0, 1)]() mutable { - return state += std::complex(2, 2); - }; - check_struct>( - Queue, ComplexDoubleGenerator); + if (Queue.get_device().has(sycl::aspect::fp64)) { + auto ComplexDoubleGenerator = [state = + std::complex(0, 1)]() mutable { + return state += std::complex(2, 2); + }; + check_struct>( + Queue, ComplexDoubleGenerator); + } std::cout << "Test passed." << std::endl; return 0; diff --git a/SYCL/USM/copy.cpp b/SYCL/USM/copy.cpp index edc543cd28..9886228dc3 100644 --- a/SYCL/USM/copy.cpp +++ b/SYCL/USM/copy.cpp @@ -20,20 +20,49 @@ template class transfer; static constexpr int N = 100; // should be even -struct test_struct { +struct test_struct_minimum { short a; int b; long c; long long d; - half e; float f; }; -bool operator==(const test_struct &lhs, const test_struct &rhs) { +bool operator==(const test_struct_minimum &lhs, + const test_struct_minimum &rhs) { + return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d && + lhs.f == rhs.f; +} + +struct test_struct_all : public test_struct_minimum { + sycl::half e; + double g; +}; + +bool operator==(const test_struct_all &lhs, const test_struct_all &rhs) { + return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d && + lhs.e == rhs.e && lhs.f == rhs.f && lhs.g == rhs.g; +} + +struct test_struct_whalf : public test_struct_minimum { + sycl::half e; +}; + +bool operator==(const test_struct_whalf &lhs, const test_struct_whalf &rhs) { return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d && lhs.e == rhs.e && lhs.f == rhs.f; } +struct test_struct_wdouble : public test_struct_minimum { + double g; +}; + +bool operator==(const test_struct_wdouble &lhs, + const test_struct_wdouble &rhs) { + return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d && + lhs.f == rhs.f && lhs.g == rhs.g; +} + template T *regular(queue q, alloc kind) { return malloc(N, q, kind); } @@ -89,17 +118,34 @@ int main() { queue q; auto dev = q.get_device(); - test_struct test_obj{4, 42, 424, 4242, 4.2f, 4.242f}; + const bool DoublesSupported = dev.has(sycl::aspect::fp64); + const bool HalfsSupported = dev.has(sycl::aspect::fp16); + + test_struct_all test_obj_all{4, 42, 424, 4242, 4.2f, 4.242, 4.24242}; + test_struct_whalf test_obj_whalf{4, 42, 424, 4242, 4.2f, 4.242}; + test_struct_wdouble test_obj_wdouble{4, 42, 424, 4242, 4.242, 4.24242}; + test_struct_minimum test_obj_minimum{4, 42, 424, 4242, 4.242}; if (dev.has(aspect::usm_host_allocations)) { runTests(q, 4, alloc::host, alloc::host); runTests(q, 42, alloc::host, alloc::host); runTests(q, 424, alloc::host, alloc::host); runTests(q, 4242, alloc::host, alloc::host); - runTests(q, half(4.2f), alloc::host, alloc::host); + if (HalfsSupported) + runTests(q, half(4.2f), alloc::host, alloc::host); runTests(q, 4.242f, alloc::host, alloc::host); - runTests(q, 4.24242, alloc::host, alloc::host); - runTests(q, test_obj, alloc::host, alloc::host); + if (DoublesSupported) + runTests(q, 4.24242, alloc::host, alloc::host); + if (HalfsSupported && DoublesSupported) + runTests(q, test_obj_all, alloc::host, alloc::host); + else if (HalfsSupported) + runTests(q, test_obj_whalf, alloc::host, alloc::host); + else if (DoublesSupported) + runTests(q, test_obj_wdouble, alloc::host, + alloc::host); + else + runTests(q, test_obj_minimum, alloc::host, + alloc::host); } if (dev.has(aspect::usm_shared_allocations)) { @@ -107,10 +153,22 @@ int main() { runTests(q, 42, alloc::shared, alloc::shared); runTests(q, 424, alloc::shared, alloc::shared); runTests(q, 4242, alloc::shared, alloc::shared); - runTests(q, half(4.2f), alloc::shared, alloc::shared); + if (HalfsSupported) + runTests(q, half(4.2f), alloc::shared, alloc::shared); runTests(q, 4.242f, alloc::shared, alloc::shared); - runTests(q, 4.24242, alloc::shared, alloc::shared); - runTests(q, test_obj, alloc::shared, alloc::shared); + if (DoublesSupported) + runTests(q, 4.24242, alloc::shared, alloc::shared); + if (HalfsSupported && DoublesSupported) + runTests(q, test_obj_all, alloc::shared, alloc::shared); + else if (HalfsSupported) + runTests(q, test_obj_whalf, alloc::shared, + alloc::shared); + else if (DoublesSupported) + runTests(q, test_obj_wdouble, alloc::shared, + alloc::shared); + else + runTests(q, test_obj_minimum, alloc::shared, + alloc::shared); } if (dev.has(aspect::usm_device_allocations)) { @@ -118,10 +176,22 @@ int main() { runTests(q, 42, alloc::device, alloc::device); runTests(q, 424, alloc::device, alloc::device); runTests(q, 4242, alloc::device, alloc::device); - runTests(q, half(4.2f), alloc::device, alloc::device); + if (HalfsSupported) + runTests(q, half(4.2f), alloc::device, alloc::device); runTests(q, 4.242f, alloc::device, alloc::device); - runTests(q, 4.24242, alloc::device, alloc::device); - runTests(q, test_obj, alloc::device, alloc::device); + if (DoublesSupported) + runTests(q, 4.24242, alloc::device, alloc::device); + if (HalfsSupported && DoublesSupported) + runTests(q, test_obj_all, alloc::device, alloc::device); + else if (HalfsSupported) + runTests(q, test_obj_whalf, alloc::device, + alloc::device); + else if (DoublesSupported) + runTests(q, test_obj_wdouble, alloc::device, + alloc::device); + else + runTests(q, test_obj_minimum, alloc::device, + alloc::device); } if (dev.has(aspect::usm_host_allocations) && @@ -130,10 +200,22 @@ int main() { runTests(q, 42, alloc::host, alloc::shared); runTests(q, 424, alloc::host, alloc::shared); runTests(q, 4242, alloc::host, alloc::shared); - runTests(q, half(4.2f), alloc::host, alloc::shared); + if (HalfsSupported) + runTests(q, half(4.2f), alloc::host, alloc::shared); runTests(q, 4.242f, alloc::host, alloc::shared); - runTests(q, 4.24242, alloc::host, alloc::shared); - runTests(q, test_obj, alloc::host, alloc::shared); + if (DoublesSupported) + runTests(q, 4.24242, alloc::host, alloc::shared); + if (HalfsSupported && DoublesSupported) + runTests(q, test_obj_all, alloc::host, alloc::shared); + else if (HalfsSupported) + runTests(q, test_obj_whalf, alloc::host, + alloc::shared); + else if (DoublesSupported) + runTests(q, test_obj_wdouble, alloc::host, + alloc::shared); + else + runTests(q, test_obj_minimum, alloc::host, + alloc::shared); } if (dev.has(aspect::usm_host_allocations) && @@ -142,10 +224,22 @@ int main() { runTests(q, 42, alloc::host, alloc::device); runTests(q, 424, alloc::host, alloc::device); runTests(q, 4242, alloc::host, alloc::device); - runTests(q, half(4.2f), alloc::host, alloc::device); + if (HalfsSupported) + runTests(q, half(4.2f), alloc::host, alloc::device); runTests(q, 4.242f, alloc::host, alloc::device); - runTests(q, 4.24242, alloc::host, alloc::device); - runTests(q, test_obj, alloc::host, alloc::device); + if (DoublesSupported) + runTests(q, 4.24242, alloc::host, alloc::device); + if (HalfsSupported && DoublesSupported) + runTests(q, test_obj_all, alloc::host, alloc::device); + else if (HalfsSupported) + runTests(q, test_obj_whalf, alloc::host, + alloc::device); + else if (DoublesSupported) + runTests(q, test_obj_wdouble, alloc::host, + alloc::device); + else + runTests(q, test_obj_minimum, alloc::host, + alloc::device); } if (dev.has(aspect::usm_shared_allocations) && @@ -154,10 +248,22 @@ int main() { runTests(q, 42, alloc::shared, alloc::device); runTests(q, 424, alloc::shared, alloc::device); runTests(q, 4242, alloc::shared, alloc::device); - runTests(q, half(4.2f), alloc::shared, alloc::device); + if (HalfsSupported) + runTests(q, half(4.2f), alloc::shared, alloc::device); runTests(q, 4.242f, alloc::shared, alloc::device); - runTests(q, 4.24242, alloc::shared, alloc::device); - runTests(q, test_obj, alloc::shared, alloc::device); + if (DoublesSupported) + runTests(q, 4.24242, alloc::shared, alloc::device); + if (HalfsSupported && DoublesSupported) + runTests(q, test_obj_all, alloc::shared, alloc::device); + else if (HalfsSupported) + runTests(q, test_obj_whalf, alloc::shared, + alloc::device); + else if (DoublesSupported) + runTests(q, test_obj_wdouble, alloc::shared, + alloc::device); + else + runTests(q, test_obj_minimum, alloc::shared, + alloc::device); } return 0; diff --git a/SYCL/USM/fill.cpp b/SYCL/USM/fill.cpp index 49947fcc3d..19f7c8c187 100644 --- a/SYCL/USM/fill.cpp +++ b/SYCL/USM/fill.cpp @@ -20,21 +20,49 @@ template class usm_aligned_device_transfer; static constexpr int N = 100; -struct test_struct { +struct test_struct_minimum { short a; int b; long c; long long d; - sycl::half e; float f; +}; + +bool operator==(const test_struct_minimum &lhs, + const test_struct_minimum &rhs) { + return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d && + lhs.f == rhs.f; +} + +struct test_struct_all : public test_struct_minimum { + sycl::half e; double g; }; -bool operator==(const test_struct &lhs, const test_struct &rhs) { +bool operator==(const test_struct_all &lhs, const test_struct_all &rhs) { return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d && lhs.e == rhs.e && lhs.f == rhs.f && lhs.g == rhs.g; } +struct test_struct_whalf : public test_struct_minimum { + sycl::half e; +}; + +bool operator==(const test_struct_whalf &lhs, const test_struct_whalf &rhs) { + return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d && + lhs.e == rhs.e && lhs.f == rhs.f; +} + +struct test_struct_wdouble : public test_struct_minimum { + double g; +}; + +bool operator==(const test_struct_wdouble &lhs, + const test_struct_wdouble &rhs) { + return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d && + lhs.f == rhs.f && lhs.g == rhs.g; +} + template void runHostTests(device dev, context ctxt, queue q, T val) { T *array; @@ -125,20 +153,31 @@ int main() { auto ctxt = q.get_context(); const bool DoublesSupported = dev.has(sycl::aspect::fp64); + const bool HalfsSupported = dev.has(sycl::aspect::fp16); - test_struct test_obj{4, 42, 424, 4242, 4.2f, 4.242, 4.24242}; + test_struct_all test_obj_all{4, 42, 424, 4242, 4.2f, 4.242, 4.24242}; + test_struct_whalf test_obj_whalf{4, 42, 424, 4242, 4.2f, 4.242}; + test_struct_wdouble test_obj_wdouble{4, 42, 424, 4242, 4.242, 4.24242}; + test_struct_minimum test_obj_minimum{4, 42, 424, 4242, 4.242}; if (dev.get_info()) { runHostTests(dev, ctxt, q, 4); runHostTests(dev, ctxt, q, 42); runHostTests(dev, ctxt, q, 424); runHostTests(dev, ctxt, q, 4242); - runHostTests(dev, ctxt, q, sycl::half(4.2f)); + if (HalfsSupported) + runHostTests(dev, ctxt, q, sycl::half(4.2f)); runHostTests(dev, ctxt, q, 4.242f); - if (DoublesSupported) { + if (DoublesSupported) runHostTests(dev, ctxt, q, 4.24242); - runHostTests(dev, ctxt, q, test_obj); - } + if (HalfsSupported && DoublesSupported) + runHostTests(dev, ctxt, q, test_obj_all); + else if (HalfsSupported) + runHostTests(dev, ctxt, q, test_obj_whalf); + else if (DoublesSupported) + runHostTests(dev, ctxt, q, test_obj_wdouble); + else + runHostTests(dev, ctxt, q, test_obj_minimum); } if (dev.get_info()) { @@ -146,12 +185,19 @@ int main() { runSharedTests(dev, ctxt, q, 42); runSharedTests(dev, ctxt, q, 424); runSharedTests(dev, ctxt, q, 4242); - runSharedTests(dev, ctxt, q, sycl::half(4.2f)); + if (HalfsSupported) + runSharedTests(dev, ctxt, q, sycl::half(4.2f)); runSharedTests(dev, ctxt, q, 4.242f); - if (DoublesSupported) { + if (DoublesSupported) runSharedTests(dev, ctxt, q, 4.24242); - runSharedTests(dev, ctxt, q, test_obj); - } + if (HalfsSupported && DoublesSupported) + runSharedTests(dev, ctxt, q, test_obj_all); + else if (HalfsSupported) + runSharedTests(dev, ctxt, q, test_obj_whalf); + else if (DoublesSupported) + runSharedTests(dev, ctxt, q, test_obj_wdouble); + else + runSharedTests(dev, ctxt, q, test_obj_minimum); } if (dev.get_info()) { @@ -159,12 +205,19 @@ int main() { runDeviceTests(dev, ctxt, q, 42); runDeviceTests(dev, ctxt, q, 420); runDeviceTests(dev, ctxt, q, 4242); - runDeviceTests(dev, ctxt, q, sycl::half(4.2f)); + if (HalfsSupported) + runDeviceTests(dev, ctxt, q, sycl::half(4.2f)); runDeviceTests(dev, ctxt, q, 4.242f); - if (DoublesSupported) { + if (DoublesSupported) runDeviceTests(dev, ctxt, q, 4.24242); - runDeviceTests(dev, ctxt, q, test_obj); - } + if (HalfsSupported && DoublesSupported) + runDeviceTests(dev, ctxt, q, test_obj_all); + else if (HalfsSupported) + runDeviceTests(dev, ctxt, q, test_obj_whalf); + else if (DoublesSupported) + runDeviceTests(dev, ctxt, q, test_obj_wdouble); + else + runDeviceTests(dev, ctxt, q, test_obj_minimum); } return 0; From f2961311985c3cf6a04a0af9ca9b02601d0638b1 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 27 Oct 2022 09:59:31 -0700 Subject: [PATCH 02/20] Add printing when skipping part of generic-shuffle Signed-off-by: Larsen, Steffen --- SYCL/SubGroup/generic-shuffle.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/SYCL/SubGroup/generic-shuffle.cpp b/SYCL/SubGroup/generic-shuffle.cpp index 9a04729ae8..76be56ef3e 100644 --- a/SYCL/SubGroup/generic-shuffle.cpp +++ b/SYCL/SubGroup/generic-shuffle.cpp @@ -232,6 +232,9 @@ int main() { }; check_struct>( Queue, ComplexDoubleGenerator); + } else { + std::cout << "fp64 tests were skipped due to the device not supporting the " + "aspect."; } std::cout << "Test passed." << std::endl; From e8d795a50ca09ae317231f658826ecea22667109 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 31 Oct 2022 06:40:56 -0700 Subject: [PATCH 03/20] Add split to scalar_relational Signed-off-by: Larsen, Steffen --- SYCL/DeviceLib/built-ins/scalar_relational.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SYCL/DeviceLib/built-ins/scalar_relational.cpp b/SYCL/DeviceLib/built-ins/scalar_relational.cpp index 885e02e66e..c9af02b703 100644 --- a/SYCL/DeviceLib/built-ins/scalar_relational.cpp +++ b/SYCL/DeviceLib/built-ins/scalar_relational.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl-device-code-split=per_kernel -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out From b1567fc8c4e1e78eec3f1c79a914fe65ac7851e1 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 31 Oct 2022 06:44:44 -0700 Subject: [PATCH 04/20] Fix use of double literals in built-ins Signed-off-by: Larsen, Steffen --- SYCL/Basic/built-ins.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/SYCL/Basic/built-ins.cpp b/SYCL/Basic/built-ins.cpp index 06ffb9e240..ce86d0eb92 100644 --- a/SYCL/Basic/built-ins.cpp +++ b/SYCL/Basic/built-ins.cpp @@ -31,12 +31,12 @@ int main() { // Test printf q.submit([&](s::handler &CGH) { CGH.single_task([=]() { - s::ext::oneapi::experimental::printf(format, 123, 1.23); + s::ext::oneapi::experimental::printf(format, 123, 1.23f); // CHECK: {{(Hello, World! 123 1.23)?}} }); }).wait(); - s::ext::oneapi::experimental::printf(format, 321, 3.21); + s::ext::oneapi::experimental::printf(format, 321, 3.21f); // CHECK: {{(Hello, World! 123 1.23)?}} // Test common @@ -47,7 +47,7 @@ int main() { auto AccMin = BufMin.get_access(cgh); auto AccMax = BufMax.get_access(cgh); cgh.single_task([=]() { - AccMax[0] = s::max(s::cl_float2{0.5f, 2.5}, s::cl_float2{2.3f, 2.3}); + AccMax[0] = s::max(s::cl_float2{0.5f, 2.5f}, s::cl_float2{2.3f, 2.3f}); AccMin[0] = s::min(s::cl_float{0.5f}, s::cl_float{2.3f}); }); }); From 7bb4bc877aaff43d4198ba6eb23129b6c1676bed Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Wed, 2 Nov 2022 07:14:20 -0700 Subject: [PATCH 05/20] Fix fp64 requirements in use of printf Signed-off-by: Larsen, Steffen --- SYCL/Basic/built-ins.cpp | 15 ++++++++ SYCL/DeviceLib/built-ins/printf.cpp | 55 ++++++++++++++++++++--------- 2 files changed, 54 insertions(+), 16 deletions(-) diff --git a/SYCL/Basic/built-ins.cpp b/SYCL/Basic/built-ins.cpp index ce86d0eb92..e0f0be0d98 100644 --- a/SYCL/Basic/built-ins.cpp +++ b/SYCL/Basic/built-ins.cpp @@ -3,6 +3,11 @@ // RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER // RUN: %ACC_RUN_PLACEHOLDER %t.out %ACC_CHECK_PLACEHOLDER +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -D__SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ %s -o %t_nonvar.out +// RUN: %CPU_RUN_PLACEHOLDER %t_nonvar.out %CPU_CHECK_PLACEHOLDER +// RUN: %GPU_RUN_PLACEHOLDER %t_nonvar.out %GPU_CHECK_PLACEHOLDER +// RUN: %ACC_RUN_PLACEHOLDER %t_nonvar.out %ACC_CHECK_PLACEHOLDER + // CUDA does not support printf. // UNSUPPORTED: cuda // @@ -28,6 +33,16 @@ static const CONSTANT char format[] = "Hello, World! %d %f\n"; int main() { s::queue q{}; +#ifndef __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ + if (!q.get_device().has(sycl::aspect::fp64)) { + std::cout + << "Test without __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ defined is " + "skipped because the device did not have fp64." + << std::endl; + return 0; + } +#endif + // Test printf q.submit([&](s::handler &CGH) { CGH.single_task([=]() { diff --git a/SYCL/DeviceLib/built-ins/printf.cpp b/SYCL/DeviceLib/built-ins/printf.cpp index 5afc03f565..1fa9f925ea 100644 --- a/SYCL/DeviceLib/built-ins/printf.cpp +++ b/SYCL/DeviceLib/built-ins/printf.cpp @@ -2,10 +2,15 @@ // HIP doesn't support printf. // CUDA doesn't support vector format specifiers ("%v"). // -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER // RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER // RUN: %ACC_RUN_PLACEHOLDER %t.out %ACC_CHECK_PLACEHOLDER +// +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -D__SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ %s -o %t_nonvar.out +// RUN: %CPU_RUN_PLACEHOLDER %t_nonvar.out %CPU_CHECK_PLACEHOLDER --check-prefixes=CHECK,CHECK-NONVAR +// RUN: %GPU_RUN_PLACEHOLDER %t_nonvar.out %GPU_CHECK_PLACEHOLDER --check-prefixes=CHECK,CHECK-NONVAR +// RUN: %ACC_RUN_PLACEHOLDER %t_nonvar.out %ACC_CHECK_PLACEHOLDER --check-prefixes=CHECK,CHECK-NONVAR #include @@ -34,9 +39,8 @@ static const CONSTANT char format_vec[] = "%d,%d,%d,%d\n"; const CONSTANT char format_hello_world_2[] = "%lu: Hello, World!\n"; int main() { + queue Queue(default_selector_v); { - queue Queue(default_selector_v); - Queue.submit([&](handler &CGH) { CGH.single_task([=]() { // String @@ -54,17 +58,6 @@ int main() { // CHECK: 123 // CHECK-NEXT: -123 - // Floating point types - { - // You can declare format string in non-global scope, but in this case - // static keyword is required - static const CONSTANT char format[] = "%.1f\n"; - ext::oneapi::experimental::printf(format, 33.4f); - ext::oneapi::experimental::printf(format, -33.4f); - } - // CHECK-NEXT: 33.4 - // CHECK-NEXT: -33.4 - // Vectors sycl::vec v4{5, 6, 7, 8}; #if defined(__SYCL_DEVICE_ONLY__) && defined(__SPIR__) @@ -107,8 +100,38 @@ int main() { Queue.wait(); } +#ifndef __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ + // Currently printf will promote floating point values to doubles. + // __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ changes the behavior to not use + // a variadic function, so if it is defined it will not promote the floating + // point arguments. + if (Queue.get_device().has(sycl::aspect::fp64)) +#endif // __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ + { + Queue.submit([&](handler &CGH) { + CGH.single_task([=]() { + // Floating point types + { + // You can declare format string in non-global scope, but in this case + // static keyword is required + static const CONSTANT char format[] = "%.1f\n"; + ext::oneapi::experimental::printf(format, 33.4f); + ext::oneapi::experimental::printf(format, -33.4f); + } + }); + }); + Queue.wait(); + } +#ifndef __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ + else { + std::cout << "Skipped floating point test." << std::endl; + std::cout << "Skipped floating point test." << std::endl; + } +#endif // __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ + // CHECK-NONVAR: {{(33.4|Skipped floating point test.)}} + // CHECK-NONVAR-NEXT: {{(-33.4|Skipped floating point test.)}} + { - queue Queue(default_selector_v); // printf in parallel_for Queue.submit([&](handler &CGH) { CGH.parallel_for(range<1>(10), [=](id<1> i) { @@ -118,7 +141,7 @@ int main() { }); }); Queue.wait(); - // CHECK-NEXT: {{[0-9]+}}: Hello, World! + // CHECK: {{[0-9]+}}: Hello, World! // CHECK-NEXT: {{[0-9]+}}: Hello, World! // CHECK-NEXT: {{[0-9]+}}: Hello, World! // CHECK-NEXT: {{[0-9]+}}: Hello, World! From 7fea540866dc0541174ad914bbb6597ffac12894 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 3 Nov 2022 05:55:10 -0700 Subject: [PATCH 06/20] Adjust ESIMD printf and change store_zero_const requirement Signed-off-by: Larsen, Steffen --- SYCL/DeviceLib/built-ins/printf.cpp | 12 +++--- SYCL/ESIMD/printf.cpp | 49 +++++++++++++++------- SYCL/ESIMD/regression/store_zero_const.cpp | 3 +- 3 files changed, 43 insertions(+), 21 deletions(-) diff --git a/SYCL/DeviceLib/built-ins/printf.cpp b/SYCL/DeviceLib/built-ins/printf.cpp index 1fa9f925ea..36aee3b8f2 100644 --- a/SYCL/DeviceLib/built-ins/printf.cpp +++ b/SYCL/DeviceLib/built-ins/printf.cpp @@ -8,9 +8,9 @@ // RUN: %ACC_RUN_PLACEHOLDER %t.out %ACC_CHECK_PLACEHOLDER // // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -D__SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ %s -o %t_nonvar.out -// RUN: %CPU_RUN_PLACEHOLDER %t_nonvar.out %CPU_CHECK_PLACEHOLDER --check-prefixes=CHECK,CHECK-NONVAR -// RUN: %GPU_RUN_PLACEHOLDER %t_nonvar.out %GPU_CHECK_PLACEHOLDER --check-prefixes=CHECK,CHECK-NONVAR -// RUN: %ACC_RUN_PLACEHOLDER %t_nonvar.out %ACC_CHECK_PLACEHOLDER --check-prefixes=CHECK,CHECK-NONVAR +// RUN: %CPU_RUN_PLACEHOLDER %t_nonvar.out %CPU_CHECK_PLACEHOLDER +// RUN: %GPU_RUN_PLACEHOLDER %t_nonvar.out %GPU_CHECK_PLACEHOLDER +// RUN: %ACC_RUN_PLACEHOLDER %t_nonvar.out %ACC_CHECK_PLACEHOLDER #include @@ -128,8 +128,8 @@ int main() { std::cout << "Skipped floating point test." << std::endl; } #endif // __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ - // CHECK-NONVAR: {{(33.4|Skipped floating point test.)}} - // CHECK-NONVAR-NEXT: {{(-33.4|Skipped floating point test.)}} + // CHECK-NEXT: {{(33.4|Skipped floating point test.)}} + // CHECK-NEXT: {{(-33.4|Skipped floating point test.)}} { // printf in parallel_for @@ -141,7 +141,7 @@ int main() { }); }); Queue.wait(); - // CHECK: {{[0-9]+}}: Hello, World! + // CHECK-NEXT: {{[0-9]+}}: Hello, World! // CHECK-NEXT: {{[0-9]+}}: Hello, World! // CHECK-NEXT: {{[0-9]+}}: Hello, World! // CHECK-NEXT: {{[0-9]+}}: Hello, World! diff --git a/SYCL/ESIMD/printf.cpp b/SYCL/ESIMD/printf.cpp index 4a65166ec5..c843473d1a 100644 --- a/SYCL/ESIMD/printf.cpp +++ b/SYCL/ESIMD/printf.cpp @@ -13,6 +13,9 @@ // RUN: %clangxx -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER // +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -D__SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ %s -o %t_nonvar.out +// RUN: %GPU_RUN_PLACEHOLDER %t_nonvar.out %GPU_CHECK_PLACEHOLDER +// //===----------------------------------------------------------------------===// // // The test checks that ESIMD kernels support printf functionality. @@ -44,9 +47,8 @@ static const CONSTANT char format_hello_world[] = "Hello, World!\n"; const CONSTANT char format_int[] = "%d\n"; int main() { + queue Queue(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler()); { - queue Queue(esimd_test::ESIMDSelector, - esimd_test::createExceptionHandler()); Queue.submit([&](handler &CGH) { CGH.single_task([=]() SYCL_ESIMD_KERNEL { @@ -60,17 +62,6 @@ int main() { // CHECK-NEXT: 123 // CHECK-NEXT: -123 - // Floating point types - { - // You can declare format string in non-global scope, but in this case - // static keyword is required - static const CONSTANT char format[] = "%f\n"; - oneapi::experimental::printf(format, 33.4f); - oneapi::experimental::printf(format, -33.4f); - } - // CHECK-NEXT: 33.4 - // CHECK-NEXT: -33.4 - // String types { static CONSTANT const char str_arg[] = "foo"; @@ -83,8 +74,38 @@ int main() { Queue.wait(); } +#ifndef __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ + // Currently printf will promote floating point values to doubles. + // __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ changes the behavior to not use + // a variadic function, so if it is defined it will not promote the floating + // point arguments. + if (Queue.get_device().has(sycl::aspect::fp64)) +#endif // __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ + { + Queue.submit([&](handler &CGH) { + CGH.single_task([=]() { + // Floating point types + { + // You can declare format string in non-global scope, but in this case + // static keyword is required + static const CONSTANT char format[] = "%.1f\n"; + ext::oneapi::experimental::printf(format, 33.4f); + ext::oneapi::experimental::printf(format, -33.4f); + } + }); + }); + Queue.wait(); + } +#ifndef __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ + else { + std::cout << "Skipped floating point test." << std::endl; + std::cout << "Skipped floating point test." << std::endl; + } +#endif // __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ + // CHECK-NEXT: {{(33.4|Skipped floating point test.)}} + // CHECK-NEXT: {{(-33.4|Skipped floating point test.)}} + { - queue Q(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler()); // printf in parallel_for constexpr int SIMD_SIZE = 16; constexpr int WORK_SIZE = SIMD_SIZE; diff --git a/SYCL/ESIMD/regression/store_zero_const.cpp b/SYCL/ESIMD/regression/store_zero_const.cpp index dcb5896137..a6a27527f1 100644 --- a/SYCL/ESIMD/regression/store_zero_const.cpp +++ b/SYCL/ESIMD/regression/store_zero_const.cpp @@ -91,7 +91,8 @@ int main(void) { Passed &= test(Q); Passed &= test(Q); Passed &= test(Q); - Passed &= test(Q); + if (Q.get_device().has(aspect::fp64)) + Passed &= test(Q); std::cout << (Passed ? "Passed\n" : "FAILED\n"); return Passed ? 0 : 1; From 55a534bb625306a665f0198daa49e5d445f94356 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 3 Nov 2022 06:25:37 -0700 Subject: [PATCH 07/20] Fix ESIMD printf Signed-off-by: Larsen, Steffen --- SYCL/ESIMD/printf.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/SYCL/ESIMD/printf.cpp b/SYCL/ESIMD/printf.cpp index c843473d1a..a0e0257b7b 100644 --- a/SYCL/ESIMD/printf.cpp +++ b/SYCL/ESIMD/printf.cpp @@ -49,7 +49,6 @@ const CONSTANT char format_int[] = "%d\n"; int main() { queue Queue(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler()); { - Queue.submit([&](handler &CGH) { CGH.single_task([=]() SYCL_ESIMD_KERNEL { // String @@ -109,17 +108,17 @@ int main() { // printf in parallel_for constexpr int SIMD_SIZE = 16; constexpr int WORK_SIZE = SIMD_SIZE; - int *Mem = malloc_shared(WORK_SIZE * SIMD_SIZE, Q); + int *Mem = malloc_shared(WORK_SIZE * SIMD_SIZE, Queue); for (int I = 0; I < WORK_SIZE * SIMD_SIZE; I++) Mem[I] = I; std::cout << "Start parallel_for:" << std::endl; - Q.parallel_for(range<1>(WORK_SIZE), [=](id<1> i) SYCL_ESIMD_KERNEL { + Queue.parallel_for(range<1>(WORK_SIZE), [=](id<1> i) SYCL_ESIMD_KERNEL { static const CONSTANT char STR_LU_D[] = "Thread-id: %d, Value: %d\n"; ext::intel::esimd::simd Vec(Mem + i * SIMD_SIZE); // cast to uint64_t to be sure that we pass 64-bit unsigned value oneapi::experimental::printf(STR_LU_D, (uint64_t)i[0], (int)Vec[i]); }).wait(); - free(Mem, Q); + free(Mem, Queue); // CHECK-LABEL: Start parallel_for // CHECK-DAG: Thread-id: 0, Value: 0 // CHECK-DAG: Thread-id: 1, Value: 17 From 2ce6b8b8706b99cf9bc974a5545cfe64cd412c6d Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 3 Nov 2022 06:34:44 -0700 Subject: [PATCH 08/20] Fix formatting Signed-off-by: Larsen, Steffen --- SYCL/ESIMD/printf.cpp | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/SYCL/ESIMD/printf.cpp b/SYCL/ESIMD/printf.cpp index a0e0257b7b..78df91f968 100644 --- a/SYCL/ESIMD/printf.cpp +++ b/SYCL/ESIMD/printf.cpp @@ -112,12 +112,19 @@ int main() { for (int I = 0; I < WORK_SIZE * SIMD_SIZE; I++) Mem[I] = I; std::cout << "Start parallel_for:" << std::endl; - Queue.parallel_for(range<1>(WORK_SIZE), [=](id<1> i) SYCL_ESIMD_KERNEL { - static const CONSTANT char STR_LU_D[] = "Thread-id: %d, Value: %d\n"; - ext::intel::esimd::simd Vec(Mem + i * SIMD_SIZE); - // cast to uint64_t to be sure that we pass 64-bit unsigned value - oneapi::experimental::printf(STR_LU_D, (uint64_t)i[0], (int)Vec[i]); - }).wait(); + Queue + .parallel_for(range<1>(WORK_SIZE), + [=](id<1> i) SYCL_ESIMD_KERNEL { + static const CONSTANT char STR_LU_D[] = + "Thread-id: %d, Value: %d\n"; + ext::intel::esimd::simd Vec( + Mem + i * SIMD_SIZE); + // cast to uint64_t to be sure that we pass 64-bit + // unsigned value + oneapi::experimental::printf(STR_LU_D, (uint64_t)i[0], + (int)Vec[i]); + }) + .wait(); free(Mem, Queue); // CHECK-LABEL: Start parallel_for // CHECK-DAG: Thread-id: 0, Value: 0 From ab56c0c372b9fe93e4ff0bedfca62df5d9e3cdcc Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 3 Nov 2022 06:48:40 -0700 Subject: [PATCH 09/20] Split per-kernel in ESIMD printf Signed-off-by: Larsen, Steffen --- SYCL/ESIMD/printf.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SYCL/ESIMD/printf.cpp b/SYCL/ESIMD/printf.cpp index 78df91f968..2c3410a7c7 100644 --- a/SYCL/ESIMD/printf.cpp +++ b/SYCL/ESIMD/printf.cpp @@ -10,7 +10,7 @@ // UNSUPPORTED: cuda || hip // CUDA and HIP don't support printf. // -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-device-code-split=per_kernel %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER // // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -D__SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ %s -o %t_nonvar.out From 26ac82d6552e1bd3dc31bbd84253cc86415eae09 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 3 Nov 2022 07:01:26 -0700 Subject: [PATCH 10/20] Disable non-variadic printf case for ESIMD temporarily Signed-off-by: Larsen, Steffen --- SYCL/ESIMD/printf.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/SYCL/ESIMD/printf.cpp b/SYCL/ESIMD/printf.cpp index 2c3410a7c7..3220718e0b 100644 --- a/SYCL/ESIMD/printf.cpp +++ b/SYCL/ESIMD/printf.cpp @@ -13,8 +13,10 @@ // RUN: %clangxx -fsycl -fsycl-device-code-split=per_kernel %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER // -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -D__SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ %s -o %t_nonvar.out -// RUN: %GPU_RUN_PLACEHOLDER %t_nonvar.out %GPU_CHECK_PLACEHOLDER +// Test using __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ is disabled until ESIMD +// supports it. +// RUNx: %clangxx -fsycl -fsycl-targets=%sycl_triple -D__SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ %s -o %t_nonvar.out +// RUNx: %GPU_RUN_PLACEHOLDER %t_nonvar.out %GPU_CHECK_PLACEHOLDER // //===----------------------------------------------------------------------===// // From f224ad469919d6ca782873c762cf5e237280b9ad Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 3 Nov 2022 07:17:56 -0700 Subject: [PATCH 11/20] Disable formatting on disabled runs Signed-off-by: Larsen, Steffen --- SYCL/ESIMD/printf.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/SYCL/ESIMD/printf.cpp b/SYCL/ESIMD/printf.cpp index 3220718e0b..68bd01c3ef 100644 --- a/SYCL/ESIMD/printf.cpp +++ b/SYCL/ESIMD/printf.cpp @@ -13,10 +13,12 @@ // RUN: %clangxx -fsycl -fsycl-device-code-split=per_kernel %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER // +// clang-format off // Test using __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ is disabled until ESIMD // supports it. // RUNx: %clangxx -fsycl -fsycl-targets=%sycl_triple -D__SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ %s -o %t_nonvar.out // RUNx: %GPU_RUN_PLACEHOLDER %t_nonvar.out %GPU_CHECK_PLACEHOLDER +// clang-format on // //===----------------------------------------------------------------------===// // From 285e2aa7e754be42e257cd1138c2e91f6b220ad6 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 3 Nov 2022 07:36:55 -0700 Subject: [PATCH 12/20] rename *_wdouble and *_whalf Signed-off-by: Larsen, Steffen --- SYCL/USM/copy.cpp | 57 ++++++++++++++++++++++------------------------- SYCL/USM/fill.cpp | 25 ++++++++++----------- 2 files changed, 39 insertions(+), 43 deletions(-) diff --git a/SYCL/USM/copy.cpp b/SYCL/USM/copy.cpp index 9886228dc3..616168ff70 100644 --- a/SYCL/USM/copy.cpp +++ b/SYCL/USM/copy.cpp @@ -44,21 +44,20 @@ bool operator==(const test_struct_all &lhs, const test_struct_all &rhs) { lhs.e == rhs.e && lhs.f == rhs.f && lhs.g == rhs.g; } -struct test_struct_whalf : public test_struct_minimum { +struct test_struct_half : public test_struct_minimum { sycl::half e; }; -bool operator==(const test_struct_whalf &lhs, const test_struct_whalf &rhs) { +bool operator==(const test_struct_half &lhs, const test_struct_half &rhs) { return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d && lhs.e == rhs.e && lhs.f == rhs.f; } -struct test_struct_wdouble : public test_struct_minimum { +struct test_struct_double : public test_struct_minimum { double g; }; -bool operator==(const test_struct_wdouble &lhs, - const test_struct_wdouble &rhs) { +bool operator==(const test_struct_double &lhs, const test_struct_double &rhs) { return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d && lhs.f == rhs.f && lhs.g == rhs.g; } @@ -122,8 +121,8 @@ int main() { const bool HalfsSupported = dev.has(sycl::aspect::fp16); test_struct_all test_obj_all{4, 42, 424, 4242, 4.2f, 4.242, 4.24242}; - test_struct_whalf test_obj_whalf{4, 42, 424, 4242, 4.2f, 4.242}; - test_struct_wdouble test_obj_wdouble{4, 42, 424, 4242, 4.242, 4.24242}; + test_struct_half test_obj_half{4, 42, 424, 4242, 4.2f, 4.242}; + test_struct_double test_obj_double{4, 42, 424, 4242, 4.242, 4.24242}; test_struct_minimum test_obj_minimum{4, 42, 424, 4242, 4.242}; if (dev.has(aspect::usm_host_allocations)) { @@ -139,10 +138,10 @@ int main() { if (HalfsSupported && DoublesSupported) runTests(q, test_obj_all, alloc::host, alloc::host); else if (HalfsSupported) - runTests(q, test_obj_whalf, alloc::host, alloc::host); + runTests(q, test_obj_half, alloc::host, alloc::host); else if (DoublesSupported) - runTests(q, test_obj_wdouble, alloc::host, - alloc::host); + runTests(q, test_obj_double, alloc::host, + alloc::host); else runTests(q, test_obj_minimum, alloc::host, alloc::host); @@ -161,11 +160,11 @@ int main() { if (HalfsSupported && DoublesSupported) runTests(q, test_obj_all, alloc::shared, alloc::shared); else if (HalfsSupported) - runTests(q, test_obj_whalf, alloc::shared, - alloc::shared); + runTests(q, test_obj_half, alloc::shared, + alloc::shared); else if (DoublesSupported) - runTests(q, test_obj_wdouble, alloc::shared, - alloc::shared); + runTests(q, test_obj_double, alloc::shared, + alloc::shared); else runTests(q, test_obj_minimum, alloc::shared, alloc::shared); @@ -184,11 +183,11 @@ int main() { if (HalfsSupported && DoublesSupported) runTests(q, test_obj_all, alloc::device, alloc::device); else if (HalfsSupported) - runTests(q, test_obj_whalf, alloc::device, - alloc::device); + runTests(q, test_obj_half, alloc::device, + alloc::device); else if (DoublesSupported) - runTests(q, test_obj_wdouble, alloc::device, - alloc::device); + runTests(q, test_obj_double, alloc::device, + alloc::device); else runTests(q, test_obj_minimum, alloc::device, alloc::device); @@ -208,11 +207,10 @@ int main() { if (HalfsSupported && DoublesSupported) runTests(q, test_obj_all, alloc::host, alloc::shared); else if (HalfsSupported) - runTests(q, test_obj_whalf, alloc::host, - alloc::shared); + runTests(q, test_obj_half, alloc::host, alloc::shared); else if (DoublesSupported) - runTests(q, test_obj_wdouble, alloc::host, - alloc::shared); + runTests(q, test_obj_double, alloc::host, + alloc::shared); else runTests(q, test_obj_minimum, alloc::host, alloc::shared); @@ -232,11 +230,10 @@ int main() { if (HalfsSupported && DoublesSupported) runTests(q, test_obj_all, alloc::host, alloc::device); else if (HalfsSupported) - runTests(q, test_obj_whalf, alloc::host, - alloc::device); + runTests(q, test_obj_half, alloc::host, alloc::device); else if (DoublesSupported) - runTests(q, test_obj_wdouble, alloc::host, - alloc::device); + runTests(q, test_obj_double, alloc::host, + alloc::device); else runTests(q, test_obj_minimum, alloc::host, alloc::device); @@ -256,11 +253,11 @@ int main() { if (HalfsSupported && DoublesSupported) runTests(q, test_obj_all, alloc::shared, alloc::device); else if (HalfsSupported) - runTests(q, test_obj_whalf, alloc::shared, - alloc::device); + runTests(q, test_obj_half, alloc::shared, + alloc::device); else if (DoublesSupported) - runTests(q, test_obj_wdouble, alloc::shared, - alloc::device); + runTests(q, test_obj_double, alloc::shared, + alloc::device); else runTests(q, test_obj_minimum, alloc::shared, alloc::device); diff --git a/SYCL/USM/fill.cpp b/SYCL/USM/fill.cpp index 19f7c8c187..a991db5968 100644 --- a/SYCL/USM/fill.cpp +++ b/SYCL/USM/fill.cpp @@ -44,21 +44,20 @@ bool operator==(const test_struct_all &lhs, const test_struct_all &rhs) { lhs.e == rhs.e && lhs.f == rhs.f && lhs.g == rhs.g; } -struct test_struct_whalf : public test_struct_minimum { +struct test_struct_half : public test_struct_minimum { sycl::half e; }; -bool operator==(const test_struct_whalf &lhs, const test_struct_whalf &rhs) { +bool operator==(const test_struct_half &lhs, const test_struct_half &rhs) { return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d && lhs.e == rhs.e && lhs.f == rhs.f; } -struct test_struct_wdouble : public test_struct_minimum { +struct test_struct_double : public test_struct_minimum { double g; }; -bool operator==(const test_struct_wdouble &lhs, - const test_struct_wdouble &rhs) { +bool operator==(const test_struct_double &lhs, const test_struct_double &rhs) { return lhs.a == rhs.a && lhs.b == rhs.b && lhs.c == rhs.c && lhs.d == rhs.d && lhs.f == rhs.f && lhs.g == rhs.g; } @@ -156,8 +155,8 @@ int main() { const bool HalfsSupported = dev.has(sycl::aspect::fp16); test_struct_all test_obj_all{4, 42, 424, 4242, 4.2f, 4.242, 4.24242}; - test_struct_whalf test_obj_whalf{4, 42, 424, 4242, 4.2f, 4.242}; - test_struct_wdouble test_obj_wdouble{4, 42, 424, 4242, 4.242, 4.24242}; + test_struct_half test_obj_half{4, 42, 424, 4242, 4.2f, 4.242}; + test_struct_double test_obj_double{4, 42, 424, 4242, 4.242, 4.24242}; test_struct_minimum test_obj_minimum{4, 42, 424, 4242, 4.242}; if (dev.get_info()) { @@ -173,9 +172,9 @@ int main() { if (HalfsSupported && DoublesSupported) runHostTests(dev, ctxt, q, test_obj_all); else if (HalfsSupported) - runHostTests(dev, ctxt, q, test_obj_whalf); + runHostTests(dev, ctxt, q, test_obj_half); else if (DoublesSupported) - runHostTests(dev, ctxt, q, test_obj_wdouble); + runHostTests(dev, ctxt, q, test_obj_double); else runHostTests(dev, ctxt, q, test_obj_minimum); } @@ -193,9 +192,9 @@ int main() { if (HalfsSupported && DoublesSupported) runSharedTests(dev, ctxt, q, test_obj_all); else if (HalfsSupported) - runSharedTests(dev, ctxt, q, test_obj_whalf); + runSharedTests(dev, ctxt, q, test_obj_half); else if (DoublesSupported) - runSharedTests(dev, ctxt, q, test_obj_wdouble); + runSharedTests(dev, ctxt, q, test_obj_double); else runSharedTests(dev, ctxt, q, test_obj_minimum); } @@ -213,9 +212,9 @@ int main() { if (HalfsSupported && DoublesSupported) runDeviceTests(dev, ctxt, q, test_obj_all); else if (HalfsSupported) - runDeviceTests(dev, ctxt, q, test_obj_whalf); + runDeviceTests(dev, ctxt, q, test_obj_half); else if (DoublesSupported) - runDeviceTests(dev, ctxt, q, test_obj_wdouble); + runDeviceTests(dev, ctxt, q, test_obj_double); else runDeviceTests(dev, ctxt, q, test_obj_minimum); } From 57a44f52c254129d6e7ef63340a947af05c6ba90 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 3 Nov 2022 09:36:31 -0700 Subject: [PATCH 13/20] Add group_broadcast and broadcast changes Signed-off-by: Larsen, Steffen --- SYCL/GroupAlgorithm/SYCL2020/group_broadcast.cpp | 4 ++-- SYCL/GroupAlgorithm/broadcast.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/SYCL/GroupAlgorithm/SYCL2020/group_broadcast.cpp b/SYCL/GroupAlgorithm/SYCL2020/group_broadcast.cpp index 6c230fa4c8..a8ace052d9 100644 --- a/SYCL/GroupAlgorithm/SYCL2020/group_broadcast.cpp +++ b/SYCL/GroupAlgorithm/SYCL2020/group_broadcast.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out @@ -84,7 +84,7 @@ int main() { std::fill(output.begin(), output.end(), std::complex(0, 0)); test(q, input, output); } - { + if (q.get_device().has(sycl::aspect::fp64)) { std::array, N> input; std::array, 3> output; for (int i = 0; i < N; ++i) { diff --git a/SYCL/GroupAlgorithm/broadcast.cpp b/SYCL/GroupAlgorithm/broadcast.cpp index 9d80740d17..a929bd2f4f 100644 --- a/SYCL/GroupAlgorithm/broadcast.cpp +++ b/SYCL/GroupAlgorithm/broadcast.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out @@ -84,7 +84,7 @@ int main() { std::fill(output.begin(), output.end(), std::complex(0, 0)); test(q, input, output); } - { + if (q.get_device().has(sycl::aspect::fp64)) { std::array, N> input; std::array, 3> output; for (int i = 0; i < N; ++i) { From f800714da49f983028c9665eff6b1b237a8f8299 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 3 Nov 2022 10:49:35 -0700 Subject: [PATCH 14/20] Re-add fp64 stream case Signed-off-by: Larsen, Steffen --- SYCL/Basic/stream/stream.cpp | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/SYCL/Basic/stream/stream.cpp b/SYCL/Basic/stream/stream.cpp index 30f12a1c2b..2c286a22a0 100644 --- a/SYCL/Basic/stream/stream.cpp +++ b/SYCL/Basic/stream/stream.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER // RUN: %GPU_RUN_ON_LINUX_PLACEHOLDER %t.out %GPU_CHECK_ON_LINUX_PLACEHOLDER // RUN: %ACC_RUN_PLACEHOLDER %t.out %ACC_CHECK_PLACEHOLDER @@ -97,9 +97,7 @@ int main() { // CHECK-NEXT: -12345678901245 // CHECK-NEXT: 12345678901245 - // Floating point types - // NOTE: Double-precision floating point tests cannot be done here as - // they may not be supported by the device. + // Floats Out << 33.4f << endl; Out << -33.4f << endl; Out << -1.0f / 0.0f << endl; @@ -195,6 +193,33 @@ int main() { }); Queue.wait(); + if (Queue.get_device().has(sycl::aspect::fp64)) { + Queue.submit([&](handler &CGH) { + stream Out(1024, 80, CGH); + CGH.single_task([=]() { + // Double + Out << 5.2 << endl; + Out << -5.2 << endl; + Out << 0.0003 << endl; + Out << -1.0 / 0.0 << endl; + Out << 1.0 / 0.0 << endl; + Out << sycl::sqrt(-1.0) << endl; + }); + }); + Queue.wait(); + } else { + // Repeat skipped message same number of times as the number of skipped + // output lines. + for (size_t I = 0; I < 6; ++I) + std::cout << "Skipped double test." << std::endl; + } + // CHECK-NEXT: {{(5.2|Skipped double test.)}} + // CHECK-NEXT: {{(-5.2|Skipped double test.)}} + // CHECK-NEXT: {{(0.0003|Skipped double test.)}} + // CHECK-NEXT: {{(-inf|Skipped double test.)}} + // CHECK-NEXT: {{(inf|Skipped double test.)}} + // CHECK-NEXT: {{(nan|Skipped double test.)}} + // Stream in parallel_for Queue.submit([&](handler &CGH) { stream Out(1024, 80, CGH); From 38e623a57e95c34f056c50b12ca20874c5d1eb3f Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 3 Nov 2022 11:46:44 -0700 Subject: [PATCH 15/20] Add skip for shuffle_fp16 Signed-off-by: Larsen, Steffen --- SYCL/SubGroup/shuffle_fp16.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/SYCL/SubGroup/shuffle_fp16.cpp b/SYCL/SubGroup/shuffle_fp16.cpp index da52ed3336..70224da203 100644 --- a/SYCL/SubGroup/shuffle_fp16.cpp +++ b/SYCL/SubGroup/shuffle_fp16.cpp @@ -22,7 +22,12 @@ int main() { queue Queue; - check(Queue); - std::cout << "Test passed." << std::endl; + if (Queue.get_device().has(sycl::aspect::fp16)) { + check(Queue); + std::cout << "Test passed." << std::endl; + } else { + std::cout << "Test skipped because device doesn't support aspect::fp16" + << std::endl; + } return 0; } From 62674a76522c41dfbdcc8acff45012bce9d233de Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 3 Nov 2022 12:05:10 -0700 Subject: [PATCH 16/20] Add fp16 check to ESIMD tests Signed-off-by: Larsen, Steffen --- SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp | 52 ++++++++++++------- SYCL/ESIMD/api/saturation_smoke.cpp | 10 ++-- SYCL/ESIMD/api/simd_memory_access.cpp | 9 ++-- SYCL/ESIMD/api/simd_view_copy_move_assign.cpp | 5 +- SYCL/ESIMD/api/simd_view_select_2d_fp.cpp | 3 +- SYCL/ESIMD/api/slm_gather_scatter.cpp | 8 +-- SYCL/ESIMD/api/svm_gather_scatter.cpp | 16 +++--- SYCL/ESIMD/api/unary_ops_heavy.cpp | 15 ++++-- .../ESIMD/regression/half_conversion_test.cpp | 15 +++++- SYCL/ESIMD/vadd_half.cpp | 7 +++ 10 files changed, 96 insertions(+), 44 deletions(-) diff --git a/SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp b/SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp index 755dc2566e..d98eb0160d 100644 --- a/SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp +++ b/SYCL/ESIMD/api/bin_and_cmp_ops_heavy.cpp @@ -264,27 +264,35 @@ int main(void) { bool passed = true; using BinOp = esimd_test::BinaryOp; + bool SupportsDouble = dev.has(aspect::fp64); + bool SupportsHalf = dev.has(aspect::fp16); + auto arith_ops = esimd_test::ArithBinaryOpsNoDiv; passed &= test(arith_ops, q); passed &= test(arith_ops, q, 0.000001f); - if (dev.has(aspect::fp64)) + if (SupportsDouble) passed &= test(arith_ops, q, 1e-15); passed &= test(arith_ops, q, 0.000001f); - passed &= test(arith_ops, q, 1); - passed &= test(arith_ops, q, 1); - if (dev.has(aspect::fp64)) + if (SupportsHalf) + passed &= test(arith_ops, q, 1); + if (SupportsHalf) + passed &= test(arith_ops, q, 1); + if (SupportsDouble && SupportsHalf) passed &= test(arith_ops, q); passed &= test(arith_ops, q); #ifdef USE_BF16 passed &= test(arith_ops, q); - passed &= test(arith_ops, q, 0.03); + if (SupportsHalf) + passed &= test(arith_ops, q, 0.03); #endif // USE_BF16 #ifdef USE_TF32 passed &= test(arith_ops, q, 0.000001f); passed &= test(arith_ops, q, 0.000001f); passed &= test(arith_ops, q, 0.000001f); - passed &= test(arith_ops, q, 0.000001f); + if (SupportsHalf) + passed &= + test(arith_ops, q, 0.000001f); passed &= test(arith_ops, q, 0.000001f); passed &= test(arith_ops, q, 0.000001f); @@ -294,27 +302,31 @@ int main(void) { passed &= test(div_op, q); passed &= test(div_op, q, 0.000001f); #ifndef WA_BUG - if (dev.has(aspect::fp64)) + if (SupportsDouble) passed &= test(div_op, q); #endif // WA_BUG passed &= test(div_op, q, 0.000001f); - passed &= test(div_op, q, 1); - passed &= test(div_op, q, 1); + if (SupportsHalf) + passed &= test(div_op, q, 1); + if (SupportsHalf) + passed &= test(div_op, q, 1); #ifndef WA_BUG - if (dev.has(aspect::fp64)) + if (SupportsDouble && SupportsHalf) passed &= test(div_op, q); #endif // WA_BUG passed &= test(div_op, q); #ifdef USE_BF16 passed &= test(div_op, q); - passed &= test(div_op, q, 0.03); + if (SupportsHalf) + passed &= test(div_op, q, 0.03); #endif // USE_BF16 #ifdef USE_TF32 passed &= test(div_op, q, 0.000001f); passed &= test(div_op, q, 0.000001f); passed &= test(div_op, q, 0.000001f); - passed &= test(div_op, q, 0.000001f); + if (SupportsHalf) + passed &= test(div_op, q, 0.000001f); passed &= test(div_op, q, 0.000001f); passed &= test(div_op, q, 0.000001f); @@ -351,24 +363,28 @@ int main(void) { auto cmp_ops = esimd_test::CmpOps; passed &= test(cmp_ops, q); passed &= test(cmp_ops, q); - if (dev.has(aspect::fp64)) + if (SupportsDouble) passed &= test(cmp_ops, q); passed &= test(cmp_ops, q); - passed &= test(cmp_ops, q, 1); - passed &= test(cmp_ops, q, 1); - if (dev.has(aspect::fp64)) + if (SupportsHalf) + passed &= test(cmp_ops, q, 1); + if (SupportsHalf) + passed &= test(cmp_ops, q, 1); + if (SupportsDouble) passed &= test(cmp_ops, q); passed &= test(cmp_ops, q); #ifdef USE_BF16 passed &= test(cmp_ops, q); - passed &= test(cmp_ops, q); + if (SupportsHalf) + passed &= test(cmp_ops, q); #endif // USE_BF16 #ifdef USE_TF32 passed &= test(cmp_ops, q); passed &= test(cmp_ops, q); passed &= test(cmp_ops, q); - passed &= test(cmp_ops, q); + if (SupportsHalf) + passed &= test(cmp_ops, q); passed &= test(cmp_ops, q); passed &= test(cmp_ops, q); #endif // USE_TF32 diff --git a/SYCL/ESIMD/api/saturation_smoke.cpp b/SYCL/ESIMD/api/saturation_smoke.cpp index 005cb1497e..3d2ab818d4 100644 --- a/SYCL/ESIMD/api/saturation_smoke.cpp +++ b/SYCL/ESIMD/api/saturation_smoke.cpp @@ -185,10 +185,13 @@ int main(int argc, char **argv) { std::cout << "Running on " << dev.get_info() << "\n"; const bool doublesSupported = dev.has(sycl::aspect::fp64); + const bool halfsSupported = dev.has(sycl::aspect::fp16); bool passed = true; - passed &= test(q); - passed &= test(q); + if (halfsSupported) + passed &= test(q); + if (halfsSupported) + passed &= test(q); passed &= test(q); if (doublesSupported) passed &= test(q); @@ -207,7 +210,8 @@ int main(int argc, char **argv) { passed &= test(q); passed &= test(q); - passed &= test(q); + if (halfsSupported) + passed &= test(q); if (doublesSupported) passed &= test(q); diff --git a/SYCL/ESIMD/api/simd_memory_access.cpp b/SYCL/ESIMD/api/simd_memory_access.cpp index 4912d52ca9..2e637c7f2f 100644 --- a/SYCL/ESIMD/api/simd_memory_access.cpp +++ b/SYCL/ESIMD/api/simd_memory_access.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // REQUIRES: gpu // UNSUPPORTED: cuda || hip -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl-device-code-split=per_kernel -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // // The test checks functionality of the memory access APIs which are members of @@ -155,6 +155,7 @@ int main(int argc, char **argv) { auto dev = q.get_device(); std::cout << "Running on " << dev.get_info() << "\n"; + const bool halfsSupported = dev.has(sycl::aspect::fp16); bool passed = true; passed &= test(q, size); @@ -165,7 +166,8 @@ int main(int argc, char **argv) { passed &= test(q, size); passed &= test(q, size); passed &= test(q, size); - passed &= test(q, size); + if (halfsSupported) + passed &= test(q, size); passed &= test(q, size); passed &= test(q, size); @@ -175,7 +177,8 @@ int main(int argc, char **argv) { passed &= test(q, size); passed &= test(q, size); passed &= test(q, size); - passed &= test(q, size); + if (halfsSupported) + passed &= test(q, size); std::cout << (passed ? "=== Test passed\n" : "=== Test FAILED\n"); return passed ? 0 : 1; diff --git a/SYCL/ESIMD/api/simd_view_copy_move_assign.cpp b/SYCL/ESIMD/api/simd_view_copy_move_assign.cpp index 811605fb65..11ea98577a 100644 --- a/SYCL/ESIMD/api/simd_view_copy_move_assign.cpp +++ b/SYCL/ESIMD/api/simd_view_copy_move_assign.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // REQUIRES: gpu // UNSUPPORTED: cuda || hip -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl-device-code-split=per_kernel -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // This test checks the behavior of simd_view constructors @@ -164,7 +164,8 @@ int main(void) { bool passed = true; passed &= testT(q); passed &= testT(q); - passed &= testT(q); + if (dev.has(sycl::aspect::fp16)) + passed &= testT(q); return passed ? 0 : 1; } diff --git a/SYCL/ESIMD/api/simd_view_select_2d_fp.cpp b/SYCL/ESIMD/api/simd_view_select_2d_fp.cpp index b2d326b690..3c8e53d032 100644 --- a/SYCL/ESIMD/api/simd_view_select_2d_fp.cpp +++ b/SYCL/ESIMD/api/simd_view_select_2d_fp.cpp @@ -22,7 +22,8 @@ int main(int argc, char **argv) { << "\n"; bool passed = true; - passed &= test(q); + if (dev.has(sycl::aspect::fp16)) + passed &= test(q); passed &= test(q); if (dev.has(sycl::aspect::fp64)) passed &= test(q); diff --git a/SYCL/ESIMD/api/slm_gather_scatter.cpp b/SYCL/ESIMD/api/slm_gather_scatter.cpp index 5e3f1960d1..ceefb08f55 100644 --- a/SYCL/ESIMD/api/slm_gather_scatter.cpp +++ b/SYCL/ESIMD/api/slm_gather_scatter.cpp @@ -1,6 +1,6 @@ // REQUIRES: gpu // UNSUPPORTED: cuda || hip -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl-device-code-split=per_kernel -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // // The test checks functionality of the slm_gather/slm_scatter ESIMD APIs. @@ -139,8 +139,10 @@ int main(void) { passed &= test(q); passed &= test(q); - passed &= test(q); - passed &= test(q); + if (dev.has(aspect::fp16)) { + passed &= test(q); + passed &= test(q); + } return passed ? 0 : 1; } diff --git a/SYCL/ESIMD/api/svm_gather_scatter.cpp b/SYCL/ESIMD/api/svm_gather_scatter.cpp index 7d149b3fc0..13a2375259 100644 --- a/SYCL/ESIMD/api/svm_gather_scatter.cpp +++ b/SYCL/ESIMD/api/svm_gather_scatter.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // REQUIRES: gpu && !gpu-intel-pvc // UNSUPPORTED: cuda || hip -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl-device-code-split=per_kernel -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // Regression test for SVM gather/scatter API. @@ -104,12 +104,14 @@ int main(void) { Pass &= test(Q); Pass &= test(Q); - Pass &= test(Q); - Pass &= test(Q); - Pass &= test(Q); - Pass &= test(Q); - Pass &= test(Q); - Pass &= test(Q); + if (dev.has(aspect::fp16)) { + Pass &= test(Q); + Pass &= test(Q); + Pass &= test(Q); + Pass &= test(Q); + Pass &= test(Q); + Pass &= test(Q); + } Pass &= test(Q); Pass &= test(Q); diff --git a/SYCL/ESIMD/api/unary_ops_heavy.cpp b/SYCL/ESIMD/api/unary_ops_heavy.cpp index df14cecc0b..adb851684a 100644 --- a/SYCL/ESIMD/api/unary_ops_heavy.cpp +++ b/SYCL/ESIMD/api/unary_ops_heavy.cpp @@ -157,6 +157,8 @@ int main(void) { auto dev = q.get_device(); std::cout << "Running on " << dev.get_info() << "\n"; + const bool doublesSupported = dev.has(sycl::aspect::fp64); + const bool halfsSupported = dev.has(sycl::aspect::fp16); bool passed = true; using UnOp = esimd_test::UnaryOp; @@ -171,10 +173,12 @@ int main(void) { passed &= test(mod_ops, q); passed &= test(mod_ops, q); passed &= test(mod_ops, q); - passed &= test(mod_ops, q); - passed &= test(mod_ops, q); + if (halfsSupported) + passed &= test(mod_ops, q); + if (halfsSupported) + passed &= test(mod_ops, q); passed &= test(mod_ops, q); - if (dev.has(aspect::fp64)) + if (ddoublesSupported) passed &= test(mod_ops, q); auto signed_ops = esimd_test::OpSeq{}; @@ -182,9 +186,10 @@ int main(void) { passed &= test(signed_ops, q); passed &= test(signed_ops, q); passed &= test(signed_ops, q); - passed &= test(signed_ops, q); + if (halfsSupported) + passed &= test(signed_ops, q); passed &= test(signed_ops, q); - if (dev.has(aspect::fp64)) + if (doublesSupported) passed &= test(signed_ops, q); #ifdef USE_BF16 diff --git a/SYCL/ESIMD/regression/half_conversion_test.cpp b/SYCL/ESIMD/regression/half_conversion_test.cpp index 7be3dce9d1..ed55424246 100644 --- a/SYCL/ESIMD/regression/half_conversion_test.cpp +++ b/SYCL/ESIMD/regression/half_conversion_test.cpp @@ -1,6 +1,6 @@ // REQUIRES: gpu // UNSUPPORTED: cuda || hip -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-device-code-split=per_kernel %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out //==- half_conversion_test.cpp - Test for half conversion under ESIMD_EMULATOR // backend -==/ @@ -78,10 +78,21 @@ int main(int argc, char *argv[]) { bool passed = true; queue q; + if (!dev.has(sycl::aspect::fp16)) { + std::cout << "Test was skipped becasue the selected device does not " + "support sycl::aspect::fp16" + << std::endl; + return 0; + } + std::cout << "\n===================" << std::endl; passed &= test(q, 1); std::cout << "\n===================" << std::endl; - passed &= test(q, 1); + if (q.get_device().has(sycl::aspect::fp16)) + passed &= test(q, 1); + else + std::cout << "Half case skipped as the device does not support fp16." + << std::endl; std::cout << "\n===================" << std::endl; passed &= test(q, 1); std::cout << "\n===================" << std::endl; diff --git a/SYCL/ESIMD/vadd_half.cpp b/SYCL/ESIMD/vadd_half.cpp index 1482aae505..596feccb81 100644 --- a/SYCL/ESIMD/vadd_half.cpp +++ b/SYCL/ESIMD/vadd_half.cpp @@ -59,6 +59,13 @@ int main(int argc, char **argv) { auto dev = q.get_device(); std::cout << "Running on " << dev.get_info() << "\n"; + if (!dev.has(sycl::aspect::fp16)) { + std::cout << "Test was skipped becasue the selected device does not " + "support sycl::aspect::fp16" + << std::endl; + return 0; + } + TstT *A = malloc_shared(Size, q); SrcT *B = malloc_shared(Size, q); using DstT = __ESIMD_DNS::computation_type_t; From 4ecb2d1542f31be4a257b24cf4650c05bed9fd2d Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 3 Nov 2022 12:13:53 -0700 Subject: [PATCH 17/20] Add fixme Signed-off-by: Larsen, Steffen --- SYCL/ESIMD/printf.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/SYCL/ESIMD/printf.cpp b/SYCL/ESIMD/printf.cpp index 68bd01c3ef..0714852fe6 100644 --- a/SYCL/ESIMD/printf.cpp +++ b/SYCL/ESIMD/printf.cpp @@ -16,6 +16,7 @@ // clang-format off // Test using __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ is disabled until ESIMD // supports it. +// FIXME: enable that test // RUNx: %clangxx -fsycl -fsycl-targets=%sycl_triple -D__SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__ %s -o %t_nonvar.out // RUNx: %GPU_RUN_PLACEHOLDER %t_nonvar.out %GPU_CHECK_PLACEHOLDER // clang-format on From 51b92b5d7bcbec892827665bfc6b5122a7abe8d9 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 3 Nov 2022 12:23:37 -0700 Subject: [PATCH 18/20] Add fp16 check to slm_gather_scatter_heavy Signed-off-by: Larsen, Steffen --- SYCL/ESIMD/api/slm_gather_scatter_heavy.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/SYCL/ESIMD/api/slm_gather_scatter_heavy.cpp b/SYCL/ESIMD/api/slm_gather_scatter_heavy.cpp index 3935790500..76451e6d30 100644 --- a/SYCL/ESIMD/api/slm_gather_scatter_heavy.cpp +++ b/SYCL/ESIMD/api/slm_gather_scatter_heavy.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // REQUIRES: gpu // UNSUPPORTED: cuda || hip -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl-device-code-split=per_kernel -fsycl %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // // The test checks functionality of the slm gather/scatter ESIMD intrinsics. @@ -463,8 +463,10 @@ int main(int argc, char **argv) { passed &= test(q); passed &= test(q); passed &= test_vl1(q); - passed &= test_vl1(q); - passed &= test(q); + if (dev.has(aspect::fp16)) { + passed &= test_vl1(q); + passed &= test(q); + } std::cout << (!passed ? "TEST FAILED\n" : "TEST Passed\n"); return passed ? 0 : 1; From 866682827a30e58aea6b56babe363a9405408641 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 3 Nov 2022 12:35:04 -0700 Subject: [PATCH 19/20] Fix mistakes Signed-off-by: Larsen, Steffen --- SYCL/ESIMD/api/svm_gather_scatter.cpp | 2 +- SYCL/ESIMD/api/unary_ops_heavy.cpp | 2 +- SYCL/ESIMD/regression/half_conversion_test.cpp | 7 ------- 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/SYCL/ESIMD/api/svm_gather_scatter.cpp b/SYCL/ESIMD/api/svm_gather_scatter.cpp index 13a2375259..09cf3e9d30 100644 --- a/SYCL/ESIMD/api/svm_gather_scatter.cpp +++ b/SYCL/ESIMD/api/svm_gather_scatter.cpp @@ -104,7 +104,7 @@ int main(void) { Pass &= test(Q); Pass &= test(Q); - if (dev.has(aspect::fp16)) { + if (Dev.has(aspect::fp16)) { Pass &= test(Q); Pass &= test(Q); Pass &= test(Q); diff --git a/SYCL/ESIMD/api/unary_ops_heavy.cpp b/SYCL/ESIMD/api/unary_ops_heavy.cpp index adb851684a..71206d0ed5 100644 --- a/SYCL/ESIMD/api/unary_ops_heavy.cpp +++ b/SYCL/ESIMD/api/unary_ops_heavy.cpp @@ -178,7 +178,7 @@ int main(void) { if (halfsSupported) passed &= test(mod_ops, q); passed &= test(mod_ops, q); - if (ddoublesSupported) + if (doublesSupported) passed &= test(mod_ops, q); auto signed_ops = esimd_test::OpSeq{}; diff --git a/SYCL/ESIMD/regression/half_conversion_test.cpp b/SYCL/ESIMD/regression/half_conversion_test.cpp index ed55424246..7a661fa5fc 100644 --- a/SYCL/ESIMD/regression/half_conversion_test.cpp +++ b/SYCL/ESIMD/regression/half_conversion_test.cpp @@ -78,13 +78,6 @@ int main(int argc, char *argv[]) { bool passed = true; queue q; - if (!dev.has(sycl::aspect::fp16)) { - std::cout << "Test was skipped becasue the selected device does not " - "support sycl::aspect::fp16" - << std::endl; - return 0; - } - std::cout << "\n===================" << std::endl; passed &= test(q, 1); std::cout << "\n===================" << std::endl; From 4068bbc2e7f7e90ad7ccd6efb695cf81dc9b3957 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Fri, 4 Nov 2022 02:11:00 -0700 Subject: [PATCH 20/20] std::complex float tests on Windows require double support Signed-off-by: Larsen, Steffen --- SYCL/DeviceLib/std_complex_math_test.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/SYCL/DeviceLib/std_complex_math_test.cpp b/SYCL/DeviceLib/std_complex_math_test.cpp index f130190e15..5adcd2ef0a 100644 --- a/SYCL/DeviceLib/std_complex_math_test.cpp +++ b/SYCL/DeviceLib/std_complex_math_test.cpp @@ -196,6 +196,18 @@ void device_complex_test_2(s::queue &deviceQueue) { #endif int main() { s::queue deviceQueue; + +#ifdef _WIN32 + // std::complex math on Windows uses doubles internally so fp64 is required to + // run this test. + if (!deviceQueue.get_device().has(s::aspect::fp64)) { + std::cout << "Skipping test as device does not support fp64 which is " + "required for math operations on std::complex on Windows." + << std::endl; + return 0; + } +#endif + device_complex_test_1(deviceQueue); #ifndef _WIN32 device_complex_test_2(deviceQueue);