diff --git a/sycl/test/basic_tests/event.cpp b/sycl/test/basic_tests/event.cpp index e8e30015079f7..be6ba2f3aa43a 100644 --- a/sycl/test/basic_tests/event.cpp +++ b/sycl/test/basic_tests/event.cpp @@ -1,7 +1,10 @@ -// REQUIRES: opencl +// REQUIRES: opencl || level0 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL // RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + //==--------------- event.cpp - SYCL event test ----------------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. diff --git a/sycl/test/inorder_queue/in_order_buffs.cpp b/sycl/test/inorder_queue/in_order_buffs.cpp index ef317031291ce..be96e3b849f2a 100644 --- a/sycl/test/inorder_queue/in_order_buffs.cpp +++ b/sycl/test/inorder_queue/in_order_buffs.cpp @@ -1,6 +1,4 @@ -// REQUIRES: opencl - -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out @@ -18,14 +16,6 @@ using namespace cl::sycl; const int dataSize = 32; -bool isQueueInOrder(cl_command_queue cq) { - cl_command_queue_properties reportedProps; - cl_int iRet = clGetCommandQueueInfo( - cq, CL_QUEUE_PROPERTIES, sizeof(reportedProps), &reportedProps, nullptr); - assert(CL_SUCCESS == iRet && "Failed to obtain queue info from ocl device"); - return (!(reportedProps & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); -} - int main() { int dataA[dataSize] = {0}; int dataB[dataSize] = {0}; @@ -63,18 +53,6 @@ int main() { cgh.parallel_for(myRange, myKernel); }); - bool result = true; - cl_command_queue cq = Queue.get(); - device dev = Queue.get_device(); - bool expected_result = dev.is_host() ? true : isQueueInOrder(cq); - - if (expected_result != result) { - std::cout << "Resulting queue order is OOO but expected order is inorder" - << std::endl; - - return -1; - } - auto readBufferB = bufB.get_access(); for (size_t i = 0; i != dataSize; ++i) { if (readBufferB[i] != i) { diff --git a/sycl/test/inorder_queue/in_order_buffs_ocl.cpp b/sycl/test/inorder_queue/in_order_buffs_ocl.cpp new file mode 100644 index 0000000000000..d42bdd2ec4e02 --- /dev/null +++ b/sycl/test/inorder_queue/in_order_buffs_ocl.cpp @@ -0,0 +1,50 @@ +// REQUIRES: opencl + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL +// RUN: %ACC_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +//==-------- ordered_buffs.cpp - SYCL buffers in ordered queues test--------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include +#include + +using namespace cl::sycl; + +const int dataSize = 32; + +bool isQueueInOrder(cl_command_queue cq) { + cl_command_queue_properties reportedProps; + cl_int iRet = clGetCommandQueueInfo( + cq, CL_QUEUE_PROPERTIES, sizeof(reportedProps), &reportedProps, nullptr); + assert(CL_SUCCESS == iRet && "Failed to obtain queue info from ocl device"); + return (!(reportedProps & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)); +} + +int main() { + int dataA[dataSize] = {0}; + int dataB[dataSize] = {0}; + + { + queue Queue{property::queue::in_order()}; + + bool result = true; + cl_command_queue cq = Queue.get(); + device dev = Queue.get_device(); + bool expected_result = dev.is_host() ? true : isQueueInOrder(cq); + + if (expected_result != result) { + std::cout << "Resulting queue order is OOO but expected order is inorder" + << std::endl; + + return -1; + } + } + + return 0; +} diff --git a/sycl/test/inorder_queue/in_order_dmemll.cpp b/sycl/test/inorder_queue/in_order_dmemll.cpp index defb64115d560..3eb6ecc39a34c 100644 --- a/sycl/test/inorder_queue/in_order_dmemll.cpp +++ b/sycl/test/inorder_queue/in_order_dmemll.cpp @@ -1,8 +1,8 @@ -// REQUIRES: opencl - -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out -L %opencl_libs_dir -lOpenCL +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out // RUN: %CPU_RUN_PLACEHOLDER %t1.out // RUN: %GPU_RUN_PLACEHOLDER %t1.out +// +// XFAIL: cuda //==----------- ordered_dmemll.cpp - Device Memory Linked List test --------==// // It uses an ordered queue where explicit waiting is not necessary between // kernels @@ -19,15 +19,6 @@ using namespace cl::sycl; constexpr int numNodes = 4; -bool getQueueOrder(cl_command_queue cq) { - cl_command_queue_properties reportedProps; - cl_int iRet = clGetCommandQueueInfo( - cq, CL_QUEUE_PROPERTIES, sizeof(reportedProps), &reportedProps, nullptr); - assert(CL_SUCCESS == iRet && "Failed to obtain queue info from ocl device"); - return (reportedProps & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) ? false - : true; -} - struct Node { Node() : pNext(nullptr), Num(0xDEADBEEF) {} @@ -103,15 +94,5 @@ int main() { d_cur = h_cur.pNext; } - bool result = true; - cl_command_queue cq = q.get(); - bool expected_result = dev.is_host() ? true : getQueueOrder(cq); - if (expected_result != result) { - std::cout << "Resulting queue order is OOO but expected order is inorder" - << std::endl; - - return -1; - } - return 0; } diff --git a/sycl/test/inorder_queue/in_order_dmemll_ocl.cpp b/sycl/test/inorder_queue/in_order_dmemll_ocl.cpp new file mode 100644 index 0000000000000..e5fb04e2b1e96 --- /dev/null +++ b/sycl/test/inorder_queue/in_order_dmemll_ocl.cpp @@ -0,0 +1,46 @@ +// REQUIRES: opencl + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out -L %opencl_libs_dir -lOpenCL +// RUN: %CPU_RUN_PLACEHOLDER %t1.out +// RUN: %GPU_RUN_PLACEHOLDER %t1.out +//==----------- ordered_dmemll.cpp - Device Memory Linked List test --------==// +// It uses an ordered queue where explicit waiting is not necessary between +// kernels +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +using namespace cl::sycl; + +constexpr int numNodes = 4; + +bool getQueueOrder(cl_command_queue cq) { + cl_command_queue_properties reportedProps; + cl_int iRet = clGetCommandQueueInfo( + cq, CL_QUEUE_PROPERTIES, sizeof(reportedProps), &reportedProps, nullptr); + assert(CL_SUCCESS == iRet && "Failed to obtain queue info from ocl device"); + return (reportedProps & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) ? false + : true; +} + +int main() { + queue q{property::queue::in_order()}; + auto dev = q.get_device(); + + bool result = true; + cl_command_queue cq = q.get(); + bool expected_result = dev.is_host() ? true : getQueueOrder(cq); + if (expected_result != result) { + std::cout << "Resulting queue order is OOO but expected order is inorder" + << std::endl; + + return -1; + } + + return 0; +} diff --git a/sycl/test/kernel-and-program/kernel-and-program-interop.cpp b/sycl/test/kernel-and-program/kernel-and-program-interop.cpp new file mode 100644 index 0000000000000..991a313712fae --- /dev/null +++ b/sycl/test/kernel-and-program/kernel-and-program-interop.cpp @@ -0,0 +1,205 @@ +// REQUIRES: opencl + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUNx: %GPU_RUN_PLACEHOLDER %t.out +// RUNx: %ACC_RUN_PLACEHOLDER %t.out + +//==--- kernel-and-program.cpp - SYCL kernel/program test ------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include +#include +#include +#include + +int main() { + // Single task invocation methods + { + cl::sycl::queue q; + int data = 0; + + // OpenCL interoperability kernel invocation + if (!q.is_host()) { + { + cl_int err; + cl::sycl::context ctx = q.get_context(); + cl_context clCtx = ctx.get(); + cl_command_queue clQ = q.get(); + cl_mem clBuffer = + clCreateBuffer(clCtx, CL_MEM_WRITE_ONLY, sizeof(int), NULL, NULL); + err = clEnqueueWriteBuffer(clQ, clBuffer, CL_TRUE, 0, sizeof(int), + &data, 0, NULL, NULL); + assert(err == CL_SUCCESS); + clFinish(clQ); + cl::sycl::program prog(ctx); + prog.build_with_source( + "kernel void SingleTask(global int* a) {*a+=1; }\n"); + q.submit([&](cl::sycl::handler &cgh) { + cgh.set_args(clBuffer); + cgh.single_task(prog.get_kernel("SingleTask")); + }); + q.wait(); + err = clEnqueueReadBuffer(clQ, clBuffer, CL_TRUE, 0, sizeof(int), &data, + 0, NULL, NULL); + clReleaseCommandQueue(clQ); + clReleaseContext(clCtx); + assert(err == CL_SUCCESS); + assert(data == 1); + } + { + cl::sycl::queue sycl_queue; + cl::sycl::program prog(sycl_queue.get_context()); + prog.build_with_source("kernel void foo(global int* a, global int* b, " + "global int* c) {*a=*b+*c; }\n"); + int a = 13, b = 14, c = 15; + { + cl::sycl::buffer bufa(&a, cl::sycl::range<1>(1)); + cl::sycl::buffer bufb(&b, cl::sycl::range<1>(1)); + cl::sycl::buffer bufc(&c, cl::sycl::range<1>(1)); + sycl_queue.submit([&](cl::sycl::handler &cgh) { + auto A = bufa.get_access(cgh); + auto B = bufb.get_access(cgh); + auto C = bufc.get_access(cgh); + cgh.set_args(A, B, C); + cgh.single_task(prog.get_kernel("foo")); + }); + } + assert(a == b + c); + } + } + { + cl::sycl::queue Queue; + if (!Queue.is_host()) { + cl::sycl::sampler first( + cl::sycl::coordinate_normalization_mode::normalized, + cl::sycl::addressing_mode::clamp, cl::sycl::filtering_mode::linear); + cl::sycl::sampler second( + cl::sycl::coordinate_normalization_mode::unnormalized, + cl::sycl::addressing_mode::clamp_to_edge, + cl::sycl::filtering_mode::nearest); + cl::sycl::program prog(Queue.get_context()); + prog.build_with_source( + "kernel void sampler_args(int a, sampler_t first, " + "int b, sampler_t second, int c) {}\n"); + cl::sycl::kernel krn = prog.get_kernel("sampler_args"); + + Queue.submit([&](cl::sycl::handler &cgh) { + cgh.set_args(0, first, 2, second, 3); + cgh.single_task(krn); + }); + } + } + } + // Parallel for with range + { + cl::sycl::queue q; + std::vector dataVec(10); + std::iota(dataVec.begin(), dataVec.end(), 0); + + if (!q.is_host()) { + cl_int err; + { + cl::sycl::context ctx = q.get_context(); + cl_context clCtx = ctx.get(); + cl_command_queue clQ = q.get(); + cl_mem clBuffer = clCreateBuffer( + clCtx, CL_MEM_WRITE_ONLY, sizeof(int) * dataVec.size(), NULL, NULL); + err = clEnqueueWriteBuffer(clQ, clBuffer, CL_TRUE, 0, + sizeof(int) * dataVec.size(), dataVec.data(), + 0, NULL, NULL); + assert(err == CL_SUCCESS); + + cl::sycl::program prog(ctx); + prog.build_with_source( + "kernel void ParallelFor(__global int* a, int v, __local int *l) " + "{ size_t index = get_global_id(0); l[index] = a[index];" + " l[index] += v; a[index] = l[index]; }\n"); + + q.submit([&](cl::sycl::handler &cgh) { + const int value = 1; + auto local_acc = + cl::sycl::accessor( + cl::sycl::range<1>(10), cgh); + cgh.set_args(clBuffer, value, local_acc); + cgh.parallel_for(cl::sycl::range<1>(10), + prog.get_kernel("ParallelFor")); + }); + + q.wait(); + err = clEnqueueReadBuffer(clQ, clBuffer, CL_TRUE, 0, + sizeof(int) * dataVec.size(), dataVec.data(), + 0, NULL, NULL); + clReleaseCommandQueue(clQ); + clReleaseContext(clCtx); + assert(err == CL_SUCCESS); + for (size_t i = 0; i < dataVec.size(); ++i) { + assert(dataVec[i] == i + 1); + } + } + } + } + + // Parallel for with nd_range + { + cl::sycl::queue q; + std::vector dataVec(10); + std::iota(dataVec.begin(), dataVec.end(), 0); + + if (!q.is_host()) { + cl_int err; + { + cl::sycl::context ctx = q.get_context(); + cl_context clCtx = ctx.get(); + cl_command_queue clQ = q.get(); + cl_mem clBuffer = clCreateBuffer( + clCtx, CL_MEM_WRITE_ONLY, sizeof(int) * dataVec.size(), NULL, NULL); + err = clEnqueueWriteBuffer(clQ, clBuffer, CL_TRUE, 0, + sizeof(int) * dataVec.size(), dataVec.data(), + 0, NULL, NULL); + assert(err == CL_SUCCESS); + + cl::sycl::program prog(ctx); + prog.build_with_source( + "kernel void ParallelForND( local int* l,global int* a)" + "{ size_t idx = get_global_id(0);" + " int pos = idx & 1;" + " int opp = pos ^ 1;" + " l[pos] = a[get_global_id(0)];" + " barrier(CLK_LOCAL_MEM_FENCE);" + " a[idx]=l[opp]; }"); + + // TODO is there no way to set local memory size via interoperability? + cl::sycl::kernel krn = prog.get_kernel("ParallelForND"); + clSetKernelArg(krn.get(), 0, sizeof(int) * 2, NULL); + + q.submit([&](cl::sycl::handler &cgh) { + cgh.set_arg(1, clBuffer); + cgh.parallel_for(cl::sycl::nd_range<1>(cl::sycl::range<1>(10), + cl::sycl::range<1>(2)), + krn); + }); + + q.wait(); + err = clEnqueueReadBuffer(clQ, clBuffer, CL_TRUE, 0, + sizeof(int) * dataVec.size(), dataVec.data(), + 0, NULL, NULL); + clReleaseCommandQueue(clQ); + clReleaseContext(clCtx); + assert(err == CL_SUCCESS); + } + for (size_t i = 0; i < dataVec.size(); ++i) { + assert(dataVec[i] == (i ^ 1)); + } + } + } +} diff --git a/sycl/test/kernel-and-program/kernel-and-program.cpp b/sycl/test/kernel-and-program/kernel-and-program.cpp index 8d3f57eb5a671..1233ec3664ee6 100644 --- a/sycl/test/kernel-and-program/kernel-and-program.cpp +++ b/sycl/test/kernel-and-program/kernel-and-program.cpp @@ -1,6 +1,4 @@ -// REQUIRES: opencl - -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUNx: %GPU_RUN_PLACEHOLDER %t.out @@ -54,77 +52,6 @@ int main() { } assert(data == 1); - // OpenCL interoperability kernel invocation - if (!q.is_host()) { - { - cl_int err; - cl::sycl::context ctx = q.get_context(); - cl_context clCtx = ctx.get(); - cl_command_queue clQ = q.get(); - cl_mem clBuffer = - clCreateBuffer(clCtx, CL_MEM_WRITE_ONLY, sizeof(int), NULL, NULL); - err = clEnqueueWriteBuffer(clQ, clBuffer, CL_TRUE, 0, sizeof(int), - &data, 0, NULL, NULL); - assert(err == CL_SUCCESS); - clFinish(clQ); - cl::sycl::program prog(ctx); - prog.build_with_source( - "kernel void SingleTask(global int* a) {*a+=1; }\n"); - q.submit([&](cl::sycl::handler &cgh) { - cgh.set_args(clBuffer); - cgh.single_task(prog.get_kernel("SingleTask")); - }); - q.wait(); - err = clEnqueueReadBuffer(clQ, clBuffer, CL_TRUE, 0, sizeof(int), &data, - 0, NULL, NULL); - clReleaseCommandQueue(clQ); - clReleaseContext(clCtx); - assert(err == CL_SUCCESS); - assert(data == 2); - } - { - cl::sycl::queue sycl_queue; - cl::sycl::program prog(sycl_queue.get_context()); - prog.build_with_source("kernel void foo(global int* a, global int* b, " - "global int* c) {*a=*b+*c; }\n"); - int a = 13, b = 14, c = 15; - { - cl::sycl::buffer bufa(&a, cl::sycl::range<1>(1)); - cl::sycl::buffer bufb(&b, cl::sycl::range<1>(1)); - cl::sycl::buffer bufc(&c, cl::sycl::range<1>(1)); - sycl_queue.submit([&](cl::sycl::handler &cgh) { - auto A = bufa.get_access(cgh); - auto B = bufb.get_access(cgh); - auto C = bufc.get_access(cgh); - cgh.set_args(A, B, C); - cgh.single_task(prog.get_kernel("foo")); - }); - } - assert(a == b + c); - } - } - { - cl::sycl::queue Queue; - if (!Queue.is_host()) { - cl::sycl::sampler first( - cl::sycl::coordinate_normalization_mode::normalized, - cl::sycl::addressing_mode::clamp, cl::sycl::filtering_mode::linear); - cl::sycl::sampler second( - cl::sycl::coordinate_normalization_mode::unnormalized, - cl::sycl::addressing_mode::clamp_to_edge, - cl::sycl::filtering_mode::nearest); - cl::sycl::program prog(Queue.get_context()); - prog.build_with_source( - "kernel void sampler_args(int a, sampler_t first, " - "int b, sampler_t second, int c) {}\n"); - cl::sycl::kernel krn = prog.get_kernel("sampler_args"); - - Queue.submit([&](cl::sycl::handler &cgh) { - cgh.set_args(0, first, 2, second, 3); - cgh.single_task(krn); - }); - } - } } // Parallel for with range { @@ -157,50 +84,6 @@ int main() { for (size_t i = 0; i < dataVec.size(); ++i) { assert(dataVec[i] == i + 1); } - - // OpenCL interoperability kernel invocation - if (!q.is_host()) { - cl_int err; - { - cl::sycl::context ctx = q.get_context(); - cl_context clCtx = ctx.get(); - cl_command_queue clQ = q.get(); - cl_mem clBuffer = clCreateBuffer( - clCtx, CL_MEM_WRITE_ONLY, sizeof(int) * dataVec.size(), NULL, NULL); - err = clEnqueueWriteBuffer(clQ, clBuffer, CL_TRUE, 0, - sizeof(int) * dataVec.size(), dataVec.data(), - 0, NULL, NULL); - assert(err == CL_SUCCESS); - - cl::sycl::program prog(ctx); - prog.build_with_source( - "kernel void ParallelFor(__global int* a, int v, __local int *l) " - "{ size_t index = get_global_id(0); l[index] = a[index];" - " l[index] += v; a[index] = l[index]; }\n"); - - q.submit([&](cl::sycl::handler &cgh) { - const int value = 1; - auto local_acc = - cl::sycl::accessor( - cl::sycl::range<1>(10), cgh); - cgh.set_args(clBuffer, value, local_acc); - cgh.parallel_for(cl::sycl::range<1>(10), - prog.get_kernel("ParallelFor")); - }); - - q.wait(); - err = clEnqueueReadBuffer(clQ, clBuffer, CL_TRUE, 0, - sizeof(int) * dataVec.size(), dataVec.data(), - 0, NULL, NULL); - clReleaseCommandQueue(clQ); - clReleaseContext(clCtx); - assert(err == CL_SUCCESS); - for (size_t i = 0; i < dataVec.size(); ++i) { - assert(dataVec[i] == i + 2); - } - } - } } // Parallel for with nd_range @@ -250,53 +133,5 @@ int main() { assert(dataVec[i] == (i ^ 1)); } } - - // OpenCL interoperability kernel invocation - if (!q.is_host()) { - cl_int err; - { - cl::sycl::context ctx = q.get_context(); - cl_context clCtx = ctx.get(); - cl_command_queue clQ = q.get(); - cl_mem clBuffer = clCreateBuffer( - clCtx, CL_MEM_WRITE_ONLY, sizeof(int) * dataVec.size(), NULL, NULL); - err = clEnqueueWriteBuffer(clQ, clBuffer, CL_TRUE, 0, - sizeof(int) * dataVec.size(), dataVec.data(), - 0, NULL, NULL); - assert(err == CL_SUCCESS); - - cl::sycl::program prog(ctx); - prog.build_with_source( - "kernel void ParallelForND( local int* l,global int* a)" - "{ size_t idx = get_global_id(0);" - " int pos = idx & 1;" - " int opp = pos ^ 1;" - " l[pos] = a[get_global_id(0)];" - " barrier(CLK_LOCAL_MEM_FENCE);" - " a[idx]=l[opp]; }"); - - // TODO is there no way to set local memory size via interoperability? - cl::sycl::kernel krn = prog.get_kernel("ParallelForND"); - clSetKernelArg(krn.get(), 0, sizeof(int) * 2, NULL); - - q.submit([&](cl::sycl::handler &cgh) { - cgh.set_arg(1, clBuffer); - cgh.parallel_for(cl::sycl::nd_range<1>(cl::sycl::range<1>(10), - cl::sycl::range<1>(2)), - krn); - }); - - q.wait(); - err = clEnqueueReadBuffer(clQ, clBuffer, CL_TRUE, 0, - sizeof(int) * dataVec.size(), dataVec.data(), - 0, NULL, NULL); - clReleaseCommandQueue(clQ); - clReleaseContext(clCtx); - assert(err == CL_SUCCESS); - } - for (size_t i = 0; i < dataVec.size(); ++i) { - assert(dataVec[i] == i); - } - } } }