diff --git a/sycl/test/basic_tests/subdevice_pi.cpp b/sycl/test/basic_tests/subdevice_pi.cpp new file mode 100644 index 0000000000000..af58fcc68a32f --- /dev/null +++ b/sycl/test/basic_tests/subdevice_pi.cpp @@ -0,0 +1,231 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=1 %t.out separate equally %CPU_CHECK_PLACEHOLDER --check-prefix CHECK-SEPARATE +// RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=1 %t.out shared equally %CPU_CHECK_PLACEHOLDER --check-prefix CHECK-SHARED --implicit-check-not piContextCreate --implicit-check-not piMemBufferCreate +// RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=1 %t.out fused equally %CPU_CHECK_PLACEHOLDER --check-prefix CHECK-FUSED --implicit-check-not piContextCreate --implicit-check-not piMemBufferCreate +// +// Intel OpenCL CPU Runtime supports device partition on all (multi-core) +// platforms. Other devices may not support this. + +#include +#include +#include + +using namespace cl::sycl; + +// Log to the same stream as SYCL_PI_TRACE +static void log_pi(const char *msg) { + std::cout << msg << std::endl; +} + +static void use_mem(buffer buf, queue q) { + q.submit( + [&](handler &cgh) { + auto acc = buf.get_access(cgh); + cgh.parallel_for(range<1>(buf.get_count()), + [=](item<1> itemID) { + acc[itemID] += 1; + }); + }); + q.wait(); +} + +typedef std::vector (*partition_fn)(device dev); + +// FIXME: `partition_by_affinity_domain' is currently not tested: OpenCL CPU +// device only supports `partition_equally'. +static std::vector partition_affinity(device dev) { + std::vector subdevices = + dev.create_sub_devices( + info::partition_affinity_domain::next_partitionable); + + return subdevices; +} + +static std::vector partition_equally(device dev) { + std::vector subdevices = + dev.create_sub_devices(1); + + return subdevices; +} + +static bool check_separate(device dev, buffer buf, + partition_fn partition) { + log_pi("Create sub devices"); + std::vector subdevices = partition(dev); + assert(subdevices.size() > 1); + // CHECK-SEPARATE: Create sub devices + // CHECK-SEPARATE: ---> piDevicePartition + + log_pi("Test sub device 0"); + { + queue q0(subdevices[0]); + use_mem(buf, q0); + } + // CHECK-SEPARATE: Test sub device 0 + // CHECK-SEPARATE: ---> piContextCreate + // CHECK-SEPARATE: ---> piQueueCreate + // CHECK-SEPARATE: ---> piMemBufferCreate + // CHECK-SEPARATE: ---> piEnqueueKernelLaunch + // CHECK-SEPARATE: ---> piEventsWait + + log_pi("Test sub device 1"); + { + queue q1(subdevices[1]); + use_mem(buf, q1); + } + // CHECK-SEPARATE: Test sub device 1 + // CHECK-SEPARATE: ---> piContextCreate + // CHECK-SEPARATE: ---> piQueueCreate + // CHECK-SEPARATE: ---> piMemBufferCreate + // + // Verify that we have a memcpy between subdevices in this case + // CHECK-SEPARATE: ---> piEnqueueMemBufferMap + // CHECK-SEPARATE: ---> piEnqueueMemBufferWrite + // + // CHECK-SEPARATE: ---> piEnqueueKernelLaunch + // CHECK-SEPARATE: ---> piEventsWait + + return true; +} + +static bool check_shared_context(device dev, buffer buf, + partition_fn partition) { + log_pi("Create sub devices"); + std::vector subdevices = partition(dev); + assert(subdevices.size() > 1); + // CHECK-SHARED: Create sub devices + // CHECK-SHARED: ---> piDevicePartition + + // Shared context: queues are bound to specific subdevices, but + // memory does not migrate + log_pi("Create shared context"); + context shared_context(subdevices); + // CHECK-SHARED: Create shared context + // CHECK-SHARED: ---> piContextCreate + // + // Make sure that a single context is created: see --implicit-check-not above. + + log_pi("Test sub device 0"); + { + queue q0(shared_context, subdevices[0]); + use_mem(buf, q0); + } + // CHECK-SHARED: Test sub device 0 + // CHECK-SHARED: ---> piQueueCreate + // CHECK-SHARED: ---> piMemBufferCreate + // + // Make sure that a single buffer is created (and shared between subdevices): + // see --implicit-check-not above. + // + // CHECK-SHARED: ---> piEnqueueKernelLaunch + // CHECK-SHARED: ---> piEventsWait + + log_pi("Test sub device 1"); + { + queue q1(shared_context, subdevices[1]); + use_mem(buf, q1); + } + // CHECK-SHARED: Test sub device 1 + // CHECK-SHARED: ---> piQueueCreate + // CHECK-SHARED: ---> piEnqueueKernelLaunch + // CHECK-SHARED: ---> piEventsWait + // CHECK-SHARED: ---> piEnqueueMemBufferRead + + return true; +} + +static bool check_fused_context(device dev, buffer buf, + partition_fn partition) { + log_pi("Create sub devices"); + std::vector subdevices = partition(dev); + assert(subdevices.size() > 1); + // CHECK-FUSED: Create sub devices + // CHECK-FUSED: ---> piDevicePartition + + // Fused context: same as shared context, but also includes the root device + log_pi("Create fused context"); + std::vector devices; + devices.push_back(dev); + devices.push_back(subdevices[0]); + devices.push_back(subdevices[1]); + context fused_context(devices); + // CHECK-FUSED: Create fused context + // CHECK-FUSED: ---> piContextCreate + // + // Make sure that a single context is created: see --implicit-check-not above. + + log_pi("Test root device"); + { + queue q(fused_context, dev); + use_mem(buf, q); + } + // CHECK-FUSED: Test root device + // CHECK-FUSED: ---> piQueueCreate + // CHECK-FUSED: ---> piMemBufferCreate + // + // Make sure that a single buffer is created (and shared between subdevices + // *and* the root device): see --implicit-check-not above. + // + // CHECK-FUSED: ---> piEnqueueKernelLaunch + // CHECK-FUSED: ---> piEventsWait + + log_pi("Test sub device 0"); + { + queue q0(fused_context, subdevices[0]); + use_mem(buf, q0); + } + // CHECK-FUSED: Test sub device 0 + // CHECK-FUSED: ---> piQueueCreate + // CHECK-FUSED: ---> piEnqueueKernelLaunch + // CHECK-FUSED: ---> piEventsWait + + log_pi("Test sub device 1"); + { + queue q1(fused_context, subdevices[1]); + use_mem(buf, q1); + } + // CHECK-FUSED: Test sub device 1 + // CHECK-FUSED: ---> piQueueCreate + // CHECK-FUSED: ---> piEnqueueKernelLaunch + // CHECK-FUSED: ---> piEventsWait + // CHECK-FUSED: ---> piEnqueueMemBufferRead + + return true; +} + +int main(int argc, const char **argv) { + assert(argc == 3 && "Invalid number of arguments"); + std::string test(argv[1]); + std::string partition_type(argv[2]); + + default_selector selector; + device dev(selector); + + std::vector host_mem(1024, 1); + buffer buf(&host_mem[0], host_mem.size()); + + partition_fn partition; + if (partition_type == "equally") { + partition = partition_equally; + } else if (partition_type == "affinity") { + partition = partition_affinity; + } else { + assert(0 && "Unsupported partition type"); + } + + bool result = false; + if (test == "separate") { + result = check_separate(dev, buf, partition); + } else if (test == "shared") { + result = check_shared_context(dev, buf, partition); + } else if (test == "fused") { + result = check_fused_context(dev, buf, partition); + } else { + assert(0 && "Unknown test"); + } + + if (!result) { + fprintf(stderr, "FAILED\n"); + return EXIT_FAILURE; + } +}