From f49927dc53c3b17cbf5c14f9d9b75d5666864f5f Mon Sep 17 00:00:00 2001 From: Andrew Savonichev Date: Fri, 17 Apr 2020 14:51:57 +0300 Subject: [PATCH 1/3] [SYCL] Add a test for SYCL subdevice feature The test verifies that subdevices can be created with separate, shared and fused contexts. Signed-off-by: Andrew Savonichev --- sycl/test/basic_tests/subdevice_pi.cpp | 231 +++++++++++++++++++++++++ 1 file changed, 231 insertions(+) create mode 100644 sycl/test/basic_tests/subdevice_pi.cpp diff --git a/sycl/test/basic_tests/subdevice_pi.cpp b/sycl/test/basic_tests/subdevice_pi.cpp new file mode 100644 index 0000000000000..d49f9fb9b11a3 --- /dev/null +++ b/sycl/test/basic_tests/subdevice_pi.cpp @@ -0,0 +1,231 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=1 %t.out separate equally | FileCheck %s --check-prefix CHECK-SEPARATE +// RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=1 %t.out shared equally | FileCheck %s --check-prefix CHECK-SHARED --implicit-check-not piContextCreate --implicit-check-not piMemBufferCreate +// RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=1 %t.out fused equally | FileCheck %s --check-prefixes CHECK-FUSED,CHECK-FUSED-CONTEXT,CHECK-FUSED-MEM +// +// Intel OpenCL CPU Runtime supports device partition on all (multi-core) +// platforms. Other devices may not support this. + +#include +#include +#include + +using namespace cl::sycl; + +// Log to the same stream as SYCL_PI_TRACE +static void log_pi(const char *msg) { + std::cout << msg << std::endl; +} + +static void use_mem(buffer buf, queue q) { + q.submit( + [&](handler &cgh) { + auto acc = buf.get_access(cgh); + cgh.parallel_for(range<1>(buf.get_count()), + [=](item<1> itemID) { + acc[itemID] += 1; + }); + }); + q.wait(); +} + +typedef std::vector (*partition_fn)(device dev); + +// FIXME: `partition_by_affinity_domain' is currently not tested: OpenCL CPU +// device only supports `partition_equally'. +static std::vector partition_affinity(device dev) { + std::vector subdevices = + dev.create_sub_devices( + info::partition_affinity_domain::next_partitionable); + + return subdevices; +} + +static std::vector partition_equally(device dev) { + std::vector subdevices = + dev.create_sub_devices(1); + + return subdevices; +} + +static bool check_separate(device dev, buffer buf, + partition_fn partition) { + log_pi("Create sub devices"); + std::vector subdevices = partition(dev); + assert(subdevices.size() > 1); + // CHECK-SEPARATE: Create sub devices + // CHECK-SEPARATE: ---> piDevicePartition + + log_pi("Test sub device 0"); + { + queue q0(subdevices[0]); + use_mem(buf, q0); + } + // CHECK-SEPARATE: Test sub device 0 + // CHECK-SEPARATE: ---> piContextCreate + // CHECK-SEPARATE: ---> piQueueCreate + // CHECK-SEPARATE: ---> piMemBufferCreate + // CHECK-SEPARATE: ---> piEnqueueKernelLaunch + // CHECK-SEPARATE: ---> piEventsWait + + log_pi("Test sub device 1"); + { + queue q1(subdevices[1]); + use_mem(buf, q1); + } + // CHECK-SEPARATE: Test sub device 1 + // CHECK-SEPARATE: ---> piContextCreate + // CHECK-SEPARATE: ---> piQueueCreate + // CHECK-SEPARATE: ---> piMemBufferCreate + // + // Verify that we have a memcpy between subdevices in this case + // CHECK-SEPARATE: ---> piEnqueueMemBufferMap + // CHECK-SEPARATE: ---> piEnqueueMemBufferWrite + // + // CHECK-SEPARATE: ---> piEnqueueKernelLaunch + // CHECK-SEPARATE: ---> piEventsWait + + return true; +} + +static bool check_shared_context(device dev, buffer buf, + partition_fn partition) { + log_pi("Create sub devices"); + std::vector subdevices = partition(dev); + assert(subdevices.size() > 1); + // CHECK-SHARED: Create sub devices + // CHECK-SHARED: ---> piDevicePartition + + // Shared context: queues are bound to specific subdevices, but + // memory does not migrate + log_pi("Create shared context"); + context shared_context(subdevices); + // CHECK-SHARED: Create shared context + // CHECK-SHARED: ---> piContextCreate + // + // Make sure that a single context is created: see --implicit-check-not above. + + log_pi("Test sub device 0"); + { + queue q0(shared_context, subdevices[0]); + use_mem(buf, q0); + } + // CHECK-SHARED: Test sub device 0 + // CHECK-SHARED: ---> piQueueCreate + // CHECK-SHARED: ---> piMemBufferCreate + // + // Make sure that a single buffer is created (and shared between subdevices): + // see --implicit-check-not above. + // + // CHECK-SHARED: ---> piEnqueueKernelLaunch + // CHECK-SHARED: ---> piEventsWait + + log_pi("Test sub device 1"); + { + queue q1(shared_context, subdevices[1]); + use_mem(buf, q1); + } + // CHECK-SHARED: Test sub device 1 + // CHECK-SHARED: ---> piQueueCreate + // CHECK-SHARED: ---> piEnqueueKernelLaunch + // CHECK-SHARED: ---> piEventsWait + // CHECK-SHARED: ---> piEnqueueMemBufferRead + + return true; +} + +static bool check_fused_context(device dev, buffer buf, + partition_fn partition) { + log_pi("Create sub devices"); + std::vector subdevices = partition(dev); + assert(subdevices.size() > 1); + // CHECK-FUSED: Create sub devices + // CHECK-FUSED: ---> piDevicePartition + + // Fused context: same as shared context, but also includes the root device + log_pi("Create fused context"); + std::vector devices; + devices.push_back(dev); + devices.push_back(subdevices[0]); + devices.push_back(subdevices[1]); + context fused_context(devices); + // CHECK-FUSED: Create fused context + // CHECK-FUSED: ---> piContextCreate + // + // Make sure that a single context is created: see --implicit-check-not above. + + log_pi("Test root device"); + { + queue q(fused_context, dev); + use_mem(buf, q); + } + // CHECK-FUSED: Test root device + // CHECK-FUSED: ---> piQueueCreate + // CHECK-FUSED: ---> piMemBufferCreate + // + // Make sure that a single buffer is created (and shared between subdevices + // *and* the root device): see --implicit-check-not above. + // + // CHECK-FUSED: ---> piEnqueueKernelLaunch + // CHECK-FUSED: ---> piEventsWait + + log_pi("Test sub device 0"); + { + queue q0(fused_context, subdevices[0]); + use_mem(buf, q0); + } + // CHECK-FUSED: Test sub device 0 + // CHECK-FUSED: ---> piQueueCreate + // CHECK-FUSED: ---> piEnqueueKernelLaunch + // CHECK-FUSED: ---> piEventsWait + + log_pi("Test sub device 1"); + { + queue q1(fused_context, subdevices[1]); + use_mem(buf, q1); + } + // CHECK-FUSED: Test sub device 1 + // CHECK-FUSED: ---> piQueueCreate + // CHECK-FUSED: ---> piEnqueueKernelLaunch + // CHECK-FUSED: ---> piEventsWait + // CHECK-FUSED: ---> piEnqueueMemBufferRead + + return true; +} + +int main(int argc, const char **argv) { + assert(argc == 3 && "Invalid number of arguments"); + std::string test(argv[1]); + std::string partition_type(argv[2]); + + default_selector selector; + device dev(selector); + + std::vector host_mem(1024, 1); + buffer buf(&host_mem[0], host_mem.size()); + + partition_fn partition; + if (partition_type == "equally") { + partition = partition_equally; + } else if (partition_type == "affinity") { + partition = partition_affinity; + } else { + assert(0 && "Unsupported partition type"); + } + + bool result = false; + if (test == "separate") { + result = check_separate(dev, buf, partition); + } else if (test == "shared") { + result = check_shared_context(dev, buf, partition); + } else if (test == "fused") { + result = check_fused_context(dev, buf, partition); + } else { + assert(0 && "Unknown test"); + } + + if (!result) { + fprintf(stderr, "FAILED\n"); + return EXIT_FAILURE; + } +} From 21839493345d40aeef19cd8fa76b0fd2d53ae150 Mon Sep 17 00:00:00 2001 From: Andrew Savonichev Date: Mon, 20 Apr 2020 17:33:22 +0300 Subject: [PATCH 2/3] Fix FileCheck arguments Signed-off-by: Andrew Savonichev --- sycl/test/basic_tests/subdevice_pi.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/basic_tests/subdevice_pi.cpp b/sycl/test/basic_tests/subdevice_pi.cpp index d49f9fb9b11a3..96d57a93f3cab 100644 --- a/sycl/test/basic_tests/subdevice_pi.cpp +++ b/sycl/test/basic_tests/subdevice_pi.cpp @@ -1,7 +1,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=1 %t.out separate equally | FileCheck %s --check-prefix CHECK-SEPARATE // RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=1 %t.out shared equally | FileCheck %s --check-prefix CHECK-SHARED --implicit-check-not piContextCreate --implicit-check-not piMemBufferCreate -// RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=1 %t.out fused equally | FileCheck %s --check-prefixes CHECK-FUSED,CHECK-FUSED-CONTEXT,CHECK-FUSED-MEM +// RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=1 %t.out fused equally | FileCheck %s --check-prefix CHECK-FUSED --implicit-check-not piContextCreate --implicit-check-not piMemBufferCreate // // Intel OpenCL CPU Runtime supports device partition on all (multi-core) // platforms. Other devices may not support this. From c384c21e96775c124e10aef413068cf47a09f45f Mon Sep 17 00:00:00 2001 From: Andrew Savonichev Date: Tue, 21 Apr 2020 13:38:55 +0300 Subject: [PATCH 3/3] Use CPU_CHECK_PLACEHOLDER instead of FileCheck If %CPU_RUN_PLACEHOLDER is substituted to `echo', FileCheck will not match. Signed-off-by: Andrew Savonichev --- sycl/test/basic_tests/subdevice_pi.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/test/basic_tests/subdevice_pi.cpp b/sycl/test/basic_tests/subdevice_pi.cpp index 96d57a93f3cab..af58fcc68a32f 100644 --- a/sycl/test/basic_tests/subdevice_pi.cpp +++ b/sycl/test/basic_tests/subdevice_pi.cpp @@ -1,7 +1,7 @@ // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -// RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=1 %t.out separate equally | FileCheck %s --check-prefix CHECK-SEPARATE -// RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=1 %t.out shared equally | FileCheck %s --check-prefix CHECK-SHARED --implicit-check-not piContextCreate --implicit-check-not piMemBufferCreate -// RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=1 %t.out fused equally | FileCheck %s --check-prefix CHECK-FUSED --implicit-check-not piContextCreate --implicit-check-not piMemBufferCreate +// RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=1 %t.out separate equally %CPU_CHECK_PLACEHOLDER --check-prefix CHECK-SEPARATE +// RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=1 %t.out shared equally %CPU_CHECK_PLACEHOLDER --check-prefix CHECK-SHARED --implicit-check-not piContextCreate --implicit-check-not piMemBufferCreate +// RUN: %CPU_RUN_PLACEHOLDER SYCL_PI_TRACE=1 %t.out fused equally %CPU_CHECK_PLACEHOLDER --check-prefix CHECK-FUSED --implicit-check-not piContextCreate --implicit-check-not piMemBufferCreate // // Intel OpenCL CPU Runtime supports device partition on all (multi-core) // platforms. Other devices may not support this.