From 3044bafdf0519116c1cb209e4617b5f5a6bfc7cf Mon Sep 17 00:00:00 2001 From: Stuart Adams Date: Thu, 19 Dec 2019 12:53:20 +0000 Subject: [PATCH 1/2] Default work-group sizes based on max Signed-off-by: Stuart Adams --- sycl/source/detail/scheduler/commands.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 965285ce75ddc..9e7f6fa4c1fe9 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1417,14 +1417,19 @@ static void adjustNDRangePerKernel(NDRDescT &NDR, RT::PiKernel Kernel, if (WGSize[0] == 0) { // kernel does not request specific workgroup shape - set one - // TODO maximum work group size as the local size might not be the best - // choice for CPU or FPGA devices + id<3> MaxWGSizes = get_device_info< + id<3>, cl::sycl::info::device::max_work_item_sizes>:: + get(DeviceImpl.getHandleRef(), DeviceImpl.getPlugin()); + size_t WGSize1D = get_kernel_work_group_info< size_t, cl::sycl::info::kernel_work_group::work_group_size>:: get(Kernel, DeviceImpl.getHandleRef(), DeviceImpl.getPlugin()); - assert(WGSize1D != 0); - // TODO implement better default for 2D/3D case: - WGSize = {WGSize1D, 1, 1}; + + assert(MaxWGSizes[2] != 0); + + // Set default work-group size in the Z-direction to either the max + // number of work-items or the maximum work-group size in the Z-direction. + WGSize = { 1, 1, min(WGSize1D, MaxWGSizes[2]) }; } NDR.set(NDR.Dims, nd_range<3>(NDR.NumWorkGroups * WGSize, WGSize)); } From 646c65c767eb9cacc0d38d99677e56a76f7afb39 Mon Sep 17 00:00:00 2001 From: Stuart Adams Date: Tue, 24 Mar 2020 17:38:17 +0000 Subject: [PATCH 2/2] Updated formatting Signed-off-by: Stuart Adams --- sycl/source/detail/scheduler/commands.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 9e7f6fa4c1fe9..dd88b9fed9685 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1417,9 +1417,9 @@ static void adjustNDRangePerKernel(NDRDescT &NDR, RT::PiKernel Kernel, if (WGSize[0] == 0) { // kernel does not request specific workgroup shape - set one - id<3> MaxWGSizes = get_device_info< - id<3>, cl::sycl::info::device::max_work_item_sizes>:: - get(DeviceImpl.getHandleRef(), DeviceImpl.getPlugin()); + id<3> MaxWGSizes = + get_device_info, cl::sycl::info::device::max_work_item_sizes>:: + get(DeviceImpl.getHandleRef(), DeviceImpl.getPlugin()); size_t WGSize1D = get_kernel_work_group_info< size_t, cl::sycl::info::kernel_work_group::work_group_size>:: @@ -1429,7 +1429,7 @@ static void adjustNDRangePerKernel(NDRDescT &NDR, RT::PiKernel Kernel, // Set default work-group size in the Z-direction to either the max // number of work-items or the maximum work-group size in the Z-direction. - WGSize = { 1, 1, min(WGSize1D, MaxWGSizes[2]) }; + WGSize = {1, 1, min(WGSize1D, MaxWGSizes[2])}; } NDR.set(NDR.Dims, nd_range<3>(NDR.NumWorkGroups * WGSize, WGSize)); }