Skip to content

Commit a963e89

Browse files
authored
[SYCL] Make handler-less path default for nd_range based functions (#20327)
Switch the default kernel submisson path from handler-based to handler-less for: - parallel_for queue shortcut function (nd_range based) - nd_launch (nd_range overload only, excluding the launch_config overload) - launch_grouped (global and local range based) This switch is limited to the cases, when: - No kernel function properties are defined - The kernel function does not use the kernel_handler parameter - No reductions are used
1 parent a3132c1 commit a963e89

File tree

9 files changed

+25
-60
lines changed

9 files changed

+25
-60
lines changed

sycl/cmake/modules/AddSYCLUnitTest.cmake

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Internal function to create SYCL unit tests with code reuse
2-
# add_sycl_unittest_internal(test_dirname SHARED|OBJECT is_preview is_no_cgh file1.cpp, file2.cpp ...)
3-
function(add_sycl_unittest_internal test_dirname link_variant is_preview is_no_cgh)
2+
# add_sycl_unittest_internal(test_dirname SHARED|OBJECT is_preview file1.cpp, file2.cpp ...)
3+
function(add_sycl_unittest_internal test_dirname link_variant is_preview)
44
# Enable exception handling for these unit tests
55
set(LLVM_REQUIRES_EH ON)
66
set(LLVM_REQUIRES_RTTI ON)
@@ -37,10 +37,6 @@ function(add_sycl_unittest_internal test_dirname link_variant is_preview is_no_c
3737
set(CMAKE_CURRENT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/Preview")
3838
endif()
3939

40-
if (${is_no_cgh})
41-
set(CMAKE_CURRENT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/NoCGH")
42-
endif()
43-
4440
if ("${link_variant}" MATCHES "SHARED")
4541
set(SYCL_LINK_LIBS ${sycl_so_target})
4642
add_unittest(SYCLUnitTests ${test_dirname} ${ARGN})
@@ -69,18 +65,6 @@ function(add_sycl_unittest_internal test_dirname link_variant is_preview is_no_c
6965
set(sycl_cache_suffix "_preview")
7066
endif()
7167

72-
if (${is_no_cgh})
73-
set(sycl_cache_suffix "_no_cgh")
74-
endif()
75-
76-
if (${is_no_cgh})
77-
target_compile_definitions(
78-
${test_dirname}
79-
PRIVATE
80-
__DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
81-
)
82-
endif()
83-
8468
if (SYCL_ENABLE_XPTI_TRACING)
8569
target_compile_definitions(${test_dirname}
8670
PRIVATE XPTI_ENABLE_INSTRUMENTATION XPTI_STATIC_LIBRARY)
@@ -175,7 +159,6 @@ endfunction()
175159
# the SYCL preview features enabled.
176160
# Produces two binaries, named `basename(test_name_prefix_non_preview)` and `basename(test_name_prefix_preview)`
177161
macro(add_sycl_unittest test_name_prefix link_variant)
178-
add_sycl_unittest_internal(${test_name_prefix}_non_preview ${link_variant} FALSE FALSE ${ARGN})
179-
add_sycl_unittest_internal(${test_name_prefix}_no_cgh ${link_variant} FALSE TRUE ${ARGN})
180-
add_sycl_unittest_internal(${test_name_prefix}_preview ${link_variant} TRUE FALSE ${ARGN})
162+
add_sycl_unittest_internal(${test_name_prefix}_non_preview ${link_variant} FALSE ${ARGN})
163+
add_sycl_unittest_internal(${test_name_prefix}_preview ${link_variant} TRUE ${ARGN})
181164
endmacro()

sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,6 @@ template <typename KernelName = sycl::detail::auto_name, int Dimensions,
259259
typename KernelType, typename... ReductionsT>
260260
void nd_launch(queue Q, nd_range<Dimensions> Range, const KernelType &KernelObj,
261261
ReductionsT &&...Reductions) {
262-
#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
263262
// TODO The handler-less path does not support reductions, kernel
264263
// function properties and kernel functions with the kernel_handler
265264
// type argument yet.
@@ -271,9 +270,7 @@ void nd_launch(queue Q, nd_range<Dimensions> Range, const KernelType &KernelObj,
271270
KernelType, sycl::nd_item<Dimensions>>::value)) {
272271
detail::submit_kernel_direct<KernelName>(std::move(Q), empty_properties_t{},
273272
Range, KernelObj);
274-
} else
275-
#endif
276-
{
273+
} else {
277274
submit(std::move(Q), [&](handler &CGH) {
278275
nd_launch<KernelName>(CGH, Range, KernelObj,
279276
std::forward<ReductionsT>(Reductions)...);

sycl/include/sycl/khr/free_function_commands.hpp

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,6 @@ template <typename KernelType, typename = typename std::enable_if_t<
157157
void launch_grouped(const queue &q, range<1> r, range<1> size, KernelType &&k,
158158
const sycl::detail::code_location &codeLoc =
159159
sycl::detail::code_location::current()) {
160-
#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
161160
// TODO The handler-less path does not support kernel function properties
162161
// and kernel functions with the kernel_handler type argument yet.
163162
if constexpr (!(ext::oneapi::experimental::detail::
@@ -168,9 +167,7 @@ void launch_grouped(const queue &q, range<1> r, range<1> size, KernelType &&k,
168167
detail::submit_kernel_direct(
169168
q, ext::oneapi::experimental::empty_properties_t{},
170169
nd_range<1>(r, size), std::forward<KernelType>(k));
171-
} else
172-
#endif
173-
{
170+
} else {
174171
submit(
175172
q, [&](handler &h) { launch_grouped<KernelType>(h, r, size, k); },
176173
codeLoc);
@@ -181,7 +178,6 @@ template <typename KernelType, typename = typename std::enable_if_t<
181178
void launch_grouped(const queue &q, range<2> r, range<2> size, KernelType &&k,
182179
const sycl::detail::code_location &codeLoc =
183180
sycl::detail::code_location::current()) {
184-
#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
185181
// TODO The handler-less path does not support kernel function properties
186182
// and kernel functions with the kernel_handler type argument yet.
187183
if constexpr (!(ext::oneapi::experimental::detail::
@@ -192,9 +188,7 @@ void launch_grouped(const queue &q, range<2> r, range<2> size, KernelType &&k,
192188
detail::submit_kernel_direct(
193189
q, ext::oneapi::experimental::empty_properties_t{},
194190
nd_range<2>(r, size), std::forward<KernelType>(k));
195-
} else
196-
#endif
197-
{
191+
} else {
198192
submit(
199193
q, [&](handler &h) { launch_grouped<KernelType>(h, r, size, k); },
200194
codeLoc);
@@ -205,7 +199,6 @@ template <typename KernelType, typename = typename std::enable_if_t<
205199
void launch_grouped(const queue &q, range<3> r, range<3> size, KernelType &&k,
206200
const sycl::detail::code_location &codeLoc =
207201
sycl::detail::code_location::current()) {
208-
#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
209202
// TODO The handler-less path does not support kernel function properties
210203
// and kernel functions with the kernel_handler type argument yet.
211204
if constexpr (!(ext::oneapi::experimental::detail::
@@ -216,9 +209,7 @@ void launch_grouped(const queue &q, range<3> r, range<3> size, KernelType &&k,
216209
detail::submit_kernel_direct(
217210
q, ext::oneapi::experimental::empty_properties_t{},
218211
nd_range<3>(r, size), std::forward<KernelType>(k));
219-
} else
220-
#endif
221-
{
212+
} else {
222213
submit(
223214
q, [&](handler &h) { launch_grouped<KernelType>(h, r, size, k); },
224215
codeLoc);

sycl/include/sycl/queue.hpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3275,7 +3275,6 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
32753275
parallel_for(nd_range<Dims> Range, RestT &&...Rest) {
32763276
constexpr detail::code_location CodeLoc = getCodeLocation<KernelName>();
32773277
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
3278-
#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
32793278
using KernelType = std::tuple_element_t<0, std::tuple<RestT...>>;
32803279

32813280
// TODO The handler-less path does not support reductions, kernel
@@ -3290,9 +3289,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
32903289
return detail::submit_kernel_direct<KernelName, true>(
32913290
*this, ext::oneapi::experimental::empty_properties_t{}, Range,
32923291
Rest..., TlsCodeLocCapture.query());
3293-
} else
3294-
#endif
3295-
{
3292+
} else {
32963293
return submit(
32973294
[&](handler &CGH) {
32983295
CGH.template parallel_for<KernelName>(Range, Rest...);

sycl/test-e2e/Basic/test_num_kernel_copies.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ int main(int argc, char **argv) {
2929

3030
kernel<1> krn1;
3131
q.parallel_for(sycl::nd_range<1>{1, 1}, krn1);
32-
assert(copy_count == 1);
32+
// The kernel is copied on the scheduler-based path only
33+
assert(copy_count == 0);
3334
assert(move_count == 0);
3435
copy_count = 0;
3536

sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionCommandsEvents.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,6 @@ TEST_F(FreeFunctionCommandsEventsTests, LaunchGroupedShortcutNoEvent) {
227227
ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{1});
228228
}
229229

230-
#if __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
231230
TEST_F(FreeFunctionCommandsEventsTests,
232231
LaunchGroupedShortcutMoveKernelNoEvent) {
233232
mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch",
@@ -252,6 +251,11 @@ TEST_F(FreeFunctionCommandsEventsTests,
252251
// to force the scheduler-based submission. In this case, the HostKernel
253252
// should be constructed.
254253

254+
// Replace the callback with an event based one, since the scheduler
255+
// needs to create an event internally
256+
mock::getCallbacks().set_replace_callback(
257+
"urEnqueueKernelLaunch", &redefined_urEnqueueKernelLaunchWithEvent);
258+
255259
Queue.submit([&](sycl::handler &CGH) {
256260
CGH.host_task([&] {
257261
std::unique_lock<std::mutex> lk(CvMutex);
@@ -274,9 +278,8 @@ TEST_F(FreeFunctionCommandsEventsTests,
274278
// HostKernel. Copy ctor is called by InstantiateKernelOnHost, can't delete
275279
// it.
276280
ASSERT_EQ(TestMoveFunctor::MoveCtorCalls, 1);
277-
ASSERT_EQ(counter_urEnqueueKernelLaunch, size_t{2});
281+
ASSERT_EQ(counter_urEnqueueKernelLaunchWithEvent, size_t{1});
278282
}
279-
#endif
280283

281284
TEST_F(FreeFunctionCommandsEventsTests, SubmitLaunchGroupedKernelNoEvent) {
282285
mock::getCallbacks().set_replace_callback("urEnqueueKernelLaunch",

sycl/unittests/Extensions/FreeFunctionCommands/FreeFunctionEventsHelpers.hpp

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,53 +26,48 @@ inline ur_result_t after_urKernelGetInfo(void *pParams) {
2626
static thread_local size_t counter_urEnqueueKernelLaunch = 0;
2727
inline ur_result_t redefined_urEnqueueKernelLaunch(void *pParams) {
2828
++counter_urEnqueueKernelLaunch;
29-
// TODO The no-handler scheduler submission includes a fix for the event return,
30-
// where the event is returned by the scheduler on every submission. This fix
31-
// is not yet applied to the handler-based path.
32-
#ifndef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
3329
auto params = *static_cast<ur_enqueue_kernel_launch_params_t *>(pParams);
3430
EXPECT_EQ(*params.pphEvent, nullptr);
35-
#endif
31+
return UR_RESULT_SUCCESS;
32+
}
33+
34+
static thread_local size_t counter_urEnqueueKernelLaunchWithEvent = 0;
35+
inline ur_result_t redefined_urEnqueueKernelLaunchWithEvent(void *pParams) {
36+
++counter_urEnqueueKernelLaunchWithEvent;
37+
auto params = *static_cast<ur_enqueue_kernel_launch_params_t *>(pParams);
38+
EXPECT_NE(*params.pphEvent, nullptr);
3639
return UR_RESULT_SUCCESS;
3740
}
3841

3942
static thread_local size_t counter_urUSMEnqueueMemcpy = 0;
4043
inline ur_result_t redefined_urUSMEnqueueMemcpy(void *pParams) {
4144
++counter_urUSMEnqueueMemcpy;
42-
#ifndef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
4345
auto params = *static_cast<ur_enqueue_usm_memcpy_params_t *>(pParams);
4446
EXPECT_EQ(*params.pphEvent, nullptr);
45-
#endif
4647
return UR_RESULT_SUCCESS;
4748
}
4849

4950
static thread_local size_t counter_urUSMEnqueueFill = 0;
5051
inline ur_result_t redefined_urUSMEnqueueFill(void *pParams) {
5152
++counter_urUSMEnqueueFill;
52-
#ifndef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
5353
auto params = *static_cast<ur_enqueue_usm_fill_params_t *>(pParams);
5454
EXPECT_EQ(*params.pphEvent, nullptr);
55-
#endif
5655
return UR_RESULT_SUCCESS;
5756
}
5857

5958
static thread_local size_t counter_urUSMEnqueuePrefetch = 0;
6059
inline ur_result_t redefined_urUSMEnqueuePrefetch(void *pParams) {
6160
++counter_urUSMEnqueuePrefetch;
62-
#ifndef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
6361
auto params = *static_cast<ur_enqueue_usm_prefetch_params_t *>(pParams);
6462
EXPECT_EQ(*params.pphEvent, nullptr);
65-
#endif
6663
return UR_RESULT_SUCCESS;
6764
}
6865

6966
static thread_local size_t counter_urUSMEnqueueMemAdvise = 0;
7067
inline ur_result_t redefined_urUSMEnqueueMemAdvise(void *pParams) {
7168
++counter_urUSMEnqueueMemAdvise;
72-
#ifndef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
7369
auto params = *static_cast<ur_enqueue_usm_advise_params_t *>(pParams);
7470
EXPECT_EQ(*params.pphEvent, nullptr);
75-
#endif
7671
return UR_RESULT_SUCCESS;
7772
}
7873

sycl/unittests/compression/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,4 @@ add_sycl_unittest(CompressionTests OBJECT
22
CompressionTests.cpp
33
)
44
target_compile_definitions(CompressionTests_non_preview PRIVATE SYCL_RT_ZSTD_AVAILABLE)
5-
target_compile_definitions(CompressionTests_no_cgh PRIVATE SYCL_RT_ZSTD_AVAILABLE)
65
target_compile_definitions(CompressionTests_preview PRIVATE SYCL_RT_ZSTD_AVAILABLE __INTEL_PREVIEW_BREAKING_CHANGES)

sycl/unittests/xpti_trace/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,3 @@ add_sycl_unittest(XptiTraceTests OBJECT
88
)
99
target_link_libraries(XptiTraceTests_non_preview PRIVATE xpti xptitest_subscriber)
1010
target_link_libraries(XptiTraceTests_preview PRIVATE xpti xptitest_subscriber)
11-
target_link_libraries(XptiTraceTests_no_cgh PRIVATE xpti xptitest_subscriber)

0 commit comments

Comments
 (0)