Skip to content

Commit 72341ee

Browse files
BensuoreblejulianmiEwanC
authored
[SYCL][Graph] Backend integration and feature additions for SYCL Graphs (3/4) (#10033)
# Backend integration and feature additions for SYCL Graphs This is the third patch of a series that adds support for an [experimental command graph extension](#5626) A snapshot of the complete work can be seen in draft PR #9375 which has support all the specification defined ways of adding nodes and edges to the graph, including both Explicit and Record & Replay graph construction. The two types of nodes currently implemented are kernel execution and memcpy commands. See https://github.com/reble/llvm#implementation-status for the status of our total work. ## Scope This third patch focuses on integrating the graphs runtime with the backend support added in #9992 as well as any remaining runtime features and bug fixes, and should include no ABI-breaking changes: * Graphs runtime changes to use PI/UR command-buffers. * Various improvements to the Graphs runtime classes. * New memory manager methods for appending copies to a command-buffer. * Changes to the Scheduler and related CG classes to enable Graphs. * Device info query for command-graph support. * Minor changes to some runtime classes to enable Graphs. ## Following Split PRs Future follow-up PRs with the remainder of our work on the extension will include: * Add end-to-end tests for SYCL Graph extension. (4/4) * NFC changes - Design doc and codeowner update. ## Authors Co-authored-by: Pablo Reble <[email protected]> Co-authored-by: Julian Miller <[email protected]> Co-authored-by: Ben Tracy <[email protected]> Co-authored-by: Ewan Crawford <[email protected]> Co-authored-by: Maxime France-Pillois <[email protected]>
1 parent 5d1da25 commit 72341ee

32 files changed

+1699
-231
lines changed

sycl/include/sycl/detail/cg.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,17 @@ class CGCopyFromDeviceGlobal : public CG {
497497
size_t getOffset() { return MOffset; }
498498
};
499499

500+
/// "Execute command-buffer" command group class.
501+
class CGExecCommandBuffer : public CG {
502+
public:
503+
sycl::detail::pi::PiExtCommandBuffer MCommandBuffer;
504+
505+
CGExecCommandBuffer(sycl::detail::pi::PiExtCommandBuffer CommandBuffer,
506+
CG::StorageInitHelper CGData)
507+
: CG(CGTYPE::ExecCommandBuffer, std::move(CGData)),
508+
MCommandBuffer(CommandBuffer) {}
509+
};
510+
500511
} // namespace detail
501512
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
502513
} // namespace sycl

sycl/include/sycl/detail/pi.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2284,7 +2284,7 @@ __SYCL_EXPORT pi_result piextCommandBufferMemBufferCopyRect(
22842284
/// \param sync_point The sync_point associated with this memory operation.
22852285
__SYCL_EXPORT pi_result piextCommandBufferMemBufferRead(
22862286
pi_ext_command_buffer command_buffer, pi_mem buffer, size_t offset,
2287-
size_t size, void *dst, pi_uint32 num_events_in_wait_list,
2287+
size_t size, void *dst, pi_uint32 num_sync_points_in_wait_list,
22882288
const pi_ext_sync_point *sync_point_wait_list,
22892289
pi_ext_sync_point *sync_point);
22902290

sycl/include/sycl/ext/oneapi/experimental/graph.hpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,6 @@ class __SYCL_EXPORT executable_command_graph {
221221

222222
int MTag;
223223
std::shared_ptr<detail::exec_graph_impl> impl;
224-
225-
friend class modifiable_command_graph;
226224
};
227225
} // namespace detail
228226

@@ -248,7 +246,10 @@ class command_graph : public detail::modifiable_command_graph {
248246
template <>
249247
class command_graph<graph_state::executable>
250248
: public detail::executable_command_graph {
251-
private:
249+
250+
protected:
251+
friend command_graph<graph_state::executable>
252+
detail::modifiable_command_graph::finalize(const sycl::property_list &) const;
252253
using detail::executable_command_graph::executable_command_graph;
253254
};
254255

sycl/include/sycl/handler.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1542,6 +1542,12 @@ class __SYCL_EXPORT handler {
15421542
setType(detail::CG::CodeplayHostTask);
15431543
}
15441544

1545+
/// @brief Get the command graph if any associated with this handler. It can
1546+
/// come from either the associated queue or from being set explicitly through
1547+
/// the appropriate constructor.
1548+
std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
1549+
getCommandGraph() const;
1550+
15451551
public:
15461552
handler(const handler &) = delete;
15471553
handler(handler &&) = delete;

sycl/include/sycl/info/ext_oneapi_device_traits.def

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ __SYCL_PARAM_TRAITS_TEMPLATE_SPEC(ext::oneapi::experimental,device, max_work_gro
99
__SYCL_PARAM_TRAITS_SPEC(ext::oneapi::experimental, device, architecture,
1010
ext::oneapi::experimental::architecture,
1111
PI_EXT_ONEAPI_DEVICE_INFO_IP_VERSION)
12+
__SYCL_PARAM_TRAITS_SPEC(
13+
ext::oneapi::experimental, device, graph_support,
14+
ext::oneapi::experimental::info::graph_support_level,
15+
0 /* No PI device code needed */)
1216
#ifdef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF
1317
#undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC
1418
#undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF

sycl/include/sycl/info/info_desc.hpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,9 +185,14 @@ template <typename T, T param> struct compatibility_param_traits {};
185185
} /*namespace info */ \
186186
} /*namespace Namespace */
187187

188-
namespace ext::oneapi::experimental::info::device {
188+
namespace ext::oneapi::experimental::info {
189+
190+
enum class graph_support_level { unsupported = 0, native, emulated };
191+
192+
namespace device {
189193
template <int Dimensions> struct max_work_groups;
190-
} // namespace ext::oneapi::experimental::info::device
194+
} // namespace device
195+
} // namespace ext::oneapi::experimental::info
191196
#include <sycl/info/ext_codeplay_device_traits.def>
192197
#include <sycl/info/ext_intel_device_traits.def>
193198
#include <sycl/info/ext_oneapi_device_traits.def>

sycl/include/sycl/queue.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,16 @@ static event submitAssertCapture(queue &, event &, queue *,
7676
#endif
7777
} // namespace detail
7878

79+
namespace ext {
80+
namespace oneapi {
81+
namespace experimental {
82+
// State of a queue with regards to graph recording,
83+
// returned by info::queue::state
84+
enum class queue_state { executing, recording };
85+
} // namespace experimental
86+
} // namespace oneapi
87+
} // namespace ext
88+
7989
/// Encapsulates a single SYCL queue which schedules kernels on a SYCL device.
8090
///
8191
/// A SYCL queue can be used to submit command groups to be executed by the SYCL
@@ -283,6 +293,9 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
283293
/// \return SYCL device this queue was constructed with.
284294
device get_device() const;
285295

296+
/// \return State the queue is currently in.
297+
ext::oneapi::experimental::queue_state ext_oneapi_get_state() const;
298+
286299
/// \return true if this queue is a SYCL host queue.
287300
__SYCL2020_DEPRECATED(
288301
"is_host() is deprecated as the host device is no longer supported.")

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1118,46 +1118,46 @@ pi_result piextCommandBufferMemBufferCopyRect(
11181118

11191119
pi_result piextCommandBufferMemBufferRead(
11201120
pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset,
1121-
size_t Size, void *Dst, pi_uint32 NumEventsInWaitList,
1121+
size_t Size, void *Dst, pi_uint32 NumSyncPointsInWaitList,
11221122
const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) {
1123-
return pi2ur::piextCommandBufferMemBufferRead(CommandBuffer, Buffer, Offset,
1124-
Size, Dst, NumEventsInWaitList,
1125-
SyncPointWaitList, SyncPoint);
1123+
return pi2ur::piextCommandBufferMemBufferRead(
1124+
CommandBuffer, Buffer, Offset, Size, Dst, NumSyncPointsInWaitList,
1125+
SyncPointWaitList, SyncPoint);
11261126
}
11271127

11281128
pi_result piextCommandBufferMemBufferReadRect(
11291129
pi_ext_command_buffer CommandBuffer, pi_mem Buffer,
11301130
pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset,
11311131
pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch,
11321132
size_t HostRowPitch, size_t HostSlicePitch, void *Ptr,
1133-
pi_uint32 NumEventsInWaitList, const pi_ext_sync_point *SyncPointWaitList,
1134-
pi_ext_sync_point *SyncPoint) {
1133+
pi_uint32 NumSyncPointsInWaitList,
1134+
const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) {
11351135
return pi2ur::piextCommandBufferMemBufferReadRect(
11361136
CommandBuffer, Buffer, BufferOffset, HostOffset, Region, BufferRowPitch,
1137-
BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, NumEventsInWaitList,
1138-
SyncPointWaitList, SyncPoint);
1137+
BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr,
1138+
NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint);
11391139
}
11401140

11411141
pi_result piextCommandBufferMemBufferWrite(
11421142
pi_ext_command_buffer CommandBuffer, pi_mem Buffer, size_t Offset,
1143-
size_t Size, const void *Ptr, pi_uint32 NumEventsInWaitList,
1143+
size_t Size, const void *Ptr, pi_uint32 NumSyncPointsInWaitList,
11441144
const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) {
1145-
return pi2ur::piextCommandBufferMemBufferWrite(CommandBuffer, Buffer, Offset,
1146-
Size, Ptr, NumEventsInWaitList,
1147-
SyncPointWaitList, SyncPoint);
1145+
return pi2ur::piextCommandBufferMemBufferWrite(
1146+
CommandBuffer, Buffer, Offset, Size, Ptr, NumSyncPointsInWaitList,
1147+
SyncPointWaitList, SyncPoint);
11481148
}
11491149

11501150
pi_result piextCommandBufferMemBufferWriteRect(
11511151
pi_ext_command_buffer CommandBuffer, pi_mem Buffer,
11521152
pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset,
11531153
pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch,
11541154
size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr,
1155-
pi_uint32 NumEventsInWaitList, const pi_ext_sync_point *SyncPointWaitList,
1156-
pi_ext_sync_point *SyncPoint) {
1155+
pi_uint32 NumSyncPointsInWaitList,
1156+
const pi_ext_sync_point *SyncPointWaitList, pi_ext_sync_point *SyncPoint) {
11571157
return pi2ur::piextCommandBufferMemBufferWriteRect(
11581158
CommandBuffer, Buffer, BufferOffset, HostOffset, Region, BufferRowPitch,
1159-
BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr, NumEventsInWaitList,
1160-
SyncPointWaitList, SyncPoint);
1159+
BufferSlicePitch, HostRowPitch, HostSlicePitch, Ptr,
1160+
NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint);
11611161
}
11621162

11631163
pi_result piextEnqueueCommandBuffer(pi_ext_command_buffer CommandBuffer,

sycl/plugins/opencl/pi_opencl.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2726,6 +2726,12 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
27262726
_PI_CL(piextCommandBufferMemBufferCopy, piextCommandBufferMemBufferCopy)
27272727
_PI_CL(piextCommandBufferMemBufferCopyRect,
27282728
piextCommandBufferMemBufferCopyRect)
2729+
_PI_CL(piextCommandBufferMemBufferRead, piextCommandBufferMemBufferRead)
2730+
_PI_CL(piextCommandBufferMemBufferReadRect,
2731+
piextCommandBufferMemBufferReadRect)
2732+
_PI_CL(piextCommandBufferMemBufferWrite, piextCommandBufferMemBufferWrite)
2733+
_PI_CL(piextCommandBufferMemBufferWriteRect,
2734+
piextCommandBufferMemBufferWriteRect)
27292735
_PI_CL(piextEnqueueCommandBuffer, piextEnqueueCommandBuffer)
27302736

27312737
_PI_CL(piextKernelSetArgMemObj, piextKernelSetArgMemObj)

sycl/plugins/unified_runtime/ur/adapters/level_zero/command_buffer.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -535,9 +535,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp(
535535
uint32_t NumSyncPointsInWaitList,
536536
const ur_exp_command_buffer_sync_point_t *SyncPointWaitList,
537537
ur_exp_command_buffer_sync_point_t *SyncPoint) {
538-
(void)SrcOffset;
539-
(void)DstOffset;
540-
541538
auto SrcBuffer = ur_cast<ur_mem_handle_t>(SrcMem);
542539
auto DstBuffer = ur_cast<ur_mem_handle_t>(DstMem);
543540

@@ -553,8 +550,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyExp(
553550
CommandBuffer->Device));
554551

555552
return enqueueCommandBufferMemCopyHelper(
556-
UR_COMMAND_MEM_BUFFER_COPY, CommandBuffer, ZeHandleDst, ZeHandleSrc, Size,
557-
NumSyncPointsInWaitList, SyncPointWaitList, SyncPoint);
553+
UR_COMMAND_MEM_BUFFER_COPY, CommandBuffer, ZeHandleDst + DstOffset,
554+
ZeHandleSrc + SrcOffset, Size, NumSyncPointsInWaitList, SyncPointWaitList,
555+
SyncPoint);
558556
}
559557

560558
UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMembufferCopyRectExp(

0 commit comments

Comments
 (0)