Skip to content

Commit 4d468f1

Browse files
[SYCL] Improve SYCL_PI_TRACE (#2357)
rather than blind size_t *, introduce some simple data structures for passing Origin and Region arguments to plugin interface. This improves code readability, can be dispatched by PI_TRACE improving output, and allows us to more easily see bugs/confusion in how certain 2D/3D arguments aren't being passed correctly to existing Rect/Image operations Signed-off-by: Chris Perkins <[email protected]>
1 parent 95f32d4 commit 4d468f1

File tree

5 files changed

+236
-136
lines changed

5 files changed

+236
-136
lines changed

sycl/include/CL/sycl/detail/pi.h

Lines changed: 57 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -706,13 +706,49 @@ struct pi_device_binary_struct {
706706
};
707707
using pi_device_binary = pi_device_binary_struct *;
708708

709-
// pi_buffer_region structure repeats cl_buffer_region
709+
// pi_buffer_region structure repeats cl_buffer_region, used for sub buffers.
710710
struct pi_buffer_region_struct {
711711
size_t origin;
712712
size_t size;
713713
};
714714
using pi_buffer_region = pi_buffer_region_struct *;
715715

716+
// pi_buff_rect_offset structure is 3D offset argument passed to buffer rect
717+
// operations (piEnqueueMemBufferCopyRect, etc).
718+
struct pi_buff_rect_offset_struct {
719+
size_t x_bytes;
720+
size_t y_scalar;
721+
size_t z_scalar;
722+
};
723+
using pi_buff_rect_offset = pi_buff_rect_offset_struct *;
724+
725+
// pi_buff_rect_region structure represents size of 3D region passed to buffer
726+
// rect operations (piEnqueueMemBufferCopyRect, etc).
727+
struct pi_buff_rect_region_struct {
728+
size_t width_bytes;
729+
size_t height_scalar;
730+
size_t depth_scalar;
731+
};
732+
using pi_buff_rect_region = pi_buff_rect_region_struct *;
733+
734+
// pi_image_offset structure is 3D offset argument passed to image operations
735+
// (piEnqueueMemImageRead, etc).
736+
struct pi_image_offset_struct {
737+
size_t x;
738+
size_t y;
739+
size_t z;
740+
};
741+
using pi_image_offset = pi_image_offset_struct *;
742+
743+
// pi_image_region structure represents size of 3D region passed to image
744+
// operations (piEnqueueMemImageRead, etc).
745+
struct pi_image_region_struct {
746+
size_t width;
747+
size_t height;
748+
size_t depth;
749+
};
750+
using pi_image_region = pi_image_region_struct *;
751+
716752
// Offload binaries descriptor version supported by this library.
717753
static const uint16_t PI_DEVICE_BINARIES_VERSION = 1;
718754

@@ -1261,11 +1297,11 @@ __SYCL_EXPORT pi_result piEnqueueMemBufferRead(
12611297

12621298
__SYCL_EXPORT pi_result piEnqueueMemBufferReadRect(
12631299
pi_queue command_queue, pi_mem buffer, pi_bool blocking_read,
1264-
const size_t *buffer_offset, const size_t *host_offset,
1265-
const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch,
1266-
size_t host_row_pitch, size_t host_slice_pitch, void *ptr,
1267-
pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
1268-
pi_event *event);
1300+
pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset,
1301+
pi_buff_rect_region region, size_t buffer_row_pitch,
1302+
size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch,
1303+
void *ptr, pi_uint32 num_events_in_wait_list,
1304+
const pi_event *event_wait_list, pi_event *event);
12691305

12701306
__SYCL_EXPORT pi_result
12711307
piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer,
@@ -1275,11 +1311,11 @@ piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer,
12751311

12761312
__SYCL_EXPORT pi_result piEnqueueMemBufferWriteRect(
12771313
pi_queue command_queue, pi_mem buffer, pi_bool blocking_write,
1278-
const size_t *buffer_offset, const size_t *host_offset,
1279-
const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch,
1280-
size_t host_row_pitch, size_t host_slice_pitch, const void *ptr,
1281-
pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
1282-
pi_event *event);
1314+
pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset,
1315+
pi_buff_rect_region region, size_t buffer_row_pitch,
1316+
size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch,
1317+
const void *ptr, pi_uint32 num_events_in_wait_list,
1318+
const pi_event *event_wait_list, pi_event *event);
12831319

12841320
__SYCL_EXPORT pi_result
12851321
piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer,
@@ -1289,10 +1325,11 @@ piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer,
12891325

12901326
__SYCL_EXPORT pi_result piEnqueueMemBufferCopyRect(
12911327
pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer,
1292-
const size_t *src_origin, const size_t *dst_origin, const size_t *region,
1293-
size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch,
1294-
size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list,
1295-
const pi_event *event_wait_list, pi_event *event);
1328+
pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin,
1329+
pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch,
1330+
size_t dst_row_pitch, size_t dst_slice_pitch,
1331+
pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
1332+
pi_event *event);
12961333

12971334
__SYCL_EXPORT pi_result
12981335
piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer,
@@ -1302,22 +1339,22 @@ piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer,
13021339

13031340
__SYCL_EXPORT pi_result piEnqueueMemImageRead(
13041341
pi_queue command_queue, pi_mem image, pi_bool blocking_read,
1305-
const size_t *origin, const size_t *region, size_t row_pitch,
1342+
pi_image_offset origin, pi_image_region region, size_t row_pitch,
13061343
size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list,
13071344
const pi_event *event_wait_list, pi_event *event);
13081345

13091346
__SYCL_EXPORT pi_result piEnqueueMemImageWrite(
13101347
pi_queue command_queue, pi_mem image, pi_bool blocking_write,
1311-
const size_t *origin, const size_t *region, size_t input_row_pitch,
1348+
pi_image_offset origin, pi_image_region region, size_t input_row_pitch,
13121349
size_t input_slice_pitch, const void *ptr,
13131350
pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
13141351
pi_event *event);
13151352

13161353
__SYCL_EXPORT pi_result piEnqueueMemImageCopy(
13171354
pi_queue command_queue, pi_mem src_image, pi_mem dst_image,
1318-
const size_t *src_origin, const size_t *dst_origin, const size_t *region,
1319-
pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
1320-
pi_event *event);
1355+
pi_image_offset src_origin, pi_image_offset dst_origin,
1356+
pi_image_region region, pi_uint32 num_events_in_wait_list,
1357+
const pi_event *event_wait_list, pi_event *event);
13211358

13221359
__SYCL_EXPORT pi_result
13231360
piEnqueueMemImageFill(pi_queue command_queue, pi_mem image,

sycl/include/CL/sycl/detail/pi.hpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,32 @@ template <> inline void print<>(PiPlatform val) {
180180
std::cout << "pi_platform : " << val << std::endl;
181181
}
182182

183+
template <> inline void print<>(pi_buffer_region rgn) {
184+
std::cout << "pi_buffer_region origin/size : " << rgn->origin << "/"
185+
<< rgn->size << std::endl;
186+
}
187+
188+
template <> inline void print<>(pi_buff_rect_region rgn) {
189+
std::cout << "pi_buff_rect_region width_bytes/height/depth : "
190+
<< rgn->width_bytes << "/" << rgn->height_scalar << "/"
191+
<< rgn->depth_scalar << std::endl;
192+
}
193+
194+
template <> inline void print<>(pi_buff_rect_offset off) {
195+
std::cout << "pi_buff_rect_offset x_bytes/y/z : " << off->x_bytes << "/"
196+
<< off->y_scalar << "/" << off->z_scalar << std::endl;
197+
}
198+
199+
template <> inline void print<>(pi_image_region rgn) {
200+
std::cout << "pi_image_region width/height/depth : " << rgn->width << "/"
201+
<< rgn->height << "/" << rgn->depth << std::endl;
202+
}
203+
204+
template <> inline void print<>(pi_image_offset off) {
205+
std::cout << "pi_image_offset x/y/z : " << off->x << "/" << off->y << "/"
206+
<< off->z << std::endl;
207+
}
208+
183209
template <> inline void print<>(PiResult val) {
184210
std::cout << "pi_result : ";
185211
if (val == PI_SUCCESS)

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 33 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3351,10 +3351,10 @@ pi_result cuda_piSamplerRelease(pi_sampler sampler) {
33513351
/// If the source and/or destination is on the device, src_ptr and/or dst_ptr
33523352
/// must be a pointer to a CUdeviceptr
33533353
static pi_result commonEnqueueMemBufferCopyRect(
3354-
CUstream cu_stream, const size_t *region, const void *src_ptr,
3355-
const CUmemorytype_enum src_type, const size_t *src_offset,
3354+
CUstream cu_stream, pi_buff_rect_region region, const void *src_ptr,
3355+
const CUmemorytype_enum src_type, pi_buff_rect_offset src_offset,
33563356
size_t src_row_pitch, size_t src_slice_pitch, void *dst_ptr,
3357-
const CUmemorytype_enum dst_type, const size_t *dst_offset,
3357+
const CUmemorytype_enum dst_type, pi_buff_rect_offset dst_offset,
33583358
size_t dst_row_pitch, size_t dst_slice_pitch) {
33593359

33603360
assert(region != nullptr);
@@ -3364,27 +3364,27 @@ static pi_result commonEnqueueMemBufferCopyRect(
33643364
assert(src_type == CU_MEMORYTYPE_DEVICE || src_type == CU_MEMORYTYPE_HOST);
33653365
assert(dst_type == CU_MEMORYTYPE_DEVICE || dst_type == CU_MEMORYTYPE_HOST);
33663366

3367-
src_row_pitch = (!src_row_pitch) ? region[0] : src_row_pitch;
3368-
src_slice_pitch =
3369-
(!src_slice_pitch) ? (region[1] * src_row_pitch) : src_slice_pitch;
3370-
dst_row_pitch = (!dst_row_pitch) ? region[0] : dst_row_pitch;
3371-
dst_slice_pitch =
3372-
(!dst_slice_pitch) ? (region[1] * dst_row_pitch) : dst_slice_pitch;
3367+
src_row_pitch = (!src_row_pitch) ? region->width_bytes : src_row_pitch;
3368+
src_slice_pitch = (!src_slice_pitch) ? (region->height_scalar * src_row_pitch)
3369+
: src_slice_pitch;
3370+
dst_row_pitch = (!dst_row_pitch) ? region->width_bytes : dst_row_pitch;
3371+
dst_slice_pitch = (!dst_slice_pitch) ? (region->height_scalar * dst_row_pitch)
3372+
: dst_slice_pitch;
33733373

33743374
CUDA_MEMCPY3D params = {0};
33753375

3376-
params.WidthInBytes = region[0];
3377-
params.Height = region[1];
3378-
params.Depth = region[2];
3376+
params.WidthInBytes = region->width_bytes;
3377+
params.Height = region->height_scalar;
3378+
params.Depth = region->depth_scalar;
33793379

33803380
params.srcMemoryType = src_type;
33813381
params.srcDevice = src_type == CU_MEMORYTYPE_DEVICE
33823382
? *static_cast<const CUdeviceptr *>(src_ptr)
33833383
: 0;
33843384
params.srcHost = src_type == CU_MEMORYTYPE_HOST ? src_ptr : nullptr;
3385-
params.srcXInBytes = src_offset[0];
3386-
params.srcY = src_offset[1];
3387-
params.srcZ = src_offset[2];
3385+
params.srcXInBytes = src_offset->x_bytes;
3386+
params.srcY = src_offset->y_scalar;
3387+
params.srcZ = src_offset->z_scalar;
33883388
params.srcPitch = src_row_pitch;
33893389
params.srcHeight = src_slice_pitch / src_row_pitch;
33903390

@@ -3393,9 +3393,9 @@ static pi_result commonEnqueueMemBufferCopyRect(
33933393
? *static_cast<CUdeviceptr *>(dst_ptr)
33943394
: 0;
33953395
params.dstHost = dst_type == CU_MEMORYTYPE_HOST ? dst_ptr : nullptr;
3396-
params.dstXInBytes = dst_offset[0];
3397-
params.dstY = dst_offset[1];
3398-
params.dstZ = dst_offset[2];
3396+
params.dstXInBytes = dst_offset->x_bytes;
3397+
params.dstY = dst_offset->y_scalar;
3398+
params.dstZ = dst_offset->z_scalar;
33993399
params.dstPitch = dst_row_pitch;
34003400
params.dstHeight = dst_slice_pitch / dst_row_pitch;
34013401

@@ -3404,11 +3404,11 @@ static pi_result commonEnqueueMemBufferCopyRect(
34043404

34053405
pi_result cuda_piEnqueueMemBufferReadRect(
34063406
pi_queue command_queue, pi_mem buffer, pi_bool blocking_read,
3407-
const size_t *buffer_offset, const size_t *host_offset,
3408-
const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch,
3409-
size_t host_row_pitch, size_t host_slice_pitch, void *ptr,
3410-
pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
3411-
pi_event *event) {
3407+
pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset,
3408+
pi_buff_rect_region region, size_t buffer_row_pitch,
3409+
size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch,
3410+
void *ptr, pi_uint32 num_events_in_wait_list,
3411+
const pi_event *event_wait_list, pi_event *event) {
34123412

34133413
assert(buffer != nullptr);
34143414
assert(command_queue != nullptr);
@@ -3455,11 +3455,11 @@ pi_result cuda_piEnqueueMemBufferReadRect(
34553455

34563456
pi_result cuda_piEnqueueMemBufferWriteRect(
34573457
pi_queue command_queue, pi_mem buffer, pi_bool blocking_write,
3458-
const size_t *buffer_offset, const size_t *host_offset,
3459-
const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch,
3460-
size_t host_row_pitch, size_t host_slice_pitch, const void *ptr,
3461-
pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
3462-
pi_event *event) {
3458+
pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset,
3459+
pi_buff_rect_region region, size_t buffer_row_pitch,
3460+
size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch,
3461+
const void *ptr, pi_uint32 num_events_in_wait_list,
3462+
const pi_event *event_wait_list, pi_event *event) {
34633463

34643464
assert(buffer != nullptr);
34653465
assert(command_queue != nullptr);
@@ -3553,10 +3553,11 @@ pi_result cuda_piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer,
35533553

35543554
pi_result cuda_piEnqueueMemBufferCopyRect(
35553555
pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer,
3556-
const size_t *src_origin, const size_t *dst_origin, const size_t *region,
3557-
size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch,
3558-
size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list,
3559-
const pi_event *event_wait_list, pi_event *event) {
3556+
pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin,
3557+
pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch,
3558+
size_t dst_row_pitch, size_t dst_slice_pitch,
3559+
pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
3560+
pi_event *event) {
35603561

35613562
assert(src_buffer != nullptr);
35623563
assert(dst_buffer != nullptr);

0 commit comments

Comments
 (0)