Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 57 additions & 20 deletions sycl/include/CL/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -706,13 +706,49 @@ struct pi_device_binary_struct {
};
using pi_device_binary = pi_device_binary_struct *;

// pi_buffer_region structure repeats cl_buffer_region
// pi_buffer_region structure repeats cl_buffer_region, used for sub buffers.
struct pi_buffer_region_struct {
size_t origin;
size_t size;
};
using pi_buffer_region = pi_buffer_region_struct *;

// pi_buff_rect_offset structure is 3D offset argument passed to buffer rect
// operations (piEnqueueMemBufferCopyRect, etc).
struct pi_buff_rect_offset_struct {
size_t x_bytes;
size_t y_scalar;
size_t z_scalar;
};
using pi_buff_rect_offset = pi_buff_rect_offset_struct *;

// pi_buff_rect_region structure represents size of 3D region passed to buffer
// rect operations (piEnqueueMemBufferCopyRect, etc).
struct pi_buff_rect_region_struct {
size_t width_bytes;
size_t height_scalar;
size_t depth_scalar;
};
using pi_buff_rect_region = pi_buff_rect_region_struct *;

// pi_image_offset structure is 3D offset argument passed to image operations
// (piEnqueueMemImageRead, etc).
struct pi_image_offset_struct {
size_t x;
size_t y;
size_t z;
};
using pi_image_offset = pi_image_offset_struct *;

// pi_image_region structure represents size of 3D region passed to image
// operations (piEnqueueMemImageRead, etc).
struct pi_image_region_struct {
size_t width;
size_t height;
size_t depth;
};
using pi_image_region = pi_image_region_struct *;

// Offload binaries descriptor version supported by this library.
static const uint16_t PI_DEVICE_BINARIES_VERSION = 1;

Expand Down Expand Up @@ -1261,11 +1297,11 @@ __SYCL_EXPORT pi_result piEnqueueMemBufferRead(

__SYCL_EXPORT pi_result piEnqueueMemBufferReadRect(
pi_queue command_queue, pi_mem buffer, pi_bool blocking_read,
const size_t *buffer_offset, const size_t *host_offset,
const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch,
size_t host_row_pitch, size_t host_slice_pitch, void *ptr,
pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
pi_event *event);
pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset,
pi_buff_rect_region region, size_t buffer_row_pitch,
size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch,
void *ptr, pi_uint32 num_events_in_wait_list,
const pi_event *event_wait_list, pi_event *event);

__SYCL_EXPORT pi_result
piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer,
Expand All @@ -1275,11 +1311,11 @@ piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer,

__SYCL_EXPORT pi_result piEnqueueMemBufferWriteRect(
pi_queue command_queue, pi_mem buffer, pi_bool blocking_write,
const size_t *buffer_offset, const size_t *host_offset,
const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch,
size_t host_row_pitch, size_t host_slice_pitch, const void *ptr,
pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
pi_event *event);
pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset,
pi_buff_rect_region region, size_t buffer_row_pitch,
size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch,
const void *ptr, pi_uint32 num_events_in_wait_list,
const pi_event *event_wait_list, pi_event *event);

__SYCL_EXPORT pi_result
piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer,
Expand All @@ -1289,10 +1325,11 @@ piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer,

__SYCL_EXPORT pi_result piEnqueueMemBufferCopyRect(
pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer,
const size_t *src_origin, const size_t *dst_origin, const size_t *region,
size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch,
size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list,
const pi_event *event_wait_list, pi_event *event);
pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin,
pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch,
size_t dst_row_pitch, size_t dst_slice_pitch,
pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
pi_event *event);

__SYCL_EXPORT pi_result
piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer,
Expand All @@ -1302,22 +1339,22 @@ piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer,

__SYCL_EXPORT pi_result piEnqueueMemImageRead(
pi_queue command_queue, pi_mem image, pi_bool blocking_read,
const size_t *origin, const size_t *region, size_t row_pitch,
pi_image_offset origin, pi_image_region region, size_t row_pitch,
size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list,
const pi_event *event_wait_list, pi_event *event);

__SYCL_EXPORT pi_result piEnqueueMemImageWrite(
pi_queue command_queue, pi_mem image, pi_bool blocking_write,
const size_t *origin, const size_t *region, size_t input_row_pitch,
pi_image_offset origin, pi_image_region region, size_t input_row_pitch,
size_t input_slice_pitch, const void *ptr,
pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
pi_event *event);

__SYCL_EXPORT pi_result piEnqueueMemImageCopy(
pi_queue command_queue, pi_mem src_image, pi_mem dst_image,
const size_t *src_origin, const size_t *dst_origin, const size_t *region,
pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
pi_event *event);
pi_image_offset src_origin, pi_image_offset dst_origin,
pi_image_region region, pi_uint32 num_events_in_wait_list,
const pi_event *event_wait_list, pi_event *event);

__SYCL_EXPORT pi_result
piEnqueueMemImageFill(pi_queue command_queue, pi_mem image,
Expand Down
26 changes: 26 additions & 0 deletions sycl/include/CL/sycl/detail/pi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,32 @@ template <> inline void print<>(PiPlatform val) {
std::cout << "pi_platform : " << val << std::endl;
}

template <> inline void print<>(pi_buffer_region rgn) {
std::cout << "pi_buffer_region origin/size : " << rgn->origin << "/"
<< rgn->size << std::endl;
}

template <> inline void print<>(pi_buff_rect_region rgn) {
std::cout << "pi_buff_rect_region width_bytes/height/depth : "
<< rgn->width_bytes << "/" << rgn->height_scalar << "/"
<< rgn->depth_scalar << std::endl;
}

template <> inline void print<>(pi_buff_rect_offset off) {
std::cout << "pi_buff_rect_offset x_bytes/y/z : " << off->x_bytes << "/"
<< off->y_scalar << "/" << off->z_scalar << std::endl;
}

template <> inline void print<>(pi_image_region rgn) {
std::cout << "pi_image_region width/height/depth : " << rgn->width << "/"
<< rgn->height << "/" << rgn->depth << std::endl;
}

template <> inline void print<>(pi_image_offset off) {
std::cout << "pi_image_offset x/y/z : " << off->x << "/" << off->y << "/"
<< off->z << std::endl;
}

template <> inline void print<>(PiResult val) {
std::cout << "pi_result : ";
if (val == PI_SUCCESS)
Expand Down
65 changes: 33 additions & 32 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3351,10 +3351,10 @@ pi_result cuda_piSamplerRelease(pi_sampler sampler) {
/// If the source and/or destination is on the device, src_ptr and/or dst_ptr
/// must be a pointer to a CUdeviceptr
static pi_result commonEnqueueMemBufferCopyRect(
CUstream cu_stream, const size_t *region, const void *src_ptr,
const CUmemorytype_enum src_type, const size_t *src_offset,
CUstream cu_stream, pi_buff_rect_region region, const void *src_ptr,
const CUmemorytype_enum src_type, pi_buff_rect_offset src_offset,
size_t src_row_pitch, size_t src_slice_pitch, void *dst_ptr,
const CUmemorytype_enum dst_type, const size_t *dst_offset,
const CUmemorytype_enum dst_type, pi_buff_rect_offset dst_offset,
size_t dst_row_pitch, size_t dst_slice_pitch) {

assert(region != nullptr);
Expand All @@ -3364,27 +3364,27 @@ static pi_result commonEnqueueMemBufferCopyRect(
assert(src_type == CU_MEMORYTYPE_DEVICE || src_type == CU_MEMORYTYPE_HOST);
assert(dst_type == CU_MEMORYTYPE_DEVICE || dst_type == CU_MEMORYTYPE_HOST);

src_row_pitch = (!src_row_pitch) ? region[0] : src_row_pitch;
src_slice_pitch =
(!src_slice_pitch) ? (region[1] * src_row_pitch) : src_slice_pitch;
dst_row_pitch = (!dst_row_pitch) ? region[0] : dst_row_pitch;
dst_slice_pitch =
(!dst_slice_pitch) ? (region[1] * dst_row_pitch) : dst_slice_pitch;
src_row_pitch = (!src_row_pitch) ? region->width_bytes : src_row_pitch;
src_slice_pitch = (!src_slice_pitch) ? (region->height_scalar * src_row_pitch)
: src_slice_pitch;
dst_row_pitch = (!dst_row_pitch) ? region->width_bytes : dst_row_pitch;
dst_slice_pitch = (!dst_slice_pitch) ? (region->height_scalar * dst_row_pitch)
: dst_slice_pitch;

CUDA_MEMCPY3D params = {0};

params.WidthInBytes = region[0];
params.Height = region[1];
params.Depth = region[2];
params.WidthInBytes = region->width_bytes;
params.Height = region->height_scalar;
params.Depth = region->depth_scalar;

params.srcMemoryType = src_type;
params.srcDevice = src_type == CU_MEMORYTYPE_DEVICE
? *static_cast<const CUdeviceptr *>(src_ptr)
: 0;
params.srcHost = src_type == CU_MEMORYTYPE_HOST ? src_ptr : nullptr;
params.srcXInBytes = src_offset[0];
params.srcY = src_offset[1];
params.srcZ = src_offset[2];
params.srcXInBytes = src_offset->x_bytes;
params.srcY = src_offset->y_scalar;
params.srcZ = src_offset->z_scalar;
params.srcPitch = src_row_pitch;
params.srcHeight = src_slice_pitch / src_row_pitch;

Expand All @@ -3393,9 +3393,9 @@ static pi_result commonEnqueueMemBufferCopyRect(
? *static_cast<CUdeviceptr *>(dst_ptr)
: 0;
params.dstHost = dst_type == CU_MEMORYTYPE_HOST ? dst_ptr : nullptr;
params.dstXInBytes = dst_offset[0];
params.dstY = dst_offset[1];
params.dstZ = dst_offset[2];
params.dstXInBytes = dst_offset->x_bytes;
params.dstY = dst_offset->y_scalar;
params.dstZ = dst_offset->z_scalar;
params.dstPitch = dst_row_pitch;
params.dstHeight = dst_slice_pitch / dst_row_pitch;

Expand All @@ -3404,11 +3404,11 @@ static pi_result commonEnqueueMemBufferCopyRect(

pi_result cuda_piEnqueueMemBufferReadRect(
pi_queue command_queue, pi_mem buffer, pi_bool blocking_read,
const size_t *buffer_offset, const size_t *host_offset,
const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch,
size_t host_row_pitch, size_t host_slice_pitch, void *ptr,
pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
pi_event *event) {
pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset,
pi_buff_rect_region region, size_t buffer_row_pitch,
size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch,
void *ptr, pi_uint32 num_events_in_wait_list,
const pi_event *event_wait_list, pi_event *event) {

assert(buffer != nullptr);
assert(command_queue != nullptr);
Expand Down Expand Up @@ -3455,11 +3455,11 @@ pi_result cuda_piEnqueueMemBufferReadRect(

pi_result cuda_piEnqueueMemBufferWriteRect(
pi_queue command_queue, pi_mem buffer, pi_bool blocking_write,
const size_t *buffer_offset, const size_t *host_offset,
const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch,
size_t host_row_pitch, size_t host_slice_pitch, const void *ptr,
pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
pi_event *event) {
pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset,
pi_buff_rect_region region, size_t buffer_row_pitch,
size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch,
const void *ptr, pi_uint32 num_events_in_wait_list,
const pi_event *event_wait_list, pi_event *event) {

assert(buffer != nullptr);
assert(command_queue != nullptr);
Expand Down Expand Up @@ -3553,10 +3553,11 @@ pi_result cuda_piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer,

pi_result cuda_piEnqueueMemBufferCopyRect(
pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer,
const size_t *src_origin, const size_t *dst_origin, const size_t *region,
size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch,
size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list,
const pi_event *event_wait_list, pi_event *event) {
pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin,
pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch,
size_t dst_row_pitch, size_t dst_slice_pitch,
pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
pi_event *event) {

assert(src_buffer != nullptr);
assert(dst_buffer != nullptr);
Expand Down
Loading