diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 741e80a106154..2a0ae02101dcf 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -706,13 +706,49 @@ struct pi_device_binary_struct { }; using pi_device_binary = pi_device_binary_struct *; -// pi_buffer_region structure repeats cl_buffer_region +// pi_buffer_region structure repeats cl_buffer_region, used for sub buffers. struct pi_buffer_region_struct { size_t origin; size_t size; }; using pi_buffer_region = pi_buffer_region_struct *; +// pi_buff_rect_offset structure is 3D offset argument passed to buffer rect +// operations (piEnqueueMemBufferCopyRect, etc). +struct pi_buff_rect_offset_struct { + size_t x_bytes; + size_t y_scalar; + size_t z_scalar; +}; +using pi_buff_rect_offset = pi_buff_rect_offset_struct *; + +// pi_buff_rect_region structure represents size of 3D region passed to buffer +// rect operations (piEnqueueMemBufferCopyRect, etc). +struct pi_buff_rect_region_struct { + size_t width_bytes; + size_t height_scalar; + size_t depth_scalar; +}; +using pi_buff_rect_region = pi_buff_rect_region_struct *; + +// pi_image_offset structure is 3D offset argument passed to image operations +// (piEnqueueMemImageRead, etc). +struct pi_image_offset_struct { + size_t x; + size_t y; + size_t z; +}; +using pi_image_offset = pi_image_offset_struct *; + +// pi_image_region structure represents size of 3D region passed to image +// operations (piEnqueueMemImageRead, etc). +struct pi_image_region_struct { + size_t width; + size_t height; + size_t depth; +}; +using pi_image_region = pi_image_region_struct *; + // Offload binaries descriptor version supported by this library. static const uint16_t PI_DEVICE_BINARIES_VERSION = 1; @@ -1261,11 +1297,11 @@ __SYCL_EXPORT pi_result piEnqueueMemBufferRead( __SYCL_EXPORT pi_result piEnqueueMemBufferReadRect( pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, - const size_t *buffer_offset, const size_t *host_offset, - const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch, - size_t host_row_pitch, size_t host_slice_pitch, void *ptr, - pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, - pi_event *event); + pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, + pi_buff_rect_region region, size_t buffer_row_pitch, + size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, + void *ptr, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event); __SYCL_EXPORT pi_result piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, @@ -1275,11 +1311,11 @@ piEnqueueMemBufferWrite(pi_queue command_queue, pi_mem buffer, __SYCL_EXPORT pi_result piEnqueueMemBufferWriteRect( pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, - const size_t *buffer_offset, const size_t *host_offset, - const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch, - size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, - pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, - pi_event *event); + pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, + pi_buff_rect_region region, size_t buffer_row_pitch, + size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, + const void *ptr, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event); __SYCL_EXPORT pi_result piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, @@ -1289,10 +1325,11 @@ piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, __SYCL_EXPORT pi_result piEnqueueMemBufferCopyRect( pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, - const size_t *src_origin, const size_t *dst_origin, const size_t *region, - size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, - size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event); + pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, + pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, + size_t dst_row_pitch, size_t dst_slice_pitch, + pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, + pi_event *event); __SYCL_EXPORT pi_result piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer, @@ -1302,22 +1339,22 @@ piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer, __SYCL_EXPORT pi_result piEnqueueMemImageRead( pi_queue command_queue, pi_mem image, pi_bool blocking_read, - const size_t *origin, const size_t *region, size_t row_pitch, + pi_image_offset origin, pi_image_region region, size_t row_pitch, size_t slice_pitch, void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event); __SYCL_EXPORT pi_result piEnqueueMemImageWrite( pi_queue command_queue, pi_mem image, pi_bool blocking_write, - const size_t *origin, const size_t *region, size_t input_row_pitch, + pi_image_offset origin, pi_image_region region, size_t input_row_pitch, size_t input_slice_pitch, const void *ptr, pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, pi_event *event); __SYCL_EXPORT pi_result piEnqueueMemImageCopy( pi_queue command_queue, pi_mem src_image, pi_mem dst_image, - const size_t *src_origin, const size_t *dst_origin, const size_t *region, - pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, - pi_event *event); + pi_image_offset src_origin, pi_image_offset dst_origin, + pi_image_region region, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event); __SYCL_EXPORT pi_result piEnqueueMemImageFill(pi_queue command_queue, pi_mem image, diff --git a/sycl/include/CL/sycl/detail/pi.hpp b/sycl/include/CL/sycl/detail/pi.hpp index da3864a31c178..85d868b7eb82e 100644 --- a/sycl/include/CL/sycl/detail/pi.hpp +++ b/sycl/include/CL/sycl/detail/pi.hpp @@ -180,6 +180,32 @@ template <> inline void print<>(PiPlatform val) { std::cout << "pi_platform : " << val << std::endl; } +template <> inline void print<>(pi_buffer_region rgn) { + std::cout << "pi_buffer_region origin/size : " << rgn->origin << "/" + << rgn->size << std::endl; +} + +template <> inline void print<>(pi_buff_rect_region rgn) { + std::cout << "pi_buff_rect_region width_bytes/height/depth : " + << rgn->width_bytes << "/" << rgn->height_scalar << "/" + << rgn->depth_scalar << std::endl; +} + +template <> inline void print<>(pi_buff_rect_offset off) { + std::cout << "pi_buff_rect_offset x_bytes/y/z : " << off->x_bytes << "/" + << off->y_scalar << "/" << off->z_scalar << std::endl; +} + +template <> inline void print<>(pi_image_region rgn) { + std::cout << "pi_image_region width/height/depth : " << rgn->width << "/" + << rgn->height << "/" << rgn->depth << std::endl; +} + +template <> inline void print<>(pi_image_offset off) { + std::cout << "pi_image_offset x/y/z : " << off->x << "/" << off->y << "/" + << off->z << std::endl; +} + template <> inline void print<>(PiResult val) { std::cout << "pi_result : "; if (val == PI_SUCCESS) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index d4a37cb1d0fb4..2f53ae46e4b44 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -3351,10 +3351,10 @@ pi_result cuda_piSamplerRelease(pi_sampler sampler) { /// If the source and/or destination is on the device, src_ptr and/or dst_ptr /// must be a pointer to a CUdeviceptr static pi_result commonEnqueueMemBufferCopyRect( - CUstream cu_stream, const size_t *region, const void *src_ptr, - const CUmemorytype_enum src_type, const size_t *src_offset, + CUstream cu_stream, pi_buff_rect_region region, const void *src_ptr, + const CUmemorytype_enum src_type, pi_buff_rect_offset src_offset, size_t src_row_pitch, size_t src_slice_pitch, void *dst_ptr, - const CUmemorytype_enum dst_type, const size_t *dst_offset, + const CUmemorytype_enum dst_type, pi_buff_rect_offset dst_offset, size_t dst_row_pitch, size_t dst_slice_pitch) { assert(region != nullptr); @@ -3364,27 +3364,27 @@ static pi_result commonEnqueueMemBufferCopyRect( assert(src_type == CU_MEMORYTYPE_DEVICE || src_type == CU_MEMORYTYPE_HOST); assert(dst_type == CU_MEMORYTYPE_DEVICE || dst_type == CU_MEMORYTYPE_HOST); - src_row_pitch = (!src_row_pitch) ? region[0] : src_row_pitch; - src_slice_pitch = - (!src_slice_pitch) ? (region[1] * src_row_pitch) : src_slice_pitch; - dst_row_pitch = (!dst_row_pitch) ? region[0] : dst_row_pitch; - dst_slice_pitch = - (!dst_slice_pitch) ? (region[1] * dst_row_pitch) : dst_slice_pitch; + src_row_pitch = (!src_row_pitch) ? region->width_bytes : src_row_pitch; + src_slice_pitch = (!src_slice_pitch) ? (region->height_scalar * src_row_pitch) + : src_slice_pitch; + dst_row_pitch = (!dst_row_pitch) ? region->width_bytes : dst_row_pitch; + dst_slice_pitch = (!dst_slice_pitch) ? (region->height_scalar * dst_row_pitch) + : dst_slice_pitch; CUDA_MEMCPY3D params = {0}; - params.WidthInBytes = region[0]; - params.Height = region[1]; - params.Depth = region[2]; + params.WidthInBytes = region->width_bytes; + params.Height = region->height_scalar; + params.Depth = region->depth_scalar; params.srcMemoryType = src_type; params.srcDevice = src_type == CU_MEMORYTYPE_DEVICE ? *static_cast(src_ptr) : 0; params.srcHost = src_type == CU_MEMORYTYPE_HOST ? src_ptr : nullptr; - params.srcXInBytes = src_offset[0]; - params.srcY = src_offset[1]; - params.srcZ = src_offset[2]; + params.srcXInBytes = src_offset->x_bytes; + params.srcY = src_offset->y_scalar; + params.srcZ = src_offset->z_scalar; params.srcPitch = src_row_pitch; params.srcHeight = src_slice_pitch / src_row_pitch; @@ -3393,9 +3393,9 @@ static pi_result commonEnqueueMemBufferCopyRect( ? *static_cast(dst_ptr) : 0; params.dstHost = dst_type == CU_MEMORYTYPE_HOST ? dst_ptr : nullptr; - params.dstXInBytes = dst_offset[0]; - params.dstY = dst_offset[1]; - params.dstZ = dst_offset[2]; + params.dstXInBytes = dst_offset->x_bytes; + params.dstY = dst_offset->y_scalar; + params.dstZ = dst_offset->z_scalar; params.dstPitch = dst_row_pitch; params.dstHeight = dst_slice_pitch / dst_row_pitch; @@ -3404,11 +3404,11 @@ static pi_result commonEnqueueMemBufferCopyRect( pi_result cuda_piEnqueueMemBufferReadRect( pi_queue command_queue, pi_mem buffer, pi_bool blocking_read, - const size_t *buffer_offset, const size_t *host_offset, - const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch, - size_t host_row_pitch, size_t host_slice_pitch, void *ptr, - pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, - pi_event *event) { + pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, + pi_buff_rect_region region, size_t buffer_row_pitch, + size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, + void *ptr, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event) { assert(buffer != nullptr); assert(command_queue != nullptr); @@ -3455,11 +3455,11 @@ pi_result cuda_piEnqueueMemBufferReadRect( pi_result cuda_piEnqueueMemBufferWriteRect( pi_queue command_queue, pi_mem buffer, pi_bool blocking_write, - const size_t *buffer_offset, const size_t *host_offset, - const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch, - size_t host_row_pitch, size_t host_slice_pitch, const void *ptr, - pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, - pi_event *event) { + pi_buff_rect_offset buffer_offset, pi_buff_rect_offset host_offset, + pi_buff_rect_region region, size_t buffer_row_pitch, + size_t buffer_slice_pitch, size_t host_row_pitch, size_t host_slice_pitch, + const void *ptr, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event) { assert(buffer != nullptr); assert(command_queue != nullptr); @@ -3553,10 +3553,11 @@ pi_result cuda_piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer, pi_result cuda_piEnqueueMemBufferCopyRect( pi_queue command_queue, pi_mem src_buffer, pi_mem dst_buffer, - const size_t *src_origin, const size_t *dst_origin, const size_t *region, - size_t src_row_pitch, size_t src_slice_pitch, size_t dst_row_pitch, - size_t dst_slice_pitch, pi_uint32 num_events_in_wait_list, - const pi_event *event_wait_list, pi_event *event) { + pi_buff_rect_offset src_origin, pi_buff_rect_offset dst_origin, + pi_buff_rect_region region, size_t src_row_pitch, size_t src_slice_pitch, + size_t dst_row_pitch, size_t dst_slice_pitch, + pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list, + pi_event *event) { assert(src_buffer != nullptr); assert(dst_buffer != nullptr); diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 2c6c95d2f6089..4ef2945e6a4c4 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -293,11 +293,11 @@ enqueueMemCopyHelper(pi_command_type CommandType, pi_queue Queue, void *Dst, static pi_result enqueueMemCopyRectHelper( pi_command_type CommandType, pi_queue Queue, void *SrcBuffer, - void *DstBuffer, const size_t *SrcOrigin, const size_t *DstOrigin, - const size_t *Region, size_t SrcRowPitch, size_t SrcSlicePitch, - size_t DstRowPitch, size_t DstSlicePitch, pi_bool Blocking, - pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, - pi_event *Event); + void *DstBuffer, pi_buff_rect_offset SrcOrigin, + pi_buff_rect_offset DstOrigin, pi_buff_rect_region Region, + size_t SrcRowPitch, size_t SrcSlicePitch, size_t DstRowPitch, + size_t DstSlicePitch, pi_bool Blocking, pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event); inline void zeParseError(ze_result_t ZeError, std::string &ErrorString) { switch (ZeError) { @@ -3095,10 +3095,11 @@ pi_result piEnqueueMemBufferRead(pi_queue Queue, pi_mem Src, pi_result piEnqueueMemBufferReadRect( pi_queue Queue, pi_mem Buffer, pi_bool BlockingRead, - const size_t *BufferOffset, const size_t *HostOffset, const size_t *Region, - size_t BufferRowPitch, size_t BufferSlicePitch, size_t HostRowPitch, - size_t HostSlicePitch, void *Ptr, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { + pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, + pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, + size_t HostRowPitch, size_t HostSlicePitch, void *Ptr, + pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, + pi_event *Event) { assert(Buffer); return enqueueMemCopyRectHelper( @@ -3167,11 +3168,11 @@ enqueueMemCopyHelper(pi_command_type CommandType, pi_queue Queue, void *Dst, // Shared by all memory read/write/copy rect PI interfaces. static pi_result enqueueMemCopyRectHelper( pi_command_type CommandType, pi_queue Queue, void *SrcBuffer, - void *DstBuffer, const size_t *SrcOrigin, const size_t *DstOrigin, - const size_t *Region, size_t SrcRowPitch, size_t DstRowPitch, - size_t SrcSlicePitch, size_t DstSlicePitch, pi_bool Blocking, - pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, - pi_event *Event) { + void *DstBuffer, pi_buff_rect_offset SrcOrigin, + pi_buff_rect_offset DstOrigin, pi_buff_rect_region Region, + size_t SrcRowPitch, size_t DstRowPitch, size_t SrcSlicePitch, + size_t DstSlicePitch, pi_bool Blocking, pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { assert(Region); assert(SrcOrigin); @@ -3212,31 +3213,31 @@ static pi_result enqueueMemCopyRectHelper( } zePrint("\n"); - uint32_t SrcOriginX = pi_cast(SrcOrigin[0]); - uint32_t SrcOriginY = pi_cast(SrcOrigin[1]); - uint32_t SrcOriginZ = pi_cast(SrcOrigin[2]); + uint32_t SrcOriginX = pi_cast(SrcOrigin->x_bytes); + uint32_t SrcOriginY = pi_cast(SrcOrigin->y_scalar); + uint32_t SrcOriginZ = pi_cast(SrcOrigin->z_scalar); uint32_t SrcPitch = SrcRowPitch; if (SrcPitch == 0) - SrcPitch = pi_cast(Region[0]); + SrcPitch = pi_cast(Region->width_bytes); if (SrcSlicePitch == 0) - SrcSlicePitch = pi_cast(Region[1]) * SrcPitch; + SrcSlicePitch = pi_cast(Region->height_scalar) * SrcPitch; - uint32_t DstOriginX = pi_cast(DstOrigin[0]); - uint32_t DstOriginY = pi_cast(DstOrigin[1]); - uint32_t DstOriginZ = pi_cast(DstOrigin[2]); + uint32_t DstOriginX = pi_cast(DstOrigin->x_bytes); + uint32_t DstOriginY = pi_cast(DstOrigin->y_scalar); + uint32_t DstOriginZ = pi_cast(DstOrigin->z_scalar); uint32_t DstPitch = DstRowPitch; if (DstPitch == 0) - DstPitch = pi_cast(Region[0]); + DstPitch = pi_cast(Region->width_bytes); if (DstSlicePitch == 0) - DstSlicePitch = pi_cast(Region[1]) * DstPitch; + DstSlicePitch = pi_cast(Region->height_scalar) * DstPitch; - uint32_t Width = pi_cast(Region[0]); - uint32_t Height = pi_cast(Region[1]); - uint32_t Depth = pi_cast(Region[2]); + uint32_t Width = pi_cast(Region->width_bytes); + uint32_t Height = pi_cast(Region->height_scalar); + uint32_t Depth = pi_cast(Region->depth_scalar); const ze_copy_region_t ZeSrcRegion = {SrcOriginX, SrcOriginY, SrcOriginZ, Width, Height, Depth}; @@ -3282,10 +3283,11 @@ pi_result piEnqueueMemBufferWrite(pi_queue Queue, pi_mem Buffer, pi_result piEnqueueMemBufferWriteRect( pi_queue Queue, pi_mem Buffer, pi_bool BlockingWrite, - const size_t *BufferOffset, const size_t *HostOffset, const size_t *Region, - size_t BufferRowPitch, size_t BufferSlicePitch, size_t HostRowPitch, - size_t HostSlicePitch, const void *Ptr, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { + pi_buff_rect_offset BufferOffset, pi_buff_rect_offset HostOffset, + pi_buff_rect_region Region, size_t BufferRowPitch, size_t BufferSlicePitch, + size_t HostRowPitch, size_t HostSlicePitch, const void *Ptr, + pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, + pi_event *Event) { assert(Buffer); return enqueueMemCopyRectHelper( @@ -3313,13 +3315,12 @@ pi_result piEnqueueMemBufferCopy(pi_queue Queue, pi_mem SrcBuffer, NumEventsInWaitList, EventWaitList, Event); } -pi_result -piEnqueueMemBufferCopyRect(pi_queue Queue, pi_mem SrcBuffer, pi_mem DstBuffer, - const size_t *SrcOrigin, const size_t *DstOrigin, - const size_t *Region, size_t SrcRowPitch, - size_t SrcSlicePitch, size_t DstRowPitch, - size_t DstSlicePitch, pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, pi_event *Event) { +pi_result piEnqueueMemBufferCopyRect( + pi_queue Queue, pi_mem SrcBuffer, pi_mem DstBuffer, + pi_buff_rect_offset SrcOrigin, pi_buff_rect_offset DstOrigin, + pi_buff_rect_region Region, size_t SrcRowPitch, size_t SrcSlicePitch, + size_t DstRowPitch, size_t DstSlicePitch, pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { assert(SrcBuffer); assert(DstBuffer); @@ -3555,8 +3556,9 @@ pi_result piMemImageGetInfo(pi_mem Image, pi_image_info ParamName, } // extern "C" -static ze_image_region_t getImageRegionHelper(pi_mem Mem, const size_t *Origin, - const size_t *Region) { +static ze_image_region_t getImageRegionHelper(pi_mem Mem, + pi_image_offset Origin, + pi_image_region Region) { assert(Mem && Origin); #ifndef NDEBUG @@ -3565,26 +3567,26 @@ static ze_image_region_t getImageRegionHelper(pi_mem Mem, const size_t *Origin, ze_image_desc_t ZeImageDesc = Image->ZeImageDesc; #endif // !NDEBUG - assert((ZeImageDesc.type == ZE_IMAGE_TYPE_1D && Origin[1] == 0 && - Origin[2] == 0) || - (ZeImageDesc.type == ZE_IMAGE_TYPE_1DARRAY && Origin[2] == 0) || - (ZeImageDesc.type == ZE_IMAGE_TYPE_2D && Origin[2] == 0) || + assert((ZeImageDesc.type == ZE_IMAGE_TYPE_1D && Origin->y == 0 && + Origin->z == 0) || + (ZeImageDesc.type == ZE_IMAGE_TYPE_1DARRAY && Origin->z == 0) || + (ZeImageDesc.type == ZE_IMAGE_TYPE_2D && Origin->z == 0) || (ZeImageDesc.type == ZE_IMAGE_TYPE_3D)); - uint32_t OriginX = pi_cast(Origin[0]); - uint32_t OriginY = pi_cast(Origin[1]); - uint32_t OriginZ = pi_cast(Origin[2]); + uint32_t OriginX = pi_cast(Origin->x); + uint32_t OriginY = pi_cast(Origin->y); + uint32_t OriginZ = pi_cast(Origin->z); - assert(Region[0] && Region[1] && Region[2]); - assert((ZeImageDesc.type == ZE_IMAGE_TYPE_1D && Region[1] == 1 && - Region[2] == 1) || - (ZeImageDesc.type == ZE_IMAGE_TYPE_1DARRAY && Region[2] == 1) || - (ZeImageDesc.type == ZE_IMAGE_TYPE_2D && Region[2] == 1) || + assert(Region->width && Region->height && Region->depth); + assert((ZeImageDesc.type == ZE_IMAGE_TYPE_1D && Region->height == 1 && + Region->depth == 1) || + (ZeImageDesc.type == ZE_IMAGE_TYPE_1DARRAY && Region->depth == 1) || + (ZeImageDesc.type == ZE_IMAGE_TYPE_2D && Region->depth == 1) || (ZeImageDesc.type == ZE_IMAGE_TYPE_3D)); - uint32_t Width = pi_cast(Region[0]); - uint32_t Height = pi_cast(Region[1]); - uint32_t Depth = pi_cast(Region[2]); + uint32_t Width = pi_cast(Region->width); + uint32_t Height = pi_cast(Region->height); + uint32_t Depth = pi_cast(Region->depth); const ze_image_region_t ZeRegion = {OriginX, OriginY, OriginZ, Width, Height, Depth}; @@ -3596,8 +3598,8 @@ static pi_result enqueueMemImageCommandHelper(pi_command_type CommandType, pi_queue Queue, const void *Src, // image or ptr void *Dst, // image or ptr - pi_bool IsBlocking, const size_t *SrcOrigin, - const size_t *DstOrigin, const size_t *Region, + pi_bool IsBlocking, pi_image_offset SrcOrigin, + pi_image_offset DstOrigin, pi_image_region Region, size_t RowPitch, size_t SlicePitch, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) { @@ -3707,8 +3709,8 @@ enqueueMemImageCommandHelper(pi_command_type CommandType, pi_queue Queue, extern "C" { pi_result piEnqueueMemImageRead(pi_queue Queue, pi_mem Image, - pi_bool BlockingRead, const size_t *Origin, - const size_t *Region, size_t RowPitch, + pi_bool BlockingRead, pi_image_offset Origin, + pi_image_region Region, size_t RowPitch, size_t SlicePitch, void *Ptr, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, @@ -3725,8 +3727,8 @@ pi_result piEnqueueMemImageRead(pi_queue Queue, pi_mem Image, } pi_result piEnqueueMemImageWrite(pi_queue Queue, pi_mem Image, - pi_bool BlockingWrite, const size_t *Origin, - const size_t *Region, size_t InputRowPitch, + pi_bool BlockingWrite, pi_image_offset Origin, + pi_image_region Region, size_t InputRowPitch, size_t InputSlicePitch, const void *Ptr, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, @@ -3743,12 +3745,11 @@ pi_result piEnqueueMemImageWrite(pi_queue Queue, pi_mem Image, Event); } -pi_result piEnqueueMemImageCopy(pi_queue Queue, pi_mem SrcImage, - pi_mem DstImage, const size_t *SrcOrigin, - const size_t *DstOrigin, const size_t *Region, - pi_uint32 NumEventsInWaitList, - const pi_event *EventWaitList, - pi_event *Event) { +pi_result +piEnqueueMemImageCopy(pi_queue Queue, pi_mem SrcImage, pi_mem DstImage, + pi_image_offset SrcOrigin, pi_image_offset DstOrigin, + pi_image_region Region, pi_uint32 NumEventsInWaitList, + const pi_event *EventWaitList, pi_event *Event) { return enqueueMemImageCommandHelper( PI_COMMAND_TYPE_IMAGE_COPY, Queue, SrcImage, DstImage, diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 22d8c00ec91b9..0cc1cea62d50f 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -276,21 +276,32 @@ void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr, size_t BufferSlicePitch = (3 == DimDst) ? DstSize[0] * DstSize[1] : 0; size_t HostRowPitch = (1 == DimSrc) ? 0 : SrcSize[0]; size_t HostSlicePitch = (3 == DimSrc) ? SrcSize[0] * SrcSize[1] : 0; + + pi_buff_rect_offset_struct BufferOffset{DstOffset[0], DstOffset[1], + DstOffset[2]}; + pi_buff_rect_offset_struct HostOffset{SrcOffset[0], SrcOffset[1], + SrcOffset[2]}; + pi_buff_rect_region_struct RectRegion{ + DstAccessRange[0], DstAccessRange[1], DstAccessRange[2]}; + Plugin.call( Queue, DstMem, - /*blocking_write=*/CL_FALSE, &DstOffset[0], &SrcOffset[0], - &DstAccessRange[0], BufferRowPitch, BufferSlicePitch, HostRowPitch, - HostSlicePitch, SrcMem, DepEvents.size(), DepEvents.data(), - &OutEvent); + /*blocking_write=*/CL_FALSE, &BufferOffset, &HostOffset, &RectRegion, + BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, + SrcMem, DepEvents.size(), DepEvents.data(), &OutEvent); } } else { size_t InputRowPitch = (1 == DimDst) ? 0 : DstSize[0]; size_t InputSlicePitch = (3 == DimDst) ? DstSize[0] * DstSize[1] : 0; + + pi_image_offset_struct Origin{DstOffset[0], DstOffset[1], DstOffset[2]}; + pi_image_region_struct Region{DstAccessRange[0], DstAccessRange[1], + DstAccessRange[2]}; + Plugin.call( Queue, DstMem, - /*blocking_write=*/CL_FALSE, &DstOffset[0], &DstAccessRange[0], - InputRowPitch, InputSlicePitch, SrcMem, DepEvents.size(), - DepEvents.data(), &OutEvent); + /*blocking_write=*/CL_FALSE, &Origin, &Region, InputRowPitch, + InputSlicePitch, SrcMem, DepEvents.size(), DepEvents.data(), &OutEvent); } } @@ -326,19 +337,31 @@ void copyD2H(SYCLMemObjI *SYCLMemObj, RT::PiMem SrcMem, QueueImplPtr SrcQueue, size_t HostRowPitch = (1 == DimDst) ? 0 : DstSize[0]; size_t HostSlicePitch = (3 == DimDst) ? DstSize[0] * DstSize[1] : 0; + + pi_buff_rect_offset_struct BufferOffset{SrcOffset[0], SrcOffset[1], + SrcOffset[2]}; + pi_buff_rect_offset_struct HostOffset{DstOffset[0], DstOffset[1], + DstOffset[2]}; + pi_buff_rect_region_struct RectRegion{ + SrcAccessRange[0], SrcAccessRange[1], SrcAccessRange[2]}; + Plugin.call( Queue, SrcMem, - /*blocking_read=*/CL_FALSE, &SrcOffset[0], &DstOffset[0], - &SrcAccessRange[0], BufferRowPitch, BufferSlicePitch, HostRowPitch, - HostSlicePitch, DstMem, DepEvents.size(), DepEvents.data(), - &OutEvent); + /*blocking_read=*/CL_FALSE, &BufferOffset, &HostOffset, &RectRegion, + BufferRowPitch, BufferSlicePitch, HostRowPitch, HostSlicePitch, + DstMem, DepEvents.size(), DepEvents.data(), &OutEvent); } } else { size_t RowPitch = (1 == DimSrc) ? 0 : SrcSize[0]; size_t SlicePitch = (3 == DimSrc) ? SrcSize[0] * SrcSize[1] : 0; + + pi_image_offset_struct Offset{SrcOffset[0], SrcOffset[1], SrcOffset[2]}; + pi_image_region_struct Region{SrcAccessRange[0], SrcAccessRange[1], + SrcAccessRange[2]}; + Plugin.call( - Queue, SrcMem, CL_FALSE, &SrcOffset[0], &SrcAccessRange[0], RowPitch, - SlicePitch, DstMem, DepEvents.size(), DepEvents.data(), &OutEvent); + Queue, SrcMem, CL_FALSE, &Offset, &Region, RowPitch, SlicePitch, DstMem, + DepEvents.size(), DepEvents.data(), &OutEvent); } } @@ -373,14 +396,26 @@ void copyD2D(SYCLMemObjI *SYCLMemObj, RT::PiMem SrcMem, QueueImplPtr SrcQueue, size_t DstSlicePitch = (DimDst > 1) ? DstSize[0] * DstSize[1] : DstSize[0]; + pi_buff_rect_offset_struct SrcOrigin{SrcOffset[0], SrcOffset[1], + SrcOffset[2]}; + pi_buff_rect_offset_struct DstOrigin{DstOffset[0], DstOffset[1], + DstOffset[2]}; + pi_buff_rect_region_struct Region{SrcAccessRange[0], SrcAccessRange[1], + SrcAccessRange[2]}; + Plugin.call( - Queue, SrcMem, DstMem, &SrcOffset[0], &DstOffset[0], - &SrcAccessRange[0], SrcRowPitch, SrcSlicePitch, DstRowPitch, - DstSlicePitch, DepEvents.size(), DepEvents.data(), &OutEvent); + Queue, SrcMem, DstMem, &SrcOrigin, &DstOrigin, &Region, SrcRowPitch, + SrcSlicePitch, DstRowPitch, DstSlicePitch, DepEvents.size(), + DepEvents.data(), &OutEvent); } } else { + pi_image_offset_struct SrcOrigin{SrcOffset[0], SrcOffset[1], SrcOffset[2]}; + pi_image_offset_struct DstOrigin{DstOffset[0], DstOffset[1], DstOffset[2]}; + pi_image_region_struct Region{SrcAccessRange[0], SrcAccessRange[1], + SrcAccessRange[2]}; + Plugin.call( - Queue, SrcMem, DstMem, &SrcOffset[0], &DstOffset[0], &SrcAccessRange[0], + Queue, SrcMem, DstMem, &SrcOrigin, &DstOrigin, &Region, DepEvents.size(), DepEvents.data(), &OutEvent); } }