@@ -239,6 +239,36 @@ void *MemoryManager::allocateMemSubBuffer(ContextImplPtr TargetContext,
239239 return NewMem;
240240}
241241
242+ struct TermPositions {
243+ int XTerm;
244+ int YTerm;
245+ int ZTerm;
246+ };
247+ void prepTermPositions (TermPositions &pos, int Dimensions,
248+ detail::SYCLMemObjI::MemObjType Type) {
249+ // For buffers, the offsets/ranges coming from accessor are always
250+ // id<3>/range<3> But their organization varies by dimension:
251+ // 1 ==> {width, 1, 1}
252+ // 2 ==> {height, width, 1}
253+ // 3 ==> {depth, height, width}
254+ // Some callers schedule 0 as DimDst/DimSrc.
255+
256+ if (Type == detail::SYCLMemObjI::MemObjType::BUFFER) {
257+ if (Dimensions == 3 ) {
258+ pos.XTerm = 2 , pos.YTerm = 1 , pos.ZTerm = 0 ;
259+ } else if (Dimensions == 2 ) {
260+ pos.XTerm = 1 , pos.YTerm = 0 , pos.ZTerm = 2 ;
261+ } else { // Dimension is 1 or 0
262+ pos.XTerm = 0 , pos.YTerm = 1 , pos.ZTerm = 2 ;
263+ }
264+ } else { // While range<>/id<> use by images is different than buffers, it's
265+ // consistent with their accessors.
266+ pos.XTerm = 0 ;
267+ pos.YTerm = 1 ;
268+ pos.ZTerm = 2 ;
269+ }
270+ }
271+
242272void copyH2D (SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr,
243273 unsigned int DimSrc, sycl::range<3 > SrcSize,
244274 sycl::range<3 > SrcAccessRange, sycl::id<3 > SrcOffset,
@@ -250,34 +280,40 @@ void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr,
250280 assert (SYCLMemObj && " The SYCLMemObj is nullptr" );
251281
252282 const RT::PiQueue Queue = TgtQueue->getHandleRef ();
253- // Adjust first dimension of copy range and offset as OpenCL expects size in
254- // bytes.
255- DstSize[0 ] *= DstElemSize;
256283 const detail::plugin &Plugin = TgtQueue->getPlugin ();
257- if (SYCLMemObj->getType () == detail::SYCLMemObjI::MemObjType::BUFFER) {
258- DstOffset[0 ] *= DstElemSize;
259- SrcOffset[0 ] *= SrcElemSize;
260- SrcAccessRange[0 ] *= SrcElemSize;
261- DstAccessRange[0 ] *= DstElemSize;
262- SrcSize[0 ] *= SrcElemSize;
263284
285+ detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType ();
286+ TermPositions SrcPos, DstPos;
287+ prepTermPositions (SrcPos, DimSrc, MemType);
288+ prepTermPositions (DstPos, DimDst, MemType);
289+
290+ size_t DstXOffBytes = DstOffset[DstPos.XTerm ] * DstElemSize;
291+ size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm ] * SrcElemSize;
292+ size_t DstAccessRangeWidthBytes = DstAccessRange[DstPos.XTerm ] * DstElemSize;
293+ size_t DstSzWidthBytes = DstSize[DstPos.XTerm ] * DstElemSize;
294+ size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm ] * SrcElemSize;
295+
296+ if (MemType == detail::SYCLMemObjI::MemObjType::BUFFER) {
264297 if (1 == DimDst && 1 == DimSrc) {
265298 Plugin.call <PiApiKind::piEnqueueMemBufferWrite>(
266299 Queue, DstMem,
267- /* blocking_write=*/ CL_FALSE, DstOffset[ 0 ], DstAccessRange[ 0 ] ,
268- SrcMem + SrcOffset[ 0 ] , DepEvents.size (), DepEvents.data (), &OutEvent);
300+ /* blocking_write=*/ CL_FALSE, DstXOffBytes, DstAccessRangeWidthBytes ,
301+ SrcMem + SrcXOffBytes , DepEvents.size (), DepEvents.data (), &OutEvent);
269302 } else {
270- size_t BufferRowPitch = (1 == DimDst) ? 0 : DstSize[0 ];
271- size_t BufferSlicePitch = (3 == DimDst) ? DstSize[0 ] * DstSize[1 ] : 0 ;
272- size_t HostRowPitch = (1 == DimSrc) ? 0 : SrcSize[0 ];
273- size_t HostSlicePitch = (3 == DimSrc) ? SrcSize[0 ] * SrcSize[1 ] : 0 ;
274-
275- pi_buff_rect_offset_struct BufferOffset{DstOffset[0 ], DstOffset[1 ],
276- DstOffset[2 ]};
277- pi_buff_rect_offset_struct HostOffset{SrcOffset[0 ], SrcOffset[1 ],
278- SrcOffset[2 ]};
279- pi_buff_rect_region_struct RectRegion{
280- DstAccessRange[0 ], DstAccessRange[1 ], DstAccessRange[2 ]};
303+ size_t BufferRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
304+ size_t BufferSlicePitch =
305+ (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm ] : 0 ;
306+ size_t HostRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
307+ size_t HostSlicePitch =
308+ (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm ] : 0 ;
309+
310+ pi_buff_rect_offset_struct BufferOffset{
311+ DstXOffBytes, DstOffset[DstPos.YTerm ], DstOffset[DstPos.ZTerm ]};
312+ pi_buff_rect_offset_struct HostOffset{
313+ SrcXOffBytes, SrcOffset[SrcPos.YTerm ], SrcOffset[SrcPos.ZTerm ]};
314+ pi_buff_rect_region_struct RectRegion{DstAccessRangeWidthBytes,
315+ DstAccessRange[DstPos.YTerm ],
316+ DstAccessRange[DstPos.ZTerm ]};
281317
282318 Plugin.call <PiApiKind::piEnqueueMemBufferWriteRect>(
283319 Queue, DstMem,
@@ -286,12 +322,16 @@ void copyH2D(SYCLMemObjI *SYCLMemObj, char *SrcMem, QueueImplPtr,
286322 SrcMem, DepEvents.size (), DepEvents.data (), &OutEvent);
287323 }
288324 } else {
289- size_t InputRowPitch = (1 == DimDst) ? 0 : DstSize[0 ];
290- size_t InputSlicePitch = (3 == DimDst) ? DstSize[0 ] * DstSize[1 ] : 0 ;
325+ size_t InputRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
326+ size_t InputSlicePitch =
327+ (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm ] : 0 ;
291328
292- pi_image_offset_struct Origin{DstOffset[0 ], DstOffset[1 ], DstOffset[2 ]};
293- pi_image_region_struct Region{DstAccessRange[0 ], DstAccessRange[1 ],
294- DstAccessRange[2 ]};
329+ pi_image_offset_struct Origin{DstOffset[DstPos.XTerm ],
330+ DstOffset[DstPos.YTerm ],
331+ DstOffset[DstPos.ZTerm ]};
332+ pi_image_region_struct Region{DstAccessRange[DstPos.XTerm ],
333+ DstAccessRange[DstPos.YTerm ],
334+ DstAccessRange[DstPos.ZTerm ]};
295335
296336 Plugin.call <PiApiKind::piEnqueueMemImageWrite>(
297337 Queue, DstMem,
@@ -311,34 +351,46 @@ void copyD2H(SYCLMemObjI *SYCLMemObj, RT::PiMem SrcMem, QueueImplPtr SrcQueue,
311351 assert (SYCLMemObj && " The SYCLMemObj is nullptr" );
312352
313353 const RT::PiQueue Queue = SrcQueue->getHandleRef ();
314- // Adjust sizes of 1 dimensions as OpenCL expects size in bytes.
315- SrcSize[0 ] *= SrcElemSize;
316354 const detail::plugin &Plugin = SrcQueue->getPlugin ();
317- if (SYCLMemObj->getType () == detail::SYCLMemObjI::MemObjType::BUFFER) {
318- DstOffset[0 ] *= DstElemSize;
319- SrcOffset[0 ] *= SrcElemSize;
320- SrcAccessRange[0 ] *= SrcElemSize;
321- DstAccessRange[0 ] *= DstElemSize;
322- DstSize[0 ] *= DstElemSize;
323355
356+ detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType ();
357+ TermPositions SrcPos, DstPos;
358+ prepTermPositions (SrcPos, DimSrc, MemType);
359+ prepTermPositions (DstPos, DimDst, MemType);
360+
361+ // For a given buffer, the various mem copy routines (copyD2H, copyH2D,
362+ // copyD2D) will usually have the same values for AccessRange, Size,
363+ // Dimension, Offset, etc. EXCEPT when the dtor for ~SYCLMemObjT is called.
364+ // Essentially, it schedules a copyBack of chars thus in copyD2H the
365+ // Dimension will then be 1 and DstAccessRange[0] and DstSize[0] will be
366+ // sized to bytes with a DstElemSize of 1.
367+ size_t DstXOffBytes = DstOffset[DstPos.XTerm ] * DstElemSize;
368+ size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm ] * SrcElemSize;
369+ size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.XTerm ] * SrcElemSize;
370+ size_t DstSzWidthBytes = DstSize[DstPos.XTerm ] * DstElemSize;
371+ size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm ] * SrcElemSize;
372+
373+ if (MemType == detail::SYCLMemObjI::MemObjType::BUFFER) {
324374 if (1 == DimDst && 1 == DimSrc) {
325375 Plugin.call <PiApiKind::piEnqueueMemBufferRead>(
326376 Queue, SrcMem,
327- /* blocking_read=*/ CL_FALSE, SrcOffset[ 0 ], SrcAccessRange[ 0 ] ,
328- DstMem + DstOffset[ 0 ] , DepEvents.size (), DepEvents.data (), &OutEvent);
377+ /* blocking_read=*/ CL_FALSE, SrcXOffBytes, SrcAccessRangeWidthBytes ,
378+ DstMem + DstXOffBytes , DepEvents.size (), DepEvents.data (), &OutEvent);
329379 } else {
330- size_t BufferRowPitch = (1 == DimSrc) ? 0 : SrcSize[0 ];
331- size_t BufferSlicePitch = (3 == DimSrc) ? SrcSize[0 ] * SrcSize[1 ] : 0 ;
332-
333- size_t HostRowPitch = (1 == DimDst) ? 0 : DstSize[0 ];
334- size_t HostSlicePitch = (3 == DimDst) ? DstSize[0 ] * DstSize[1 ] : 0 ;
335-
336- pi_buff_rect_offset_struct BufferOffset{SrcOffset[0 ], SrcOffset[1 ],
337- SrcOffset[2 ]};
338- pi_buff_rect_offset_struct HostOffset{DstOffset[0 ], DstOffset[1 ],
339- DstOffset[2 ]};
340- pi_buff_rect_region_struct RectRegion{
341- SrcAccessRange[0 ], SrcAccessRange[1 ], SrcAccessRange[2 ]};
380+ size_t BufferRowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
381+ size_t BufferSlicePitch =
382+ (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm ] : 0 ;
383+ size_t HostRowPitch = (1 == DimDst) ? 0 : DstSzWidthBytes;
384+ size_t HostSlicePitch =
385+ (3 == DimDst) ? DstSzWidthBytes * DstSize[DstPos.YTerm ] : 0 ;
386+
387+ pi_buff_rect_offset_struct BufferOffset{
388+ SrcXOffBytes, SrcOffset[SrcPos.YTerm ], SrcOffset[SrcPos.ZTerm ]};
389+ pi_buff_rect_offset_struct HostOffset{
390+ DstXOffBytes, DstOffset[DstPos.YTerm ], DstOffset[DstPos.ZTerm ]};
391+ pi_buff_rect_region_struct RectRegion{SrcAccessRangeWidthBytes,
392+ SrcAccessRange[SrcPos.YTerm ],
393+ SrcAccessRange[SrcPos.ZTerm ]};
342394
343395 Plugin.call <PiApiKind::piEnqueueMemBufferReadRect>(
344396 Queue, SrcMem,
@@ -347,12 +399,16 @@ void copyD2H(SYCLMemObjI *SYCLMemObj, RT::PiMem SrcMem, QueueImplPtr SrcQueue,
347399 DstMem, DepEvents.size (), DepEvents.data (), &OutEvent);
348400 }
349401 } else {
350- size_t RowPitch = (1 == DimSrc) ? 0 : SrcSize[0 ];
351- size_t SlicePitch = (3 == DimSrc) ? SrcSize[0 ] * SrcSize[1 ] : 0 ;
402+ size_t RowPitch = (1 == DimSrc) ? 0 : SrcSzWidthBytes;
403+ size_t SlicePitch =
404+ (3 == DimSrc) ? SrcSzWidthBytes * SrcSize[SrcPos.YTerm ] : 0 ;
352405
353- pi_image_offset_struct Offset{SrcOffset[0 ], SrcOffset[1 ], SrcOffset[2 ]};
354- pi_image_region_struct Region{SrcAccessRange[0 ], SrcAccessRange[1 ],
355- SrcAccessRange[2 ]};
406+ pi_image_offset_struct Offset{SrcOffset[SrcPos.XTerm ],
407+ SrcOffset[SrcPos.YTerm ],
408+ SrcOffset[SrcPos.ZTerm ]};
409+ pi_image_region_struct Region{SrcAccessRange[SrcPos.XTerm ],
410+ SrcAccessRange[SrcPos.YTerm ],
411+ SrcAccessRange[SrcPos.ZTerm ]};
356412
357413 Plugin.call <PiApiKind::piEnqueueMemImageRead>(
358414 Queue, SrcMem, CL_FALSE, &Offset, &Region, RowPitch, SlicePitch, DstMem,
@@ -371,43 +427,60 @@ void copyD2D(SYCLMemObjI *SYCLMemObj, RT::PiMem SrcMem, QueueImplPtr SrcQueue,
371427
372428 const RT::PiQueue Queue = SrcQueue->getHandleRef ();
373429 const detail::plugin &Plugin = SrcQueue->getPlugin ();
374- if (SYCLMemObj->getType () == detail::SYCLMemObjI::MemObjType::BUFFER) {
375- // Adjust sizes of 1 dimensions as OpenCL expects size in bytes.
376- DstOffset[0 ] *= DstElemSize;
377- SrcOffset[0 ] *= SrcElemSize;
378- SrcAccessRange[0 ] *= SrcElemSize;
379- SrcSize[0 ] *= SrcElemSize;
380- DstSize[0 ] *= DstElemSize;
430+
431+ detail::SYCLMemObjI::MemObjType MemType = SYCLMemObj->getType ();
432+ TermPositions SrcPos, DstPos;
433+ prepTermPositions (SrcPos, DimSrc, MemType);
434+ prepTermPositions (DstPos, DimDst, MemType);
435+
436+ size_t DstXOffBytes = DstOffset[DstPos.XTerm ] * DstElemSize;
437+ size_t SrcXOffBytes = SrcOffset[SrcPos.XTerm ] * SrcElemSize;
438+ size_t SrcAccessRangeWidthBytes = SrcAccessRange[SrcPos.XTerm ] * SrcElemSize;
439+ size_t DstSzWidthBytes = DstSize[DstPos.XTerm ] * DstElemSize;
440+ size_t SrcSzWidthBytes = SrcSize[SrcPos.XTerm ] * SrcElemSize;
441+
442+ if (MemType == detail::SYCLMemObjI::MemObjType::BUFFER) {
381443 if (1 == DimDst && 1 == DimSrc) {
382444 Plugin.call <PiApiKind::piEnqueueMemBufferCopy>(
383- Queue, SrcMem, DstMem, SrcOffset[0 ], DstOffset[0 ], SrcAccessRange[0 ],
384- DepEvents.size (), DepEvents.data (), &OutEvent);
445+ Queue, SrcMem, DstMem, SrcXOffBytes, DstXOffBytes,
446+ SrcAccessRangeWidthBytes, DepEvents.size (), DepEvents.data (),
447+ &OutEvent);
385448 } else {
386- size_t SrcRowPitch = (1 == DimSrc) ? 0 : SrcSize[0 ];
387- size_t SrcSlicePitch =
388- (DimSrc > 1 ) ? SrcSize[0 ] * SrcSize[1 ] : SrcSize[0 ];
389-
390- size_t DstRowPitch = (1 == DimDst) ? 0 : DstSize[0 ];
391- size_t DstSlicePitch =
392- (DimDst > 1 ) ? DstSize[0 ] * DstSize[1 ] : DstSize[0 ];
393-
394- pi_buff_rect_offset_struct SrcOrigin{SrcOffset[0 ], SrcOffset[1 ],
395- SrcOffset[2 ]};
396- pi_buff_rect_offset_struct DstOrigin{DstOffset[0 ], DstOffset[1 ],
397- DstOffset[2 ]};
398- pi_buff_rect_region_struct Region{SrcAccessRange[0 ], SrcAccessRange[1 ],
399- SrcAccessRange[2 ]};
449+ // passing 0 for pitches not allowed. Because clEnqueueCopyBufferRect will
450+ // calculate both src and dest pitch using region[0], which is not correct
451+ // if src and dest are not the same size.
452+ size_t SrcRowPitch = SrcSzWidthBytes;
453+ size_t SrcSlicePitch = (DimSrc <= 1 )
454+ ? SrcSzWidthBytes
455+ : SrcSzWidthBytes * SrcSize[SrcPos.YTerm ];
456+ size_t DstRowPitch = DstSzWidthBytes;
457+ size_t DstSlicePitch = (DimDst <= 1 )
458+ ? DstSzWidthBytes
459+ : DstSzWidthBytes * DstSize[DstPos.YTerm ];
460+
461+ pi_buff_rect_offset_struct SrcOrigin{
462+ SrcXOffBytes, SrcOffset[SrcPos.YTerm ], SrcOffset[SrcPos.ZTerm ]};
463+ pi_buff_rect_offset_struct DstOrigin{
464+ DstXOffBytes, DstOffset[DstPos.YTerm ], DstOffset[DstPos.ZTerm ]};
465+ pi_buff_rect_region_struct Region{SrcAccessRangeWidthBytes,
466+ SrcAccessRange[SrcPos.YTerm ],
467+ SrcAccessRange[SrcPos.ZTerm ]};
400468
401469 Plugin.call <PiApiKind::piEnqueueMemBufferCopyRect>(
402470 Queue, SrcMem, DstMem, &SrcOrigin, &DstOrigin, &Region, SrcRowPitch,
403471 SrcSlicePitch, DstRowPitch, DstSlicePitch, DepEvents.size (),
404472 DepEvents.data (), &OutEvent);
405473 }
406474 } else {
407- pi_image_offset_struct SrcOrigin{SrcOffset[0 ], SrcOffset[1 ], SrcOffset[2 ]};
408- pi_image_offset_struct DstOrigin{DstOffset[0 ], DstOffset[1 ], DstOffset[2 ]};
409- pi_image_region_struct Region{SrcAccessRange[0 ], SrcAccessRange[1 ],
410- SrcAccessRange[2 ]};
475+ pi_image_offset_struct SrcOrigin{SrcOffset[SrcPos.XTerm ],
476+ SrcOffset[SrcPos.YTerm ],
477+ SrcOffset[SrcPos.ZTerm ]};
478+ pi_image_offset_struct DstOrigin{DstOffset[DstPos.XTerm ],
479+ DstOffset[DstPos.YTerm ],
480+ DstOffset[DstPos.ZTerm ]};
481+ pi_image_region_struct Region{SrcAccessRange[SrcPos.XTerm ],
482+ SrcAccessRange[SrcPos.YTerm ],
483+ SrcAccessRange[SrcPos.ZTerm ]};
411484
412485 Plugin.call <PiApiKind::piEnqueueMemImageCopy>(
413486 Queue, SrcMem, DstMem, &SrcOrigin, &DstOrigin, &Region,
0 commit comments