diff --git a/sycl/include/sycl/accessor.hpp b/sycl/include/sycl/accessor.hpp index 9e05e48f20080..c3ad24ecdc1de 100644 --- a/sycl/include/sycl/accessor.hpp +++ b/sycl/include/sycl/accessor.hpp @@ -227,6 +227,22 @@ template &Offset, const sycl::range<3> &Range, + const sycl::range<3> &MemoryRange, void *Data = nullptr) + : MOffset(Offset), MAccessRange(Range), MMemoryRange(MemoryRange), + MData(Data) {} + + sycl::id<3> MOffset; + sycl::range<3> MAccessRange; + sycl::range<3> MMemoryRange; + void *MData = nullptr; + void *Reserved = nullptr; +}; + // To ensure loop unrolling is done when processing dimensions. template void dim_loop_impl(std::integer_sequence, F &&f) { @@ -474,6 +490,8 @@ class __SYCL_EXPORT AccessorBaseHost { const range<3> &getMemoryRange() const; void *getPtr() const; + detail::AccHostDataT &getAccData(); + const property_list &getPropList() const; void *getMemoryObject() const; @@ -1106,21 +1124,42 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : detail::InitializedVal::template get<0>()) {} #else - id<3> &getOffset() { return AccessorBaseHost::getOffset(); } + id<3> &getOffset() { + if constexpr (IsHostBuf) + return MAccData->MOffset; + else + return AccessorBaseHost::getOffset(); + } + range<3> &getAccessRange() { return AccessorBaseHost::getAccessRange(); } - range<3> &getMemoryRange() { return AccessorBaseHost::getMemoryRange(); } + range<3> &getMemoryRange() { + if constexpr (IsHostBuf) + return MAccData->MMemoryRange; + else + return AccessorBaseHost::getMemoryRange(); + } void *getPtr() { return AccessorBaseHost::getPtr(); } - const id<3> &getOffset() const { return AccessorBaseHost::getOffset(); } + const id<3> &getOffset() const { + if constexpr (IsHostBuf) + return MAccData->MOffset; + else + return AccessorBaseHost::getOffset(); + } const range<3> &getAccessRange() const { return AccessorBaseHost::getAccessRange(); } const range<3> &getMemoryRange() const { - return AccessorBaseHost::getMemoryRange(); + if constexpr (IsHostBuf) + return MAccData->MMemoryRange; + else + return AccessorBaseHost::getMemoryRange(); } void *getPtr() const { return AccessorBaseHost::getPtr(); } + void initHostAcc() { MAccData = &getAccData(); } + // The function references helper methods required by GDB pretty-printers void GDBMethodsAnchor() { #ifndef NDEBUG @@ -1131,11 +1170,17 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : #endif } + detail::AccHostDataT *MAccData = nullptr; + char padding[sizeof(detail::AccessorImplDevice) + - sizeof(PtrType) - sizeof(detail::AccessorBaseHost)]; + sizeof(PtrType) - sizeof(detail::AccessorBaseHost) - + sizeof(MAccData)]; PtrType getQualifiedPtr() const { - return reinterpret_cast(AccessorBaseHost::getPtr()); + if constexpr (IsHostBuf) + return reinterpret_cast(MAccData->MData); + else + return reinterpret_cast(AccessorBaseHost::getPtr()); } #endif // __SYCL_DEVICE_ONLY__ @@ -1196,9 +1241,11 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : preScreenAccessor(BufferRef.size(), PropertyList); if (!IsPlaceH) addHostAccessorAndWait(AccessorBaseHost::impl.get()); + initHostAcc(); detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(), detail::AccessorBaseHost::impl.get(), AccessTarget, AccessMode, CodeLoc); + GDBMethodsAnchor(); #endif } @@ -1227,9 +1274,11 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : preScreenAccessor(BufferRef.size(), PropertyList); if (!IsPlaceH) addHostAccessorAndWait(AccessorBaseHost::impl.get()); + initHostAcc(); detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(), detail::AccessorBaseHost::impl.get(), AccessTarget, AccessMode, CodeLoc); + GDBMethodsAnchor(); #endif } @@ -1256,9 +1305,11 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { preScreenAccessor(BufferRef.size(), PropertyList); detail::associateWithHandler(CommandGroupHandler, this, AccessTarget); + initHostAcc(); detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(), detail::AccessorBaseHost::impl.get(), AccessTarget, AccessMode, CodeLoc); + GDBMethodsAnchor(); } #endif @@ -1287,9 +1338,11 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { preScreenAccessor(BufferRef.size(), PropertyList); detail::associateWithHandler(CommandGroupHandler, this, AccessTarget); + initHostAcc(); detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(), detail::AccessorBaseHost::impl.get(), AccessTarget, AccessMode, CodeLoc); + GDBMethodsAnchor(); } #endif @@ -1315,13 +1368,14 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : getAdjustedMode(PropertyList), detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { - GDBMethodsAnchor(); preScreenAccessor(BufferRef.size(), PropertyList); if (!IsPlaceH) addHostAccessorAndWait(AccessorBaseHost::impl.get()); + initHostAcc(); detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(), detail::AccessorBaseHost::impl.get(), AccessTarget, AccessMode, CodeLoc); + GDBMethodsAnchor(); } #endif @@ -1349,13 +1403,14 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : getAdjustedMode(PropertyList), detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { - GDBMethodsAnchor(); preScreenAccessor(BufferRef.size(), PropertyList); if (!IsPlaceH) addHostAccessorAndWait(AccessorBaseHost::impl.get()); + initHostAcc(); detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(), detail::AccessorBaseHost::impl.get(), AccessTarget, AccessMode, CodeLoc); + GDBMethodsAnchor(); } #endif @@ -1413,12 +1468,13 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : getAdjustedMode(PropertyList), detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { - GDBMethodsAnchor(); preScreenAccessor(BufferRef.size(), PropertyList); detail::associateWithHandler(CommandGroupHandler, this, AccessTarget); + initHostAcc(); detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(), detail::AccessorBaseHost::impl.get(), AccessTarget, AccessMode, CodeLoc); + GDBMethodsAnchor(); } #endif @@ -1446,12 +1502,13 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : getAdjustedMode(PropertyList), detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { - GDBMethodsAnchor(); preScreenAccessor(BufferRef.size(), PropertyList); + initHostAcc(); detail::associateWithHandler(CommandGroupHandler, this, AccessTarget); detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(), detail::AccessorBaseHost::impl.get(), AccessTarget, AccessMode, CodeLoc); + GDBMethodsAnchor(); } #endif @@ -1633,7 +1690,6 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { - GDBMethodsAnchor(); preScreenAccessor(BufferRef.size(), PropertyList); if (BufferRef.isOutOfBounds(AccessOffset, AccessRange, BufferRef.get_range())) @@ -1644,9 +1700,11 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : if (!IsPlaceH) addHostAccessorAndWait(AccessorBaseHost::impl.get()); + initHostAcc(); detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(), detail::AccessorBaseHost::impl.get(), AccessTarget, AccessMode, CodeLoc); + GDBMethodsAnchor(); } #endif @@ -1675,7 +1733,6 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { - GDBMethodsAnchor(); preScreenAccessor(BufferRef.size(), PropertyList); if (BufferRef.isOutOfBounds(AccessOffset, AccessRange, BufferRef.get_range())) @@ -1686,9 +1743,11 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : if (!IsPlaceH) addHostAccessorAndWait(AccessorBaseHost::impl.get()); + initHostAcc(); detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(), detail::AccessorBaseHost::impl.get(), AccessTarget, AccessMode, CodeLoc); + GDBMethodsAnchor(); } #endif @@ -1748,7 +1807,6 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { - GDBMethodsAnchor(); preScreenAccessor(BufferRef.size(), PropertyList); if (BufferRef.isOutOfBounds(AccessOffset, AccessRange, BufferRef.get_range())) @@ -1757,10 +1815,12 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : "the buffer", PI_ERROR_INVALID_VALUE); + initHostAcc(); detail::associateWithHandler(CommandGroupHandler, this, AccessTarget); detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(), detail::AccessorBaseHost::impl.get(), AccessTarget, AccessMode, CodeLoc); + GDBMethodsAnchor(); } #endif @@ -1789,7 +1849,6 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : detail::getSyclObjImpl(BufferRef).get(), Dimensions, sizeof(DataT), BufferRef.OffsetInBytes, BufferRef.IsSubBuffer, PropertyList) { - GDBMethodsAnchor(); preScreenAccessor(BufferRef.size(), PropertyList); if (BufferRef.isOutOfBounds(AccessOffset, AccessRange, BufferRef.get_range())) @@ -1798,10 +1857,12 @@ class __SYCL_SPECIAL_CLASS __SYCL_TYPE(accessor) accessor : "the buffer", PI_ERROR_INVALID_VALUE); + initHostAcc(); detail::associateWithHandler(CommandGroupHandler, this, AccessTarget); detail::constructorNotification(detail::getSyclObjImpl(BufferRef).get(), detail::AccessorBaseHost::impl.get(), AccessTarget, AccessMode, CodeLoc); + GDBMethodsAnchor(); } #endif diff --git a/sycl/source/accessor.cpp b/sycl/source/accessor.cpp index e9d6c6be901f3..a2afdd70824ed 100644 --- a/sycl/source/accessor.cpp +++ b/sycl/source/accessor.cpp @@ -33,6 +33,8 @@ range<3> &AccessorBaseHost::getAccessRange() { return impl->MAccessRange; } range<3> &AccessorBaseHost::getMemoryRange() { return impl->MMemoryRange; } void *AccessorBaseHost::getPtr() { return impl->MData; } +detail::AccHostDataT &AccessorBaseHost::getAccData() { return impl->MAccData; } + const property_list &AccessorBaseHost::getPropList() const { return impl->MPropertyList; } diff --git a/sycl/source/detail/accessor_impl.hpp b/sycl/source/detail/accessor_impl.hpp index f58af65f826c8..1f0ce27a9f25b 100644 --- a/sycl/source/detail/accessor_impl.hpp +++ b/sycl/source/detail/accessor_impl.hpp @@ -9,6 +9,7 @@ #pragma once #include +#include #include #include #include @@ -45,8 +46,7 @@ class __SYCL_EXPORT AccessorImplHost { int ElemSize, int OffsetInBytes = 0, bool IsSubBuffer = false, const property_list &PropertyList = {}) - : MOffset(Offset), MAccessRange(AccessRange), MMemoryRange(MemoryRange), - MAccessMode(AccessMode), + : MAccData(Offset, AccessRange, MemoryRange), MAccessMode(AccessMode), MSYCLMemObj((detail::SYCLMemObjI *)SYCLMemObject), MDims(Dims), MElemSize(ElemSize), MOffsetInBytes(OffsetInBytes), MIsSubBuffer(IsSubBuffer), MPropertyList(PropertyList) {} @@ -54,8 +54,7 @@ class __SYCL_EXPORT AccessorImplHost { ~AccessorImplHost(); AccessorImplHost(const AccessorImplHost &Other) - : MOffset(Other.MOffset), MAccessRange(Other.MAccessRange), - MMemoryRange(Other.MMemoryRange), MAccessMode(Other.MAccessMode), + : MAccData(Other.MAccData), MAccessMode(Other.MAccessMode), MSYCLMemObj(Other.MSYCLMemObj), MDims(Other.MDims), MElemSize(Other.MElemSize), MOffsetInBytes(Other.MOffsetInBytes), MIsSubBuffer(Other.MIsSubBuffer), MPropertyList(Other.MPropertyList) {} @@ -71,11 +70,13 @@ class __SYCL_EXPORT AccessorImplHost { void resize(size_t GlobalSize); - id<3> MOffset; + detail::AccHostDataT MAccData; + + id<3> &MOffset = MAccData.MOffset; // The size of accessing region. - range<3> MAccessRange; + range<3> &MAccessRange = MAccData.MAccessRange; // The size of memory object this requirement is created for. - range<3> MMemoryRange; + range<3> &MMemoryRange = MAccData.MMemoryRange; access::mode MAccessMode; detail::SYCLMemObjI *MSYCLMemObj; @@ -85,7 +86,7 @@ class __SYCL_EXPORT AccessorImplHost { unsigned int MOffsetInBytes; bool MIsSubBuffer; - void *MData = nullptr; + void *&MData = MAccData.MData; Command *MBlockedCmd = nullptr; diff --git a/sycl/test/abi/layout_accessors_host.cpp b/sycl/test/abi/layout_accessors_host.cpp index ef03fac69c89d..e5a6febd03415 100644 --- a/sycl/test/abi/layout_accessors_host.cpp +++ b/sycl/test/abi/layout_accessors_host.cpp @@ -25,7 +25,8 @@ void hostAcc(accessor A // CHECK-NEXT: 8 | class std::__shared_count<__gnu_cxx::_S_atomic> _M_refcount // CHECK-NEXT: 8 | _Sp_counted_base<(_Lock_policy)2U> * _M_pi // CHECK-NEXT: 0 | class sycl::detail::accessor_common (base) (empty) -// CHECK-NEXT: 16 | char[16] padding +// CHECK-NEXT: 16 | detail::AccHostDataT * MAccData +// CHECK-NEXT: 24 | char[8] padding // CHECK-NEXT: [sizeof=32, dsize=32, align=8, // CHECK-NEXT: nvsize=32, nvalign=8] @@ -46,7 +47,8 @@ void hostAcc(accessor // CHECK-NEXT: 8 | class std::__shared_count<__gnu_cxx::_S_atomic> _M_refcount // CHECK-NEXT: 8 | _Sp_counted_base<(_Lock_policy)2U> * _M_pi // CHECK-NEXT: 0 | class sycl::detail::accessor_common (base) (empty) -// CHECK-NEXT: 16 | char[16] padding +// CHECK-NEXT: 16 | detail::AccHostDataT * MAccData +// CHECK-NEXT: 24 | char[8] padding // CHECK-NEXT: [sizeof=32, dsize=32, align=8, // CHECK-NEXT: nvsize=32, nvalign=8] diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index d7a6f7d91e2a0..a17fe783f3b60 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3812,6 +3812,7 @@ _ZN4sycl3_V16detail13select_deviceERKSt8functionIFiRKNS0_6deviceEEE _ZN4sycl3_V16detail13select_deviceERKSt8functionIFiRKNS0_6deviceEEERKNS0_7contextE _ZN4sycl3_V16detail14getBorderColorENS0_19image_channel_orderE _ZN4sycl3_V16detail15getOrWaitEventsESt6vectorINS0_5eventESaIS3_EESt10shared_ptrINS1_12context_implEE +_ZN4sycl3_V16detail16AccessorBaseHost10getAccDataEv _ZN4sycl3_V16detail16AccessorBaseHost14getAccessRangeEv _ZN4sycl3_V16detail16AccessorBaseHost14getMemoryRangeEv _ZN4sycl3_V16detail16AccessorBaseHost6getPtrEv diff --git a/sycl/test/basic_tests/accessor/host_acc_opt.cpp b/sycl/test/basic_tests/accessor/host_acc_opt.cpp new file mode 100644 index 0000000000000..4aab777fea053 --- /dev/null +++ b/sycl/test/basic_tests/accessor/host_acc_opt.cpp @@ -0,0 +1,26 @@ +// RUN: %clangxx -O2 -std=c++17 -I %sycl_include/sycl -I %sycl_include -S -emit-llvm %s -o - | FileCheck %s + +// The test verifies that the accessor::operator[] implementation is +// good enough for compiler to optimize away calls to getOffset and +// getMemoryRange and vectorize the loop. + +#include + +// CHECK: define {{.*}}foo{{.*}} { +// CHECK-NOT: call +// CHECK-NOT: invoke +// CHECK: vector.body: +// CHECK-NOT: call +// CHECK-NOT: invoke +// CHECK: load <4 x i32> +// CHECK-NOT: call +// CHECK-NOT: invoke +// CHECK: store <4 x i32> +// CHECK-NOT: call +// CHECK-NOT: invoke +void foo(sycl::accessor &Acc, + int *Src) { + for (size_t I = 0; I < 64; ++I) + Acc[I] = Src[I]; +}