From ea4aeb5d49384216aa25ec6e0edafa22bbbd35b5 Mon Sep 17 00:00:00 2001 From: Dmitri Mokhov Date: Wed, 2 Jun 2021 13:30:58 -0500 Subject: [PATCH 1/2] [SYCL] Added missing SYCL 2020 USM features - USM atomic allocation aspects (+ tests) - handler::mem_advise - handler::copy and queue::copy - queue class: overloads of fill, memset, memcpy, copy, mem_advise, prefetch with dependency event arguments - optional property list argument in USM allocation functions and usm_allocator constructors (+ tests) --- sycl/include/CL/sycl/aspects.hpp | 4 +- sycl/include/CL/sycl/detail/cg.hpp | 33 +++ .../include/CL/sycl/detail/memory_manager.hpp | 5 + sycl/include/CL/sycl/handler.hpp | 21 ++ sycl/include/CL/sycl/queue.hpp | 196 +++++++++++++++++- sycl/include/CL/sycl/usm.hpp | 130 +++++++++--- sycl/include/CL/sycl/usm/usm_allocator.hpp | 21 +- sycl/source/detail/device_impl.cpp | 13 ++ sycl/source/detail/memory_manager.cpp | 13 ++ sycl/source/detail/queue_impl.cpp | 30 +-- sycl/source/detail/queue_impl.hpp | 22 +- sycl/source/detail/scheduler/commands.cpp | 7 + sycl/source/detail/usm/usm_impl.cpp | 83 +++++++- sycl/source/handler.cpp | 27 +++ sycl/source/queue.cpp | 34 ++- sycl/test/abi/sycl_symbols_linux.dump | 24 +++ .../test/extensions/usm/usm_alloc_utility.cpp | 85 +++++--- sycl/test/extensions/usm/usm_allocator.cpp | 7 +- sycl/test/on-device/basic_tests/aspects.cpp | 6 + 19 files changed, 656 insertions(+), 105 deletions(-) diff --git a/sycl/include/CL/sycl/aspects.hpp b/sycl/include/CL/sycl/aspects.hpp index 34d02856639aa..1fe2670d1ff3a 100644 --- a/sycl/include/CL/sycl/aspects.hpp +++ b/sycl/include/CL/sycl/aspects.hpp @@ -38,7 +38,9 @@ enum class aspect { ext_intel_gpu_subslices_per_slice = 22, ext_intel_gpu_eu_count_per_subslice = 23, ext_intel_max_mem_bandwidth = 24, - ext_intel_mem_channel = 25 + ext_intel_mem_channel = 25, + usm_atomic_host_allocations = 26, + usm_atomic_shared_allocations = 27 }; } // namespace sycl diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index 9c40d1e20771f..b218df59c142d 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -93,6 +93,7 @@ namespace detail { // Used to represent a type of an extended member enum class ExtendedMembersType : unsigned int { HANDLER_KERNEL_BUNDLE = 0, + HANDLER_MEM_ADVICE, }; // Holds a pointer to an object of an arbitrary type and an ID value which @@ -164,6 +165,7 @@ class CG { PREFETCH_USM = 12, CODEPLAY_INTEROP_TASK = 13, CODEPLAY_HOST_TASK = 14, + ADVISE_USM = 15, }; CG(CGTYPE Type, vector_class> ArgsStorage, @@ -414,6 +416,37 @@ class CGPrefetchUSM : public CG { size_t getLength() { return MLength; } }; +/// "Advise USM" command group class. +class CGAdviseUSM : public CG { + void *MDst; + size_t MLength; + +public: + CGAdviseUSM(void *DstPtr, size_t Length, + vector_class> ArgsStorage, + vector_class AccStorage, + vector_class> SharedPtrStorage, + vector_class Requirements, + vector_class Events, + detail::code_location loc = {}) + : CG(ADVISE_USM, std::move(ArgsStorage), std::move(AccStorage), + std::move(SharedPtrStorage), std::move(Requirements), + std::move(Events), std::move(loc)), + MDst(DstPtr), MLength(Length) {} + void *getDst() { return MDst; } + size_t getLength() { return MLength; } + + pi_mem_advice getAdvice() { + auto ExtendedMembers = getExtendedMembers(); + if (!ExtendedMembers) + return PI_MEM_ADVISE_UNKNOWN; + for (const ExtendedMemberT &EM : *ExtendedMembers) + if ((ExtendedMembersType::HANDLER_MEM_ADVICE == EM.MType) && EM.MData) + return *std::static_pointer_cast(EM.MData); + return PI_MEM_ADVISE_UNKNOWN; + } +}; + class CGInteropTask : public CG { public: std::unique_ptr MInteropTask; diff --git a/sycl/include/CL/sycl/detail/memory_manager.hpp b/sycl/include/CL/sycl/detail/memory_manager.hpp index 75640e2e24b84..ded30d827db37 100644 --- a/sycl/include/CL/sycl/detail/memory_manager.hpp +++ b/sycl/include/CL/sycl/detail/memory_manager.hpp @@ -151,6 +151,11 @@ class __SYCL_EXPORT MemoryManager { static void prefetch_usm(void *Ptr, QueueImplPtr Queue, size_t Len, std::vector DepEvents, RT::PiEvent &OutEvent); + + static void advise_usm(const void *Ptr, QueueImplPtr Queue, size_t Len, + pi_mem_advice Advice, + std::vector DepEvents, + RT::PiEvent &OutEvent); }; } // namespace detail } // namespace sycl diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 0642f36588b6c..6dbd6fbc3e0fc 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -2183,6 +2183,19 @@ class __SYCL_EXPORT handler { /// \param Count is a number of bytes to copy. void memcpy(void *Dest, const void *Src, size_t Count); + /// Copies data from one memory region to another, both pointed by + /// USM pointers. + /// No operations is done if \param Count is zero. An exception is thrown + /// if either \param Dest or \param Src is nullptr. The behavior is undefined + /// if any of the pointer parameters is invalid. + /// + /// \param Dest is a USM pointer to the destination memory. + /// \param Src is a USM pointer to the source memory. + /// \param Count is a number of elements of type T to copy. + template void copy(T *Dest, const T *Src, size_t Count) { + this->memcpy(Dest, Src, Count * sizeof(T)); + } + /// Fills the memory pointed by a USM pointer with the value specified. /// No operations is done if \param Count is zero. An exception is thrown /// if \param Dest is nullptr. The behavior is undefined if \param Dest @@ -2201,6 +2214,14 @@ class __SYCL_EXPORT handler { /// \param Count is a number of bytes to be prefetched. void prefetch(const void *Ptr, size_t Count); + /// Provides additional information to the underlying runtime about how + /// different allocations are used. + /// + /// \param Ptr is a USM pointer to the allocation. + /// \param Length is a number of bytes in the allocation. + /// \param Advice is a device-defined advice for the specified allocation. + void mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice); + private: shared_ptr_class MQueue; /// The storage for the arguments passed. diff --git a/sycl/include/CL/sycl/queue.hpp b/sycl/include/CL/sycl/queue.hpp index ae7fcb47e47fa..0dfcb7ecbb98f 100644 --- a/sycl/include/CL/sycl/queue.hpp +++ b/sycl/include/CL/sycl/queue.hpp @@ -320,17 +320,52 @@ class __SYCL_EXPORT queue { /// Fills the specified memory with the specified pattern. /// - /// \param Ptr is the pointer to the memory to fill + /// \param Ptr is the pointer to the memory to fill. /// \param Pattern is the pattern to fill into the memory. T should be /// trivially copyable. /// \param Count is the number of times to fill Pattern into Ptr. + /// \return an event representing fill operation. template event fill(void *Ptr, const T &Pattern, size_t Count) { return submit([&](handler &CGH) { CGH.fill(Ptr, Pattern, Count); }); } + /// Fills the specified memory with the specified pattern. + /// + /// \param Ptr is the pointer to the memory to fill. + /// \param Pattern is the pattern to fill into the memory. T should be + /// trivially copyable. + /// \param Count is the number of times to fill Pattern into Ptr. + /// \param DepEvent is an event that specifies the kernel dependencies. + /// \return an event representing fill operation. + template + event fill(void *Ptr, const T &Pattern, size_t Count, event DepEvent) { + return submit([&](handler &CGH) { + CGH.depends_on(DepEvent); + CGH.fill(Ptr, Pattern, Count); + }); + } + + /// Fills the specified memory with the specified pattern. + /// + /// \param Ptr is the pointer to the memory to fill. + /// \param Pattern is the pattern to fill into the memory. T should be + /// trivially copyable. + /// \param Count is the number of times to fill Pattern into Ptr. + /// \param DepEvents is a vector of events that specifies the kernel + /// dependencies. + /// \return an event representing fill operation. + template + event fill(void *Ptr, const T &Pattern, size_t Count, + const vector_class &DepEvents) { + return submit([&](handler &CGH) { + CGH.depends_on(DepEvents); + CGH.fill(Ptr, Pattern, Count); + }); + } + /// Fills the memory pointed by a USM pointer with the value specified. /// No operations is done if \param Count is zero. An exception is thrown - /// if \param Dest is nullptr. The behavior is undefined if \param Ptr + /// if \param Ptr is nullptr. The behavior is undefined if \param Ptr /// is invalid. /// /// \param Ptr is a USM pointer to the memory to fill. @@ -339,6 +374,32 @@ class __SYCL_EXPORT queue { /// \return an event representing fill operation. event memset(void *Ptr, int Value, size_t Count); + /// Fills the memory pointed by a USM pointer with the value specified. + /// No operations is done if \param Count is zero. An exception is thrown + /// if \param Ptr is nullptr. The behavior is undefined if \param Ptr + /// is invalid. + /// + /// \param Ptr is a USM pointer to the memory to fill. + /// \param Value is a value to be set. Value is cast as an unsigned char. + /// \param Count is a number of bytes to fill. + /// \param DepEvent is an event that specifies the kernel dependencies. + /// \return an event representing fill operation. + event memset(void *Ptr, int Value, size_t Count, event DepEvent); + + /// Fills the memory pointed by a USM pointer with the value specified. + /// No operations is done if \param Count is zero. An exception is thrown + /// if \param Ptr is nullptr. The behavior is undefined if \param Ptr + /// is invalid. + /// + /// \param Ptr is a USM pointer to the memory to fill. + /// \param Value is a value to be set. Value is cast as an unsigned char. + /// \param Count is a number of bytes to fill. + /// \param DepEvents is a vector of events that specifies the kernel + /// dependencies. + /// \return an event representing fill operation. + event memset(void *Ptr, int Value, size_t Count, + const vector_class &DepEvents); + /// Copies data from one memory region to another, both pointed by /// USM pointers. /// No operations is done if \param Count is zero. An exception is thrown @@ -351,6 +412,81 @@ class __SYCL_EXPORT queue { /// \return an event representing copy operation. event memcpy(void *Dest, const void *Src, size_t Count); + /// Copies data from one memory region to another, both pointed by + /// USM pointers. + /// No operations is done if \param Count is zero. An exception is thrown + /// if either \param Dest or \param Src is nullptr. The behavior is undefined + /// if any of the pointer parameters is invalid. + /// + /// \param Dest is a USM pointer to the destination memory. + /// \param Src is a USM pointer to the source memory. + /// \param Count is a number of bytes to copy. + /// \param DepEvent is an event that specifies the kernel dependencies. + /// \return an event representing copy operation. + event memcpy(void *Dest, const void *Src, size_t Count, event DepEvent); + + /// Copies data from one memory region to another, both pointed by + /// USM pointers. + /// No operations is done if \param Count is zero. An exception is thrown + /// if either \param Dest or \param Src is nullptr. The behavior is undefined + /// if any of the pointer parameters is invalid. + /// + /// \param Dest is a USM pointer to the destination memory. + /// \param Src is a USM pointer to the source memory. + /// \param Count is a number of bytes to copy. + /// \param DepEvents is a vector of events that specifies the kernel + /// dependencies. + /// \return an event representing copy operation. + event memcpy(void *Dest, const void *Src, size_t Count, + const vector_class &DepEvents); + + /// Copies data from one memory region to another, both pointed by + /// USM pointers. + /// No operations is done if \param Count is zero. An exception is thrown + /// if either \param Dest or \param Src is nullptr. The behavior is undefined + /// if any of the pointer parameters is invalid. + /// + /// \param Dest is a USM pointer to the destination memory. + /// \param Src is a USM pointer to the source memory. + /// \param Count is a number of elements of type T to copy. + /// \return an event representing copy operation. + template event copy(T *Dest, const T *Src, size_t Count) { + return this->memcpy(Dest, Src, Count * sizeof(T)); + } + + /// Copies data from one memory region to another, both pointed by + /// USM pointers. + /// No operations is done if \param Count is zero. An exception is thrown + /// if either \param Dest or \param Src is nullptr. The behavior is undefined + /// if any of the pointer parameters is invalid. + /// + /// \param Dest is a USM pointer to the destination memory. + /// \param Src is a USM pointer to the source memory. + /// \param Count is a number of elements of type T to copy. + /// \param DepEvent is an event that specifies the kernel dependencies. + /// \return an event representing copy operation. + template + event copy(T *Dest, const T *Src, size_t Count, event DepEvent) { + return this->memcpy(Dest, Src, Count * sizeof(T), DepEvent); + } + + /// Copies data from one memory region to another, both pointed by + /// USM pointers. + /// No operations is done if \param Count is zero. An exception is thrown + /// if either \param Dest or \param Src is nullptr. The behavior is undefined + /// if any of the pointer parameters is invalid. + /// + /// \param Dest is a USM pointer to the destination memory. + /// \param Src is a USM pointer to the source memory. + /// \param Count is a number of elements of type T to copy. + /// \param DepEvents is a vector of events that specifies the kernel + /// \return an event representing copy operation. + template + event copy(T *Dest, const T *Src, size_t Count, + const vector_class &DepEvents) { + return this->memcpy(Dest, Src, Count * sizeof(T), DepEvents); + } + /// Provides additional information to the underlying runtime about how /// different allocations are used. /// @@ -360,16 +496,72 @@ class __SYCL_EXPORT queue { /// \return an event representing advice operation. event mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice); + /// Provides additional information to the underlying runtime about how + /// different allocations are used. + /// + /// \param Ptr is a USM pointer to the allocation. + /// \param Length is a number of bytes in the allocation. + /// \param Advice is a device-defined advice for the specified allocation. + /// \param DepEvent is an event that specifies the kernel dependencies. + /// \return an event representing advice operation. + event mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice, + event DepEvent); + + /// Provides additional information to the underlying runtime about how + /// different allocations are used. + /// + /// \param Ptr is a USM pointer to the allocation. + /// \param Length is a number of bytes in the allocation. + /// \param Advice is a device-defined advice for the specified allocation. + /// \param DepEvents is a vector of events that specifies the kernel + /// dependencies. + /// \return an event representing advice operation. + event mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice, + const vector_class &DepEvents); + /// Provides hints to the runtime library that data should be made available /// on a device earlier than Unified Shared Memory would normally require it /// to be available. /// /// \param Ptr is a USM pointer to the memory to be prefetched to the device. /// \param Count is a number of bytes to be prefetched. + /// \return an event representing prefetch operation. event prefetch(const void *Ptr, size_t Count) { return submit([=](handler &CGH) { CGH.prefetch(Ptr, Count); }); } + /// Provides hints to the runtime library that data should be made available + /// on a device earlier than Unified Shared Memory would normally require it + /// to be available. + /// + /// \param Ptr is a USM pointer to the memory to be prefetched to the device. + /// \param Count is a number of bytes to be prefetched. + /// \param DepEvent is an event that specifies the kernel dependencies. + /// \return an event representing prefetch operation. + event prefetch(const void *Ptr, size_t Count, event DepEvent) { + return submit([=](handler &CGH) { + CGH.depends_on(DepEvent); + CGH.prefetch(Ptr, Count); + }); + } + + /// Provides hints to the runtime library that data should be made available + /// on a device earlier than Unified Shared Memory would normally require it + /// to be available. + /// + /// \param Ptr is a USM pointer to the memory to be prefetched to the device. + /// \param Count is a number of bytes to be prefetched. + /// \param DepEvents is a vector of events that specifies the kernel + /// dependencies. + /// \return an event representing prefetch operation. + event prefetch(const void *Ptr, size_t Count, + const vector_class &DepEvents) { + return submit([=](handler &CGH) { + CGH.depends_on(DepEvents); + CGH.prefetch(Ptr, Count); + }); + } + /// single_task version with a kernel represented as a lambda. /// /// \param KernelFunc is the Kernel functor or lambda diff --git a/sycl/include/CL/sycl/usm.hpp b/sycl/include/CL/sycl/usm.hpp index ba9e417b98499..ba91029144fb5 100644 --- a/sycl/include/CL/sycl/usm.hpp +++ b/sycl/include/CL/sycl/usm.hpp @@ -20,13 +20,24 @@ namespace sycl { /// __SYCL_EXPORT void *malloc_device(size_t size, const device &dev, const context &ctxt); +__SYCL_EXPORT void *malloc_device(size_t size, const device &dev, + const context &ctxt, + const property_list &propList); __SYCL_EXPORT void *malloc_device(size_t size, const queue &q); +__SYCL_EXPORT void *malloc_device(size_t size, const queue &q, + const property_list &propList); __SYCL_EXPORT void *aligned_alloc_device(size_t alignment, size_t size, const device &dev, const context &ctxt); +__SYCL_EXPORT void *aligned_alloc_device(size_t alignment, size_t size, + const device &dev, const context &ctxt, + const property_list &propList); __SYCL_EXPORT void *aligned_alloc_device(size_t alignment, size_t size, const queue &q); +__SYCL_EXPORT void *aligned_alloc_device(size_t alignment, size_t size, + const queue &q, + const property_list &propList); __SYCL_EXPORT void free(void *ptr, const context &ctxt); __SYCL_EXPORT void free(void *ptr, const queue &q); @@ -35,124 +46,177 @@ __SYCL_EXPORT void free(void *ptr, const queue &q); // Restricted USM /// __SYCL_EXPORT void *malloc_host(size_t size, const context &ctxt); +__SYCL_EXPORT void *malloc_host(size_t size, const context &ctxt, + const property_list &propList); __SYCL_EXPORT void *malloc_host(size_t size, const queue &q); +__SYCL_EXPORT void *malloc_host(size_t size, const queue &q, + const property_list &propList); __SYCL_EXPORT void *malloc_shared(size_t size, const device &dev, const context &ctxt); +__SYCL_EXPORT void *malloc_shared(size_t size, const device &dev, + const context &ctxt, + const property_list &propList); __SYCL_EXPORT void *malloc_shared(size_t size, const queue &q); +__SYCL_EXPORT void *malloc_shared(size_t size, const queue &q, + const property_list &propList); __SYCL_EXPORT void *aligned_alloc_host(size_t alignment, size_t size, const context &ctxt); +__SYCL_EXPORT void *aligned_alloc_host(size_t alignment, size_t size, + const context &ctxt, + const property_list &propList); __SYCL_EXPORT void *aligned_alloc_host(size_t alignment, size_t size, const queue &q); +__SYCL_EXPORT void *aligned_alloc_host(size_t alignment, size_t size, + const queue &q, + const property_list &propList); __SYCL_EXPORT void *aligned_alloc_shared(size_t alignment, size_t size, const device &dev, const context &ctxt); +__SYCL_EXPORT void *aligned_alloc_shared(size_t alignment, size_t size, + const device &dev, const context &ctxt, + const property_list &propList); __SYCL_EXPORT void *aligned_alloc_shared(size_t alignment, size_t size, const queue &q); +__SYCL_EXPORT void *aligned_alloc_shared(size_t alignment, size_t size, + const queue &q, + const property_list &propList); /// // single form /// __SYCL_EXPORT void *malloc(size_t size, const device &dev, const context &ctxt, usm::alloc kind); +__SYCL_EXPORT void *malloc(size_t size, const device &dev, const context &ctxt, + usm::alloc kind, const property_list &propList); __SYCL_EXPORT void *malloc(size_t size, const queue &q, usm::alloc kind); +__SYCL_EXPORT void *malloc(size_t size, const queue &q, usm::alloc kind, + const property_list &propList); __SYCL_EXPORT void *aligned_alloc(size_t alignment, size_t size, const device &dev, const context &ctxt, usm::alloc kind); +__SYCL_EXPORT void *aligned_alloc(size_t alignment, size_t size, + const device &dev, const context &ctxt, + usm::alloc kind, + const property_list &propList); __SYCL_EXPORT void *aligned_alloc(size_t alignment, size_t size, const queue &q, usm::alloc kind); +__SYCL_EXPORT void *aligned_alloc(size_t alignment, size_t size, const queue &q, + usm::alloc kind, + const property_list &propList); /// // Template forms /// template -T *malloc_device(size_t Count, const device &Dev, const context &Ctxt) { - return static_cast(malloc_device(Count * sizeof(T), Dev, Ctxt)); +T *malloc_device(size_t Count, const device &Dev, const context &Ctxt, + const property_list &PropList = {}) { + return static_cast( + malloc_device(Count * sizeof(T), Dev, Ctxt, PropList)); } -template T *malloc_device(size_t Count, const queue &Q) { - return malloc_device(Count, Q.get_device(), Q.get_context()); +template +T *malloc_device(size_t Count, const queue &Q, + const property_list &PropList = {}) { + return malloc_device(Count, Q.get_device(), Q.get_context(), PropList); } template T *aligned_alloc_device(size_t Alignment, size_t Count, const device &Dev, - const context &Ctxt) { + const context &Ctxt, + const property_list &PropList = {}) { return static_cast( - aligned_alloc_device(Alignment, Count * sizeof(T), Dev, Ctxt)); + aligned_alloc_device(Alignment, Count * sizeof(T), Dev, Ctxt, PropList)); } template -T *aligned_alloc_device(size_t Alignment, size_t Count, const queue &Q) { +T *aligned_alloc_device(size_t Alignment, size_t Count, const queue &Q, + const property_list &PropList = {}) { return aligned_alloc_device(Alignment, Count, Q.get_device(), - Q.get_context()); + Q.get_context(), PropList); } -template T *malloc_host(size_t Count, const context &Ctxt) { - return static_cast(malloc_host(Count * sizeof(T), Ctxt)); +template +T *malloc_host(size_t Count, const context &Ctxt, + const property_list &PropList = {}) { + return static_cast(malloc_host(Count * sizeof(T), Ctxt, PropList)); } -template T *malloc_host(size_t Count, const queue &Q) { - return malloc_host(Count, Q.get_context()); +template +T *malloc_host(size_t Count, const queue &Q, + const property_list &PropList = {}) { + return malloc_host(Count, Q.get_context(), PropList); } template -T *malloc_shared(size_t Count, const device &Dev, const context &Ctxt) { - return static_cast(malloc_shared(Count * sizeof(T), Dev, Ctxt)); +T *malloc_shared(size_t Count, const device &Dev, const context &Ctxt, + const property_list &PropList = {}) { + return static_cast( + malloc_shared(Count * sizeof(T), Dev, Ctxt, PropList)); } -template T *malloc_shared(size_t Count, const queue &Q) { - return malloc_shared(Count, Q.get_device(), Q.get_context()); +template +T *malloc_shared(size_t Count, const queue &Q, + const property_list &PropList = {}) { + return malloc_shared(Count, Q.get_device(), Q.get_context(), PropList); } template -T *aligned_alloc_host(size_t Alignment, size_t Count, const context &Ctxt) { +T *aligned_alloc_host(size_t Alignment, size_t Count, const context &Ctxt, + const property_list &PropList = {}) { return static_cast( - aligned_alloc_host(Alignment, Count * sizeof(T), Ctxt)); + aligned_alloc_host(Alignment, Count * sizeof(T), Ctxt, PropList)); } template -T *aligned_alloc_host(size_t Alignment, size_t Count, const queue &Q) { - return aligned_alloc_host(Alignment, Count, Q.get_context()); +T *aligned_alloc_host(size_t Alignment, size_t Count, const queue &Q, + const property_list &PropList = {}) { + return aligned_alloc_host(Alignment, Count, Q.get_context(), PropList); } template T *aligned_alloc_shared(size_t Alignment, size_t Count, const device &Dev, - const context &Ctxt) { + const context &Ctxt, + const property_list &PropList = {}) { return static_cast( - aligned_alloc_shared(Alignment, Count * sizeof(T), Dev, Ctxt)); + aligned_alloc_shared(Alignment, Count * sizeof(T), Dev, Ctxt, PropList)); } template -T *aligned_alloc_shared(size_t Alignment, size_t Count, const queue &Q) { +T *aligned_alloc_shared(size_t Alignment, size_t Count, const queue &Q, + const property_list &PropList = {}) { return aligned_alloc_shared(Alignment, Count, Q.get_device(), - Q.get_context()); + Q.get_context(), PropList); } template -T *malloc(size_t Count, const device &Dev, const context &Ctxt, - usm::alloc Kind) { - return static_cast(malloc(Count * sizeof(T), Dev, Ctxt, Kind)); +T *malloc(size_t Count, const device &Dev, const context &Ctxt, usm::alloc Kind, + const property_list &PropList = {}) { + return static_cast(malloc(Count * sizeof(T), Dev, Ctxt, Kind, PropList)); } -template T *malloc(size_t Count, const queue &Q, usm::alloc Kind) { - return malloc(Count, Q.get_device(), Q.get_context(), Kind); +template +T *malloc(size_t Count, const queue &Q, usm::alloc Kind, + const property_list &PropList = {}) { + return malloc(Count, Q.get_device(), Q.get_context(), Kind, PropList); } template T *aligned_alloc(size_t Alignment, size_t Count, const device &Dev, - const context &Ctxt, usm::alloc Kind) { + const context &Ctxt, usm::alloc Kind, + const property_list &PropList = {}) { return static_cast( - aligned_alloc(Alignment, Count * sizeof(T), Dev, Ctxt, Kind)); + aligned_alloc(Alignment, Count * sizeof(T), Dev, Ctxt, Kind, PropList)); } template T *aligned_alloc(size_t Alignment, size_t Count, const queue &Q, - usm::alloc Kind) { + usm::alloc Kind, const property_list &PropList = {}) { return aligned_alloc(Alignment, Count, Q.get_device(), Q.get_context(), - Kind); + Kind, PropList); } // Pointer queries diff --git a/sycl/include/CL/sycl/usm/usm_allocator.hpp b/sycl/include/CL/sycl/usm/usm_allocator.hpp index 5c25673841926..9bcc63d521e13 100644 --- a/sycl/include/CL/sycl/usm/usm_allocator.hpp +++ b/sycl/include/CL/sycl/usm/usm_allocator.hpp @@ -23,7 +23,8 @@ namespace sycl { // Forward declarations. __SYCL_EXPORT void *aligned_alloc(size_t alignment, size_t size, const device &dev, const context &ctxt, - usm::alloc kind); + usm::alloc kind, + const property_list &propList); __SYCL_EXPORT void free(void *ptr, const context &ctxt); template @@ -44,26 +45,31 @@ class usm_allocator { "usm_allocator does not support AllocKind == usm::alloc::device"); usm_allocator() noexcept = delete; - usm_allocator(const context &Ctxt, const device &Dev) noexcept - : MContext(Ctxt), MDevice(Dev) {} - usm_allocator(const queue &Q) noexcept - : MContext(Q.get_context()), MDevice(Q.get_device()) {} + usm_allocator(const context &Ctxt, const device &Dev, + const property_list &PropList = {}) noexcept + : MContext(Ctxt), MDevice(Dev), MPropList(PropList) {} + usm_allocator(const queue &Q, const property_list &PropList = {}) noexcept + : MContext(Q.get_context()), MDevice(Q.get_device()), + MPropList(PropList) {} usm_allocator(const usm_allocator &) noexcept = default; usm_allocator(usm_allocator &&) noexcept = default; usm_allocator &operator=(const usm_allocator &Other) { MContext = Other.MContext; MDevice = Other.MDevice; + MPropList = Other.MPropList; return *this; } usm_allocator &operator=(usm_allocator &&Other) { MContext = std::move(Other.MContext); MDevice = std::move(Other.MDevice); + MPropList = std::move(Other.MPropList); return *this; } template usm_allocator(const usm_allocator &Other) noexcept - : MContext(Other.MContext), MDevice(Other.MDevice) {} + : MContext(Other.MContext), MDevice(Other.MDevice), + MPropList(Other.MPropList) {} /// Allocates memory. /// @@ -72,7 +78,7 @@ class usm_allocator { auto Result = reinterpret_cast( aligned_alloc(getAlignment(), NumberOfElements * sizeof(value_type), - MDevice, MContext, AllocKind)); + MDevice, MContext, AllocKind, MPropList)); if (!Result) { throw memory_allocation_error(); } @@ -111,6 +117,7 @@ class usm_allocator { context MContext; device MDevice; + property_list MPropList; }; } // namespace sycl diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 85305d397987d..172c19be83344 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -251,8 +251,21 @@ bool device_impl::has(aspect Aspect) const { return get_info(); case aspect::usm_host_allocations: return get_info(); + case aspect::usm_atomic_host_allocations: + return is_host() || + (get_device_info< + pi_usm_capabilities, + info::device::usm_host_allocations>::get(MDevice, getPlugin()) & + PI_USM_ATOMIC_ACCESS); case aspect::usm_shared_allocations: return get_info(); + case aspect::usm_atomic_shared_allocations: + return is_host() || + (get_device_info< + pi_usm_capabilities, + info::device::usm_shared_allocations>::get(MDevice, + getPlugin()) & + PI_USM_ATOMIC_ACCESS); case aspect::usm_restricted_shared_allocations: return get_info(); case aspect::usm_system_allocator: diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index a7a0098a7c83f..ef5a944892fc3 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -697,6 +697,19 @@ void MemoryManager::prefetch_usm(void *Mem, QueueImplPtr Queue, size_t Length, } } +void MemoryManager::advise_usm(const void *Mem, QueueImplPtr Queue, + size_t Length, pi_mem_advice Advice, + std::vector /*DepEvents*/, + RT::PiEvent &OutEvent) { + sycl::context Context = Queue->get_context(); + + if (!Context.is_host()) { + const detail::plugin &Plugin = Queue->getPlugin(); + Plugin.call(Queue->getHandleRef(), Mem, + Length, Advice, &OutEvent); + } +} + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index 52c49cf26f0ea..73f1943d638e3 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -51,10 +51,11 @@ prepareUSMEvent(const shared_ptr_class &QueueImpl, } event queue_impl::memset(const shared_ptr_class &Self, - void *Ptr, int Value, size_t Count) { + void *Ptr, int Value, size_t Count, + const vector_class &DepEvents) { RT::PiEvent NativeEvent{}; - MemoryManager::fill_usm(Ptr, Self, Count, Value, /*DepEvents*/ {}, - NativeEvent); + MemoryManager::fill_usm(Ptr, Self, Count, Value, + getOrWaitEvents(DepEvents, MContext), NativeEvent); if (MContext->is_host()) return event(); @@ -65,10 +66,11 @@ event queue_impl::memset(const shared_ptr_class &Self, } event queue_impl::memcpy(const shared_ptr_class &Self, - void *Dest, const void *Src, size_t Count) { + void *Dest, const void *Src, size_t Count, + const vector_class &DepEvents) { RT::PiEvent NativeEvent{}; - MemoryManager::copy_usm(Src, Self, Count, Dest, /*DepEvents*/ {}, - NativeEvent); + MemoryManager::copy_usm(Src, Self, Count, Dest, + getOrWaitEvents(DepEvents, MContext), NativeEvent); if (MContext->is_host()) return event(); @@ -80,16 +82,14 @@ event queue_impl::memcpy(const shared_ptr_class &Self, event queue_impl::mem_advise(const shared_ptr_class &Self, const void *Ptr, size_t Length, - pi_mem_advice Advice) { - if (MContext->is_host()) { - return event(); - } - - // non-Host device + pi_mem_advice Advice, + const vector_class &DepEvents) { RT::PiEvent NativeEvent{}; - const detail::plugin &Plugin = getPlugin(); - Plugin.call(getHandleRef(), Ptr, Length, - Advice, &NativeEvent); + MemoryManager::advise_usm(Ptr, Self, Length, Advice, + getOrWaitEvents(DepEvents, MContext), NativeEvent); + + if (MContext->is_host()) + return event(); event ResEvent = prepareUSMEvent(Self, NativeEvent); addSharedEvent(ResEvent); diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index aea55006a6fba..1690786eebfec 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -325,31 +325,41 @@ class queue_impl { /// Fills the memory pointed by a USM pointer with the value specified. /// - /// \param Impl is a shared_ptr to this queue. + /// \param Self is a shared_ptr to this queue. /// \param Ptr is a USM pointer to the memory to fill. /// \param Value is a value to be set. Value is cast as an unsigned char. /// \param Count is a number of bytes to fill. + /// \param DepEvents is a vector of events that specifies the kernel + /// dependencies. /// \return an event representing fill operation. event memset(const shared_ptr_class &Self, void *Ptr, int Value, - size_t Count); + size_t Count, const vector_class &DepEvents); /// Copies data from one memory region to another, both pointed by /// USM pointers. /// - /// \param Impl is a shared_ptr to this queue. + /// \param Self is a shared_ptr to this queue. /// \param Dest is a USM pointer to the destination memory. /// \param Src is a USM pointer to the source memory. /// \param Count is a number of bytes to copy. + /// \param DepEvents is a vector of events that specifies the kernel + /// dependencies. + /// \return an event representing copy operation. event memcpy(const shared_ptr_class &Self, void *Dest, - const void *Src, size_t Count); + const void *Src, size_t Count, + const vector_class &DepEvents); /// Provides additional information to the underlying runtime about how /// different allocations are used. /// - /// \param Impl is a shared_ptr to this queue. + /// \param Self is a shared_ptr to this queue. /// \param Ptr is a USM pointer to the allocation. /// \param Length is a number of bytes in the allocation. /// \param Advice is a device-defined advice for the specified allocation. + /// \param DepEvents is a vector of events that specifies the kernel + /// dependencies. + /// \return an event representing advise operation. event mem_advise(const shared_ptr_class &Self, const void *Ptr, - size_t Length, pi_mem_advice Advice); + size_t Length, pi_mem_advice Advice, + const vector_class &DepEvents); /// Puts exception to the list of asynchronous ecxeptions. /// diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 4ea9394f2034f..a50965ffcd92d 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2036,6 +2036,13 @@ cl_int ExecCGCommand::enqueueImp() { return CL_SUCCESS; } + case CG::CGTYPE::ADVISE_USM: { + CGAdviseUSM *Advise = (CGAdviseUSM *)MCommandGroup.get(); + MemoryManager::advise_usm(Advise->getDst(), MQueue, Advise->getLength(), + Advise->getAdvice(), std::move(RawEvents), Event); + + return CL_SUCCESS; + } case CG::CGTYPE::CODEPLAY_INTEROP_TASK: { const detail::plugin &Plugin = MQueue->getPlugin(); CGInteropTask *ExecInterop = (CGInteropTask *)MCommandGroup.get(); diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp index d6acee67293cf..6620e3de3ff0d 100644 --- a/sycl/source/detail/usm/usm_impl.cpp +++ b/sycl/source/detail/usm/usm_impl.cpp @@ -150,19 +150,40 @@ void *malloc_device(size_t Size, const device &Dev, const context &Ctxt) { return detail::usm::alignedAlloc(0, Size, Ctxt, Dev, alloc::device); } +void *malloc_device(size_t Size, const device &Dev, const context &Ctxt, + const property_list &) { + return malloc_device(Size, Dev, Ctxt); +} + void *malloc_device(size_t Size, const queue &Q) { return malloc_device(Size, Q.get_device(), Q.get_context()); } +void *malloc_device(size_t Size, const queue &Q, + const property_list &PropList) { + return malloc_device(Size, Q.get_device(), Q.get_context(), PropList); +} + void *aligned_alloc_device(size_t Alignment, size_t Size, const device &Dev, const context &Ctxt) { return detail::usm::alignedAlloc(Alignment, Size, Ctxt, Dev, alloc::device); } +void *aligned_alloc_device(size_t Alignment, size_t Size, const device &Dev, + const context &Ctxt, const property_list &) { + return aligned_alloc_device(Alignment, Size, Dev, Ctxt); +} + void *aligned_alloc_device(size_t Alignment, size_t Size, const queue &Q) { return aligned_alloc_device(Alignment, Size, Q.get_device(), Q.get_context()); } +void *aligned_alloc_device(size_t Alignment, size_t Size, const queue &Q, + const property_list &PropList) { + return aligned_alloc_device(Alignment, Size, Q.get_device(), Q.get_context(), + PropList); +} + void free(void *ptr, const context &Ctxt) { return detail::usm::free(ptr, Ctxt); } @@ -176,35 +197,74 @@ void *malloc_host(size_t Size, const context &Ctxt) { return detail::usm::alignedAllocHost(0, Size, Ctxt, alloc::host); } +void *malloc_host(size_t Size, const context &Ctxt, const property_list &) { + return malloc_host(Size, Ctxt); +} + void *malloc_host(size_t Size, const queue &Q) { return malloc_host(Size, Q.get_context()); } +void *malloc_host(size_t Size, const queue &Q, const property_list &PropList) { + return malloc_host(Size, Q.get_context(), PropList); +} + void *malloc_shared(size_t Size, const device &Dev, const context &Ctxt) { return detail::usm::alignedAlloc(0, Size, Ctxt, Dev, alloc::shared); } +void *malloc_shared(size_t Size, const device &Dev, const context &Ctxt, + const property_list &) { + return malloc_shared(Size, Dev, Ctxt); +} + void *malloc_shared(size_t Size, const queue &Q) { return malloc_shared(Size, Q.get_device(), Q.get_context()); } +void *malloc_shared(size_t Size, const queue &Q, + const property_list &PropList) { + return malloc_shared(Size, Q.get_device(), Q.get_context(), PropList); +} + void *aligned_alloc_host(size_t Alignment, size_t Size, const context &Ctxt) { return detail::usm::alignedAllocHost(Alignment, Size, Ctxt, alloc::host); } +void *aligned_alloc_host(size_t Alignment, size_t Size, const context &Ctxt, + const property_list &) { + return aligned_alloc_host(Alignment, Size, Ctxt); +} + void *aligned_alloc_host(size_t Alignment, size_t Size, const queue &Q) { return aligned_alloc_host(Alignment, Size, Q.get_context()); -} +} + +void *aligned_alloc_host(size_t Alignment, size_t Size, const queue &Q, + const property_list &PropList) { + return aligned_alloc_host(Alignment, Size, Q.get_context(), PropList); +} void *aligned_alloc_shared(size_t Alignment, size_t Size, const device &Dev, const context &Ctxt) { return detail::usm::alignedAlloc(Alignment, Size, Ctxt, Dev, alloc::shared); } +void *aligned_alloc_shared(size_t Alignment, size_t Size, const device &Dev, + const context &Ctxt, const property_list &) { + return aligned_alloc_shared(Alignment, Size, Dev, Ctxt); +} + void *aligned_alloc_shared(size_t Alignment, size_t Size, const queue &Q) { return aligned_alloc_shared(Alignment, Size, Q.get_device(), Q.get_context()); } +void *aligned_alloc_shared(size_t Alignment, size_t Size, const queue &Q, + const property_list &PropList) { + return aligned_alloc_shared(Alignment, Size, Q.get_device(), Q.get_context(), + PropList); +} + // single form void *malloc(size_t Size, const device &Dev, const context &Ctxt, alloc Kind) { @@ -219,10 +279,20 @@ void *malloc(size_t Size, const device &Dev, const context &Ctxt, alloc Kind) { return RetVal; } +void *malloc(size_t Size, const device &Dev, const context &Ctxt, alloc Kind, + const property_list &) { + return malloc(Size, Dev, Ctxt, Kind); +} + void *malloc(size_t Size, const queue &Q, alloc Kind) { return malloc(Size, Q.get_device(), Q.get_context(), Kind); } +void *malloc(size_t Size, const queue &Q, alloc Kind, + const property_list &PropList) { + return malloc(Size, Q.get_device(), Q.get_context(), Kind, PropList); +} + void *aligned_alloc(size_t Alignment, size_t Size, const device &Dev, const context &Ctxt, alloc Kind) { void *RetVal = nullptr; @@ -236,10 +306,21 @@ void *aligned_alloc(size_t Alignment, size_t Size, const device &Dev, return RetVal; } +void *aligned_alloc(size_t Alignment, size_t Size, const device &Dev, + const context &Ctxt, alloc Kind, const property_list &) { + return aligned_alloc(Alignment, Size, Dev, Ctxt, Kind); +} + void *aligned_alloc(size_t Alignment, size_t Size, const queue &Q, alloc Kind) { return aligned_alloc(Alignment, Size, Q.get_device(), Q.get_context(), Kind); } +void *aligned_alloc(size_t Alignment, size_t Size, const queue &Q, alloc Kind, + const property_list &PropList) { + return aligned_alloc(Alignment, Size, Q.get_device(), Q.get_context(), Kind, + PropList); +} + // Pointer queries /// Query the allocation type from a USM pointer /// Returns alloc::host for all pointers in a host context. diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index ec94949ead375..7984f0a38bbbe 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -188,6 +188,12 @@ event handler::finalize() { std::move(MSharedPtrStorage), std::move(MRequirements), std::move(MEvents), MCodeLoc)); break; + case detail::CG::ADVISE_USM: + CommandGroup.reset(new detail::CGAdviseUSM( + MDstPtr, MLength, std::move(MArgsStorage), std::move(MAccStorage), + std::move(MSharedPtrStorage), std::move(MRequirements), + std::move(MEvents), MCodeLoc)); + break; case detail::CG::CODEPLAY_HOST_TASK: CommandGroup.reset(new detail::CGHostTask( std::move(MHostTask), MQueue, MQueue->getContextImplPtr(), @@ -499,5 +505,26 @@ void handler::prefetch(const void *Ptr, size_t Count) { MLength = Count; MCGType = detail::CG::PREFETCH_USM; } + +void handler::mem_advise(const void *Ptr, size_t Count, pi_mem_advice Advice) { + throwIfActionIsCreated(); + MDstPtr = const_cast(Ptr); + MLength = Count; + MCGType = detail::CG::ADVISE_USM; + + assert(!MSharedPtrStorage.empty()); + + std::lock_guard Lock( + detail::GlobalHandler::instance().getHandlerExtendedMembersMutex()); + + std::shared_ptr> ExtendedMembersVec = + detail::convertToExtendedMembers(MSharedPtrStorage[0]); + + detail::ExtendedMemberT EMember = { + detail::ExtendedMembersType::HANDLER_MEM_ADVICE, + std::make_shared(Advice)}; + + ExtendedMembersVec->push_back(EMember); +} } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/queue.cpp b/sycl/source/queue.cpp index e470a5c845d88..5a0209d7f6fc0 100644 --- a/sycl/source/queue.cpp +++ b/sycl/source/queue.cpp @@ -79,15 +79,43 @@ bool queue::is_host() const { return impl->is_host(); } void queue::throw_asynchronous() { impl->throw_asynchronous(); } event queue::memset(void *Ptr, int Value, size_t Count) { - return impl->memset(impl, Ptr, Value, Count); + return impl->memset(impl, Ptr, Value, Count, {}); +} + +event queue::memset(void *Ptr, int Value, size_t Count, event DepEvent) { + return impl->memset(impl, Ptr, Value, Count, {DepEvent}); +} + +event queue::memset(void *Ptr, int Value, size_t Count, + const vector_class &DepEvents) { + return impl->memset(impl, Ptr, Value, Count, DepEvents); } event queue::memcpy(void *Dest, const void *Src, size_t Count) { - return impl->memcpy(impl, Dest, Src, Count); + return impl->memcpy(impl, Dest, Src, Count, {}); +} + +event queue::memcpy(void *Dest, const void *Src, size_t Count, event DepEvent) { + return impl->memcpy(impl, Dest, Src, Count, {DepEvent}); +} + +event queue::memcpy(void *Dest, const void *Src, size_t Count, + const vector_class &DepEvents) { + return impl->memcpy(impl, Dest, Src, Count, DepEvents); } event queue::mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice) { - return impl->mem_advise(impl, Ptr, Length, Advice); + return impl->mem_advise(impl, Ptr, Length, Advice, {}); +} + +event queue::mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice, + event DepEvent) { + return impl->mem_advise(impl, Ptr, Length, Advice, {DepEvent}); +} + +event queue::mem_advise(const void *Ptr, size_t Length, pi_mem_advice Advice, + const vector_class &DepEvents) { + return impl->mem_advise(impl, Ptr, Length, Advice, DepEvents); } event queue::submit_impl(function_class CGH, diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 011113631ec29..9c330f9d70b9b 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3597,13 +3597,21 @@ _ZN2cl4sycl10level_zero12make_contextERKSt6vectorINS0_6deviceESaIS3_EEmb _ZN2cl4sycl10level_zero12make_programERKNS0_7contextEm _ZN2cl4sycl10level_zero13make_platformEm _ZN2cl4sycl11malloc_hostEmRKNS0_5queueE +_ZN2cl4sycl11malloc_hostEmRKNS0_5queueERKNS0_13property_listE _ZN2cl4sycl11malloc_hostEmRKNS0_7contextE +_ZN2cl4sycl11malloc_hostEmRKNS0_7contextERKNS0_13property_listE _ZN2cl4sycl13aligned_allocEmmRKNS0_5queueENS0_3usm5allocE +_ZN2cl4sycl13aligned_allocEmmRKNS0_5queueENS0_3usm5allocERKNS0_13property_listE _ZN2cl4sycl13aligned_allocEmmRKNS0_6deviceERKNS0_7contextENS0_3usm5allocE +_ZN2cl4sycl13aligned_allocEmmRKNS0_6deviceERKNS0_7contextENS0_3usm5allocERKNS0_13property_listE _ZN2cl4sycl13malloc_deviceEmRKNS0_5queueE +_ZN2cl4sycl13malloc_deviceEmRKNS0_5queueERKNS0_13property_listE _ZN2cl4sycl13malloc_deviceEmRKNS0_6deviceERKNS0_7contextE +_ZN2cl4sycl13malloc_deviceEmRKNS0_6deviceERKNS0_7contextERKNS0_13property_listE _ZN2cl4sycl13malloc_sharedEmRKNS0_5queueE +_ZN2cl4sycl13malloc_sharedEmRKNS0_5queueERKNS0_13property_listE _ZN2cl4sycl13malloc_sharedEmRKNS0_6deviceERKNS0_7contextE +_ZN2cl4sycl13malloc_sharedEmRKNS0_6deviceERKNS0_7contextERKNS0_13property_listE _ZN2cl4sycl13sycl_categoryEv _ZN2cl4sycl14exception_list5ClearEv _ZN2cl4sycl14exception_list8PushBackEONSt15__exception_ptr13exception_ptrE @@ -3611,12 +3619,18 @@ _ZN2cl4sycl14exception_list8PushBackERKNSt15__exception_ptr13exception_ptrE _ZN2cl4sycl15make_error_codeENS0_4errcE _ZN2cl4sycl16get_pointer_typeEPKvRKNS0_7contextE _ZN2cl4sycl18aligned_alloc_hostEmmRKNS0_5queueE +_ZN2cl4sycl18aligned_alloc_hostEmmRKNS0_5queueERKNS0_13property_listE _ZN2cl4sycl18aligned_alloc_hostEmmRKNS0_7contextE +_ZN2cl4sycl18aligned_alloc_hostEmmRKNS0_7contextERKNS0_13property_listE _ZN2cl4sycl18get_pointer_deviceEPKvRKNS0_7contextE _ZN2cl4sycl20aligned_alloc_deviceEmmRKNS0_5queueE +_ZN2cl4sycl20aligned_alloc_deviceEmmRKNS0_5queueERKNS0_13property_listE _ZN2cl4sycl20aligned_alloc_deviceEmmRKNS0_6deviceERKNS0_7contextE +_ZN2cl4sycl20aligned_alloc_deviceEmmRKNS0_6deviceERKNS0_7contextERKNS0_13property_listE _ZN2cl4sycl20aligned_alloc_sharedEmmRKNS0_5queueE +_ZN2cl4sycl20aligned_alloc_sharedEmmRKNS0_5queueERKNS0_13property_listE _ZN2cl4sycl20aligned_alloc_sharedEmmRKNS0_6deviceERKNS0_7contextE +_ZN2cl4sycl20aligned_alloc_sharedEmmRKNS0_6deviceERKNS0_7contextERKNS0_13property_listE _ZN2cl4sycl4freeEPvRKNS0_5queueE _ZN2cl4sycl4freeEPvRKNS0_7contextE _ZN2cl4sycl5INTEL15online_compilerILNS1_15source_languageE0EE7compileIJSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaISC_EEEEES6_IhSaIhEERKSC_DpRKT_ @@ -3633,13 +3647,19 @@ _ZN2cl4sycl5eventC2EP9_cl_eventRKNS0_7contextE _ZN2cl4sycl5eventC2ESt10shared_ptrINS0_6detail10event_implEE _ZN2cl4sycl5eventC2Ev _ZN2cl4sycl5queue10mem_adviseEPKvm14_pi_mem_advice +_ZN2cl4sycl5queue10mem_adviseEPKvm14_pi_mem_adviceNS0_5eventE +_ZN2cl4sycl5queue10mem_adviseEPKvm14_pi_mem_adviceRKSt6vectorINS0_5eventESaIS6_EE _ZN2cl4sycl5queue10wait_proxyERKNS0_6detail13code_locationE _ZN2cl4sycl5queue11submit_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationE _ZN2cl4sycl5queue11submit_implESt8functionIFvRNS0_7handlerEEES1_RKNS0_6detail13code_locationE _ZN2cl4sycl5queue18throw_asynchronousEv _ZN2cl4sycl5queue20wait_and_throw_proxyERKNS0_6detail13code_locationE _ZN2cl4sycl5queue6memcpyEPvPKvm +_ZN2cl4sycl5queue6memcpyEPvPKvmNS0_5eventE +_ZN2cl4sycl5queue6memcpyEPvPKvmRKSt6vectorINS0_5eventESaIS6_EE _ZN2cl4sycl5queue6memsetEPvim +_ZN2cl4sycl5queue6memsetEPvimNS0_5eventE +_ZN2cl4sycl5queue6memsetEPvimRKSt6vectorINS0_5eventESaIS4_EE _ZN2cl4sycl5queueC1EP17_cl_command_queueRKNS0_7contextERKSt8functionIFvNS0_14exception_listEEE _ZN2cl4sycl5queueC1ERKNS0_6deviceERKSt8functionIFvNS0_14exception_listEEERKNS0_13property_listE _ZN2cl4sycl5queueC1ERKNS0_7contextERKNS0_15device_selectorERKNS0_13property_listE @@ -3750,6 +3770,7 @@ _ZN2cl4sycl6detail12sampler_implC2EP11_cl_samplerRKNS0_7contextE _ZN2cl4sycl6detail12sampler_implD1Ev _ZN2cl4sycl6detail12sampler_implD2Ev _ZN2cl4sycl6detail12split_stringERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEc +_ZN2cl4sycl6detail13MemoryManager10advise_usmEPKvSt10shared_ptrINS1_10queue_implEEm14_pi_mem_adviceSt6vectorIP9_pi_eventSaISB_EERSB_ _ZN2cl4sycl6detail13MemoryManager12prefetch_usmEPvSt10shared_ptrINS1_10queue_implEEmSt6vectorIP9_pi_eventSaIS9_EERS9_ _ZN2cl4sycl6detail13MemoryManager13releaseMemObjESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvS8_ _ZN2cl4sycl6detail13MemoryManager16allocateMemImageESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvbmRK14_pi_image_descRK16_pi_image_formatRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event @@ -3828,7 +3849,9 @@ _ZN2cl4sycl6kernelC1ESt10shared_ptrINS0_6detail11kernel_implEE _ZN2cl4sycl6kernelC2EP10_cl_kernelRKNS0_7contextE _ZN2cl4sycl6kernelC2ESt10shared_ptrINS0_6detail11kernel_implEE _ZN2cl4sycl6mallocEmRKNS0_5queueENS0_3usm5allocE +_ZN2cl4sycl6mallocEmRKNS0_5queueENS0_3usm5allocERKNS0_13property_listE _ZN2cl4sycl6mallocEmRKNS0_6deviceERKNS0_7contextENS0_3usm5allocE +_ZN2cl4sycl6mallocEmRKNS0_6deviceERKNS0_7contextENS0_3usm5allocERKNS0_13property_listE _ZN2cl4sycl6opencl10make_queueERKNS0_7contextEm _ZN2cl4sycl6opencl11make_deviceEm _ZN2cl4sycl6opencl12make_contextEm @@ -3856,6 +3879,7 @@ _ZN2cl4sycl7contextC2ERKSt6vectorINS0_6deviceESaIS3_EERKNS0_13property_listE _ZN2cl4sycl7contextC2ERKSt6vectorINS0_6deviceESaIS3_EESt8functionIFvNS0_14exception_listEEERKNS0_13property_listE _ZN2cl4sycl7contextC2ERKSt8functionIFvNS0_14exception_listEEERKNS0_13property_listE _ZN2cl4sycl7contextC2ESt10shared_ptrINS0_6detail12context_implEE +_ZN2cl4sycl7handler10mem_adviseEPKvm14_pi_mem_advice _ZN2cl4sycl7handler10processArgEPvRKNS0_6detail19kernel_param_kind_tEimRmb _ZN2cl4sycl7handler10processArgEPvRKNS0_6detail19kernel_param_kind_tEimRmbb _ZN2cl4sycl7handler13getKernelNameB5cxx11Ev diff --git a/sycl/test/extensions/usm/usm_alloc_utility.cpp b/sycl/test/extensions/usm/usm_alloc_utility.cpp index 4bb9ebb1f25b0..ddae20778d80f 100644 --- a/sycl/test/extensions/usm/usm_alloc_utility.cpp +++ b/sycl/test/extensions/usm/usm_alloc_utility.cpp @@ -17,6 +17,15 @@ using namespace cl::sycl; constexpr int N = 8; +static void check_and_free(int *array, const device &dev, const context &ctxt) { + // host device treats all allocations as host allocations + assert((get_pointer_type(array, ctxt) == usm::alloc::host) && + "Allocation pointer should be host type"); + assert((get_pointer_device(array, ctxt) == dev) && + "Allocation pointer should be host type"); + free(array, ctxt); +} + int main() { queue q; auto dev = q.get_device(); @@ -24,58 +33,66 @@ int main() { int *array; if (dev.get_info()) { + array = (int *)malloc(N * sizeof(int), q, usm::alloc::host); + check_and_free(array, dev, ctxt); + + array = + (int *)malloc(N * sizeof(int), q, usm::alloc::host, property_list{}); + check_and_free(array, dev, ctxt); + + array = (int *)aligned_alloc(alignof(long long), N * sizeof(int), q, + usm::alloc::host); + check_and_free(array, dev, ctxt); + + array = (int *)aligned_alloc(alignof(long long), N * sizeof(int), q, + usm::alloc::host, property_list{}); + check_and_free(array, dev, ctxt); + array = (int *)malloc_host(N * sizeof(int), q); - assert((get_pointer_type(array, ctxt) == usm::alloc::host) && - "Allocation pointer should be host type"); - assert((get_pointer_device(array, ctxt) == dev) && - "Allocation pointer should be host type"); - free(array, ctxt); + check_and_free(array, dev, ctxt); + + array = (int *)malloc_host(N * sizeof(int), q, property_list{}); + check_and_free(array, dev, ctxt); array = (int *)aligned_alloc_host(alignof(long long), N * sizeof(int), ctxt); - assert((get_pointer_type(array, ctxt) == usm::alloc::host) && - "Allocation pointer should be host type"); - assert((get_pointer_device(array, ctxt) == dev) && - "Allocation pointer should be host type"); - free(array, ctxt); + check_and_free(array, dev, ctxt); + + array = (int *)aligned_alloc_host(alignof(long long), N * sizeof(int), ctxt, + property_list{}); + check_and_free(array, dev, ctxt); } if (dev.get_info()) { array = (int *)malloc_shared(N * sizeof(int), q); - // host device treats all allocations as host allocations - assert((get_pointer_type(array, ctxt) == usm::alloc::host) && - "Allocation pointer should be host type"); - assert((get_pointer_device(array, ctxt) == dev) && - "Allocation pointer should be host type"); - free(array, ctxt); + check_and_free(array, dev, ctxt); + + array = (int *)malloc_shared(N * sizeof(int), q, property_list{}); + check_and_free(array, dev, ctxt); array = (int *)aligned_alloc_shared(alignof(long long), N * sizeof(int), dev, ctxt); - // host device treats all allocations as host allocations - assert((get_pointer_type(array, ctxt) == usm::alloc::host) && - "Allocation pointer should be host type"); - assert((get_pointer_device(array, ctxt) == dev) && - "Allocation pointer should be host type"); - free(array, ctxt); + check_and_free(array, dev, ctxt); + + array = (int *)aligned_alloc_shared(alignof(long long), N * sizeof(int), + dev, ctxt, property_list{}); + check_and_free(array, dev, ctxt); } if (dev.get_info()) { array = (int *)malloc_device(N * sizeof(int), q); - // host device treats all allocations as host allocations - assert((get_pointer_type(array, ctxt) == usm::alloc::host) && - "Allocation pointer should be host type"); - assert((get_pointer_device(array, ctxt) == dev) && - "Allocation pointer should be host type"); - free(array, ctxt); + check_and_free(array, dev, ctxt); + + array = (int *)malloc_device(N * sizeof(int), q, property_list{}); + check_and_free(array, dev, ctxt); array = (int *)aligned_alloc_device(alignof(long long), N * sizeof(int), dev, ctxt); - // host device treats all allocations as host allocations - assert((get_pointer_type(array, ctxt) == usm::alloc::host) && - "Allocation pointer should be host type"); - assert((get_pointer_device(array, ctxt) == dev) && - "Allocation pointer should be host type"); - free(array, ctxt); + check_and_free(array, dev, ctxt); + + array = (int *)aligned_alloc_device(alignof(long long), N * sizeof(int), + dev, ctxt, property_list{}); + check_and_free(array, dev, ctxt); } return 0; diff --git a/sycl/test/extensions/usm/usm_allocator.cpp b/sycl/test/extensions/usm/usm_allocator.cpp index b1623e509cc1b..ac8be36109b81 100644 --- a/sycl/test/extensions/usm/usm_allocator.cpp +++ b/sycl/test/extensions/usm/usm_allocator.cpp @@ -15,8 +15,6 @@ using namespace cl::sycl; -constexpr int N = 8; - int main() { queue q; auto dev = q.get_device(); @@ -27,15 +25,18 @@ int main() { if (dev.get_info() && dev.get_info()) { usm_allocator alloc11(ctxt, dev); - usm_allocator alloc12(ctxt, dev); + usm_allocator alloc12(ctxt, dev, + property_list{}); usm_allocator alloc21(q); usm_allocator alloc22(alloc21); + usm_allocator alloc23(q, property_list{}); // usm::alloc::device is not supported by usm_allocator assert((alloc11 != alloc22) && "Allocators should NOT be equal."); assert((alloc11 == alloc12) && "Allocators should be equal."); assert((alloc21 == alloc22) && "Allocators should be equal."); + assert((alloc21 == alloc23) && "Allocators should be equal."); } } diff --git a/sycl/test/on-device/basic_tests/aspects.cpp b/sycl/test/on-device/basic_tests/aspects.cpp index 521914c763ee6..521247708a774 100644 --- a/sycl/test/on-device/basic_tests/aspects.cpp +++ b/sycl/test/on-device/basic_tests/aspects.cpp @@ -75,9 +75,15 @@ int main() { if (plt.has(aspect::usm_host_allocations)) { std::cout << " USM host allocations" << std::endl; } + if (plt.has(aspect::usm_atomic_host_allocations)) { + std::cout << " USM atomic host allocations" << std::endl; + } if (plt.has(aspect::usm_shared_allocations)) { std::cout << " USM shared allocations" << std::endl; } + if (plt.has(aspect::usm_atomic_shared_allocations)) { + std::cout << " USM atomic shared allocations" << std::endl; + } if (plt.has(aspect::usm_restricted_shared_allocations)) { std::cout << " USM restricted shared allocations" << std::endl; } From 82c915cc18717853921d9bfdcd9f95843230710a Mon Sep 17 00:00:00 2001 From: Dmitri Mokhov Date: Wed, 23 Jun 2021 10:55:04 -0500 Subject: [PATCH 2/2] Finish line with a comma. Co-authored-by: sergei <57672082+s-kanaev@users.noreply.github.com> --- sycl/include/CL/sycl/aspects.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/CL/sycl/aspects.hpp b/sycl/include/CL/sycl/aspects.hpp index 1fe2670d1ff3a..24513ed3e515d 100644 --- a/sycl/include/CL/sycl/aspects.hpp +++ b/sycl/include/CL/sycl/aspects.hpp @@ -40,7 +40,7 @@ enum class aspect { ext_intel_max_mem_bandwidth = 24, ext_intel_mem_channel = 25, usm_atomic_host_allocations = 26, - usm_atomic_shared_allocations = 27 + usm_atomic_shared_allocations = 27, }; } // namespace sycl