From 7973c589e813a86aff308db696ee99b9e9fe6e9b Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Thu, 24 Nov 2022 07:16:53 -0800 Subject: [PATCH 01/53] [SYCL] Implement command_submit L0 Signed-off-by: Rauf, Rana --- sycl/include/sycl/detail/pi.h | 1 + sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp | 1 + sycl/plugins/level_zero/pi_level_zero.cpp | 10 ++++++++++ sycl/source/detail/event_impl.cpp | 13 ++++++++++++- sycl/source/detail/event_impl.hpp | 5 +++++ sycl/source/handler.cpp | 14 +++++++++++++- 6 files changed, 42 insertions(+), 2 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 26b6695c21b96..08247a89f58cc 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -289,6 +289,7 @@ typedef enum { PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D = 0x20002, PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D = 0x20003, PI_EXT_ONEAPI_DEVICE_INFO_CUDA_ASYNC_BARRIER = 0x20004, + PI_DEVICE_CURRENT_TIME = 0x20005; } _pi_device_info; typedef enum { diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 4a656729fc433..a8af67f5b5421 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -1968,6 +1968,7 @@ pi_result piextPluginGetOpaqueData(void *, void **OpaqueDataReturn) { return PI_SUCCESS; } + pi_result piTearDown(void *) { delete reinterpret_cast( PiESimdDeviceAccess->data); diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 252e437497b45..ebfe094e653c8 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -3206,6 +3206,16 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16: return PI_ERROR_INVALID_VALUE; + case PI_DEVICE_CURRENT_TIME:{ + uint64_t ZeTimerResolution = Device->ZeDeviceProperties->timerResolution; + uint64_t TimestampMaxCount = ((1ULL << Device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL); + uint64_t deviceClockCount, dummy; + + ZE_CALL(zeDeviceGetGlobalTimestamps, + (Device->ZeDevice, &dummy, &deviceClockCount)); + return ReturnValue((deviceClockCount & TimestampMaxCount) * ZeTimerResolution); + } + // TODO: Implement. case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: default: diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 9ea1eac1867ca..9f29892a71c2e 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -271,9 +271,13 @@ uint64_t event_impl::get_profiling_info() { checkProfilingPreconditions(); if (!MHostEvent) { - if (MEvent) + if(getPlugin().getBackend() == backend::ext_oneapi_level_zero){ + return submitTime; + } + if (MEvent){ return get_event_profiling_info( this->getHandleRef(), this->getPlugin()); + } return 0; } if (!MHostProfilingInfo) @@ -429,6 +433,13 @@ void event_impl::cleanDepEventsThroughOneLevel() { } } +void event_impl::setSubmissionTime(uint64_t time){ + submitTime=time; +} + uint64_t event_imp::getSubmissionTime(){ + return submitTime; + } + } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index e689270c5abe8..de064e9bef18e 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -232,6 +232,10 @@ class event_impl { MSubmittedQueue = SubmittedQueue; }; + void setSubmissionTime(uint64_t time); + + uint64_t getSubmissionTime(); + QueueImplPtr getSubmittedQueue() const { return MSubmittedQueue.lock(); }; /// Checks if an event is in a fully intialized state. Default-constructed @@ -263,6 +267,7 @@ class event_impl { bool MIsInitialized = true; bool MIsContextInitialized = false; RT::PiEvent MEvent = nullptr; + uint64_t submitTime = 0; //Only used for level-zero plugin ContextImplPtr MContext; bool MHostEvent = true; std::unique_ptr MHostProfilingInfo; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index bc3aba5d80fbc..1734e8543e202 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -93,6 +93,15 @@ event handler::finalize() { return MLastEvent; MIsFinalized = true; + auto setSubmissionTime = [&](detail::EventImplPtr event) { + auto plugin= MQueue->getPlugin(); + if(!MQueue->is_host() && plugin.getBackend() == backend::ext_oneapi_level_zero) { + uint64_t submitTime=0; + plugin.call(MQueue->getDeviceImplPtr()->getHandleRef(),sizeof(uint64_t),&submitTime,nullptr); + event->setSubmissionTime(submitTime); + } + }; + const auto &type = getType(); if (type == detail::CG::Kernel) { // If there were uses of set_specialization_constant build the kernel_bundle @@ -208,9 +217,11 @@ event handler::finalize() { PI_ERROR_INVALID_OPERATION); else if (NewEvent->is_host() || NewEvent->getHandleRef() == nullptr) NewEvent->setComplete(); - + + setSubmissionTime(NewEvent); MLastEvent = detail::createSyclObjFromImpl(NewEvent); } + return MLastEvent; } } @@ -312,6 +323,7 @@ event handler::finalize() { detail::EventImplPtr Event = detail::Scheduler::getInstance().addCG( std::move(CommandGroup), std::move(MQueue)); + setSubmissionTime(Event); MLastEvent = detail::createSyclObjFromImpl(Event); return MLastEvent; } From 2dd37613252d20fcf93214516e5a4bd0c64bfbfc Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Mon, 12 Dec 2022 13:56:40 -0800 Subject: [PATCH 02/53] Add PI API extension piGetDeviceAndHostTimer Signed-off-by: Rauf, Rana --- sycl/include/sycl/detail/pi.def | 2 ++ sycl/include/sycl/detail/pi.h | 11 ++++++++-- sycl/plugins/cuda/pi_cuda.cpp | 5 +++++ .../esimd_emulator/pi_esimd_emulator.cpp | 3 +++ sycl/plugins/hip/pi_hip.cpp | 5 +++++ sycl/plugins/level_zero/pi_level_zero.cpp | 21 ++++++++++--------- sycl/plugins/opencl/pi_opencl.cpp | 5 +++++ sycl/source/detail/event_impl.cpp | 2 +- sycl/source/handler.cpp | 9 ++++---- sycl/test/abi/pi_level_zero_symbol_check.dump | 3 ++- sycl/test/abi/pi_opencl_symbol_check.dump | 3 ++- 11 files changed, 50 insertions(+), 19 deletions(-) diff --git a/sycl/include/sycl/detail/pi.def b/sycl/include/sycl/detail/pi.def index 59dab0c4721a1..b3dfc81ff5797 100644 --- a/sycl/include/sycl/detail/pi.def +++ b/sycl/include/sycl/detail/pi.def @@ -140,5 +140,7 @@ _PI_API(piPluginGetLastError) _PI_API(piTearDown) +_PI_API(piGetDeviceAndHostTimer) + #undef _PI_API diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 08247a89f58cc..1d6f5681a8f4e 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -53,9 +53,10 @@ // 10.14 Add PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY as an extension for // piDeviceGetInfo. // 11.15 piEventCreate creates even in the signalled state now. +// 11.16 Added piGetDeviceAndHostTimer #define _PI_H_VERSION_MAJOR 11 -#define _PI_H_VERSION_MINOR 15 +#define _PI_H_VERSION_MINOR 16 #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) @@ -289,7 +290,6 @@ typedef enum { PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D = 0x20002, PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D = 0x20003, PI_EXT_ONEAPI_DEVICE_INFO_CUDA_ASYNC_BARRIER = 0x20004, - PI_DEVICE_CURRENT_TIME = 0x20005; } _pi_device_info; typedef enum { @@ -1792,6 +1792,13 @@ __SYCL_EXPORT pi_result piTearDown(void *PluginParameter); /// runtime must handle it or end the application. __SYCL_EXPORT pi_result piPluginGetLastError(char **message); +/// Returns the global timestamp from \param device , and syncronized host timestamp +/// +/// \param device device to query for timestamp +/// \param deviceTime pointer to store device time +/// \param hostTime pointer to store syncronized host time +__SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t* deviceTime, uint64_t* hostTime); + struct _pi_plugin { // PI version supported by host passed to the plugin. The Plugin // checks and writes the appropriate Function Pointers in diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 39af4e9fb09f3..13e686f4d0245 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -5385,6 +5385,10 @@ pi_result cuda_piTearDown(void *) { return PI_SUCCESS; } +pi_result cuda_piGetDeviceAndHostTimer(pi_device device, uint64_t* deviceTime, uint64_t* hostTime){ + assert(0 && "Method not implemented"); +} + const char SupportedVersion[] = _PI_CUDA_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { @@ -5529,6 +5533,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piextKernelSetArgSampler, cuda_piextKernelSetArgSampler) _PI_CL(piPluginGetLastError, cuda_piPluginGetLastError) _PI_CL(piTearDown, cuda_piTearDown) + _PI_CL(piGetDeviceAndHostTimer, cuda_piGetDeviceAndHostTimer) #undef _PI_CL diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index a8af67f5b5421..607853c47c85b 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -1984,6 +1984,9 @@ pi_result piTearDown(void *) { return PI_SUCCESS; } +pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t* deviceTime, uint64_t* hostTime){ + assert(0 && "Method not implemented"); +} const char SupportedVersion[] = _PI_ESIMD_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 868a2099f46aa..be3c9aaaea799 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5113,6 +5113,10 @@ pi_result hip_piTearDown(void *PluginParameter) { return PI_SUCCESS; } +pi_result hip_piGetDeviceAndHostTimer(pi_device device, uint64_t* deviceTime, uint64_t* hostTime){ + assert(0 && "Method not implemented"); +} + const char SupportedVersion[] = _PI_HIP_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { @@ -5251,6 +5255,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piextKernelSetArgSampler, hip_piextKernelSetArgSampler) _PI_CL(piPluginGetLastError, hip_piPluginGetLastError) _PI_CL(piTearDown, hip_piTearDown) + _PI_CL(piGetDeviceAndHostTimer, hip_piGetDeviceAndHostTimer) #undef _PI_CL diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index ebfe094e653c8..7ef57109e3024 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -3206,16 +3206,6 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16: return PI_ERROR_INVALID_VALUE; - case PI_DEVICE_CURRENT_TIME:{ - uint64_t ZeTimerResolution = Device->ZeDeviceProperties->timerResolution; - uint64_t TimestampMaxCount = ((1ULL << Device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL); - uint64_t deviceClockCount, dummy; - - ZE_CALL(zeDeviceGetGlobalTimestamps, - (Device->ZeDevice, &dummy, &deviceClockCount)); - return ReturnValue((deviceClockCount & TimestampMaxCount) * ZeTimerResolution); - } - // TODO: Implement. case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: default: @@ -9063,4 +9053,15 @@ pi_result _pi_buffer::free() { return PI_SUCCESS; } + +pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t* deviceTime, uint64_t* hostTime){ + const uint64_t& ZeTimerResolution = device->ZeDeviceProperties->timerResolution; + const uint64_t TimestampMaxCount = ((1ULL << device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL); + uint64_t deviceClockCount, dummy; + + ZE_CALL(zeDeviceGetGlobalTimestamps, + (device->ZeDevice, hostTime == nullptr ? &dummy : hostTime, &deviceClockCount)); + *deviceTime= (deviceClockCount & TimestampMaxCount) * ZeTimerResolution ; + return PI_SUCCESS; +} } // extern "C" diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index f26bc5516c8c4..a98420fa3733e 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1505,6 +1505,10 @@ pi_result piTearDown(void *PluginParameter) { return PI_SUCCESS; } +pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t* deviceTime, uint64_t* hostTime){ + assert(0 && "Method not implemented"); +} + const char SupportedVersion[] = _PI_OPENCL_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { @@ -1638,6 +1642,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piextKernelSetArgSampler, piextKernelSetArgSampler) _PI_CL(piPluginGetLastError, piPluginGetLastError) _PI_CL(piTearDown, piTearDown) + _PI_CL(piGetDeviceAndHostTimer, piGetDeviceAndHostTimer) #undef _PI_CL diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 9f29892a71c2e..7e55fc9a93cb0 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -436,7 +436,7 @@ void event_impl::cleanDepEventsThroughOneLevel() { void event_impl::setSubmissionTime(uint64_t time){ submitTime=time; } - uint64_t event_imp::getSubmissionTime(){ + uint64_t event_impl::getSubmissionTime(){ return submitTime; } diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 1734e8543e202..faf8fcd7bccf2 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -94,13 +94,14 @@ event handler::finalize() { MIsFinalized = true; auto setSubmissionTime = [&](detail::EventImplPtr event) { - auto plugin= MQueue->getPlugin(); - if(!MQueue->is_host() && plugin.getBackend() == backend::ext_oneapi_level_zero) { + if(!MQueue->is_host()){ + auto plugin=MQueue->getPlugin(); + if( plugin.getBackend() == backend::ext_oneapi_level_zero) { uint64_t submitTime=0; - plugin.call(MQueue->getDeviceImplPtr()->getHandleRef(),sizeof(uint64_t),&submitTime,nullptr); + plugin.call(MQueue->getDeviceImplPtr()->getHandleRef(),&submitTime,nullptr); event->setSubmissionTime(submitTime); } - }; + }}; const auto &type = getType(); if (type == detail::CG::Kernel) { diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index 2cde4ca788830..3335257791c16 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -41,6 +41,7 @@ piEventRetain piEventSetCallback piEventSetStatus piEventsWait +piGetDeviceAndHostTimer piKernelCreate piKernelGetGroupInfo piKernelGetInfo @@ -58,6 +59,7 @@ piMemRelease piMemRetain piPlatformGetInfo piPlatformsGet +piPluginGetLastError piPluginInit piProgramBuild piProgramCompile @@ -78,7 +80,6 @@ piSamplerCreate piSamplerGetInfo piSamplerRelease piSamplerRetain -piPluginGetLastError piTearDown piclProgramCreateWithSource piextContextCreateWithNativeHandle diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump index f7c2736a1432b..1c9b2c2b72537 100644 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ b/sycl/test/abi/pi_opencl_symbol_check.dump @@ -12,6 +12,7 @@ piDeviceGetInfo piDevicesGet piEnqueueMemBufferMap piEventCreate +piGetDeviceAndHostTimer piKernelCreate piKernelGetGroupInfo piKernelGetSubGroupInfo @@ -20,13 +21,13 @@ piMemBufferCreate piMemBufferPartition piMemImageCreate piPlatformsGet +piPluginGetLastError piPluginInit piProgramCreate piProgramCreateWithBinary piProgramLink piQueueCreate piSamplerCreate -piPluginGetLastError piTearDown piclProgramCreateWithSource piextContextCreateWithNativeHandle From f73197de6b5ea2181f533a6829e98a425b1eaed4 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Mon, 12 Dec 2022 15:14:26 -0800 Subject: [PATCH 03/53] Formatting Signed-off-by: Rauf, Rana --- sycl/include/sycl/detail/pi.h | 12 ++++++++---- sycl/plugins/cuda/pi_cuda.cpp | 3 ++- .../esimd_emulator/pi_esimd_emulator.cpp | 4 ++-- sycl/plugins/hip/pi_hip.cpp | 3 ++- sycl/plugins/level_zero/pi_level_zero.cpp | 15 +++++++++------ sycl/plugins/opencl/pi_opencl.cpp | 3 ++- sycl/source/detail/event_impl.cpp | 14 +++++--------- sycl/source/detail/event_impl.hpp | 3 ++- sycl/source/handler.cpp | 18 ++++++++++-------- 9 files changed, 42 insertions(+), 33 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 65e177a4f37a8..618f94ebaf781 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -1814,15 +1814,19 @@ __SYCL_EXPORT pi_result piTearDown(void *PluginParameter); /// /// \return PI_SUCCESS if plugin is indicating non-fatal warning. Any other /// error code indicates that plugin considers this to be a fatal error and the -/// runtime must handle it or end the application. +/// Returns the global timestamp from \param device , and syncronized host +/// timestamp __SYCL_EXPORT pi_result piPluginGetLastError(char **message); -/// Returns the global timestamp from \param device , and syncronized host timestamp +/// Returns the global timestamp from \param device , and syncronized host +/// timestamp /// /// \param device device to query for timestamp /// \param deviceTime pointer to store device time -/// \param hostTime pointer to store syncronized host time -__SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t* deviceTime, uint64_t* hostTime); +/// \param hostTime pointer to store syncronized host time +__SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device device, + uint64_t *deviceTime, + uint64_t *hostTime); struct _pi_plugin { // PI version supported by host passed to the plugin. The Plugin diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 086f73e2cebf7..84b1753cb822f 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -5425,7 +5425,8 @@ pi_result cuda_piTearDown(void *) { return PI_SUCCESS; } -pi_result cuda_piGetDeviceAndHostTimer(pi_device device, uint64_t* deviceTime, uint64_t* hostTime){ +pi_result cuda_piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, + uint64_t *hostTime) { assert(0 && "Method not implemented"); } diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 1ef5197dfd4c9..ee1d3598dfe78 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -2001,7 +2001,6 @@ pi_result piextPluginGetOpaqueData(void *, void **OpaqueDataReturn) { return PI_SUCCESS; } - pi_result piTearDown(void *) { delete reinterpret_cast( PiESimdDeviceAccess->data); @@ -2017,7 +2016,8 @@ pi_result piTearDown(void *) { return PI_SUCCESS; } -pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t* deviceTime, uint64_t* hostTime){ +pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, + uint64_t *hostTime) { assert(0 && "Method not implemented"); } const char SupportedVersion[] = _PI_ESIMD_PLUGIN_VERSION_STRING; diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 9883f274c1875..1cbb9ea32de62 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5155,7 +5155,8 @@ pi_result hip_piTearDown(void *PluginParameter) { return PI_SUCCESS; } -pi_result hip_piGetDeviceAndHostTimer(pi_device device, uint64_t* deviceTime, uint64_t* hostTime){ +pi_result hip_piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, + uint64_t *hostTime) { assert(0 && "Method not implemented"); } diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 0fde798b84d25..10f5671975fde 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -9259,15 +9259,18 @@ pi_result _pi_buffer::free() { return PI_SUCCESS; } - -pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t* deviceTime, uint64_t* hostTime){ - const uint64_t& ZeTimerResolution = device->ZeDeviceProperties->timerResolution; - const uint64_t TimestampMaxCount = ((1ULL << device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL); +pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, + uint64_t *hostTime) { + const uint64_t &ZeTimerResolution = + device->ZeDeviceProperties->timerResolution; + const uint64_t TimestampMaxCount = + ((1ULL << device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL); uint64_t deviceClockCount, dummy; ZE_CALL(zeDeviceGetGlobalTimestamps, - (device->ZeDevice, hostTime == nullptr ? &dummy : hostTime, &deviceClockCount)); - *deviceTime= (deviceClockCount & TimestampMaxCount) * ZeTimerResolution ; + (device->ZeDevice, hostTime == nullptr ? &dummy : hostTime, + &deviceClockCount)); + *deviceTime = (deviceClockCount & TimestampMaxCount) * ZeTimerResolution; return PI_SUCCESS; } } // extern "C" diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index b08fffd99d839..8921d5dfa502e 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1531,7 +1531,8 @@ pi_result piTearDown(void *PluginParameter) { return PI_SUCCESS; } -pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t* deviceTime, uint64_t* hostTime){ +pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, + uint64_t *hostTime) { assert(0 && "Method not implemented"); } diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index fc64fb7112562..f68e57d96e8ed 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -273,13 +273,13 @@ uint64_t event_impl::get_profiling_info() { checkProfilingPreconditions(); if (!MHostEvent) { - if(getPlugin().getBackend() == backend::ext_oneapi_level_zero){ + if (getPlugin().getBackend() == backend::ext_oneapi_level_zero) { return submitTime; } - if (MEvent){ + if (MEvent) { return get_event_profiling_info( this->getHandleRef(), this->getPlugin()); - } + } return 0; } if (!MHostProfilingInfo) @@ -435,12 +435,8 @@ void event_impl::cleanDepEventsThroughOneLevel() { } } -void event_impl::setSubmissionTime(uint64_t time){ - submitTime=time; -} - uint64_t event_impl::getSubmissionTime(){ - return submitTime; - } +void event_impl::setSubmissionTime(uint64_t time) { submitTime = time; } +uint64_t event_impl::getSubmissionTime() { return submitTime; } bool event_impl::isCompleted() { return get_info() == info::event_command_status::complete; diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 5656a93b20ca4..ff7652b910d2d 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -269,7 +269,8 @@ class event_impl { bool MIsInitialized = true; bool MIsContextInitialized = false; RT::PiEvent MEvent = nullptr; - uint64_t submitTime = 0; //Only used for level-zero plugin + uint64_t submitTime = 0; // Only used for level-zero plugin, submission time + // of associated MCommand ContextImplPtr MContext; bool MHostEvent = true; std::unique_ptr MHostProfilingInfo; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index faf8fcd7bccf2..d28076c390998 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -93,15 +93,17 @@ event handler::finalize() { return MLastEvent; MIsFinalized = true; - auto setSubmissionTime = [&](detail::EventImplPtr event) { - if(!MQueue->is_host()){ - auto plugin=MQueue->getPlugin(); - if( plugin.getBackend() == backend::ext_oneapi_level_zero) { - uint64_t submitTime=0; - plugin.call(MQueue->getDeviceImplPtr()->getHandleRef(),&submitTime,nullptr); + auto setSubmissionTime = [&](detail::EventImplPtr event) { + if (!MQueue->is_host()) { + auto plugin = MQueue->getPlugin(); + if (plugin.getBackend() == backend::ext_oneapi_level_zero) { + uint64_t submitTime = 0; + plugin.call( + MQueue->getDeviceImplPtr()->getHandleRef(), &submitTime, nullptr); event->setSubmissionTime(submitTime); } - }}; + } + }; const auto &type = getType(); if (type == detail::CG::Kernel) { @@ -218,7 +220,7 @@ event handler::finalize() { PI_ERROR_INVALID_OPERATION); else if (NewEvent->is_host() || NewEvent->getHandleRef() == nullptr) NewEvent->setComplete(); - + setSubmissionTime(NewEvent); MLastEvent = detail::createSyclObjFromImpl(NewEvent); } From d25bc5dcabfe23697d69e6d7e8f523575a11501a Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Mon, 12 Dec 2022 15:30:08 -0800 Subject: [PATCH 04/53] Moe formatting Signed-off-by: Rauf, Rana --- sycl/source/detail/event_impl.hpp | 2 +- sycl/source/handler.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index ff7652b910d2d..9ab551d4fe94c 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -235,7 +235,7 @@ class event_impl { void setSubmissionTime(uint64_t time); uint64_t getSubmissionTime(); - + QueueImplPtr getSubmittedQueue() const { return MSubmittedQueue.lock(); }; /// Checks if an event is in a fully intialized state. Default-constructed diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index d28076c390998..c178f156a7ecc 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -224,7 +224,7 @@ event handler::finalize() { setSubmissionTime(NewEvent); MLastEvent = detail::createSyclObjFromImpl(NewEvent); } - + return MLastEvent; } } From c7d76cc9c3a77fbe25b95d44336005e335812eab Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Tue, 13 Dec 2022 06:19:24 -0800 Subject: [PATCH 05/53] E Signed-off-by: Rauf, Rana --- sycl/source/detail/event_impl.hpp | 1 - sycl/source/handler.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 9ab551d4fe94c..8f6760b2f8758 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -235,7 +235,6 @@ class event_impl { void setSubmissionTime(uint64_t time); uint64_t getSubmissionTime(); - QueueImplPtr getSubmittedQueue() const { return MSubmittedQueue.lock(); }; /// Checks if an event is in a fully intialized state. Default-constructed diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index c178f156a7ecc..f11cae923421e 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -224,7 +224,6 @@ event handler::finalize() { setSubmissionTime(NewEvent); MLastEvent = detail::createSyclObjFromImpl(NewEvent); } - return MLastEvent; } } From 1c465da88e4d88a6f9275f01eea3536c84d27676 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Tue, 13 Dec 2022 06:52:57 -0800 Subject: [PATCH 06/53] Add piGetDeviceAndHostTimer Signed-off-by: Rauf, Rana --- sycl/unittests/helpers/PiMockPlugin.hpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index 84c776333aa94..4ba85697b320d 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -14,6 +14,7 @@ #include #include +#include #include // Helpers for dummy handles @@ -1048,3 +1049,16 @@ inline pi_result mock_piTearDown(void *PluginParameter) { return PI_SUCCESS; } inline pi_result mock_piPluginGetLastError(char **message) { return PI_SUCCESS; } + +// Returns the wall-clock timestamp of host for deviceTime and hostTime +inline pi_result mock_piGetDeviceAndHostTimer(pi_device device, + uint64_t *deviceTime, + uint64_t *hostTime) { + using namespace std::chrono; + auto timeNanoseconds = + duration_cast(steady_clock::now().time_since_epoch()) + .count(); + *deviceTime = timeNanoseconds; + *hostTime = timeNanoseconds; + return PI_SUCCESS; +} From ecfc6b12cab8893c3e082881fc6960565c30c2f0 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Tue, 13 Dec 2022 08:27:25 -0800 Subject: [PATCH 07/53] Dummy Signed-off-by: Rauf, Rana --- sycl/unittests/helpers/PiMockPlugin.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index 4ba85697b320d..75326b648ad89 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -1054,6 +1054,7 @@ inline pi_result mock_piPluginGetLastError(char **message) { inline pi_result mock_piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, uint64_t *hostTime) { + using namespace std::chrono; auto timeNanoseconds = duration_cast(steady_clock::now().time_since_epoch()) From 375ff05bc738ea56c89cf3681d86532caaf106d9 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Tue, 13 Dec 2022 15:27:37 -0800 Subject: [PATCH 08/53] Apply review suggestions Signed-off-by: Rauf, Rana --- sycl/include/sycl/detail/pi.h | 12 ++++++------ sycl/plugins/level_zero/pi_level_zero.cpp | 21 +++++++++++++-------- sycl/source/detail/event_impl.hpp | 4 ++-- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index fe70db09f26cc..0875ef6e6a6b5 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -1836,15 +1836,15 @@ __SYCL_EXPORT pi_result piTearDown(void *PluginParameter); /// runtime must handle it or end the application. __SYCL_EXPORT pi_result piPluginGetLastError(char **message); -/// Returns the global timestamp from \param device , and syncronized host -/// timestamp +/// Returns the global timestamp from \param device , and synchronized host +/// timestamp. /// /// \param device device to query for timestamp /// \param deviceTime pointer to store device time -/// \param hostTime pointer to store syncronized host time -__SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device device, - uint64_t *deviceTime, - uint64_t *hostTime); +/// \param hostTime pointer to store synchronized host time +__SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device Device, + uint64_t *DeviceTime, + uint64_t *HostTime); struct _pi_plugin { // PI version supported by host passed to the plugin. The Plugin diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index ebe51327f9822..87d77ce488867 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -9417,18 +9417,23 @@ pi_result _pi_buffer::free() { return PI_SUCCESS; } -pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, - uint64_t *hostTime) { +pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, + uint64_t *HostTime) { const uint64_t &ZeTimerResolution = - device->ZeDeviceProperties->timerResolution; + Device->ZeDeviceProperties->timerResolution; const uint64_t TimestampMaxCount = - ((1ULL << device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL); - uint64_t deviceClockCount, dummy; + ((1ULL << Device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL); + uint64_t DeviceClockCount, Dummy; + + uint64_t *&HostTimeHandle = HostTime == nullptr ? &Dummy : HostTime; ZE_CALL(zeDeviceGetGlobalTimestamps, - (device->ZeDevice, hostTime == nullptr ? &dummy : hostTime, - &deviceClockCount)); - *deviceTime = (deviceClockCount & TimestampMaxCount) * ZeTimerResolution; + (device->ZeDevice, HostTimeHandle, &DeviceClockCount)); + + if (DeviceTime != nullptr) { + + *DeviceTime = (DeviceClockCount & TimestampMaxCount) * ZeTimerResolution; + } return PI_SUCCESS; } } // extern "C" diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 8f6760b2f8758..165614d70cc35 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -268,8 +268,8 @@ class event_impl { bool MIsInitialized = true; bool MIsContextInitialized = false; RT::PiEvent MEvent = nullptr; - uint64_t submitTime = 0; // Only used for level-zero plugin, submission time - // of associated MCommand + uint64_t MSubmitTime = 0; // Only used for level-zero plugin, submission time + // of associated MCommand ContextImplPtr MContext; bool MHostEvent = true; std::unique_ptr MHostProfilingInfo; From 23ec9553ed081e7211951d93fe6510629cf95d40 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Tue, 13 Dec 2022 16:18:18 -0800 Subject: [PATCH 09/53] Fix build errors Signed-off-by: Rauf, Rana --- sycl/plugins/level_zero/pi_level_zero.cpp | 4 +--- sycl/source/detail/event_impl.cpp | 6 +++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 87d77ce488867..b4cf0204d88fe 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -9425,10 +9425,8 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, ((1ULL << Device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL); uint64_t DeviceClockCount, Dummy; - uint64_t *&HostTimeHandle = HostTime == nullptr ? &Dummy : HostTime; - ZE_CALL(zeDeviceGetGlobalTimestamps, - (device->ZeDevice, HostTimeHandle, &DeviceClockCount)); + (Device->ZeDevice, HostTime == nullptr ? &Dummy : HostTime, &DeviceClockCount)); if (DeviceTime != nullptr) { diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index f68e57d96e8ed..ccc216ff840d3 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -274,7 +274,7 @@ event_impl::get_profiling_info() { checkProfilingPreconditions(); if (!MHostEvent) { if (getPlugin().getBackend() == backend::ext_oneapi_level_zero) { - return submitTime; + return MSubmitTime; } if (MEvent) { return get_event_profiling_info( @@ -435,8 +435,8 @@ void event_impl::cleanDepEventsThroughOneLevel() { } } -void event_impl::setSubmissionTime(uint64_t time) { submitTime = time; } -uint64_t event_impl::getSubmissionTime() { return submitTime; } +void event_impl::setSubmissionTime(uint64_t time) { MSubmitTime = time; } +uint64_t event_impl::getSubmissionTime() { return MSubmitTime; } bool event_impl::isCompleted() { return get_info() == info::event_command_status::complete; From 713430e1da316bf4b2aca9a12a922639286fea8d Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Thu, 15 Dec 2022 08:38:23 -0800 Subject: [PATCH 10/53] Add piGetDeviceAndHostTimer for CUDA and HIP Signed-off-by: Rauf, Rana --- sycl/plugins/cuda/pi_cuda.cpp | 28 ++++++++++++--- .../esimd_emulator/pi_esimd_emulator.cpp | 3 +- sycl/plugins/hip/pi_hip.cpp | 21 ++++++++++- sycl/plugins/opencl/pi_opencl.cpp | 36 +++++++++++++++++-- sycl/source/detail/event_impl.cpp | 9 +---- sycl/source/handler.cpp | 12 +++---- sycl/unittests/helpers/PiMockPlugin.hpp | 6 ++-- 7 files changed, 89 insertions(+), 26 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index cf58167d9af80..85ad73f8eabb1 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -24,6 +24,7 @@ #include #include #include +#include // Forward declarations void enableCUDATracing(); @@ -2121,7 +2122,7 @@ pi_result cuda_piContextCreate(const pi_context_properties *properties, piContextPtr = std::unique_ptr<_pi_context>(new _pi_context{ _pi_context::kind::user_defined, newContext, *devices}); } - +_pi_platform::evBase_ static std::once_flag initFlag; std::call_once( initFlag, @@ -5441,9 +5442,28 @@ pi_result cuda_piTearDown(void *) { return PI_SUCCESS; } -pi_result cuda_piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, - uint64_t *hostTime) { - assert(0 && "Method not implemented"); +pi_result cuda_piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, + uint64_t *HostTime) { + cudaEvent_t event; + if(DeviceTime){ + PI_CHECK_ERROR(cudaEventCreateWithFlags(&event, cudaEventDefault)); + PI_CHECK_ERROR(cudaEventRecord(event)); + } + using namespace std::chrono; + if(HostTime){ + *HostTime = duration_cast(steady_clock::now().time_since_epoch()) + .count(); + } + + if(DeviceTime){ + PI_CHECK_ERROR(cudaEventSynchronize(event)); + + float elapsedTime = 0.0f; + PI_CHECK_ERROR(cudaEventElapsedTime(&elapsedTime,_pi_platform::evBase_,event)); + *DeviceTime=(uint64_t) (elapsedTime * (double)1e6); + } + + return PI_SUCCESS; } const char SupportedVersion[] = _PI_CUDA_PLUGIN_VERSION_STRING; diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 1f1937be14fa2..119b4b4166910 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -2030,7 +2030,8 @@ pi_result piTearDown(void *) { pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, uint64_t *hostTime) { - assert(0 && "Method not implemented"); + PiTrace("Warning : Querying device clock not supported under PI_ESIMD_EMULATOR"); + return PI_SUCCESS; } const char SupportedVersion[] = _PI_ESIMD_PLUGIN_VERSION_STRING; diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index efb4b5b79ff45..d833762fca895 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -24,6 +24,7 @@ #include #include #include +#include namespace { // Hipify doesn't support cuArrayGetDescriptor, on AMD the hipArray can just be @@ -5173,7 +5174,25 @@ pi_result hip_piTearDown(void *PluginParameter) { pi_result hip_piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, uint64_t *hostTime) { - assert(0 && "Method not implemented"); + hipEvent_t event; + if(DeviceTime){ + PI_CHECK_ERROR(hipEventCreateWithFlags(&event, hipEventDefault)); + PI_CHECK_ERROR(hipEventRecord(event)); + } + using namespace std::chrono; + if(HostTime){ + *HostTime = duration_cast(steady_clock::now().time_since_epoch()) + .count(); + } + + if(DeviceTime){ + PI_CHECK_ERROR(hipEventSynchronize(event)); + + float elapsedTime = 0.0f; + PI_CHECK_ERROR(hipEventElapsedTime(&elapsedTime,_pi_platform::evBase_,event)); + *DeviceTime=(uint64_t) (elapsedTime * (double)1e6); + } + return PI_SUCCESS; } const char SupportedVersion[] = _PI_HIP_PLUGIN_VERSION_STRING; diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 6d1d1970ae44d..b08e8d83cc216 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1547,9 +1547,39 @@ pi_result piTearDown(void *PluginParameter) { return PI_SUCCESS; } -pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, - uint64_t *hostTime) { - assert(0 && "Method not implemented"); +pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, + uint64_t *HostTime) { + OCLV::OpenCLVersion devVer,platVer; + cl_platform_id platform; + cl_device_id deviceID= cast(Device); + + auto ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, + sizeof(cl_platform_id), &platform, nullptr); + if (ret_err != CL_SUCCESS){ + return cast(ret_err); + } + + ret_err= getDeviceVersion(deviceID, devVer); + + if (ret_err != CL_SUCCESS){ + return cast(ret_err); + } + + ret_err = getPlatformVersion(platform,platVer); + + if(platVer < OCLV::V2_1 && devVer < OCLV::V2_1){ + return PI_ERROR_INVALID_OPERATION; + } + + if(HostTime){ + if(DeviceTime){ + clGetDeviceAndHostTimer(deviceID,DeviceTime,HostTime); + }else { + clGetHostTimer(deviceID,HostTime); + } + } + + return PI_SUCCESS; } const char SupportedVersion[] = _PI_OPENCL_PLUGIN_VERSION_STRING; diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index ccc216ff840d3..0c7e4bc4b14c9 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -273,14 +273,7 @@ uint64_t event_impl::get_profiling_info() { checkProfilingPreconditions(); if (!MHostEvent) { - if (getPlugin().getBackend() == backend::ext_oneapi_level_zero) { - return MSubmitTime; - } - if (MEvent) { - return get_event_profiling_info( - this->getHandleRef(), this->getPlugin()); - } - return 0; + return MSubmitTime; } if (!MHostProfilingInfo) throw invalid_object_error("Profiling info is not available.", diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index f11cae923421e..2e37f9ada8e10 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -94,14 +94,12 @@ event handler::finalize() { MIsFinalized = true; auto setSubmissionTime = [&](detail::EventImplPtr event) { - if (!MQueue->is_host()) { + if (!MQueue->is_host() && MQueue->MIsProfilingEnabled) { auto plugin = MQueue->getPlugin(); - if (plugin.getBackend() == backend::ext_oneapi_level_zero) { - uint64_t submitTime = 0; - plugin.call( - MQueue->getDeviceImplPtr()->getHandleRef(), &submitTime, nullptr); - event->setSubmissionTime(submitTime); - } + uint64_t submitTime = 0; + plugin.call( + MQueue->getDeviceImplPtr()->getHandleRef(), &submitTime, nullptr); + event->setSubmissionTime(submitTime); } }; diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index 37ac5fba5bdf0..72371cc071954 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -1065,7 +1065,9 @@ inline pi_result mock_piGetDeviceAndHostTimer(pi_device device, auto timeNanoseconds = duration_cast(steady_clock::now().time_since_epoch()) .count(); - *deviceTime = timeNanoseconds; - *hostTime = timeNanoseconds; + if(deviceTime){ + *deviceTime = timeNanoseconds;} + if(hostTime){ + *hostTime = timeNanoseconds;} return PI_SUCCESS; } From dc509a9d68650ff7811bb737766786228422e243 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Thu, 15 Dec 2022 08:42:18 -0800 Subject: [PATCH 11/53] Formatting Signed-off-by: Rauf, Rana --- sycl/plugins/cuda/pi_cuda.cpp | 31 ++++++++++--------- .../esimd_emulator/pi_esimd_emulator.cpp | 3 +- sycl/plugins/hip/pi_hip.cpp | 26 +++++++++------- sycl/plugins/opencl/pi_opencl.cpp | 28 ++++++++--------- sycl/unittests/helpers/PiMockPlugin.hpp | 10 +++--- 5 files changed, 52 insertions(+), 46 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 85ad73f8eabb1..b8106d9b9c1eb 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -18,13 +18,13 @@ #include #include +#include #include #include #include #include #include #include -#include // Forward declarations void enableCUDATracing(); @@ -2122,8 +2122,7 @@ pi_result cuda_piContextCreate(const pi_context_properties *properties, piContextPtr = std::unique_ptr<_pi_context>(new _pi_context{ _pi_context::kind::user_defined, newContext, *devices}); } -_pi_platform::evBase_ - static std::once_flag initFlag; + _pi_platform::evBase_ static std::once_flag initFlag; std::call_once( initFlag, [](pi_result &err) { @@ -5445,24 +5444,26 @@ pi_result cuda_piTearDown(void *) { pi_result cuda_piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime) { cudaEvent_t event; - if(DeviceTime){ - PI_CHECK_ERROR(cudaEventCreateWithFlags(&event, cudaEventDefault)); - PI_CHECK_ERROR(cudaEventRecord(event)); + if (DeviceTime) { + PI_CHECK_ERROR(cudaEventCreateWithFlags(&event, cudaEventDefault)); + PI_CHECK_ERROR(cudaEventRecord(event)); } using namespace std::chrono; - if(HostTime){ - *HostTime = duration_cast(steady_clock::now().time_since_epoch()) - .count(); + if (HostTime) { + *HostTime = + duration_cast(steady_clock::now().time_since_epoch()) + .count(); } - if(DeviceTime){ - PI_CHECK_ERROR(cudaEventSynchronize(event)); + if (DeviceTime) { + PI_CHECK_ERROR(cudaEventSynchronize(event)); - float elapsedTime = 0.0f; - PI_CHECK_ERROR(cudaEventElapsedTime(&elapsedTime,_pi_platform::evBase_,event)); - *DeviceTime=(uint64_t) (elapsedTime * (double)1e6); + float elapsedTime = 0.0f; + PI_CHECK_ERROR( + cudaEventElapsedTime(&elapsedTime, _pi_platform::evBase_, event)); + *DeviceTime = (uint64_t)(elapsedTime * (double)1e6); } - + return PI_SUCCESS; } diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 119b4b4166910..8920bcb49f1b4 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -2030,7 +2030,8 @@ pi_result piTearDown(void *) { pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, uint64_t *hostTime) { - PiTrace("Warning : Querying device clock not supported under PI_ESIMD_EMULATOR"); + PiTrace( + "Warning : Querying device clock not supported under PI_ESIMD_EMULATOR"); return PI_SUCCESS; } const char SupportedVersion[] = _PI_ESIMD_PLUGIN_VERSION_STRING; diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index d833762fca895..319643074b377 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -18,13 +18,13 @@ #include #include +#include #include #include #include #include #include #include -#include namespace { // Hipify doesn't support cuArrayGetDescriptor, on AMD the hipArray can just be @@ -5175,22 +5175,24 @@ pi_result hip_piTearDown(void *PluginParameter) { pi_result hip_piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, uint64_t *hostTime) { hipEvent_t event; - if(DeviceTime){ - PI_CHECK_ERROR(hipEventCreateWithFlags(&event, hipEventDefault)); - PI_CHECK_ERROR(hipEventRecord(event)); + if (DeviceTime) { + PI_CHECK_ERROR(hipEventCreateWithFlags(&event, hipEventDefault)); + PI_CHECK_ERROR(hipEventRecord(event)); } using namespace std::chrono; - if(HostTime){ - *HostTime = duration_cast(steady_clock::now().time_since_epoch()) - .count(); + if (HostTime) { + *HostTime = + duration_cast(steady_clock::now().time_since_epoch()) + .count(); } - if(DeviceTime){ - PI_CHECK_ERROR(hipEventSynchronize(event)); + if (DeviceTime) { + PI_CHECK_ERROR(hipEventSynchronize(event)); - float elapsedTime = 0.0f; - PI_CHECK_ERROR(hipEventElapsedTime(&elapsedTime,_pi_platform::evBase_,event)); - *DeviceTime=(uint64_t) (elapsedTime * (double)1e6); + float elapsedTime = 0.0f; + PI_CHECK_ERROR( + hipEventElapsedTime(&elapsedTime, _pi_platform::evBase_, event)); + *DeviceTime = (uint64_t)(elapsedTime * (double)1e6); } return PI_SUCCESS; } diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index b08e8d83cc216..95655f9c56b97 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1549,33 +1549,33 @@ pi_result piTearDown(void *PluginParameter) { pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime) { - OCLV::OpenCLVersion devVer,platVer; + OCLV::OpenCLVersion devVer, platVer; cl_platform_id platform; - cl_device_id deviceID= cast(Device); + cl_device_id deviceID = cast(Device); auto ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &platform, nullptr); - if (ret_err != CL_SUCCESS){ + sizeof(cl_platform_id), &platform, nullptr); + if (ret_err != CL_SUCCESS) { return cast(ret_err); } - ret_err= getDeviceVersion(deviceID, devVer); + ret_err = getDeviceVersion(deviceID, devVer); - if (ret_err != CL_SUCCESS){ + if (ret_err != CL_SUCCESS) { return cast(ret_err); - } + } - ret_err = getPlatformVersion(platform,platVer); + ret_err = getPlatformVersion(platform, platVer); - if(platVer < OCLV::V2_1 && devVer < OCLV::V2_1){ + if (platVer < OCLV::V2_1 && devVer < OCLV::V2_1) { return PI_ERROR_INVALID_OPERATION; } - if(HostTime){ - if(DeviceTime){ - clGetDeviceAndHostTimer(deviceID,DeviceTime,HostTime); - }else { - clGetHostTimer(deviceID,HostTime); + if (HostTime) { + if (DeviceTime) { + clGetDeviceAndHostTimer(deviceID, DeviceTime, HostTime); + } else { + clGetHostTimer(deviceID, HostTime); } } diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index 72371cc071954..b6af6b2ef5348 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -1065,9 +1065,11 @@ inline pi_result mock_piGetDeviceAndHostTimer(pi_device device, auto timeNanoseconds = duration_cast(steady_clock::now().time_since_epoch()) .count(); - if(deviceTime){ - *deviceTime = timeNanoseconds;} - if(hostTime){ - *hostTime = timeNanoseconds;} + if (deviceTime) { + *deviceTime = timeNanoseconds; + } + if (hostTime) { + *hostTime = timeNanoseconds; + } return PI_SUCCESS; } From 82fbb3b0911b0439b86316a2e02df4be5859a54d Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Thu, 15 Dec 2022 15:28:48 -0800 Subject: [PATCH 12/53] Fix issues Signed-off-by: Rauf, Rana --- sycl/plugins/cuda/pi_cuda.cpp | 15 ++++++----- .../esimd_emulator/pi_esimd_emulator.cpp | 2 +- sycl/plugins/hip/pi_hip.cpp | 4 +-- sycl/plugins/opencl/pi_opencl.cpp | 12 +++++---- sycl/source/detail/device_impl.cpp | 27 ++++++++++++++++++- sycl/source/detail/device_impl.hpp | 4 +++ sycl/source/detail/event_impl.hpp | 3 +-- sycl/source/event.cpp | 16 ++++++----- sycl/source/handler.cpp | 14 ++-------- 9 files changed, 61 insertions(+), 36 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index b8106d9b9c1eb..c0df9016af212 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -2122,7 +2122,7 @@ pi_result cuda_piContextCreate(const pi_context_properties *properties, piContextPtr = std::unique_ptr<_pi_context>(new _pi_context{ _pi_context::kind::user_defined, newContext, *devices}); } - _pi_platform::evBase_ static std::once_flag initFlag; + static std::once_flag initFlag; std::call_once( initFlag, [](pi_result &err) { @@ -5443,12 +5443,13 @@ pi_result cuda_piTearDown(void *) { pi_result cuda_piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime) { - cudaEvent_t event; + _pi_event::native_type event; + using namespace std::chrono; + if (DeviceTime) { - PI_CHECK_ERROR(cudaEventCreateWithFlags(&event, cudaEventDefault)); - PI_CHECK_ERROR(cudaEventRecord(event)); + PI_CHECK_ERROR(cuEventCreate(&event, CU_EVENT_DEFAULT)); + PI_CHECK_ERROR(cuEventRecord(event, 0)); } - using namespace std::chrono; if (HostTime) { *HostTime = duration_cast(steady_clock::now().time_since_epoch()) @@ -5456,11 +5457,11 @@ pi_result cuda_piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, } if (DeviceTime) { - PI_CHECK_ERROR(cudaEventSynchronize(event)); + PI_CHECK_ERROR(cuEventSynchronize(event)); float elapsedTime = 0.0f; PI_CHECK_ERROR( - cudaEventElapsedTime(&elapsedTime, _pi_platform::evBase_, event)); + cuEventElapsedTime(&elapsedTime, _pi_platform::evBase_, event)); *DeviceTime = (uint64_t)(elapsedTime * (double)1e6); } diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 8920bcb49f1b4..8a565087f771f 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -2032,7 +2032,7 @@ pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, uint64_t *hostTime) { PiTrace( "Warning : Querying device clock not supported under PI_ESIMD_EMULATOR"); - return PI_SUCCESS; + return PI_ERROR_INVALID_OPERATION; } const char SupportedVersion[] = _PI_ESIMD_PLUGIN_VERSION_STRING; diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 319643074b377..8cc7786189fa0 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5174,12 +5174,12 @@ pi_result hip_piTearDown(void *PluginParameter) { pi_result hip_piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, uint64_t *hostTime) { - hipEvent_t event; + _pi_event::native_type event; + using namespace std::chrono; if (DeviceTime) { PI_CHECK_ERROR(hipEventCreateWithFlags(&event, hipEventDefault)); PI_CHECK_ERROR(hipEventRecord(event)); } - using namespace std::chrono; if (HostTime) { *HostTime = duration_cast(steady_clock::now().time_since_epoch()) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 95655f9c56b97..f44e1fde22df3 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1553,6 +1553,7 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, cl_platform_id platform; cl_device_id deviceID = cast(Device); + //TODO: Cache OpenCL version for each device and platform auto ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &platform, nullptr); if (ret_err != CL_SUCCESS) { @@ -1571,12 +1572,13 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, return PI_ERROR_INVALID_OPERATION; } - if (HostTime) { - if (DeviceTime) { - clGetDeviceAndHostTimer(deviceID, DeviceTime, HostTime); - } else { + uint64_t dummy; + + if (DeviceTime) { + clGetDeviceAndHostTimer(deviceID, DeviceTime, HostTime == nullptr ? &dummy: HostTime); + + }else if (HostTime){ clGetHostTimer(deviceID, HostTime); - } } return PI_SUCCESS; diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index f039e99afd2a4..846081d5ff6a1 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -36,7 +36,7 @@ device_impl::device_impl(RT::PiDevice Device, const plugin &Plugin) device_impl::device_impl(pi_native_handle InteropDeviceHandle, RT::PiDevice Device, PlatformImplPtr Platform, const plugin &Plugin) - : MDevice(Device), MIsHostDevice(false) { + : MDevice(Device), MIsHostDevice(false), deviceTimePair(std::make_pair(0,0)) { bool InteroperabilityConstructor = false; if (Device == nullptr) { @@ -435,6 +435,31 @@ std::string device_impl::getDeviceName() const { return MDeviceName; } +uint64_t device_impl::getTime(){ + static uint64_t timeTillRefresh= 100e9; + uint64_t hostTime; + if(MIsHostDevice){ + using namespace std::chrono; + return duration_cast(steady_clock::now().time_since_epoch()) + .count(); + } + auto plugin = getPlugin(); + RT::PiResult result = plugin.call_nocheck(MDevice, nullptr, &hostTime); + plugin.checkPiResult(result == PI_ERROR_INVALID_OPERATION ? PI_SUCCESS : result); + + if(result == PI_ERROR_INVALID_OPERATION){ + return 0; + } + uint64_t diff= hostTime - deviceTimePair.second; + + if( diff > timeTillRefresh){ + plugin.call(MDevice, &deviceTimePair.first, &deviceTimePair.second); + diff=0; + } + + return deviceTimePair.first + diff; +} + } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 17fdd6c746367..ee65e19b8e696 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -18,6 +18,7 @@ #include #include +#include namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { @@ -237,6 +238,8 @@ class device_impl { std::string getDeviceName() const; + uint64_t getTime(); + private: explicit device_impl(pi_native_handle InteropDevice, RT::PiDevice Device, PlatformImplPtr Platform, const plugin &Plugin); @@ -248,6 +251,7 @@ class device_impl { bool MIsAssertFailSupported = false; mutable std::string MDeviceName; mutable std::once_flag MDeviceNameFlag; + std::pair deviceTimePair ; }; // class device_impl } // namespace detail diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 165614d70cc35..6644837f28e90 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -268,8 +268,7 @@ class event_impl { bool MIsInitialized = true; bool MIsContextInitialized = false; RT::PiEvent MEvent = nullptr; - uint64_t MSubmitTime = 0; // Only used for level-zero plugin, submission time - // of associated MCommand + uint64_t MSubmitTime = 0; // Stores submission time of command associated with event ContextImplPtr MContext; bool MHostEvent = true; std::unique_ptr MHostProfilingInfo; diff --git a/sycl/source/event.cpp b/sycl/source/event.cpp index 7950e70162d5e..54df3f463d7a1 100644 --- a/sycl/source/event.cpp +++ b/sycl/source/event.cpp @@ -77,6 +77,15 @@ event::get_info() const { return impl->template get_info(); } +template +typename detail::is_event_profiling_info_desc::return_type +event::get_profiling_info() const{ + if constexpr(!std::is_same_v){ + impl->wait(impl); + } + return impl->template get_profiling_info(); +} + #define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) \ template __SYCL_EXPORT ReturnT event::get_info() const; @@ -85,12 +94,7 @@ event::get_info() const { #undef __SYCL_PARAM_TRAITS_SPEC #define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) \ - template <> \ - __SYCL_EXPORT ReturnT event::get_profiling_info() \ - const { \ - impl->wait(impl); \ - return impl->get_profiling_info(); \ - } +template __SYCL_EXPORT ReturnT event::get_profiling_info() const; #include diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 2e37f9ada8e10..532f7b386fd90 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -93,16 +93,6 @@ event handler::finalize() { return MLastEvent; MIsFinalized = true; - auto setSubmissionTime = [&](detail::EventImplPtr event) { - if (!MQueue->is_host() && MQueue->MIsProfilingEnabled) { - auto plugin = MQueue->getPlugin(); - uint64_t submitTime = 0; - plugin.call( - MQueue->getDeviceImplPtr()->getHandleRef(), &submitTime, nullptr); - event->setSubmissionTime(submitTime); - } - }; - const auto &type = getType(); if (type == detail::CG::Kernel) { // If there were uses of set_specialization_constant build the kernel_bundle @@ -219,7 +209,7 @@ event handler::finalize() { else if (NewEvent->is_host() || NewEvent->getHandleRef() == nullptr) NewEvent->setComplete(); - setSubmissionTime(NewEvent); + NewEvent->setSubmissionTime(std::move(MQueue->getDeviceImplPtr()->getTime())); MLastEvent = detail::createSyclObjFromImpl(NewEvent); } return MLastEvent; @@ -323,7 +313,7 @@ event handler::finalize() { detail::EventImplPtr Event = detail::Scheduler::getInstance().addCG( std::move(CommandGroup), std::move(MQueue)); - setSubmissionTime(Event); + Event->setSubmissionTime(MQueue->getDeviceImplPtr()->getTime()); MLastEvent = detail::createSyclObjFromImpl(Event); return MLastEvent; } From 6cacfff61d7bf6fb61906d75b6327b46a2b00d34 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Thu, 15 Dec 2022 15:29:48 -0800 Subject: [PATCH 13/53] Formatting Signed-off-by: Rauf, Rana --- sycl/plugins/opencl/pi_opencl.cpp | 11 ++++++----- sycl/source/detail/device_impl.cpp | 31 +++++++++++++++++------------- sycl/source/detail/device_impl.hpp | 2 +- sycl/source/detail/event_impl.hpp | 3 ++- sycl/source/event.cpp | 7 ++++--- sycl/source/handler.cpp | 3 ++- 6 files changed, 33 insertions(+), 24 deletions(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index f44e1fde22df3..0e27922607424 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1553,7 +1553,7 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, cl_platform_id platform; cl_device_id deviceID = cast(Device); - //TODO: Cache OpenCL version for each device and platform + // TODO: Cache OpenCL version for each device and platform auto ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &platform, nullptr); if (ret_err != CL_SUCCESS) { @@ -1575,10 +1575,11 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t dummy; if (DeviceTime) { - clGetDeviceAndHostTimer(deviceID, DeviceTime, HostTime == nullptr ? &dummy: HostTime); - - }else if (HostTime){ - clGetHostTimer(deviceID, HostTime); + clGetDeviceAndHostTimer(deviceID, DeviceTime, + HostTime == nullptr ? &dummy : HostTime); + + } else if (HostTime) { + clGetHostTimer(deviceID, HostTime); } return PI_SUCCESS; diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 846081d5ff6a1..e60adaf65a8f7 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -36,7 +36,8 @@ device_impl::device_impl(RT::PiDevice Device, const plugin &Plugin) device_impl::device_impl(pi_native_handle InteropDeviceHandle, RT::PiDevice Device, PlatformImplPtr Platform, const plugin &Plugin) - : MDevice(Device), MIsHostDevice(false), deviceTimePair(std::make_pair(0,0)) { + : MDevice(Device), MIsHostDevice(false), + deviceTimePair(std::make_pair(0, 0)) { bool InteroperabilityConstructor = false; if (Device == nullptr) { @@ -435,26 +436,30 @@ std::string device_impl::getDeviceName() const { return MDeviceName; } -uint64_t device_impl::getTime(){ - static uint64_t timeTillRefresh= 100e9; +uint64_t device_impl::getTime() { + static uint64_t timeTillRefresh = 100e9; uint64_t hostTime; - if(MIsHostDevice){ + if (MIsHostDevice) { using namespace std::chrono; return duration_cast(steady_clock::now().time_since_epoch()) - .count(); + .count(); } auto plugin = getPlugin(); - RT::PiResult result = plugin.call_nocheck(MDevice, nullptr, &hostTime); - plugin.checkPiResult(result == PI_ERROR_INVALID_OPERATION ? PI_SUCCESS : result); + RT::PiResult result = + plugin.call_nocheck( + MDevice, nullptr, &hostTime); + plugin.checkPiResult(result == PI_ERROR_INVALID_OPERATION ? PI_SUCCESS + : result); - if(result == PI_ERROR_INVALID_OPERATION){ + if (result == PI_ERROR_INVALID_OPERATION) { return 0; } - uint64_t diff= hostTime - deviceTimePair.second; - - if( diff > timeTillRefresh){ - plugin.call(MDevice, &deviceTimePair.first, &deviceTimePair.second); - diff=0; + uint64_t diff = hostTime - deviceTimePair.second; + + if (diff > timeTillRefresh) { + plugin.call( + MDevice, &deviceTimePair.first, &deviceTimePair.second); + diff = 0; } return deviceTimePair.first + diff; diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index ee65e19b8e696..6a4d936b700d2 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -251,7 +251,7 @@ class device_impl { bool MIsAssertFailSupported = false; mutable std::string MDeviceName; mutable std::once_flag MDeviceNameFlag; - std::pair deviceTimePair ; + std::pair deviceTimePair; }; // class device_impl } // namespace detail diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 6644837f28e90..b6c7cd9a02893 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -268,7 +268,8 @@ class event_impl { bool MIsInitialized = true; bool MIsContextInitialized = false; RT::PiEvent MEvent = nullptr; - uint64_t MSubmitTime = 0; // Stores submission time of command associated with event + uint64_t MSubmitTime = + 0; // Stores submission time of command associated with event ContextImplPtr MContext; bool MHostEvent = true; std::unique_ptr MHostProfilingInfo; diff --git a/sycl/source/event.cpp b/sycl/source/event.cpp index 54df3f463d7a1..7892de69cce81 100644 --- a/sycl/source/event.cpp +++ b/sycl/source/event.cpp @@ -79,8 +79,8 @@ event::get_info() const { template typename detail::is_event_profiling_info_desc::return_type -event::get_profiling_info() const{ - if constexpr(!std::is_same_v){ +event::get_profiling_info() const { + if constexpr (!std::is_same_v) { impl->wait(impl); } return impl->template get_profiling_info(); @@ -94,7 +94,8 @@ event::get_profiling_info() const{ #undef __SYCL_PARAM_TRAITS_SPEC #define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) \ -template __SYCL_EXPORT ReturnT event::get_profiling_info() const; + template __SYCL_EXPORT ReturnT \ + event::get_profiling_info() const; #include diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 532f7b386fd90..164b9a7a747b8 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -209,7 +209,8 @@ event handler::finalize() { else if (NewEvent->is_host() || NewEvent->getHandleRef() == nullptr) NewEvent->setComplete(); - NewEvent->setSubmissionTime(std::move(MQueue->getDeviceImplPtr()->getTime())); + NewEvent->setSubmissionTime( + std::move(MQueue->getDeviceImplPtr()->getTime())); MLastEvent = detail::createSyclObjFromImpl(NewEvent); } return MLastEvent; From 810cbe03c30b3698af31729cdeb3471d0b84ca7e Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Fri, 16 Dec 2022 11:35:42 -0800 Subject: [PATCH 14/53] Added documentation Signed-off-by: Rauf, Rana --- sycl/include/sycl/detail/pi.h | 7 +++---- sycl/plugins/cuda/pi_cuda.cpp | 4 +++- sycl/plugins/hip/pi_hip.cpp | 3 +++ sycl/plugins/level_zero/pi_level_zero.cpp | 5 +++-- sycl/source/detail/device_impl.cpp | 18 +++++++++++++++--- sycl/source/detail/device_impl.hpp | 2 ++ sycl/source/detail/event_impl.cpp | 11 ++++++++++- sycl/source/detail/event_impl.hpp | 5 ++++- sycl/source/handler.cpp | 4 ++-- 9 files changed, 45 insertions(+), 14 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 0875ef6e6a6b5..968e45d3c41e0 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -1836,12 +1836,11 @@ __SYCL_EXPORT pi_result piTearDown(void *PluginParameter); /// runtime must handle it or end the application. __SYCL_EXPORT pi_result piPluginGetLastError(char **message); -/// Returns the global timestamp from \param device , and synchronized host -/// timestamp. +/// Queries device for it's global timestamp in nanoseconds, and updates HostTime with the value of the host timer at the closest possible point in time to that at which DeviceTime was returned. /// /// \param device device to query for timestamp -/// \param deviceTime pointer to store device time -/// \param hostTime pointer to store synchronized host time +/// \param deviceTime pointer to store device timestamp in nanoseconds. Optional argument, can be nullptr +/// \param hostTime pointer to store host timestamp in nanoseconds. Optional argurment, can be nullptr __SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime); diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index c0df9016af212..bb31746f38d6d 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -529,6 +529,7 @@ pi_result _pi_event::start() { try { if (queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE) { // NOTE: This relies on the default stream to be unused. + //TODO: Remove this and other related code for setting or getting queued/submit time result = PI_CHECK_ERROR(cuEventRecord(evQueued_, 0)); result = PI_CHECK_ERROR(cuEventRecord(evStart_, stream_)); } @@ -556,7 +557,7 @@ bool _pi_event::is_completed() const noexcept { } return true; } - +//TODO: Remove this function and other code for setting or getting queued/submit time pi_uint64 _pi_event::get_queued_time() const { float miliSeconds = 0.0f; assert(is_started()); @@ -3870,6 +3871,7 @@ pi_result cuda_piEventGetProfilingInfo(pi_event event, } switch (param_name) { +//TODO: Remove this and other related code for setting or getting queued/submit time case PI_PROFILING_INFO_COMMAND_QUEUED: case PI_PROFILING_INFO_COMMAND_SUBMIT: return getInfo(param_value_size, param_value, diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 8cc7786189fa0..cc0566d226073 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -565,6 +565,7 @@ pi_result _pi_event::start() { try { if (queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE) { // NOTE: This relies on the default stream to be unused. + //TODO: Remove this and other related code for setting or getting queued/submit time PI_CHECK_ERROR(hipEventRecord(evQueued_, 0)); PI_CHECK_ERROR(hipEventRecord(evStart_, queue_->get())); } @@ -593,6 +594,7 @@ bool _pi_event::is_completed() const noexcept { return true; } +//TODO: Remove this and other code for setting or getting queued/submit time pi_uint64 _pi_event::get_queued_time() const { float miliSeconds = 0.0f; assert(is_started()); @@ -3696,6 +3698,7 @@ pi_result hip_piEventGetProfilingInfo(pi_event event, } switch (param_name) { + //TODO: Remove this and other related code for setting or getting queued/submit time case PI_PROFILING_INFO_COMMAND_QUEUED: case PI_PROFILING_INFO_COMMAND_SUBMIT: return getInfo(param_value_size, param_value, diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index b4cf0204d88fe..fad4cec55b052 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -6130,9 +6130,9 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName, ContextEndTime *= ZeTimerResolution; return ReturnValue(ContextEndTime); } + //TODO: Remove this and other related code for setting or getting queued/submit time case PI_PROFILING_INFO_COMMAND_QUEUED: case PI_PROFILING_INFO_COMMAND_SUBMIT: - // TODO: Support these when Level Zero supported is added. return ReturnValue(uint64_t{0}); default: zePrint("piEventGetProfilingInfo: not supported ParamName\n"); @@ -9426,7 +9426,8 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t DeviceClockCount, Dummy; ZE_CALL(zeDeviceGetGlobalTimestamps, - (Device->ZeDevice, HostTime == nullptr ? &Dummy : HostTime, &DeviceClockCount)); + (Device->ZeDevice, HostTime == nullptr ? &Dummy : HostTime, + &DeviceClockCount)); if (DeviceTime != nullptr) { diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 846081d5ff6a1..de2dbac899512 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -435,8 +435,18 @@ std::string device_impl::getDeviceName() const { return MDeviceName; } +/* On first call this function queries for device timestamp + along with host synchronized timestamp + and stores it in memeber varaible deviceTimePair. + Succive calls to this function would just retrieve the host timestamp , + compute difference against the host timestamp in deviceTimePair + and calculate the device timestamp based on the difference. + deviceTimePair is refreshed with new device and host timestamp after a certain interval + (determined by timeTillRefresh) to account for clock skew between host and device. +*/ + uint64_t device_impl::getTime(){ - static uint64_t timeTillRefresh= 100e9; + constexpr uint64_t timeTillRefresh= 100e9; uint64_t hostTime; if(MIsHostDevice){ using namespace std::chrono; @@ -448,11 +458,13 @@ uint64_t device_impl::getTime(){ plugin.checkPiResult(result == PI_ERROR_INVALID_OPERATION ? PI_SUCCESS : result); if(result == PI_ERROR_INVALID_OPERATION){ - return 0; + throw sycl::feature_not_supported( + "Device and/or backend does not support querying timestamp", + result); } uint64_t diff= hostTime - deviceTimePair.second; - if( diff > timeTillRefresh){ + if( diff > timeTillRefresh || diff <= 0){ plugin.call(MDevice, &deviceTimePair.first, &deviceTimePair.second); diff=0; } diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index ee65e19b8e696..6673d2a0c2685 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -238,6 +238,8 @@ class device_impl { std::string getDeviceName() const; + /// Gets the current device timestamp + /// @throw sycl::feature_not_supported if feature is not supported on device uint64_t getTime(); private: diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 0c7e4bc4b14c9..d2fa8977b4e2d 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -428,8 +428,17 @@ void event_impl::cleanDepEventsThroughOneLevel() { } } -void event_impl::setSubmissionTime(uint64_t time) { MSubmitTime = time; } +void event_impl::setSubmissionTime() { + if(!MSubmittedQueue.expired()){ + auto queue=MSubmittedQueue.lock(); + if(queue->MIsProfilingEnabled){ + MSubmitTime= queue->getDeviceImplPtr()->getTime(); + } + } +} + uint64_t event_impl::getSubmissionTime() { return MSubmitTime; } + bool event_impl::isCompleted() { return get_info() == info::event_command_status::complete; diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 6644837f28e90..9e158d7fa9c15 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -232,9 +232,12 @@ class event_impl { MSubmittedQueue = SubmittedQueue; }; - void setSubmissionTime(uint64_t time); + /// Calling this function queries the current device timestamp and sets it as submission time for the command associated with this event. + void setSubmissionTime(); + /// @return Submission time for command associated with this event uint64_t getSubmissionTime(); + QueueImplPtr getSubmittedQueue() const { return MSubmittedQueue.lock(); }; /// Checks if an event is in a fully intialized state. Default-constructed diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 532f7b386fd90..828c2546456c9 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -209,7 +209,7 @@ event handler::finalize() { else if (NewEvent->is_host() || NewEvent->getHandleRef() == nullptr) NewEvent->setComplete(); - NewEvent->setSubmissionTime(std::move(MQueue->getDeviceImplPtr()->getTime())); + NewEvent->setSubmissionTime(); MLastEvent = detail::createSyclObjFromImpl(NewEvent); } return MLastEvent; @@ -313,7 +313,7 @@ event handler::finalize() { detail::EventImplPtr Event = detail::Scheduler::getInstance().addCG( std::move(CommandGroup), std::move(MQueue)); - Event->setSubmissionTime(MQueue->getDeviceImplPtr()->getTime()); + Event->setSubmissionTime(); MLastEvent = detail::createSyclObjFromImpl(Event); return MLastEvent; } From 9645592edc38fab7b7b5c4d05ffaa31f273e49ca Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Fri, 16 Dec 2022 13:59:26 -0800 Subject: [PATCH 15/53] Formatting Signed-off-by: Rauf, Rana --- sycl/include/sycl/detail/pi.h | 9 ++++++--- sycl/plugins/cuda/pi_cuda.cpp | 9 ++++++--- sycl/plugins/hip/pi_hip.cpp | 8 +++++--- sycl/plugins/level_zero/pi_level_zero.cpp | 3 ++- sycl/source/detail/device_impl.cpp | 22 +++++++++++----------- sycl/source/detail/device_impl.hpp | 2 +- sycl/source/detail/event_impl.cpp | 8 ++++---- sycl/source/detail/event_impl.hpp | 5 +++-- 8 files changed, 38 insertions(+), 28 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 968e45d3c41e0..7aad46f0f8b28 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -1836,11 +1836,14 @@ __SYCL_EXPORT pi_result piTearDown(void *PluginParameter); /// runtime must handle it or end the application. __SYCL_EXPORT pi_result piPluginGetLastError(char **message); -/// Queries device for it's global timestamp in nanoseconds, and updates HostTime with the value of the host timer at the closest possible point in time to that at which DeviceTime was returned. +/// Queries device for it's global timestamp in nanoseconds, and updates +/// HostTime with the value of the host timer at the closest possible point in +/// time to that at which DeviceTime was returned. /// /// \param device device to query for timestamp -/// \param deviceTime pointer to store device timestamp in nanoseconds. Optional argument, can be nullptr -/// \param hostTime pointer to store host timestamp in nanoseconds. Optional argurment, can be nullptr +/// \param deviceTime pointer to store device timestamp in nanoseconds. Optional +/// argument, can be nullptr \param hostTime pointer to store host timestamp in +/// nanoseconds. Optional argurment, can be nullptr __SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime); diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index bb31746f38d6d..1a5290d7ff534 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -529,7 +529,8 @@ pi_result _pi_event::start() { try { if (queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE) { // NOTE: This relies on the default stream to be unused. - //TODO: Remove this and other related code for setting or getting queued/submit time + // TODO: Remove this and other related code for setting or getting + // queued/submit time result = PI_CHECK_ERROR(cuEventRecord(evQueued_, 0)); result = PI_CHECK_ERROR(cuEventRecord(evStart_, stream_)); } @@ -557,7 +558,8 @@ bool _pi_event::is_completed() const noexcept { } return true; } -//TODO: Remove this function and other code for setting or getting queued/submit time +// TODO: Remove this function and other code for setting or getting +// queued/submit time pi_uint64 _pi_event::get_queued_time() const { float miliSeconds = 0.0f; assert(is_started()); @@ -3871,7 +3873,8 @@ pi_result cuda_piEventGetProfilingInfo(pi_event event, } switch (param_name) { -//TODO: Remove this and other related code for setting or getting queued/submit time + // TODO: Remove this and other related code for setting or getting + // queued/submit time case PI_PROFILING_INFO_COMMAND_QUEUED: case PI_PROFILING_INFO_COMMAND_SUBMIT: return getInfo(param_value_size, param_value, diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index cc0566d226073..f9c182a384798 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -565,7 +565,8 @@ pi_result _pi_event::start() { try { if (queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE) { // NOTE: This relies on the default stream to be unused. - //TODO: Remove this and other related code for setting or getting queued/submit time + // TODO: Remove this and other related code for setting or getting + // queued/submit time PI_CHECK_ERROR(hipEventRecord(evQueued_, 0)); PI_CHECK_ERROR(hipEventRecord(evStart_, queue_->get())); } @@ -594,7 +595,7 @@ bool _pi_event::is_completed() const noexcept { return true; } -//TODO: Remove this and other code for setting or getting queued/submit time +// TODO: Remove this and other code for setting or getting queued/submit time pi_uint64 _pi_event::get_queued_time() const { float miliSeconds = 0.0f; assert(is_started()); @@ -3698,7 +3699,8 @@ pi_result hip_piEventGetProfilingInfo(pi_event event, } switch (param_name) { - //TODO: Remove this and other related code for setting or getting queued/submit time + // TODO: Remove this and other related code for setting or getting + // queued/submit time case PI_PROFILING_INFO_COMMAND_QUEUED: case PI_PROFILING_INFO_COMMAND_SUBMIT: return getInfo(param_value_size, param_value, diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index fad4cec55b052..cd4fc2ff63cb9 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -6130,7 +6130,8 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName, ContextEndTime *= ZeTimerResolution; return ReturnValue(ContextEndTime); } - //TODO: Remove this and other related code for setting or getting queued/submit time + // TODO: Remove this and other related code for setting or getting + // queued/submit time case PI_PROFILING_INFO_COMMAND_QUEUED: case PI_PROFILING_INFO_COMMAND_SUBMIT: return ReturnValue(uint64_t{0}); diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 959c24a4390d5..7707571e67203 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -436,18 +436,19 @@ std::string device_impl::getDeviceName() const { return MDeviceName; } -/* On first call this function queries for device timestamp +/* On first call this function queries for device timestamp along with host synchronized timestamp - and stores it in memeber varaible deviceTimePair. + and stores it in memeber varaible deviceTimePair. Succive calls to this function would just retrieve the host timestamp , - compute difference against the host timestamp in deviceTimePair + compute difference against the host timestamp in deviceTimePair and calculate the device timestamp based on the difference. - deviceTimePair is refreshed with new device and host timestamp after a certain interval - (determined by timeTillRefresh) to account for clock skew between host and device. -*/ + deviceTimePair is refreshed with new device and host timestamp after a + certain interval (determined by timeTillRefresh) to account for clock skew + between host and device. +*/ uint64_t device_impl::getTime(){ - constexpr uint64_t timeTillRefresh= 100e9; + constexpr uint64_t timeTillRefresh = 100e9; uint64_t hostTime; if (MIsHostDevice) { using namespace std::chrono; @@ -463,12 +464,11 @@ uint64_t device_impl::getTime(){ if(result == PI_ERROR_INVALID_OPERATION){ throw sycl::feature_not_supported( - "Device and/or backend does not support querying timestamp", - result); + "Device and/or backend does not support querying timestamp", result); } uint64_t diff= hostTime - deviceTimePair.second; - - if( diff > timeTillRefresh || diff <= 0){ + + if (diff > timeTillRefresh || diff <= 0) { plugin.call(MDevice, &deviceTimePair.first, &deviceTimePair.second); diff=0; } diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 0d6fdfef35c53..c71fc0e0c9ac4 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -238,7 +238,7 @@ class device_impl { std::string getDeviceName() const; - /// Gets the current device timestamp + /// Gets the current device timestamp /// @throw sycl::feature_not_supported if feature is not supported on device uint64_t getTime(); diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index d2fa8977b4e2d..90206e2f1fefb 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -429,10 +429,10 @@ void event_impl::cleanDepEventsThroughOneLevel() { } void event_impl::setSubmissionTime() { - if(!MSubmittedQueue.expired()){ - auto queue=MSubmittedQueue.lock(); - if(queue->MIsProfilingEnabled){ - MSubmitTime= queue->getDeviceImplPtr()->getTime(); + if (!MSubmittedQueue.expired()) { + auto queue = MSubmittedQueue.lock(); + if (queue->MIsProfilingEnabled) { + MSubmitTime = queue->getDeviceImplPtr()->getTime(); } } } diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 8098fdf55c2a4..ca201cb45a880 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -232,12 +232,13 @@ class event_impl { MSubmittedQueue = SubmittedQueue; }; - /// Calling this function queries the current device timestamp and sets it as submission time for the command associated with this event. + /// Calling this function queries the current device timestamp and sets it as + /// submission time for the command associated with this event. void setSubmissionTime(); /// @return Submission time for command associated with this event uint64_t getSubmissionTime(); - + QueueImplPtr getSubmittedQueue() const { return MSubmittedQueue.lock(); }; /// Checks if an event is in a fully intialized state. Default-constructed From 474fa1709e32689f9ee891c0dcd8a8416c4da3ac Mon Sep 17 00:00:00 2001 From: raaiq1 <106714052+raaiq1@users.noreply.github.com> Date: Mon, 19 Dec 2022 09:41:01 -0500 Subject: [PATCH 16/53] Apply suggestions from code review Co-authored-by: Romanov Vlad --- sycl/source/detail/device_impl.cpp | 2 +- sycl/source/detail/device_impl.hpp | 4 ++-- sycl/source/detail/event_impl.cpp | 10 ++++------ sycl/source/detail/event_impl.hpp | 4 ++-- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 7707571e67203..54af17eb73ff4 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -439,7 +439,7 @@ std::string device_impl::getDeviceName() const { /* On first call this function queries for device timestamp along with host synchronized timestamp and stores it in memeber varaible deviceTimePair. - Succive calls to this function would just retrieve the host timestamp , + Successive calls to this function would just retrieve the host timestamp , compute difference against the host timestamp in deviceTimePair and calculate the device timestamp based on the difference. deviceTimePair is refreshed with new device and host timestamp after a diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index c71fc0e0c9ac4..41cb1c9fdb62c 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -240,7 +240,7 @@ class device_impl { /// Gets the current device timestamp /// @throw sycl::feature_not_supported if feature is not supported on device - uint64_t getTime(); + uint64_t getCurrentDeviceTime(); private: explicit device_impl(pi_native_handle InteropDevice, RT::PiDevice Device, @@ -253,7 +253,7 @@ class device_impl { bool MIsAssertFailSupported = false; mutable std::string MDeviceName; mutable std::once_flag MDeviceNameFlag; - std::pair deviceTimePair; + std::pair MDeviceHostBaseTime; }; // class device_impl } // namespace detail diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 90206e2f1fefb..dec729a757017 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -429,12 +429,10 @@ void event_impl::cleanDepEventsThroughOneLevel() { } void event_impl::setSubmissionTime() { - if (!MSubmittedQueue.expired()) { - auto queue = MSubmittedQueue.lock(); - if (queue->MIsProfilingEnabled) { - MSubmitTime = queue->getDeviceImplPtr()->getTime(); - } - } + if (!MIsProfilingEnabled) + return; + if (QueueImplPtr Queue = getSubmittedQueue()) + MSubmitTime = Queue->getDeviceImplPtr()->getTime(); } uint64_t event_impl::getSubmissionTime() { return MSubmitTime; } diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index ca201cb45a880..df3edb12f48b3 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -272,8 +272,8 @@ class event_impl { bool MIsInitialized = true; bool MIsContextInitialized = false; RT::PiEvent MEvent = nullptr; - uint64_t MSubmitTime = - 0; // Stores submission time of command associated with event + // Stores submission time of command associated with event + uint64_t MSubmitTime = 0; ContextImplPtr MContext; bool MHostEvent = true; std::unique_ptr MHostProfilingInfo; From a7b3b6041553a7afc760d177bcce8094df7523eb Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Mon, 19 Dec 2022 14:29:20 -0800 Subject: [PATCH 17/53] Added unittests Signed-off-by: Rauf, Rana --- sycl/plugins/opencl/pi_opencl.cpp | 3 +- sycl/source/detail/device_impl.cpp | 38 ++++++---- sycl/source/detail/event_impl.cpp | 9 ++- sycl/source/handler.cpp | 3 +- sycl/unittests/queue/GetProfilingInfo.cpp | 84 +++++++++++++++++++++++ 5 files changed, 119 insertions(+), 18 deletions(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 0e27922607424..22280c019a1f1 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1568,7 +1568,8 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, ret_err = getPlatformVersion(platform, platVer); - if (platVer < OCLV::V2_1 && devVer < OCLV::V2_1) { + if (platVer < OCLV::V2_1 || devVer < OCLV::V2_1) { + setErrorMessage("OpenCL version for device and/or platform is less than 2.1",PI_ERROR_INVALID_OPERATION); return PI_ERROR_INVALID_OPERATION; } diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 54af17eb73ff4..7907ac85b3a59 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -37,7 +37,7 @@ device_impl::device_impl(pi_native_handle InteropDeviceHandle, RT::PiDevice Device, PlatformImplPtr Platform, const plugin &Plugin) : MDevice(Device), MIsHostDevice(false), - deviceTimePair(std::make_pair(0, 0)) { + MDeviceHostBaseTime(std::make_pair(0, 0)) { bool InteroperabilityConstructor = false; if (Device == nullptr) { @@ -438,17 +438,19 @@ std::string device_impl::getDeviceName() const { /* On first call this function queries for device timestamp along with host synchronized timestamp - and stores it in memeber varaible deviceTimePair. - Successive calls to this function would just retrieve the host timestamp , - compute difference against the host timestamp in deviceTimePair + and stores it in memeber varaible deviceTimePair. + Subsequent calls to this function would just retrieve the host timestamp , + compute difference against the host timestamp in deviceTimePair and calculate the device timestamp based on the difference. - deviceTimePair is refreshed with new device and host timestamp after a - certain interval (determined by timeTillRefresh) to account for clock skew - between host and device. -*/ + deviceTimePair is refreshed with new device and host timestamp after a certain interval + (determined by timeTillRefresh) to account for clock drift between host and device. +*/ + +uint64_t device_impl::getCurrentDeviceTime(){ + // To account for potential clock drift between host clock and device clock. + // The value set is arbitrary: 200 seconds + constexpr uint64_t timeTillRefresh= 200e9; -uint64_t device_impl::getTime(){ - constexpr uint64_t timeTillRefresh = 100e9; uint64_t hostTime; if (MIsHostDevice) { using namespace std::chrono; @@ -463,17 +465,25 @@ uint64_t device_impl::getTime(){ : result); if(result == PI_ERROR_INVALID_OPERATION){ + std::string errorMsg{}; + char* p; + plugin.call_nocheck(&p); + while (*p != '\0'){ + errorMsg +=*p; + p++; + } throw sycl::feature_not_supported( - "Device and/or backend does not support querying timestamp", result); + "Device and/or backend does not support querying timestamp: " + errorMsg, + result); } - uint64_t diff= hostTime - deviceTimePair.second; + uint64_t diff= hostTime - MDeviceHostBaseTime.second; if (diff > timeTillRefresh || diff <= 0) { - plugin.call(MDevice, &deviceTimePair.first, &deviceTimePair.second); + plugin.call(MDevice, &MDeviceHostBaseTime.first, &MDeviceHostBaseTime.second); diff=0; } - return deviceTimePair.first + diff; + return MDeviceHostBaseTime.first + diff; } } // namespace detail diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index dec729a757017..21d5b32f840db 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -431,8 +431,13 @@ void event_impl::cleanDepEventsThroughOneLevel() { void event_impl::setSubmissionTime() { if (!MIsProfilingEnabled) return; - if (QueueImplPtr Queue = getSubmittedQueue()) - MSubmitTime = Queue->getDeviceImplPtr()->getTime(); + if (QueueImplPtr Queue = getSubmittedQueue()){ + try{ + MSubmitTime = Queue->getDeviceImplPtr()->getCurrentDeviceTime(); + }catch(feature_not_supported & e){ + throw feature_not_supported(std::string("Unable to get command group submission time: ") + e.what(),PI_ERROR_INVALID_OPERATION); + } + } } uint64_t event_impl::getSubmissionTime() { return MSubmitTime; } diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 828c2546456c9..417a519acdae0 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -208,7 +208,8 @@ event handler::finalize() { PI_ERROR_INVALID_OPERATION); else if (NewEvent->is_host() || NewEvent->getHandleRef() == nullptr) NewEvent->setComplete(); - + + NewEvent->setSubmittedQueue(MQueue); NewEvent->setSubmissionTime(); MLastEvent = detail::createSyclObjFromImpl(NewEvent); } diff --git a/sycl/unittests/queue/GetProfilingInfo.cpp b/sycl/unittests/queue/GetProfilingInfo.cpp index f410811e63445..c6e5a1e70e236 100644 --- a/sycl/unittests/queue/GetProfilingInfo.cpp +++ b/sycl/unittests/queue/GetProfilingInfo.cpp @@ -17,6 +17,7 @@ #include #include +#include #include @@ -316,3 +317,86 @@ TEST(GetProfilingInfo, check_if_now_dead_queue_property_not_set) { // The test passes without this, but keep it still, just in case. sycl::detail::getSyclObjImpl(Ctx)->getKernelProgramCache().reset(); } + +bool DeviceTimerCalled; + +pi_result redefinedPiGetDeviceAndHostTimer(pi_device Device, uint64_t* DeviceTime,uint64_t* HostTime){ + DeviceTimerCalled=true; + return PI_SUCCESS; +} + +TEST(GetProfilingInfo, check_no_command_submission_time_when_event_profiling_disabled){ + using namespace sycl; + unittest::PiMock Mock; + platform Plt= Mock.getPlatform(); + Mock.redefine(redefinedPiGetDeviceAndHostTimer); + device Dev=Plt.get_devices()[0]; + context Ctx{Dev}; + queue Queue{Ctx,Dev}; + DeviceTimerCalled=false; + + event E=Queue.submit([&](handler& cgh){ + cgh.single_task>([](){}); + }); + EXPECT_FALSE(DeviceTimerCalled); + +} + +//Checks to see if command submit time is calculated before queue.submit returns. +//A host accessor is contructed before submitting the command, +//to ensure command submission time is calculated even if command may not be enqueued +//due to overlap in data dependencies between the kernel and host accessor +TEST(GetProfilingInfo, check_command_submission_time_with_host_accessor){ + using namespace sycl; + unittest::PiMock Mock; + platform Plt= Mock.getPlatform(); + Mock.redefine(redefinedPiGetDeviceAndHostTimer); + device Dev=Plt.get_devices()[0]; + context Ctx{Dev}; + queue Queue{Ctx,Dev,property::queue::enable_profiling()}; + int data[1024]; + buffer Buf{data,range<1>{1024}}; + DeviceTimerCalled=false; + + accessor host_acc= Buf.get_access(); + event E=Queue.submit([&](handler& cgh){ + accessor writeRes{Buf,cgh,read_write}; + + cgh.single_task>([](){}); + }); + + EXPECT_TRUE(DeviceTimerCalled); + +} + +pi_result redefinedFailedPiGetDeviceAndHostTimer(pi_device Device, uint64_t* DeviceTime,uint64_t* HostTime){ + return PI_ERROR_INVALID_OPERATION; +} + +pi_result redefinedPiPluginGetLastError(char **message) { + static char messageString[50]= "Plugin version not supported"; + *message=messageString; + return PI_SUCCESS; +} + +TEST(GetProfilingInfo, submission_time_exception_check){ + using namespace sycl; + unittest::PiMock Mock; + platform Plt= Mock.getPlatform(); + Mock.redefine(redefinedFailedPiGetDeviceAndHostTimer); + Mock.redefine(redefinedPiPluginGetLastError); + device Dev=Plt.get_devices()[0]; + context Ctx{Dev}; + queue Queue{Ctx,Dev,property::queue::enable_profiling()}; + + try{ + event E=Queue.submit([&](handler& cgh){ + cgh.single_task>([](){}); + }); + FAIL(); + }catch(feature_not_supported &e){ + EXPECT_STREQ(e.what(),"Unable to get command group submission time: " + "Device and/or backend does not support querying timestamp: " + "Plugin version not supported -59 (PI_ERROR_INVALID_OPERATION) -59 (PI_ERROR_INVALID_OPERATION)"); + } +} From 1d4d35591c0e0616e82135d5f8f26857643e7de9 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Mon, 19 Dec 2022 14:31:29 -0800 Subject: [PATCH 18/53] Formatting Signed-off-by: Rauf, Rana --- sycl/plugins/opencl/pi_opencl.cpp | 4 +- sycl/source/detail/device_impl.cpp | 31 ++++--- sycl/source/detail/event_impl.cpp | 15 ++-- sycl/source/handler.cpp | 2 +- sycl/unittests/queue/GetProfilingInfo.cpp | 105 ++++++++++++---------- 5 files changed, 87 insertions(+), 70 deletions(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 22280c019a1f1..dadbef446a29c 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1569,7 +1569,9 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, ret_err = getPlatformVersion(platform, platVer); if (platVer < OCLV::V2_1 || devVer < OCLV::V2_1) { - setErrorMessage("OpenCL version for device and/or platform is less than 2.1",PI_ERROR_INVALID_OPERATION); + setErrorMessage( + "OpenCL version for device and/or platform is less than 2.1", + PI_ERROR_INVALID_OPERATION); return PI_ERROR_INVALID_OPERATION; } diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 7907ac85b3a59..4a07dcae0cfbe 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -438,18 +438,19 @@ std::string device_impl::getDeviceName() const { /* On first call this function queries for device timestamp along with host synchronized timestamp - and stores it in memeber varaible deviceTimePair. + and stores it in memeber varaible deviceTimePair. Subsequent calls to this function would just retrieve the host timestamp , - compute difference against the host timestamp in deviceTimePair + compute difference against the host timestamp in deviceTimePair and calculate the device timestamp based on the difference. - deviceTimePair is refreshed with new device and host timestamp after a certain interval - (determined by timeTillRefresh) to account for clock drift between host and device. -*/ + deviceTimePair is refreshed with new device and host timestamp after a + certain interval (determined by timeTillRefresh) to account for clock drift + between host and device. +*/ -uint64_t device_impl::getCurrentDeviceTime(){ - // To account for potential clock drift between host clock and device clock. +uint64_t device_impl::getCurrentDeviceTime() { + // To account for potential clock drift between host clock and device clock. // The value set is arbitrary: 200 seconds - constexpr uint64_t timeTillRefresh= 200e9; + constexpr uint64_t timeTillRefresh = 200e9; uint64_t hostTime; if (MIsHostDevice) { @@ -466,20 +467,22 @@ uint64_t device_impl::getCurrentDeviceTime(){ if(result == PI_ERROR_INVALID_OPERATION){ std::string errorMsg{}; - char* p; + char *p; plugin.call_nocheck(&p); - while (*p != '\0'){ - errorMsg +=*p; + while (*p != '\0') { + errorMsg += *p; p++; } throw sycl::feature_not_supported( - "Device and/or backend does not support querying timestamp: " + errorMsg, + "Device and/or backend does not support querying timestamp: " + + errorMsg, result); } - uint64_t diff= hostTime - MDeviceHostBaseTime.second; + uint64_t diff = hostTime - MDeviceHostBaseTime.second; if (diff > timeTillRefresh || diff <= 0) { - plugin.call(MDevice, &MDeviceHostBaseTime.first, &MDeviceHostBaseTime.second); + plugin.call( + MDevice, &MDeviceHostBaseTime.first, &MDeviceHostBaseTime.second); diff=0; } diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 21d5b32f840db..ff88725b5e111 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -431,13 +431,16 @@ void event_impl::cleanDepEventsThroughOneLevel() { void event_impl::setSubmissionTime() { if (!MIsProfilingEnabled) return; - if (QueueImplPtr Queue = getSubmittedQueue()){ - try{ - MSubmitTime = Queue->getDeviceImplPtr()->getCurrentDeviceTime(); - }catch(feature_not_supported & e){ - throw feature_not_supported(std::string("Unable to get command group submission time: ") + e.what(),PI_ERROR_INVALID_OPERATION); - } + if (QueueImplPtr Queue = getSubmittedQueue()) { + try { + MSubmitTime = Queue->getDeviceImplPtr()->getCurrentDeviceTime(); + } catch (feature_not_supported &e) { + throw feature_not_supported( + std::string("Unable to get command group submission time: ") + + e.what(), + PI_ERROR_INVALID_OPERATION); } + } } uint64_t event_impl::getSubmissionTime() { return MSubmitTime; } diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 417a519acdae0..983858847d1de 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -208,7 +208,7 @@ event handler::finalize() { PI_ERROR_INVALID_OPERATION); else if (NewEvent->is_host() || NewEvent->getHandleRef() == nullptr) NewEvent->setComplete(); - + NewEvent->setSubmittedQueue(MQueue); NewEvent->setSubmissionTime(); MLastEvent = detail::createSyclObjFromImpl(NewEvent); diff --git a/sycl/unittests/queue/GetProfilingInfo.cpp b/sycl/unittests/queue/GetProfilingInfo.cpp index c6e5a1e70e236..2b5526158e22e 100644 --- a/sycl/unittests/queue/GetProfilingInfo.cpp +++ b/sycl/unittests/queue/GetProfilingInfo.cpp @@ -320,83 +320,92 @@ TEST(GetProfilingInfo, check_if_now_dead_queue_property_not_set) { bool DeviceTimerCalled; -pi_result redefinedPiGetDeviceAndHostTimer(pi_device Device, uint64_t* DeviceTime,uint64_t* HostTime){ - DeviceTimerCalled=true; - return PI_SUCCESS; +pi_result redefinedPiGetDeviceAndHostTimer(pi_device Device, + uint64_t *DeviceTime, + uint64_t *HostTime) { + DeviceTimerCalled = true; + return PI_SUCCESS; } -TEST(GetProfilingInfo, check_no_command_submission_time_when_event_profiling_disabled){ +TEST(GetProfilingInfo, + check_no_command_submission_time_when_event_profiling_disabled) { using namespace sycl; unittest::PiMock Mock; - platform Plt= Mock.getPlatform(); - Mock.redefine(redefinedPiGetDeviceAndHostTimer); - device Dev=Plt.get_devices()[0]; + platform Plt = Mock.getPlatform(); + Mock.redefine( + redefinedPiGetDeviceAndHostTimer); + device Dev = Plt.get_devices()[0]; context Ctx{Dev}; - queue Queue{Ctx,Dev}; - DeviceTimerCalled=false; + queue Queue{Ctx, Dev}; + DeviceTimerCalled = false; - event E=Queue.submit([&](handler& cgh){ - cgh.single_task>([](){}); - }); + event E = Queue.submit( + [&](handler &cgh) { cgh.single_task>([]() {}); }); EXPECT_FALSE(DeviceTimerCalled); - } -//Checks to see if command submit time is calculated before queue.submit returns. -//A host accessor is contructed before submitting the command, -//to ensure command submission time is calculated even if command may not be enqueued -//due to overlap in data dependencies between the kernel and host accessor -TEST(GetProfilingInfo, check_command_submission_time_with_host_accessor){ +// Checks to see if command submit time is calculated before queue.submit +// returns. A host accessor is contructed before submitting the command, to +// ensure command submission time is calculated even if command may not be +// enqueued due to overlap in data dependencies between the kernel and host +// accessor +TEST(GetProfilingInfo, check_command_submission_time_with_host_accessor) { using namespace sycl; unittest::PiMock Mock; - platform Plt= Mock.getPlatform(); - Mock.redefine(redefinedPiGetDeviceAndHostTimer); - device Dev=Plt.get_devices()[0]; + platform Plt = Mock.getPlatform(); + Mock.redefine( + redefinedPiGetDeviceAndHostTimer); + device Dev = Plt.get_devices()[0]; context Ctx{Dev}; - queue Queue{Ctx,Dev,property::queue::enable_profiling()}; + queue Queue{Ctx, Dev, property::queue::enable_profiling()}; int data[1024]; - buffer Buf{data,range<1>{1024}}; - DeviceTimerCalled=false; + buffer Buf{data, range<1>{1024}}; + DeviceTimerCalled = false; - accessor host_acc= Buf.get_access(); - event E=Queue.submit([&](handler& cgh){ - accessor writeRes{Buf,cgh,read_write}; + accessor host_acc = Buf.get_access(); + event E = Queue.submit([&](handler &cgh) { + accessor writeRes{Buf, cgh, read_write}; - cgh.single_task>([](){}); + cgh.single_task>([]() {}); }); EXPECT_TRUE(DeviceTimerCalled); - } -pi_result redefinedFailedPiGetDeviceAndHostTimer(pi_device Device, uint64_t* DeviceTime,uint64_t* HostTime){ - return PI_ERROR_INVALID_OPERATION; +pi_result redefinedFailedPiGetDeviceAndHostTimer(pi_device Device, + uint64_t *DeviceTime, + uint64_t *HostTime) { + return PI_ERROR_INVALID_OPERATION; } pi_result redefinedPiPluginGetLastError(char **message) { - static char messageString[50]= "Plugin version not supported"; - *message=messageString; + static char messageString[50] = "Plugin version not supported"; + *message = messageString; return PI_SUCCESS; } -TEST(GetProfilingInfo, submission_time_exception_check){ +TEST(GetProfilingInfo, submission_time_exception_check) { using namespace sycl; unittest::PiMock Mock; - platform Plt= Mock.getPlatform(); - Mock.redefine(redefinedFailedPiGetDeviceAndHostTimer); - Mock.redefine(redefinedPiPluginGetLastError); - device Dev=Plt.get_devices()[0]; + platform Plt = Mock.getPlatform(); + Mock.redefine( + redefinedFailedPiGetDeviceAndHostTimer); + Mock.redefine( + redefinedPiPluginGetLastError); + device Dev = Plt.get_devices()[0]; context Ctx{Dev}; - queue Queue{Ctx,Dev,property::queue::enable_profiling()}; + queue Queue{Ctx, Dev, property::queue::enable_profiling()}; - try{ - event E=Queue.submit([&](handler& cgh){ - cgh.single_task>([](){}); - }); - FAIL(); - }catch(feature_not_supported &e){ - EXPECT_STREQ(e.what(),"Unable to get command group submission time: " - "Device and/or backend does not support querying timestamp: " - "Plugin version not supported -59 (PI_ERROR_INVALID_OPERATION) -59 (PI_ERROR_INVALID_OPERATION)"); + try { + event E = Queue.submit( + [&](handler &cgh) { cgh.single_task>([]() {}); }); + FAIL(); + } catch (feature_not_supported &e) { + EXPECT_STREQ( + e.what(), + "Unable to get command group submission time: " + "Device and/or backend does not support querying timestamp: " + "Plugin version not supported -59 (PI_ERROR_INVALID_OPERATION) -59 " + "(PI_ERROR_INVALID_OPERATION)"); } } From d086af846c9faddcc0a3d096ae2bb5194bbfc13c Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Mon, 19 Dec 2022 14:42:13 -0800 Subject: [PATCH 19/53] More formatting Signed-off-by: Rauf, Rana --- sycl/source/detail/device_impl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 4a07dcae0cfbe..b030b5f78dc9d 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -465,7 +465,7 @@ uint64_t device_impl::getCurrentDeviceTime() { plugin.checkPiResult(result == PI_ERROR_INVALID_OPERATION ? PI_SUCCESS : result); - if(result == PI_ERROR_INVALID_OPERATION){ + if (result == PI_ERROR_INVALID_OPERATION) { std::string errorMsg{}; char *p; plugin.call_nocheck(&p); @@ -483,7 +483,7 @@ uint64_t device_impl::getCurrentDeviceTime() { if (diff > timeTillRefresh || diff <= 0) { plugin.call( MDevice, &MDeviceHostBaseTime.first, &MDeviceHostBaseTime.second); - diff=0; + diff = 0; } return MDeviceHostBaseTime.first + diff; From 5d0175798b5ff71524e4f38276fe21294b3e1ea8 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Mon, 19 Dec 2022 14:52:19 -0800 Subject: [PATCH 20/53] Fix HIP fail Signed-off-by: Rauf, Rana --- sycl/plugins/hip/pi_hip.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index c2ee33963012c..579adb2e345c9 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5214,8 +5214,8 @@ pi_result hip_piTearDown(void *PluginParameter) { return PI_SUCCESS; } -pi_result hip_piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, - uint64_t *hostTime) { +pi_result hip_piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, + uint64_t *HostTime) { _pi_event::native_type event; using namespace std::chrono; if (DeviceTime) { From 26ca040ca582ba47146b1c0ffd7d0bc79b0c85dc Mon Sep 17 00:00:00 2001 From: raaiq1 <106714052+raaiq1@users.noreply.github.com> Date: Tue, 20 Dec 2022 09:48:35 -0500 Subject: [PATCH 21/53] Apply suggestions from code review Co-authored-by: smaslov-intel --- sycl/include/sycl/detail/pi.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 5a620ad995b52..f8bf98dd11e92 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -1906,10 +1906,11 @@ __SYCL_EXPORT pi_result piPluginGetLastError(char **message); /// HostTime with the value of the host timer at the closest possible point in /// time to that at which DeviceTime was returned. /// -/// \param device device to query for timestamp -/// \param deviceTime pointer to store device timestamp in nanoseconds. Optional -/// argument, can be nullptr \param hostTime pointer to store host timestamp in -/// nanoseconds. Optional argurment, can be nullptr +/// \param Device device to query for timestamp +/// \param DeviceTime pointer to store device timestamp in nanoseconds. Optional +/// argument, can be nullptr +\param HostTime pointer to store host timestamp in +/// nanoseconds. Optional argurment, can be nullptr in which case timestamp will not be written __SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime); From ba6ccc8bb728d49b5c436325fd3f3b6af4af637b Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Tue, 20 Dec 2022 07:52:28 -0800 Subject: [PATCH 22/53] Add review suggestions,fix HIP issues and handle host platform Signed-off-by: Rauf, Rana --- sycl/include/sycl/detail/pi.h | 5 +++-- sycl/plugins/hip/pi_hip.cpp | 23 +++++++++++++++-------- sycl/plugins/hip/pi_hip.hpp | 6 ++---- sycl/plugins/opencl/pi_opencl.cpp | 3 +-- sycl/source/detail/event_impl.cpp | 8 +------- 5 files changed, 22 insertions(+), 23 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index f8bf98dd11e92..c99493809754b 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -1909,8 +1909,9 @@ __SYCL_EXPORT pi_result piPluginGetLastError(char **message); /// \param Device device to query for timestamp /// \param DeviceTime pointer to store device timestamp in nanoseconds. Optional /// argument, can be nullptr -\param HostTime pointer to store host timestamp in -/// nanoseconds. Optional argurment, can be nullptr in which case timestamp will not be written +/// \param HostTime pointer to store host timestamp in +/// nanoseconds. Optional argurment, can be nullptr in which case timestamp will +/// not be written __SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime); diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 579adb2e345c9..47fd2c0764d6c 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -609,7 +609,7 @@ pi_uint64 _pi_event::get_start_time() const { assert(is_started()); PI_CHECK_ERROR( - hipEventElapsedTime(&miliSeconds, context_->evBase_, evStart_)); + hipEventElapsedTime(&miliSeconds, _pi_platform::evBase_, evStart_)); return static_cast(miliSeconds * 1.0e6); } @@ -617,7 +617,8 @@ pi_uint64 _pi_event::get_end_time() const { float miliSeconds = 0.0f; assert(is_started() && is_recorded()); - PI_CHECK_ERROR(hipEventElapsedTime(&miliSeconds, context_->evBase_, evEnd_)); + PI_CHECK_ERROR( + hipEventElapsedTime(&miliSeconds, _pi_platform::evBase_, evEnd_)); return static_cast(miliSeconds * 1.0e6); } @@ -1992,10 +1993,16 @@ pi_result hip_piContextCreate(const pi_context_properties *properties, _pi_context::kind::user_defined, newContext, *devices}); } - // Use default stream to record base event counter - PI_CHECK_ERROR( - hipEventCreateWithFlags(&piContextPtr->evBase_, hipEventDefault)); - PI_CHECK_ERROR(hipEventRecord(piContextPtr->evBase_, 0)); + static std::once_flag initFlag; + std::call_once( + initFlag, + [](pi_result &err) { + // Use default stream to record base event counter + PI_CHECK_ERROR( + hipEventCreateWithFlags(&_pi_platform::evBase_, hipEventDefault)); + PI_CHECK_ERROR(hipEventRecord(_pi_platform::evBase_, 0)); + }, + errcode_ret); // For non-primary scoped contexts keep the last active on top of the stack // as `cuCtxCreate` replaces it implicitly otherwise. @@ -2025,8 +2032,6 @@ pi_result hip_piContextRelease(pi_context ctxt) { std::unique_ptr<_pi_context> context{ctxt}; - PI_CHECK_ERROR(hipEventDestroy(context->evBase_)); - if (!ctxt->is_primary()) { hipCtx_t hipCtxt = ctxt->get(); // hipCtxSynchronize is not supported for AMD platform so we can just @@ -5389,3 +5394,5 @@ pi_result piPluginInit(pi_plugin *PluginInit) { } } // extern "C" + +hipEvent_t _pi_platform::evBase_{nullptr}; \ No newline at end of file diff --git a/sycl/plugins/hip/pi_hip.hpp b/sycl/plugins/hip/pi_hip.hpp index af2ff53d6fa6f..c8f3c2e65dc10 100644 --- a/sycl/plugins/hip/pi_hip.hpp +++ b/sycl/plugins/hip/pi_hip.hpp @@ -65,6 +65,7 @@ using _pi_stream_guard = std::unique_lock; /// when devices are used. /// struct _pi_platform { + static hipEvent_t evBase_; // HIP event used as base counter std::vector> devices_; }; @@ -146,11 +147,8 @@ struct _pi_context { _pi_device *deviceId_; std::atomic_uint32_t refCount_; - hipEvent_t evBase_; // HIP event used as base counter - _pi_context(kind k, hipCtx_t ctxt, _pi_device *devId) - : kind_{k}, hipContext_{ctxt}, deviceId_{devId}, refCount_{1}, - evBase_(nullptr) { + : kind_{k}, hipContext_{ctxt}, deviceId_{devId}, refCount_{1} { hip_piDeviceRetain(deviceId_); }; diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index bdea86faca36f..c00e24a014acc 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1693,9 +1693,8 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, return PI_ERROR_INVALID_OPERATION; } - uint64_t dummy; - if (DeviceTime) { + uint64_t dummy; clGetDeviceAndHostTimer(deviceID, DeviceTime, HostTime == nullptr ? &dummy : HostTime); diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 348b215420cf5..72a3111d8d30c 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -265,13 +265,7 @@ template <> uint64_t event_impl::get_profiling_info() { checkProfilingPreconditions(); - if (!MHostEvent) { - return MSubmitTime; - } - if (!MHostProfilingInfo) - throw invalid_object_error("Profiling info is not available.", - PI_ERROR_PROFILING_INFO_NOT_AVAILABLE); - return MHostProfilingInfo->getStartTime(); + return MSubmitTime; } template <> From bfcc33e445eee5bca2ca44817483d0dc6a3a2dc2 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Thu, 24 Nov 2022 07:16:53 -0800 Subject: [PATCH 23/53] [SYCL] Implement command_submit L0 Signed-off-by: Rauf, Rana --- sycl/include/sycl/detail/pi.h | 1 + sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp | 1 + sycl/plugins/level_zero/pi_level_zero.cpp | 10 ++++++++++ sycl/source/detail/event_impl.cpp | 12 +++++++++++- sycl/source/detail/event_impl.hpp | 5 +++++ sycl/source/handler.cpp | 14 +++++++++++++- 6 files changed, 41 insertions(+), 2 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 2b70daa7aeeac..fc7952f147075 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -319,6 +319,7 @@ typedef enum { PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D = 0x20002, PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D = 0x20003, PI_EXT_ONEAPI_DEVICE_INFO_CUDA_ASYNC_BARRIER = 0x20004, + PI_DEVICE_CURRENT_TIME = 0x20005; } _pi_device_info; typedef enum { diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 1988d8f0db53c..7e90a8996b7fb 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -2034,6 +2034,7 @@ pi_result piextPluginGetOpaqueData(void *, void **OpaqueDataReturn) { return PI_SUCCESS; } + pi_result piTearDown(void *) { delete reinterpret_cast( PiESimdDeviceAccess->data); diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 887e1ffdf52a3..aa2acd7f5ad15 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -3244,6 +3244,16 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, return ReturnValue(bool{false}); } + case PI_DEVICE_CURRENT_TIME:{ + uint64_t ZeTimerResolution = Device->ZeDeviceProperties->timerResolution; + uint64_t TimestampMaxCount = ((1ULL << Device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL); + uint64_t deviceClockCount, dummy; + + ZE_CALL(zeDeviceGetGlobalTimestamps, + (Device->ZeDevice, &dummy, &deviceClockCount)); + return ReturnValue((deviceClockCount & TimestampMaxCount) * ZeTimerResolution); + } + // TODO: Implement. case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: default: diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index d95173c47e7fb..31a4d8249f30c 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -266,9 +266,13 @@ uint64_t event_impl::get_profiling_info() { checkProfilingPreconditions(); if (!MHostEvent) { - if (MEvent) + if(getPlugin().getBackend() == backend::ext_oneapi_level_zero){ + return submitTime; + } + if (MEvent){ return get_event_profiling_info( this->getHandleRef(), this->getPlugin()); + } return 0; } if (!MHostProfilingInfo) @@ -428,6 +432,12 @@ bool event_impl::isCompleted() { return get_info() == info::event_command_status::complete; } +void event_impl::setSubmissionTime(uint64_t time){ + submitTime=time; +} + uint64_t event_imp::getSubmissionTime(){ + return submitTime; + } } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 3330654501fa4..73ec45998cec6 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -221,6 +221,10 @@ class event_impl { MSubmittedQueue = SubmittedQueue; }; + void setSubmissionTime(uint64_t time); + + uint64_t getSubmissionTime(); + QueueImplPtr getSubmittedQueue() const { return MSubmittedQueue.lock(); }; /// Checks if an event is in a fully intialized state. Default-constructed @@ -257,6 +261,7 @@ class event_impl { bool MIsInitialized = true; bool MIsContextInitialized = false; RT::PiEvent MEvent = nullptr; + uint64_t submitTime = 0; //Only used for level-zero plugin ContextImplPtr MContext; bool MHostEvent = true; std::unique_ptr MHostProfilingInfo; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index c0b5e3881818c..61aaf2345296b 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -94,6 +94,15 @@ event handler::finalize() { return MLastEvent; MIsFinalized = true; + auto setSubmissionTime = [&](detail::EventImplPtr event) { + auto plugin= MQueue->getPlugin(); + if(!MQueue->is_host() && plugin.getBackend() == backend::ext_oneapi_level_zero) { + uint64_t submitTime=0; + plugin.call(MQueue->getDeviceImplPtr()->getHandleRef(),sizeof(uint64_t),&submitTime,nullptr); + event->setSubmissionTime(submitTime); + } + }; + const auto &type = getType(); if (type == detail::CG::Kernel) { // If there were uses of set_specialization_constant build the kernel_bundle @@ -210,9 +219,11 @@ event handler::finalize() { PI_ERROR_INVALID_OPERATION); else if (NewEvent->is_host() || NewEvent->getHandleRef() == nullptr) NewEvent->setComplete(); - + + setSubmissionTime(NewEvent); MLastEvent = detail::createSyclObjFromImpl(NewEvent); } + return MLastEvent; } } @@ -335,6 +346,7 @@ event handler::finalize() { detail::EventImplPtr Event = detail::Scheduler::getInstance().addCG( std::move(CommandGroup), std::move(MQueue)); + setSubmissionTime(Event); MLastEvent = detail::createSyclObjFromImpl(Event); return MLastEvent; } From d353ae852e5490be7395a10d6e889e3554fb4a14 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Mon, 12 Dec 2022 13:56:40 -0800 Subject: [PATCH 24/53] Add PI API extension piGetDeviceAndHostTimer Signed-off-by: Rauf, Rana --- sycl/include/sycl/detail/pi.def | 3 +++ sycl/include/sycl/detail/pi.h | 13 ++++++++++-- sycl/plugins/cuda/pi_cuda.cpp | 5 +++++ .../esimd_emulator/pi_esimd_emulator.cpp | 3 +++ sycl/plugins/hip/pi_hip.cpp | 5 +++++ sycl/plugins/level_zero/pi_level_zero.cpp | 21 ++++++++++--------- sycl/plugins/opencl/pi_opencl.cpp | 5 +++++ sycl/source/detail/event_impl.cpp | 2 +- sycl/source/handler.cpp | 9 ++++---- sycl/test/abi/pi_level_zero_symbol_check.dump | 1 + sycl/test/abi/pi_opencl_symbol_check.dump | 1 + 11 files changed, 51 insertions(+), 17 deletions(-) diff --git a/sycl/include/sycl/detail/pi.def b/sycl/include/sycl/detail/pi.def index 0e737c0256fc9..f63ba4835486b 100644 --- a/sycl/include/sycl/detail/pi.def +++ b/sycl/include/sycl/detail/pi.def @@ -145,4 +145,7 @@ _PI_API(piextUSMEnqueueFill2D) _PI_API(piextUSMEnqueueMemset2D) _PI_API(piextUSMEnqueueMemcpy2D) +_PI_API(piGetDeviceAndHostTimer) + + #undef _PI_API diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index fc7952f147075..fbdef796e5c4b 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -53,6 +53,7 @@ // 10.14 Add PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY as an extension for // piDeviceGetInfo. // 11.15 piEventCreate creates even in the signalled state now. +<<<<<<< HEAD // 11.16 Add PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE and // PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH as an extension for // piDeviceGetInfo. @@ -74,9 +75,11 @@ // PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMSET2D_SUPPORT, and // PI_EXT_ONEAPI_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT context info query // descriptors. +// 12.22 Add piGetDeviceAndHostTimer to query device wall-clock timestamp #define _PI_H_VERSION_MAJOR 12 -#define _PI_H_VERSION_MINOR 21 +#define _PI_H_VERSION_MINOR 22 + #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) @@ -319,7 +322,6 @@ typedef enum { PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_2D = 0x20002, PI_EXT_ONEAPI_DEVICE_INFO_MAX_WORK_GROUPS_3D = 0x20003, PI_EXT_ONEAPI_DEVICE_INFO_CUDA_ASYNC_BARRIER = 0x20004, - PI_DEVICE_CURRENT_TIME = 0x20005; } _pi_device_info; typedef enum { @@ -1902,6 +1904,13 @@ __SYCL_EXPORT pi_result piTearDown(void *PluginParameter); /// runtime must handle it or end the application. __SYCL_EXPORT pi_result piPluginGetLastError(char **message); +/// Returns the global timestamp from \param device , and syncronized host timestamp +/// +/// \param device device to query for timestamp +/// \param deviceTime pointer to store device time +/// \param hostTime pointer to store syncronized host time +__SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t* deviceTime, uint64_t* hostTime); + struct _pi_plugin { // PI version supported by host passed to the plugin. The Plugin // checks and writes the appropriate Function Pointers in diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index a8ae7a44bee38..e221db296abaa 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -5486,6 +5486,10 @@ pi_result cuda_piTearDown(void *) { return PI_SUCCESS; } +pi_result cuda_piGetDeviceAndHostTimer(pi_device device, uint64_t* deviceTime, uint64_t* hostTime){ + assert(0 && "Method not implemented"); +} + const char SupportedVersion[] = _PI_CUDA_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { @@ -5634,6 +5638,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piextKernelSetArgSampler, cuda_piextKernelSetArgSampler) _PI_CL(piPluginGetLastError, cuda_piPluginGetLastError) _PI_CL(piTearDown, cuda_piTearDown) + _PI_CL(piGetDeviceAndHostTimer, cuda_piGetDeviceAndHostTimer) #undef _PI_CL diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 7e90a8996b7fb..f90dc443b7b57 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -2050,6 +2050,9 @@ pi_result piTearDown(void *) { return PI_SUCCESS; } +pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t* deviceTime, uint64_t* hostTime){ + assert(0 && "Method not implemented"); +} const char SupportedVersion[] = _PI_ESIMD_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index f4d316db6593a..a3908d20aaa47 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5208,6 +5208,10 @@ pi_result hip_piTearDown(void *PluginParameter) { return PI_SUCCESS; } +pi_result hip_piGetDeviceAndHostTimer(pi_device device, uint64_t* deviceTime, uint64_t* hostTime){ + assert(0 && "Method not implemented"); +} + const char SupportedVersion[] = _PI_HIP_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { @@ -5350,6 +5354,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piextKernelSetArgSampler, hip_piextKernelSetArgSampler) _PI_CL(piPluginGetLastError, hip_piPluginGetLastError) _PI_CL(piTearDown, hip_piTearDown) + _PI_CL(piGetDeviceAndHostTimer, hip_piGetDeviceAndHostTimer) #undef _PI_CL diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index aa2acd7f5ad15..ae70a31ce0c00 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -3244,16 +3244,6 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, return ReturnValue(bool{false}); } - case PI_DEVICE_CURRENT_TIME:{ - uint64_t ZeTimerResolution = Device->ZeDeviceProperties->timerResolution; - uint64_t TimestampMaxCount = ((1ULL << Device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL); - uint64_t deviceClockCount, dummy; - - ZE_CALL(zeDeviceGetGlobalTimestamps, - (Device->ZeDevice, &dummy, &deviceClockCount)); - return ReturnValue((deviceClockCount & TimestampMaxCount) * ZeTimerResolution); - } - // TODO: Implement. case PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: default: @@ -9364,4 +9354,15 @@ pi_result _pi_buffer::free() { return PI_SUCCESS; } + +pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t* deviceTime, uint64_t* hostTime){ + const uint64_t& ZeTimerResolution = device->ZeDeviceProperties->timerResolution; + const uint64_t TimestampMaxCount = ((1ULL << device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL); + uint64_t deviceClockCount, dummy; + + ZE_CALL(zeDeviceGetGlobalTimestamps, + (device->ZeDevice, hostTime == nullptr ? &dummy : hostTime, &deviceClockCount)); + *deviceTime= (deviceClockCount & TimestampMaxCount) * ZeTimerResolution ; + return PI_SUCCESS; +} } // extern "C" diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index e71f23e0d2e4d..afe9d674598ed 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1665,6 +1665,10 @@ pi_result piTearDown(void *PluginParameter) { return PI_SUCCESS; } +pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t* deviceTime, uint64_t* hostTime){ + assert(0 && "Method not implemented"); +} + const char SupportedVersion[] = _PI_OPENCL_PLUGIN_VERSION_STRING; pi_result piPluginInit(pi_plugin *PluginInit) { @@ -1802,6 +1806,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piextKernelSetArgSampler, piextKernelSetArgSampler) _PI_CL(piPluginGetLastError, piPluginGetLastError) _PI_CL(piTearDown, piTearDown) + _PI_CL(piGetDeviceAndHostTimer, piGetDeviceAndHostTimer) #undef _PI_CL diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 31a4d8249f30c..25cce83dc5d9f 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -435,7 +435,7 @@ bool event_impl::isCompleted() { void event_impl::setSubmissionTime(uint64_t time){ submitTime=time; } - uint64_t event_imp::getSubmissionTime(){ + uint64_t event_impl::getSubmissionTime(){ return submitTime; } diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 61aaf2345296b..4237700cf0b9b 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -95,13 +95,14 @@ event handler::finalize() { MIsFinalized = true; auto setSubmissionTime = [&](detail::EventImplPtr event) { - auto plugin= MQueue->getPlugin(); - if(!MQueue->is_host() && plugin.getBackend() == backend::ext_oneapi_level_zero) { + if(!MQueue->is_host()){ + auto plugin=MQueue->getPlugin(); + if( plugin.getBackend() == backend::ext_oneapi_level_zero) { uint64_t submitTime=0; - plugin.call(MQueue->getDeviceImplPtr()->getHandleRef(),sizeof(uint64_t),&submitTime,nullptr); + plugin.call(MQueue->getDeviceImplPtr()->getHandleRef(),&submitTime,nullptr); event->setSubmissionTime(submitTime); } - }; + }}; const auto &type = getType(); if (type == detail::CG::Kernel) { diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index 44f6d2f781ca3..54229d45356a4 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -41,6 +41,7 @@ piEventRetain piEventSetCallback piEventSetStatus piEventsWait +piGetDeviceAndHostTimer piKernelCreate piKernelGetGroupInfo piKernelGetInfo diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump index 1f359bf582458..e54388658985e 100644 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ b/sycl/test/abi/pi_opencl_symbol_check.dump @@ -13,6 +13,7 @@ piDeviceGetInfo piDevicesGet piEnqueueMemBufferMap piEventCreate +piGetDeviceAndHostTimer piKernelCreate piKernelGetGroupInfo piKernelGetSubGroupInfo From d8711871125063fe534117113bff04b2f00f3979 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Mon, 12 Dec 2022 15:14:26 -0800 Subject: [PATCH 25/53] Formatting Signed-off-by: Rauf, Rana --- sycl/include/sycl/detail/pi.h | 12 ++++++++---- sycl/plugins/cuda/pi_cuda.cpp | 3 ++- .../esimd_emulator/pi_esimd_emulator.cpp | 4 ++-- sycl/plugins/hip/pi_hip.cpp | 3 ++- sycl/plugins/level_zero/pi_level_zero.cpp | 15 +++++++++------ sycl/plugins/opencl/pi_opencl.cpp | 3 ++- sycl/source/detail/event_impl.cpp | 14 +++++--------- sycl/source/detail/event_impl.hpp | 3 ++- sycl/source/handler.cpp | 18 ++++++++++-------- 9 files changed, 42 insertions(+), 33 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index fbdef796e5c4b..0b6456725d304 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -1901,15 +1901,19 @@ __SYCL_EXPORT pi_result piTearDown(void *PluginParameter); /// /// \return PI_SUCCESS if plugin is indicating non-fatal warning. Any other /// error code indicates that plugin considers this to be a fatal error and the -/// runtime must handle it or end the application. +/// Returns the global timestamp from \param device , and syncronized host +/// timestamp __SYCL_EXPORT pi_result piPluginGetLastError(char **message); -/// Returns the global timestamp from \param device , and syncronized host timestamp +/// Returns the global timestamp from \param device , and syncronized host +/// timestamp /// /// \param device device to query for timestamp /// \param deviceTime pointer to store device time -/// \param hostTime pointer to store syncronized host time -__SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t* deviceTime, uint64_t* hostTime); +/// \param hostTime pointer to store syncronized host time +__SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device device, + uint64_t *deviceTime, + uint64_t *hostTime); struct _pi_plugin { // PI version supported by host passed to the plugin. The Plugin diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index e221db296abaa..b648bfdab24fe 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -5486,7 +5486,8 @@ pi_result cuda_piTearDown(void *) { return PI_SUCCESS; } -pi_result cuda_piGetDeviceAndHostTimer(pi_device device, uint64_t* deviceTime, uint64_t* hostTime){ +pi_result cuda_piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, + uint64_t *hostTime) { assert(0 && "Method not implemented"); } diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index f90dc443b7b57..313e3056cf9ed 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -2034,7 +2034,6 @@ pi_result piextPluginGetOpaqueData(void *, void **OpaqueDataReturn) { return PI_SUCCESS; } - pi_result piTearDown(void *) { delete reinterpret_cast( PiESimdDeviceAccess->data); @@ -2050,7 +2049,8 @@ pi_result piTearDown(void *) { return PI_SUCCESS; } -pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t* deviceTime, uint64_t* hostTime){ +pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, + uint64_t *hostTime) { assert(0 && "Method not implemented"); } const char SupportedVersion[] = _PI_ESIMD_PLUGIN_VERSION_STRING; diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index a3908d20aaa47..081c8cb991474 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5208,7 +5208,8 @@ pi_result hip_piTearDown(void *PluginParameter) { return PI_SUCCESS; } -pi_result hip_piGetDeviceAndHostTimer(pi_device device, uint64_t* deviceTime, uint64_t* hostTime){ +pi_result hip_piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, + uint64_t *hostTime) { assert(0 && "Method not implemented"); } diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index ae70a31ce0c00..ac6674d184bb2 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -9354,15 +9354,18 @@ pi_result _pi_buffer::free() { return PI_SUCCESS; } - -pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t* deviceTime, uint64_t* hostTime){ - const uint64_t& ZeTimerResolution = device->ZeDeviceProperties->timerResolution; - const uint64_t TimestampMaxCount = ((1ULL << device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL); +pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, + uint64_t *hostTime) { + const uint64_t &ZeTimerResolution = + device->ZeDeviceProperties->timerResolution; + const uint64_t TimestampMaxCount = + ((1ULL << device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL); uint64_t deviceClockCount, dummy; ZE_CALL(zeDeviceGetGlobalTimestamps, - (device->ZeDevice, hostTime == nullptr ? &dummy : hostTime, &deviceClockCount)); - *deviceTime= (deviceClockCount & TimestampMaxCount) * ZeTimerResolution ; + (device->ZeDevice, hostTime == nullptr ? &dummy : hostTime, + &deviceClockCount)); + *deviceTime = (deviceClockCount & TimestampMaxCount) * ZeTimerResolution; return PI_SUCCESS; } } // extern "C" diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index afe9d674598ed..607e19d423a7b 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1665,7 +1665,8 @@ pi_result piTearDown(void *PluginParameter) { return PI_SUCCESS; } -pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t* deviceTime, uint64_t* hostTime){ +pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, + uint64_t *hostTime) { assert(0 && "Method not implemented"); } diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 25cce83dc5d9f..b9c3d59040dff 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -266,13 +266,13 @@ uint64_t event_impl::get_profiling_info() { checkProfilingPreconditions(); if (!MHostEvent) { - if(getPlugin().getBackend() == backend::ext_oneapi_level_zero){ + if (getPlugin().getBackend() == backend::ext_oneapi_level_zero) { return submitTime; } - if (MEvent){ + if (MEvent) { return get_event_profiling_info( this->getHandleRef(), this->getPlugin()); - } + } return 0; } if (!MHostProfilingInfo) @@ -428,16 +428,12 @@ void event_impl::cleanDepEventsThroughOneLevel() { } } +void event_impl::setSubmissionTime(uint64_t time) { submitTime = time; } +uint64_t event_impl::getSubmissionTime() { return submitTime; } bool event_impl::isCompleted() { return get_info() == info::event_command_status::complete; } -void event_impl::setSubmissionTime(uint64_t time){ - submitTime=time; -} - uint64_t event_impl::getSubmissionTime(){ - return submitTime; - } } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 73ec45998cec6..238d85186e924 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -261,7 +261,8 @@ class event_impl { bool MIsInitialized = true; bool MIsContextInitialized = false; RT::PiEvent MEvent = nullptr; - uint64_t submitTime = 0; //Only used for level-zero plugin + uint64_t submitTime = 0; // Only used for level-zero plugin, submission time + // of associated MCommand ContextImplPtr MContext; bool MHostEvent = true; std::unique_ptr MHostProfilingInfo; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 4237700cf0b9b..c751c7126af1c 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -94,15 +94,17 @@ event handler::finalize() { return MLastEvent; MIsFinalized = true; - auto setSubmissionTime = [&](detail::EventImplPtr event) { - if(!MQueue->is_host()){ - auto plugin=MQueue->getPlugin(); - if( plugin.getBackend() == backend::ext_oneapi_level_zero) { - uint64_t submitTime=0; - plugin.call(MQueue->getDeviceImplPtr()->getHandleRef(),&submitTime,nullptr); + auto setSubmissionTime = [&](detail::EventImplPtr event) { + if (!MQueue->is_host()) { + auto plugin = MQueue->getPlugin(); + if (plugin.getBackend() == backend::ext_oneapi_level_zero) { + uint64_t submitTime = 0; + plugin.call( + MQueue->getDeviceImplPtr()->getHandleRef(), &submitTime, nullptr); event->setSubmissionTime(submitTime); } - }}; + } + }; const auto &type = getType(); if (type == detail::CG::Kernel) { @@ -220,7 +222,7 @@ event handler::finalize() { PI_ERROR_INVALID_OPERATION); else if (NewEvent->is_host() || NewEvent->getHandleRef() == nullptr) NewEvent->setComplete(); - + setSubmissionTime(NewEvent); MLastEvent = detail::createSyclObjFromImpl(NewEvent); } From 6361bc0260bf629f40afe789ffa5656020db7677 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Mon, 12 Dec 2022 15:30:08 -0800 Subject: [PATCH 26/53] Moe formatting Signed-off-by: Rauf, Rana --- sycl/source/detail/event_impl.hpp | 2 +- sycl/source/handler.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 238d85186e924..162ec53e8d60a 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -224,7 +224,7 @@ class event_impl { void setSubmissionTime(uint64_t time); uint64_t getSubmissionTime(); - + QueueImplPtr getSubmittedQueue() const { return MSubmittedQueue.lock(); }; /// Checks if an event is in a fully intialized state. Default-constructed diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index c751c7126af1c..1b603cff7baff 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -226,7 +226,7 @@ event handler::finalize() { setSubmissionTime(NewEvent); MLastEvent = detail::createSyclObjFromImpl(NewEvent); } - + return MLastEvent; } } From e2fc03a0405f77b3f17ff948198358ee22da2420 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Tue, 13 Dec 2022 06:19:24 -0800 Subject: [PATCH 27/53] E Signed-off-by: Rauf, Rana --- sycl/source/detail/event_impl.hpp | 1 - sycl/source/handler.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 162ec53e8d60a..208cce3cfe151 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -224,7 +224,6 @@ class event_impl { void setSubmissionTime(uint64_t time); uint64_t getSubmissionTime(); - QueueImplPtr getSubmittedQueue() const { return MSubmittedQueue.lock(); }; /// Checks if an event is in a fully intialized state. Default-constructed diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 1b603cff7baff..8837c0d206b86 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -226,7 +226,6 @@ event handler::finalize() { setSubmissionTime(NewEvent); MLastEvent = detail::createSyclObjFromImpl(NewEvent); } - return MLastEvent; } } From 0473ed4ecbafe1f4d2ad2435b8af352d5d5ca9ff Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Tue, 13 Dec 2022 06:52:57 -0800 Subject: [PATCH 28/53] Add piGetDeviceAndHostTimer Signed-off-by: Rauf, Rana --- sycl/unittests/helpers/PiMockPlugin.hpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index e08b2a015748e..8f397501c6008 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -14,6 +14,7 @@ #include #include +#include #include // Helpers for dummy handles @@ -1094,3 +1095,16 @@ inline pi_result mock_piTearDown(void *PluginParameter) { return PI_SUCCESS; } inline pi_result mock_piPluginGetLastError(char **message) { return PI_SUCCESS; } + +// Returns the wall-clock timestamp of host for deviceTime and hostTime +inline pi_result mock_piGetDeviceAndHostTimer(pi_device device, + uint64_t *deviceTime, + uint64_t *hostTime) { + using namespace std::chrono; + auto timeNanoseconds = + duration_cast(steady_clock::now().time_since_epoch()) + .count(); + *deviceTime = timeNanoseconds; + *hostTime = timeNanoseconds; + return PI_SUCCESS; +} From 0d9021ffeab837ed561cc02e05eba368bf68ed5f Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Tue, 13 Dec 2022 08:27:25 -0800 Subject: [PATCH 29/53] Dummy Signed-off-by: Rauf, Rana --- sycl/unittests/helpers/PiMockPlugin.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index 8f397501c6008..1991b57071461 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -1100,6 +1100,7 @@ inline pi_result mock_piPluginGetLastError(char **message) { inline pi_result mock_piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, uint64_t *hostTime) { + using namespace std::chrono; auto timeNanoseconds = duration_cast(steady_clock::now().time_since_epoch()) From 4be8dc0898e1b566924598f5c1939671a29c10ac Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Tue, 13 Dec 2022 15:27:37 -0800 Subject: [PATCH 30/53] Apply review suggestions Signed-off-by: Rauf, Rana --- sycl/include/sycl/detail/pi.h | 12 ++++++------ sycl/plugins/level_zero/pi_level_zero.cpp | 21 +++++++++++++-------- sycl/source/detail/event_impl.hpp | 4 ++-- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 0b6456725d304..611af0be03724 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -1905,15 +1905,15 @@ __SYCL_EXPORT pi_result piTearDown(void *PluginParameter); /// timestamp __SYCL_EXPORT pi_result piPluginGetLastError(char **message); -/// Returns the global timestamp from \param device , and syncronized host -/// timestamp +/// Returns the global timestamp from \param device , and synchronized host +/// timestamp. /// /// \param device device to query for timestamp /// \param deviceTime pointer to store device time -/// \param hostTime pointer to store syncronized host time -__SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device device, - uint64_t *deviceTime, - uint64_t *hostTime); +/// \param hostTime pointer to store synchronized host time +__SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device Device, + uint64_t *DeviceTime, + uint64_t *HostTime); struct _pi_plugin { // PI version supported by host passed to the plugin. The Plugin diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index ac6674d184bb2..f5f3b32e3eddc 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -9354,18 +9354,23 @@ pi_result _pi_buffer::free() { return PI_SUCCESS; } -pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, - uint64_t *hostTime) { +pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, + uint64_t *HostTime) { const uint64_t &ZeTimerResolution = - device->ZeDeviceProperties->timerResolution; + Device->ZeDeviceProperties->timerResolution; const uint64_t TimestampMaxCount = - ((1ULL << device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL); - uint64_t deviceClockCount, dummy; + ((1ULL << Device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL); + uint64_t DeviceClockCount, Dummy; + + uint64_t *&HostTimeHandle = HostTime == nullptr ? &Dummy : HostTime; ZE_CALL(zeDeviceGetGlobalTimestamps, - (device->ZeDevice, hostTime == nullptr ? &dummy : hostTime, - &deviceClockCount)); - *deviceTime = (deviceClockCount & TimestampMaxCount) * ZeTimerResolution; + (device->ZeDevice, HostTimeHandle, &DeviceClockCount)); + + if (DeviceTime != nullptr) { + + *DeviceTime = (DeviceClockCount & TimestampMaxCount) * ZeTimerResolution; + } return PI_SUCCESS; } } // extern "C" diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 208cce3cfe151..e34dcff7affb6 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -260,8 +260,8 @@ class event_impl { bool MIsInitialized = true; bool MIsContextInitialized = false; RT::PiEvent MEvent = nullptr; - uint64_t submitTime = 0; // Only used for level-zero plugin, submission time - // of associated MCommand + uint64_t MSubmitTime = 0; // Only used for level-zero plugin, submission time + // of associated MCommand ContextImplPtr MContext; bool MHostEvent = true; std::unique_ptr MHostProfilingInfo; From 3700114eee0ae6864814ae31994b4ffc5d7d5cd7 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Tue, 13 Dec 2022 16:18:18 -0800 Subject: [PATCH 31/53] Fix build errors Signed-off-by: Rauf, Rana --- sycl/plugins/level_zero/pi_level_zero.cpp | 4 +--- sycl/source/detail/event_impl.cpp | 6 +++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index f5f3b32e3eddc..8701f64badf8e 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -9362,10 +9362,8 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, ((1ULL << Device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL); uint64_t DeviceClockCount, Dummy; - uint64_t *&HostTimeHandle = HostTime == nullptr ? &Dummy : HostTime; - ZE_CALL(zeDeviceGetGlobalTimestamps, - (device->ZeDevice, HostTimeHandle, &DeviceClockCount)); + (Device->ZeDevice, HostTime == nullptr ? &Dummy : HostTime, &DeviceClockCount)); if (DeviceTime != nullptr) { diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index b9c3d59040dff..336c37c56f545 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -267,7 +267,7 @@ event_impl::get_profiling_info() { checkProfilingPreconditions(); if (!MHostEvent) { if (getPlugin().getBackend() == backend::ext_oneapi_level_zero) { - return submitTime; + return MSubmitTime; } if (MEvent) { return get_event_profiling_info( @@ -428,8 +428,8 @@ void event_impl::cleanDepEventsThroughOneLevel() { } } -void event_impl::setSubmissionTime(uint64_t time) { submitTime = time; } -uint64_t event_impl::getSubmissionTime() { return submitTime; } +void event_impl::setSubmissionTime(uint64_t time) { MSubmitTime = time; } +uint64_t event_impl::getSubmissionTime() { return MSubmitTime; } bool event_impl::isCompleted() { return get_info() == info::event_command_status::complete; From 5ee56ef4eb9e46be228eef1776d4f6553f60c221 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Thu, 15 Dec 2022 08:38:23 -0800 Subject: [PATCH 32/53] Add piGetDeviceAndHostTimer for CUDA and HIP Signed-off-by: Rauf, Rana --- sycl/plugins/cuda/pi_cuda.cpp | 28 ++++++++++++--- .../esimd_emulator/pi_esimd_emulator.cpp | 3 +- sycl/plugins/hip/pi_hip.cpp | 21 ++++++++++- sycl/plugins/opencl/pi_opencl.cpp | 36 +++++++++++++++++-- sycl/source/detail/event_impl.cpp | 9 +---- sycl/source/handler.cpp | 12 +++---- sycl/unittests/helpers/PiMockPlugin.hpp | 6 ++-- 7 files changed, 89 insertions(+), 26 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index b648bfdab24fe..d26c42eab07fb 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -24,6 +24,7 @@ #include #include #include +#include // Forward declarations void enableCUDATracing(); @@ -2134,7 +2135,7 @@ pi_result cuda_piContextCreate(const pi_context_properties *properties, piContextPtr = std::unique_ptr<_pi_context>(new _pi_context{ _pi_context::kind::user_defined, newContext, *devices}); } - +_pi_platform::evBase_ static std::once_flag initFlag; std::call_once( initFlag, @@ -5486,9 +5487,28 @@ pi_result cuda_piTearDown(void *) { return PI_SUCCESS; } -pi_result cuda_piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, - uint64_t *hostTime) { - assert(0 && "Method not implemented"); +pi_result cuda_piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, + uint64_t *HostTime) { + cudaEvent_t event; + if(DeviceTime){ + PI_CHECK_ERROR(cudaEventCreateWithFlags(&event, cudaEventDefault)); + PI_CHECK_ERROR(cudaEventRecord(event)); + } + using namespace std::chrono; + if(HostTime){ + *HostTime = duration_cast(steady_clock::now().time_since_epoch()) + .count(); + } + + if(DeviceTime){ + PI_CHECK_ERROR(cudaEventSynchronize(event)); + + float elapsedTime = 0.0f; + PI_CHECK_ERROR(cudaEventElapsedTime(&elapsedTime,_pi_platform::evBase_,event)); + *DeviceTime=(uint64_t) (elapsedTime * (double)1e6); + } + + return PI_SUCCESS; } const char SupportedVersion[] = _PI_CUDA_PLUGIN_VERSION_STRING; diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 313e3056cf9ed..523026fa04f52 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -2051,7 +2051,8 @@ pi_result piTearDown(void *) { pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, uint64_t *hostTime) { - assert(0 && "Method not implemented"); + PiTrace("Warning : Querying device clock not supported under PI_ESIMD_EMULATOR"); + return PI_SUCCESS; } const char SupportedVersion[] = _PI_ESIMD_PLUGIN_VERSION_STRING; diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 081c8cb991474..96d8cbe44540c 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -24,6 +24,7 @@ #include #include #include +#include namespace { // Hipify doesn't support cuArrayGetDescriptor, on AMD the hipArray can just be @@ -5210,7 +5211,25 @@ pi_result hip_piTearDown(void *PluginParameter) { pi_result hip_piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, uint64_t *hostTime) { - assert(0 && "Method not implemented"); + hipEvent_t event; + if(DeviceTime){ + PI_CHECK_ERROR(hipEventCreateWithFlags(&event, hipEventDefault)); + PI_CHECK_ERROR(hipEventRecord(event)); + } + using namespace std::chrono; + if(HostTime){ + *HostTime = duration_cast(steady_clock::now().time_since_epoch()) + .count(); + } + + if(DeviceTime){ + PI_CHECK_ERROR(hipEventSynchronize(event)); + + float elapsedTime = 0.0f; + PI_CHECK_ERROR(hipEventElapsedTime(&elapsedTime,_pi_platform::evBase_,event)); + *DeviceTime=(uint64_t) (elapsedTime * (double)1e6); + } + return PI_SUCCESS; } const char SupportedVersion[] = _PI_HIP_PLUGIN_VERSION_STRING; diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 607e19d423a7b..e605ae4c6c587 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1665,9 +1665,39 @@ pi_result piTearDown(void *PluginParameter) { return PI_SUCCESS; } -pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, - uint64_t *hostTime) { - assert(0 && "Method not implemented"); +pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, + uint64_t *HostTime) { + OCLV::OpenCLVersion devVer,platVer; + cl_platform_id platform; + cl_device_id deviceID= cast(Device); + + auto ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, + sizeof(cl_platform_id), &platform, nullptr); + if (ret_err != CL_SUCCESS){ + return cast(ret_err); + } + + ret_err= getDeviceVersion(deviceID, devVer); + + if (ret_err != CL_SUCCESS){ + return cast(ret_err); + } + + ret_err = getPlatformVersion(platform,platVer); + + if(platVer < OCLV::V2_1 && devVer < OCLV::V2_1){ + return PI_ERROR_INVALID_OPERATION; + } + + if(HostTime){ + if(DeviceTime){ + clGetDeviceAndHostTimer(deviceID,DeviceTime,HostTime); + }else { + clGetHostTimer(deviceID,HostTime); + } + } + + return PI_SUCCESS; } const char SupportedVersion[] = _PI_OPENCL_PLUGIN_VERSION_STRING; diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 336c37c56f545..530f389046cfc 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -266,14 +266,7 @@ uint64_t event_impl::get_profiling_info() { checkProfilingPreconditions(); if (!MHostEvent) { - if (getPlugin().getBackend() == backend::ext_oneapi_level_zero) { - return MSubmitTime; - } - if (MEvent) { - return get_event_profiling_info( - this->getHandleRef(), this->getPlugin()); - } - return 0; + return MSubmitTime; } if (!MHostProfilingInfo) throw invalid_object_error("Profiling info is not available.", diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 8837c0d206b86..a0eae42fd6a92 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -95,14 +95,12 @@ event handler::finalize() { MIsFinalized = true; auto setSubmissionTime = [&](detail::EventImplPtr event) { - if (!MQueue->is_host()) { + if (!MQueue->is_host() && MQueue->MIsProfilingEnabled) { auto plugin = MQueue->getPlugin(); - if (plugin.getBackend() == backend::ext_oneapi_level_zero) { - uint64_t submitTime = 0; - plugin.call( - MQueue->getDeviceImplPtr()->getHandleRef(), &submitTime, nullptr); - event->setSubmissionTime(submitTime); - } + uint64_t submitTime = 0; + plugin.call( + MQueue->getDeviceImplPtr()->getHandleRef(), &submitTime, nullptr); + event->setSubmissionTime(submitTime); } }; diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index 1991b57071461..0ca5196a4c257 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -1105,7 +1105,9 @@ inline pi_result mock_piGetDeviceAndHostTimer(pi_device device, auto timeNanoseconds = duration_cast(steady_clock::now().time_since_epoch()) .count(); - *deviceTime = timeNanoseconds; - *hostTime = timeNanoseconds; + if(deviceTime){ + *deviceTime = timeNanoseconds;} + if(hostTime){ + *hostTime = timeNanoseconds;} return PI_SUCCESS; } From ba0b2db3e241d7771e1818659de4527f95288661 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Thu, 15 Dec 2022 08:42:18 -0800 Subject: [PATCH 33/53] Formatting Signed-off-by: Rauf, Rana --- sycl/plugins/cuda/pi_cuda.cpp | 31 ++++++++++--------- .../esimd_emulator/pi_esimd_emulator.cpp | 3 +- sycl/plugins/hip/pi_hip.cpp | 26 +++++++++------- sycl/plugins/opencl/pi_opencl.cpp | 28 ++++++++--------- sycl/unittests/helpers/PiMockPlugin.hpp | 10 +++--- 5 files changed, 52 insertions(+), 46 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index d26c42eab07fb..f622950623ea2 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -18,13 +18,13 @@ #include #include +#include #include #include #include #include #include #include -#include // Forward declarations void enableCUDATracing(); @@ -2135,8 +2135,7 @@ pi_result cuda_piContextCreate(const pi_context_properties *properties, piContextPtr = std::unique_ptr<_pi_context>(new _pi_context{ _pi_context::kind::user_defined, newContext, *devices}); } -_pi_platform::evBase_ - static std::once_flag initFlag; + _pi_platform::evBase_ static std::once_flag initFlag; std::call_once( initFlag, [](pi_result &err) { @@ -5490,24 +5489,26 @@ pi_result cuda_piTearDown(void *) { pi_result cuda_piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime) { cudaEvent_t event; - if(DeviceTime){ - PI_CHECK_ERROR(cudaEventCreateWithFlags(&event, cudaEventDefault)); - PI_CHECK_ERROR(cudaEventRecord(event)); + if (DeviceTime) { + PI_CHECK_ERROR(cudaEventCreateWithFlags(&event, cudaEventDefault)); + PI_CHECK_ERROR(cudaEventRecord(event)); } using namespace std::chrono; - if(HostTime){ - *HostTime = duration_cast(steady_clock::now().time_since_epoch()) - .count(); + if (HostTime) { + *HostTime = + duration_cast(steady_clock::now().time_since_epoch()) + .count(); } - if(DeviceTime){ - PI_CHECK_ERROR(cudaEventSynchronize(event)); + if (DeviceTime) { + PI_CHECK_ERROR(cudaEventSynchronize(event)); - float elapsedTime = 0.0f; - PI_CHECK_ERROR(cudaEventElapsedTime(&elapsedTime,_pi_platform::evBase_,event)); - *DeviceTime=(uint64_t) (elapsedTime * (double)1e6); + float elapsedTime = 0.0f; + PI_CHECK_ERROR( + cudaEventElapsedTime(&elapsedTime, _pi_platform::evBase_, event)); + *DeviceTime = (uint64_t)(elapsedTime * (double)1e6); } - + return PI_SUCCESS; } diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 523026fa04f52..9fdf6cb9d30b6 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -2051,7 +2051,8 @@ pi_result piTearDown(void *) { pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, uint64_t *hostTime) { - PiTrace("Warning : Querying device clock not supported under PI_ESIMD_EMULATOR"); + PiTrace( + "Warning : Querying device clock not supported under PI_ESIMD_EMULATOR"); return PI_SUCCESS; } const char SupportedVersion[] = _PI_ESIMD_PLUGIN_VERSION_STRING; diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 96d8cbe44540c..24d66ee8114a4 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -18,13 +18,13 @@ #include #include +#include #include #include #include #include #include #include -#include namespace { // Hipify doesn't support cuArrayGetDescriptor, on AMD the hipArray can just be @@ -5212,22 +5212,24 @@ pi_result hip_piTearDown(void *PluginParameter) { pi_result hip_piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, uint64_t *hostTime) { hipEvent_t event; - if(DeviceTime){ - PI_CHECK_ERROR(hipEventCreateWithFlags(&event, hipEventDefault)); - PI_CHECK_ERROR(hipEventRecord(event)); + if (DeviceTime) { + PI_CHECK_ERROR(hipEventCreateWithFlags(&event, hipEventDefault)); + PI_CHECK_ERROR(hipEventRecord(event)); } using namespace std::chrono; - if(HostTime){ - *HostTime = duration_cast(steady_clock::now().time_since_epoch()) - .count(); + if (HostTime) { + *HostTime = + duration_cast(steady_clock::now().time_since_epoch()) + .count(); } - if(DeviceTime){ - PI_CHECK_ERROR(hipEventSynchronize(event)); + if (DeviceTime) { + PI_CHECK_ERROR(hipEventSynchronize(event)); - float elapsedTime = 0.0f; - PI_CHECK_ERROR(hipEventElapsedTime(&elapsedTime,_pi_platform::evBase_,event)); - *DeviceTime=(uint64_t) (elapsedTime * (double)1e6); + float elapsedTime = 0.0f; + PI_CHECK_ERROR( + hipEventElapsedTime(&elapsedTime, _pi_platform::evBase_, event)); + *DeviceTime = (uint64_t)(elapsedTime * (double)1e6); } return PI_SUCCESS; } diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index e605ae4c6c587..8093d1d13fb60 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1667,33 +1667,33 @@ pi_result piTearDown(void *PluginParameter) { pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime) { - OCLV::OpenCLVersion devVer,platVer; + OCLV::OpenCLVersion devVer, platVer; cl_platform_id platform; - cl_device_id deviceID= cast(Device); + cl_device_id deviceID = cast(Device); auto ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, - sizeof(cl_platform_id), &platform, nullptr); - if (ret_err != CL_SUCCESS){ + sizeof(cl_platform_id), &platform, nullptr); + if (ret_err != CL_SUCCESS) { return cast(ret_err); } - ret_err= getDeviceVersion(deviceID, devVer); + ret_err = getDeviceVersion(deviceID, devVer); - if (ret_err != CL_SUCCESS){ + if (ret_err != CL_SUCCESS) { return cast(ret_err); - } + } - ret_err = getPlatformVersion(platform,platVer); + ret_err = getPlatformVersion(platform, platVer); - if(platVer < OCLV::V2_1 && devVer < OCLV::V2_1){ + if (platVer < OCLV::V2_1 && devVer < OCLV::V2_1) { return PI_ERROR_INVALID_OPERATION; } - if(HostTime){ - if(DeviceTime){ - clGetDeviceAndHostTimer(deviceID,DeviceTime,HostTime); - }else { - clGetHostTimer(deviceID,HostTime); + if (HostTime) { + if (DeviceTime) { + clGetDeviceAndHostTimer(deviceID, DeviceTime, HostTime); + } else { + clGetHostTimer(deviceID, HostTime); } } diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index 0ca5196a4c257..c8c46da7ec357 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -1105,9 +1105,11 @@ inline pi_result mock_piGetDeviceAndHostTimer(pi_device device, auto timeNanoseconds = duration_cast(steady_clock::now().time_since_epoch()) .count(); - if(deviceTime){ - *deviceTime = timeNanoseconds;} - if(hostTime){ - *hostTime = timeNanoseconds;} + if (deviceTime) { + *deviceTime = timeNanoseconds; + } + if (hostTime) { + *hostTime = timeNanoseconds; + } return PI_SUCCESS; } From 8268fdfd35ff1c14023e5226d25bd3d7f1bfc752 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Thu, 15 Dec 2022 15:28:48 -0800 Subject: [PATCH 34/53] Fix issues Signed-off-by: Rauf, Rana --- sycl/plugins/cuda/pi_cuda.cpp | 15 ++++++----- .../esimd_emulator/pi_esimd_emulator.cpp | 2 +- sycl/plugins/hip/pi_hip.cpp | 4 +-- sycl/plugins/opencl/pi_opencl.cpp | 12 +++++---- sycl/source/detail/device_impl.cpp | 27 ++++++++++++++++++- sycl/source/detail/device_impl.hpp | 4 +++ sycl/source/detail/event_impl.hpp | 3 +-- sycl/source/event.cpp | 16 ++++++----- sycl/source/handler.cpp | 14 ++-------- 9 files changed, 61 insertions(+), 36 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index f622950623ea2..1e116dc661454 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -2135,7 +2135,7 @@ pi_result cuda_piContextCreate(const pi_context_properties *properties, piContextPtr = std::unique_ptr<_pi_context>(new _pi_context{ _pi_context::kind::user_defined, newContext, *devices}); } - _pi_platform::evBase_ static std::once_flag initFlag; + static std::once_flag initFlag; std::call_once( initFlag, [](pi_result &err) { @@ -5488,12 +5488,13 @@ pi_result cuda_piTearDown(void *) { pi_result cuda_piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime) { - cudaEvent_t event; + _pi_event::native_type event; + using namespace std::chrono; + if (DeviceTime) { - PI_CHECK_ERROR(cudaEventCreateWithFlags(&event, cudaEventDefault)); - PI_CHECK_ERROR(cudaEventRecord(event)); + PI_CHECK_ERROR(cuEventCreate(&event, CU_EVENT_DEFAULT)); + PI_CHECK_ERROR(cuEventRecord(event, 0)); } - using namespace std::chrono; if (HostTime) { *HostTime = duration_cast(steady_clock::now().time_since_epoch()) @@ -5501,11 +5502,11 @@ pi_result cuda_piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, } if (DeviceTime) { - PI_CHECK_ERROR(cudaEventSynchronize(event)); + PI_CHECK_ERROR(cuEventSynchronize(event)); float elapsedTime = 0.0f; PI_CHECK_ERROR( - cudaEventElapsedTime(&elapsedTime, _pi_platform::evBase_, event)); + cuEventElapsedTime(&elapsedTime, _pi_platform::evBase_, event)); *DeviceTime = (uint64_t)(elapsedTime * (double)1e6); } diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 9fdf6cb9d30b6..401030bfea1c0 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -2053,7 +2053,7 @@ pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, uint64_t *hostTime) { PiTrace( "Warning : Querying device clock not supported under PI_ESIMD_EMULATOR"); - return PI_SUCCESS; + return PI_ERROR_INVALID_OPERATION; } const char SupportedVersion[] = _PI_ESIMD_PLUGIN_VERSION_STRING; diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 24d66ee8114a4..bad971c54fa8f 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5211,12 +5211,12 @@ pi_result hip_piTearDown(void *PluginParameter) { pi_result hip_piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, uint64_t *hostTime) { - hipEvent_t event; + _pi_event::native_type event; + using namespace std::chrono; if (DeviceTime) { PI_CHECK_ERROR(hipEventCreateWithFlags(&event, hipEventDefault)); PI_CHECK_ERROR(hipEventRecord(event)); } - using namespace std::chrono; if (HostTime) { *HostTime = duration_cast(steady_clock::now().time_since_epoch()) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 8093d1d13fb60..9f5dc256ca718 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1671,6 +1671,7 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, cl_platform_id platform; cl_device_id deviceID = cast(Device); + //TODO: Cache OpenCL version for each device and platform auto ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &platform, nullptr); if (ret_err != CL_SUCCESS) { @@ -1689,12 +1690,13 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, return PI_ERROR_INVALID_OPERATION; } - if (HostTime) { - if (DeviceTime) { - clGetDeviceAndHostTimer(deviceID, DeviceTime, HostTime); - } else { + uint64_t dummy; + + if (DeviceTime) { + clGetDeviceAndHostTimer(deviceID, DeviceTime, HostTime == nullptr ? &dummy: HostTime); + + }else if (HostTime){ clGetHostTimer(deviceID, HostTime); - } } return PI_SUCCESS; diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index f039e99afd2a4..846081d5ff6a1 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -36,7 +36,7 @@ device_impl::device_impl(RT::PiDevice Device, const plugin &Plugin) device_impl::device_impl(pi_native_handle InteropDeviceHandle, RT::PiDevice Device, PlatformImplPtr Platform, const plugin &Plugin) - : MDevice(Device), MIsHostDevice(false) { + : MDevice(Device), MIsHostDevice(false), deviceTimePair(std::make_pair(0,0)) { bool InteroperabilityConstructor = false; if (Device == nullptr) { @@ -435,6 +435,31 @@ std::string device_impl::getDeviceName() const { return MDeviceName; } +uint64_t device_impl::getTime(){ + static uint64_t timeTillRefresh= 100e9; + uint64_t hostTime; + if(MIsHostDevice){ + using namespace std::chrono; + return duration_cast(steady_clock::now().time_since_epoch()) + .count(); + } + auto plugin = getPlugin(); + RT::PiResult result = plugin.call_nocheck(MDevice, nullptr, &hostTime); + plugin.checkPiResult(result == PI_ERROR_INVALID_OPERATION ? PI_SUCCESS : result); + + if(result == PI_ERROR_INVALID_OPERATION){ + return 0; + } + uint64_t diff= hostTime - deviceTimePair.second; + + if( diff > timeTillRefresh){ + plugin.call(MDevice, &deviceTimePair.first, &deviceTimePair.second); + diff=0; + } + + return deviceTimePair.first + diff; +} + } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 17fdd6c746367..ee65e19b8e696 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -18,6 +18,7 @@ #include #include +#include namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { @@ -237,6 +238,8 @@ class device_impl { std::string getDeviceName() const; + uint64_t getTime(); + private: explicit device_impl(pi_native_handle InteropDevice, RT::PiDevice Device, PlatformImplPtr Platform, const plugin &Plugin); @@ -248,6 +251,7 @@ class device_impl { bool MIsAssertFailSupported = false; mutable std::string MDeviceName; mutable std::once_flag MDeviceNameFlag; + std::pair deviceTimePair ; }; // class device_impl } // namespace detail diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index e34dcff7affb6..39a0576bea7ca 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -260,8 +260,7 @@ class event_impl { bool MIsInitialized = true; bool MIsContextInitialized = false; RT::PiEvent MEvent = nullptr; - uint64_t MSubmitTime = 0; // Only used for level-zero plugin, submission time - // of associated MCommand + uint64_t MSubmitTime = 0; // Stores submission time of command associated with event ContextImplPtr MContext; bool MHostEvent = true; std::unique_ptr MHostProfilingInfo; diff --git a/sycl/source/event.cpp b/sycl/source/event.cpp index 7950e70162d5e..54df3f463d7a1 100644 --- a/sycl/source/event.cpp +++ b/sycl/source/event.cpp @@ -77,6 +77,15 @@ event::get_info() const { return impl->template get_info(); } +template +typename detail::is_event_profiling_info_desc::return_type +event::get_profiling_info() const{ + if constexpr(!std::is_same_v){ + impl->wait(impl); + } + return impl->template get_profiling_info(); +} + #define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) \ template __SYCL_EXPORT ReturnT event::get_info() const; @@ -85,12 +94,7 @@ event::get_info() const { #undef __SYCL_PARAM_TRAITS_SPEC #define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) \ - template <> \ - __SYCL_EXPORT ReturnT event::get_profiling_info() \ - const { \ - impl->wait(impl); \ - return impl->get_profiling_info(); \ - } +template __SYCL_EXPORT ReturnT event::get_profiling_info() const; #include diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index a0eae42fd6a92..0f5683eb99999 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -94,16 +94,6 @@ event handler::finalize() { return MLastEvent; MIsFinalized = true; - auto setSubmissionTime = [&](detail::EventImplPtr event) { - if (!MQueue->is_host() && MQueue->MIsProfilingEnabled) { - auto plugin = MQueue->getPlugin(); - uint64_t submitTime = 0; - plugin.call( - MQueue->getDeviceImplPtr()->getHandleRef(), &submitTime, nullptr); - event->setSubmissionTime(submitTime); - } - }; - const auto &type = getType(); if (type == detail::CG::Kernel) { // If there were uses of set_specialization_constant build the kernel_bundle @@ -221,7 +211,7 @@ event handler::finalize() { else if (NewEvent->is_host() || NewEvent->getHandleRef() == nullptr) NewEvent->setComplete(); - setSubmissionTime(NewEvent); + NewEvent->setSubmissionTime(std::move(MQueue->getDeviceImplPtr()->getTime())); MLastEvent = detail::createSyclObjFromImpl(NewEvent); } return MLastEvent; @@ -346,7 +336,7 @@ event handler::finalize() { detail::EventImplPtr Event = detail::Scheduler::getInstance().addCG( std::move(CommandGroup), std::move(MQueue)); - setSubmissionTime(Event); + Event->setSubmissionTime(MQueue->getDeviceImplPtr()->getTime()); MLastEvent = detail::createSyclObjFromImpl(Event); return MLastEvent; } From 5e2949f8961e86ae802b713cde35a816200aab0d Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Fri, 16 Dec 2022 11:35:42 -0800 Subject: [PATCH 35/53] Added documentation Signed-off-by: Rauf, Rana --- sycl/include/sycl/detail/pi.h | 7 +++---- sycl/plugins/cuda/pi_cuda.cpp | 4 +++- sycl/plugins/hip/pi_hip.cpp | 3 +++ sycl/plugins/level_zero/pi_level_zero.cpp | 5 +++-- sycl/source/detail/device_impl.cpp | 18 +++++++++++++++--- sycl/source/detail/device_impl.hpp | 2 ++ sycl/source/detail/event_impl.cpp | 11 ++++++++++- sycl/source/detail/event_impl.hpp | 5 ++++- sycl/source/handler.cpp | 4 ++-- 9 files changed, 45 insertions(+), 14 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 611af0be03724..7df01dc94587c 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -1905,12 +1905,11 @@ __SYCL_EXPORT pi_result piTearDown(void *PluginParameter); /// timestamp __SYCL_EXPORT pi_result piPluginGetLastError(char **message); -/// Returns the global timestamp from \param device , and synchronized host -/// timestamp. +/// Queries device for it's global timestamp in nanoseconds, and updates HostTime with the value of the host timer at the closest possible point in time to that at which DeviceTime was returned. /// /// \param device device to query for timestamp -/// \param deviceTime pointer to store device time -/// \param hostTime pointer to store synchronized host time +/// \param deviceTime pointer to store device timestamp in nanoseconds. Optional argument, can be nullptr +/// \param hostTime pointer to store host timestamp in nanoseconds. Optional argurment, can be nullptr __SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime); diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 1e116dc661454..1cadc68c62365 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -529,6 +529,7 @@ pi_result _pi_event::start() { try { if (queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE) { // NOTE: This relies on the default stream to be unused. + //TODO: Remove this and other related code for setting or getting queued/submit time result = PI_CHECK_ERROR(cuEventRecord(evQueued_, 0)); result = PI_CHECK_ERROR(cuEventRecord(evStart_, stream_)); } @@ -556,7 +557,7 @@ bool _pi_event::is_completed() const noexcept { } return true; } - +//TODO: Remove this function and other code for setting or getting queued/submit time pi_uint64 _pi_event::get_queued_time() const { float miliSeconds = 0.0f; assert(is_started()); @@ -3887,6 +3888,7 @@ pi_result cuda_piEventGetProfilingInfo(pi_event event, } switch (param_name) { +//TODO: Remove this and other related code for setting or getting queued/submit time case PI_PROFILING_INFO_COMMAND_QUEUED: case PI_PROFILING_INFO_COMMAND_SUBMIT: return getInfo(param_value_size, param_value, diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index bad971c54fa8f..d0271f75d7c54 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -565,6 +565,7 @@ pi_result _pi_event::start() { try { if (queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE) { // NOTE: This relies on the default stream to be unused. + //TODO: Remove this and other related code for setting or getting queued/submit time PI_CHECK_ERROR(hipEventRecord(evQueued_, 0)); PI_CHECK_ERROR(hipEventRecord(evStart_, queue_->get())); } @@ -593,6 +594,7 @@ bool _pi_event::is_completed() const noexcept { return true; } +//TODO: Remove this and other code for setting or getting queued/submit time pi_uint64 _pi_event::get_queued_time() const { float miliSeconds = 0.0f; assert(is_started()); @@ -3706,6 +3708,7 @@ pi_result hip_piEventGetProfilingInfo(pi_event event, } switch (param_name) { + //TODO: Remove this and other related code for setting or getting queued/submit time case PI_PROFILING_INFO_COMMAND_QUEUED: case PI_PROFILING_INFO_COMMAND_SUBMIT: return getInfo(param_value_size, param_value, diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 8701f64badf8e..94be6e672c8b2 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -5986,9 +5986,9 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName, ContextEndTime *= ZeTimerResolution; return ReturnValue(ContextEndTime); } + //TODO: Remove this and other related code for setting or getting queued/submit time case PI_PROFILING_INFO_COMMAND_QUEUED: case PI_PROFILING_INFO_COMMAND_SUBMIT: - // TODO: Support these when Level Zero supported is added. return ReturnValue(uint64_t{0}); default: zePrint("piEventGetProfilingInfo: not supported ParamName\n"); @@ -9363,7 +9363,8 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t DeviceClockCount, Dummy; ZE_CALL(zeDeviceGetGlobalTimestamps, - (Device->ZeDevice, HostTime == nullptr ? &Dummy : HostTime, &DeviceClockCount)); + (Device->ZeDevice, HostTime == nullptr ? &Dummy : HostTime, + &DeviceClockCount)); if (DeviceTime != nullptr) { diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 846081d5ff6a1..de2dbac899512 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -435,8 +435,18 @@ std::string device_impl::getDeviceName() const { return MDeviceName; } +/* On first call this function queries for device timestamp + along with host synchronized timestamp + and stores it in memeber varaible deviceTimePair. + Succive calls to this function would just retrieve the host timestamp , + compute difference against the host timestamp in deviceTimePair + and calculate the device timestamp based on the difference. + deviceTimePair is refreshed with new device and host timestamp after a certain interval + (determined by timeTillRefresh) to account for clock skew between host and device. +*/ + uint64_t device_impl::getTime(){ - static uint64_t timeTillRefresh= 100e9; + constexpr uint64_t timeTillRefresh= 100e9; uint64_t hostTime; if(MIsHostDevice){ using namespace std::chrono; @@ -448,11 +458,13 @@ uint64_t device_impl::getTime(){ plugin.checkPiResult(result == PI_ERROR_INVALID_OPERATION ? PI_SUCCESS : result); if(result == PI_ERROR_INVALID_OPERATION){ - return 0; + throw sycl::feature_not_supported( + "Device and/or backend does not support querying timestamp", + result); } uint64_t diff= hostTime - deviceTimePair.second; - if( diff > timeTillRefresh){ + if( diff > timeTillRefresh || diff <= 0){ plugin.call(MDevice, &deviceTimePair.first, &deviceTimePair.second); diff=0; } diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index ee65e19b8e696..6673d2a0c2685 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -238,6 +238,8 @@ class device_impl { std::string getDeviceName() const; + /// Gets the current device timestamp + /// @throw sycl::feature_not_supported if feature is not supported on device uint64_t getTime(); private: diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 530f389046cfc..74868b6006d0f 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -421,8 +421,17 @@ void event_impl::cleanDepEventsThroughOneLevel() { } } -void event_impl::setSubmissionTime(uint64_t time) { MSubmitTime = time; } +void event_impl::setSubmissionTime() { + if(!MSubmittedQueue.expired()){ + auto queue=MSubmittedQueue.lock(); + if(queue->MIsProfilingEnabled){ + MSubmitTime= queue->getDeviceImplPtr()->getTime(); + } + } +} + uint64_t event_impl::getSubmissionTime() { return MSubmitTime; } + bool event_impl::isCompleted() { return get_info() == info::event_command_status::complete; diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 39a0576bea7ca..e25a16ca61af8 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -221,9 +221,12 @@ class event_impl { MSubmittedQueue = SubmittedQueue; }; - void setSubmissionTime(uint64_t time); + /// Calling this function queries the current device timestamp and sets it as submission time for the command associated with this event. + void setSubmissionTime(); + /// @return Submission time for command associated with this event uint64_t getSubmissionTime(); + QueueImplPtr getSubmittedQueue() const { return MSubmittedQueue.lock(); }; /// Checks if an event is in a fully intialized state. Default-constructed diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 0f5683eb99999..49f5ada011656 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -211,7 +211,7 @@ event handler::finalize() { else if (NewEvent->is_host() || NewEvent->getHandleRef() == nullptr) NewEvent->setComplete(); - NewEvent->setSubmissionTime(std::move(MQueue->getDeviceImplPtr()->getTime())); + NewEvent->setSubmissionTime(); MLastEvent = detail::createSyclObjFromImpl(NewEvent); } return MLastEvent; @@ -336,7 +336,7 @@ event handler::finalize() { detail::EventImplPtr Event = detail::Scheduler::getInstance().addCG( std::move(CommandGroup), std::move(MQueue)); - Event->setSubmissionTime(MQueue->getDeviceImplPtr()->getTime()); + Event->setSubmissionTime(); MLastEvent = detail::createSyclObjFromImpl(Event); return MLastEvent; } From 19cd2e9b6ef0eec05225ab9c99cd47128f807781 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Thu, 15 Dec 2022 15:29:48 -0800 Subject: [PATCH 36/53] Formatting Signed-off-by: Rauf, Rana --- sycl/plugins/opencl/pi_opencl.cpp | 11 ++++++----- sycl/source/detail/device_impl.cpp | 14 +++++++++----- sycl/source/detail/device_impl.hpp | 2 +- sycl/source/detail/event_impl.hpp | 3 ++- sycl/source/event.cpp | 7 ++++--- 5 files changed, 22 insertions(+), 15 deletions(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 9f5dc256ca718..b4f5cadd362cb 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1671,7 +1671,7 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, cl_platform_id platform; cl_device_id deviceID = cast(Device); - //TODO: Cache OpenCL version for each device and platform + // TODO: Cache OpenCL version for each device and platform auto ret_err = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &platform, nullptr); if (ret_err != CL_SUCCESS) { @@ -1693,10 +1693,11 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t dummy; if (DeviceTime) { - clGetDeviceAndHostTimer(deviceID, DeviceTime, HostTime == nullptr ? &dummy: HostTime); - - }else if (HostTime){ - clGetHostTimer(deviceID, HostTime); + clGetDeviceAndHostTimer(deviceID, DeviceTime, + HostTime == nullptr ? &dummy : HostTime); + + } else if (HostTime) { + clGetHostTimer(deviceID, HostTime); } return PI_SUCCESS; diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index de2dbac899512..959c24a4390d5 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -36,7 +36,8 @@ device_impl::device_impl(RT::PiDevice Device, const plugin &Plugin) device_impl::device_impl(pi_native_handle InteropDeviceHandle, RT::PiDevice Device, PlatformImplPtr Platform, const plugin &Plugin) - : MDevice(Device), MIsHostDevice(false), deviceTimePair(std::make_pair(0,0)) { + : MDevice(Device), MIsHostDevice(false), + deviceTimePair(std::make_pair(0, 0)) { bool InteroperabilityConstructor = false; if (Device == nullptr) { @@ -448,14 +449,17 @@ std::string device_impl::getDeviceName() const { uint64_t device_impl::getTime(){ constexpr uint64_t timeTillRefresh= 100e9; uint64_t hostTime; - if(MIsHostDevice){ + if (MIsHostDevice) { using namespace std::chrono; return duration_cast(steady_clock::now().time_since_epoch()) - .count(); + .count(); } auto plugin = getPlugin(); - RT::PiResult result = plugin.call_nocheck(MDevice, nullptr, &hostTime); - plugin.checkPiResult(result == PI_ERROR_INVALID_OPERATION ? PI_SUCCESS : result); + RT::PiResult result = + plugin.call_nocheck( + MDevice, nullptr, &hostTime); + plugin.checkPiResult(result == PI_ERROR_INVALID_OPERATION ? PI_SUCCESS + : result); if(result == PI_ERROR_INVALID_OPERATION){ throw sycl::feature_not_supported( diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 6673d2a0c2685..0d6fdfef35c53 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -253,7 +253,7 @@ class device_impl { bool MIsAssertFailSupported = false; mutable std::string MDeviceName; mutable std::once_flag MDeviceNameFlag; - std::pair deviceTimePair ; + std::pair deviceTimePair; }; // class device_impl } // namespace detail diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index e25a16ca61af8..4fea0a5cb2bdc 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -263,7 +263,8 @@ class event_impl { bool MIsInitialized = true; bool MIsContextInitialized = false; RT::PiEvent MEvent = nullptr; - uint64_t MSubmitTime = 0; // Stores submission time of command associated with event + uint64_t MSubmitTime = + 0; // Stores submission time of command associated with event ContextImplPtr MContext; bool MHostEvent = true; std::unique_ptr MHostProfilingInfo; diff --git a/sycl/source/event.cpp b/sycl/source/event.cpp index 54df3f463d7a1..7892de69cce81 100644 --- a/sycl/source/event.cpp +++ b/sycl/source/event.cpp @@ -79,8 +79,8 @@ event::get_info() const { template typename detail::is_event_profiling_info_desc::return_type -event::get_profiling_info() const{ - if constexpr(!std::is_same_v){ +event::get_profiling_info() const { + if constexpr (!std::is_same_v) { impl->wait(impl); } return impl->template get_profiling_info(); @@ -94,7 +94,8 @@ event::get_profiling_info() const{ #undef __SYCL_PARAM_TRAITS_SPEC #define __SYCL_PARAM_TRAITS_SPEC(DescType, Desc, ReturnT, PiCode) \ -template __SYCL_EXPORT ReturnT event::get_profiling_info() const; + template __SYCL_EXPORT ReturnT \ + event::get_profiling_info() const; #include From 82ff6c786d4b4bdee1930e2a4ebdd3a51317f637 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Fri, 16 Dec 2022 13:59:26 -0800 Subject: [PATCH 37/53] Formatting Signed-off-by: Rauf, Rana --- sycl/include/sycl/detail/pi.h | 9 ++++++--- sycl/plugins/cuda/pi_cuda.cpp | 9 ++++++--- sycl/plugins/hip/pi_hip.cpp | 8 +++++--- sycl/plugins/level_zero/pi_level_zero.cpp | 3 ++- sycl/source/detail/device_impl.cpp | 22 +++++++++++----------- sycl/source/detail/device_impl.hpp | 2 +- sycl/source/detail/event_impl.cpp | 8 ++++---- sycl/source/detail/event_impl.hpp | 5 +++-- 8 files changed, 38 insertions(+), 28 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 7df01dc94587c..bf6511d8b84bd 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -1905,11 +1905,14 @@ __SYCL_EXPORT pi_result piTearDown(void *PluginParameter); /// timestamp __SYCL_EXPORT pi_result piPluginGetLastError(char **message); -/// Queries device for it's global timestamp in nanoseconds, and updates HostTime with the value of the host timer at the closest possible point in time to that at which DeviceTime was returned. +/// Queries device for it's global timestamp in nanoseconds, and updates +/// HostTime with the value of the host timer at the closest possible point in +/// time to that at which DeviceTime was returned. /// /// \param device device to query for timestamp -/// \param deviceTime pointer to store device timestamp in nanoseconds. Optional argument, can be nullptr -/// \param hostTime pointer to store host timestamp in nanoseconds. Optional argurment, can be nullptr +/// \param deviceTime pointer to store device timestamp in nanoseconds. Optional +/// argument, can be nullptr \param hostTime pointer to store host timestamp in +/// nanoseconds. Optional argurment, can be nullptr __SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime); diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 1cadc68c62365..41b4efbac7235 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -529,7 +529,8 @@ pi_result _pi_event::start() { try { if (queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE) { // NOTE: This relies on the default stream to be unused. - //TODO: Remove this and other related code for setting or getting queued/submit time + // TODO: Remove this and other related code for setting or getting + // queued/submit time result = PI_CHECK_ERROR(cuEventRecord(evQueued_, 0)); result = PI_CHECK_ERROR(cuEventRecord(evStart_, stream_)); } @@ -557,7 +558,8 @@ bool _pi_event::is_completed() const noexcept { } return true; } -//TODO: Remove this function and other code for setting or getting queued/submit time +// TODO: Remove this function and other code for setting or getting +// queued/submit time pi_uint64 _pi_event::get_queued_time() const { float miliSeconds = 0.0f; assert(is_started()); @@ -3888,7 +3890,8 @@ pi_result cuda_piEventGetProfilingInfo(pi_event event, } switch (param_name) { -//TODO: Remove this and other related code for setting or getting queued/submit time + // TODO: Remove this and other related code for setting or getting + // queued/submit time case PI_PROFILING_INFO_COMMAND_QUEUED: case PI_PROFILING_INFO_COMMAND_SUBMIT: return getInfo(param_value_size, param_value, diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index d0271f75d7c54..c2ee33963012c 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -565,7 +565,8 @@ pi_result _pi_event::start() { try { if (queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE) { // NOTE: This relies on the default stream to be unused. - //TODO: Remove this and other related code for setting or getting queued/submit time + // TODO: Remove this and other related code for setting or getting + // queued/submit time PI_CHECK_ERROR(hipEventRecord(evQueued_, 0)); PI_CHECK_ERROR(hipEventRecord(evStart_, queue_->get())); } @@ -594,7 +595,7 @@ bool _pi_event::is_completed() const noexcept { return true; } -//TODO: Remove this and other code for setting or getting queued/submit time +// TODO: Remove this and other code for setting or getting queued/submit time pi_uint64 _pi_event::get_queued_time() const { float miliSeconds = 0.0f; assert(is_started()); @@ -3708,7 +3709,8 @@ pi_result hip_piEventGetProfilingInfo(pi_event event, } switch (param_name) { - //TODO: Remove this and other related code for setting or getting queued/submit time + // TODO: Remove this and other related code for setting or getting + // queued/submit time case PI_PROFILING_INFO_COMMAND_QUEUED: case PI_PROFILING_INFO_COMMAND_SUBMIT: return getInfo(param_value_size, param_value, diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 94be6e672c8b2..4022f9bf20c8c 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -5986,7 +5986,8 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName, ContextEndTime *= ZeTimerResolution; return ReturnValue(ContextEndTime); } - //TODO: Remove this and other related code for setting or getting queued/submit time + // TODO: Remove this and other related code for setting or getting + // queued/submit time case PI_PROFILING_INFO_COMMAND_QUEUED: case PI_PROFILING_INFO_COMMAND_SUBMIT: return ReturnValue(uint64_t{0}); diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 959c24a4390d5..7707571e67203 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -436,18 +436,19 @@ std::string device_impl::getDeviceName() const { return MDeviceName; } -/* On first call this function queries for device timestamp +/* On first call this function queries for device timestamp along with host synchronized timestamp - and stores it in memeber varaible deviceTimePair. + and stores it in memeber varaible deviceTimePair. Succive calls to this function would just retrieve the host timestamp , - compute difference against the host timestamp in deviceTimePair + compute difference against the host timestamp in deviceTimePair and calculate the device timestamp based on the difference. - deviceTimePair is refreshed with new device and host timestamp after a certain interval - (determined by timeTillRefresh) to account for clock skew between host and device. -*/ + deviceTimePair is refreshed with new device and host timestamp after a + certain interval (determined by timeTillRefresh) to account for clock skew + between host and device. +*/ uint64_t device_impl::getTime(){ - constexpr uint64_t timeTillRefresh= 100e9; + constexpr uint64_t timeTillRefresh = 100e9; uint64_t hostTime; if (MIsHostDevice) { using namespace std::chrono; @@ -463,12 +464,11 @@ uint64_t device_impl::getTime(){ if(result == PI_ERROR_INVALID_OPERATION){ throw sycl::feature_not_supported( - "Device and/or backend does not support querying timestamp", - result); + "Device and/or backend does not support querying timestamp", result); } uint64_t diff= hostTime - deviceTimePair.second; - - if( diff > timeTillRefresh || diff <= 0){ + + if (diff > timeTillRefresh || diff <= 0) { plugin.call(MDevice, &deviceTimePair.first, &deviceTimePair.second); diff=0; } diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 0d6fdfef35c53..c71fc0e0c9ac4 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -238,7 +238,7 @@ class device_impl { std::string getDeviceName() const; - /// Gets the current device timestamp + /// Gets the current device timestamp /// @throw sycl::feature_not_supported if feature is not supported on device uint64_t getTime(); diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 74868b6006d0f..459c8de3b8e20 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -422,10 +422,10 @@ void event_impl::cleanDepEventsThroughOneLevel() { } void event_impl::setSubmissionTime() { - if(!MSubmittedQueue.expired()){ - auto queue=MSubmittedQueue.lock(); - if(queue->MIsProfilingEnabled){ - MSubmitTime= queue->getDeviceImplPtr()->getTime(); + if (!MSubmittedQueue.expired()) { + auto queue = MSubmittedQueue.lock(); + if (queue->MIsProfilingEnabled) { + MSubmitTime = queue->getDeviceImplPtr()->getTime(); } } } diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 4fea0a5cb2bdc..85e8abd7e40d1 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -221,12 +221,13 @@ class event_impl { MSubmittedQueue = SubmittedQueue; }; - /// Calling this function queries the current device timestamp and sets it as submission time for the command associated with this event. + /// Calling this function queries the current device timestamp and sets it as + /// submission time for the command associated with this event. void setSubmissionTime(); /// @return Submission time for command associated with this event uint64_t getSubmissionTime(); - + QueueImplPtr getSubmittedQueue() const { return MSubmittedQueue.lock(); }; /// Checks if an event is in a fully intialized state. Default-constructed From 451de353d86428da6799e214cab18ec7c05b17af Mon Sep 17 00:00:00 2001 From: raaiq1 <106714052+raaiq1@users.noreply.github.com> Date: Mon, 19 Dec 2022 09:41:01 -0500 Subject: [PATCH 38/53] Apply suggestions from code review Co-authored-by: Romanov Vlad --- sycl/source/detail/device_impl.cpp | 2 +- sycl/source/detail/device_impl.hpp | 4 ++-- sycl/source/detail/event_impl.cpp | 10 ++++------ sycl/source/detail/event_impl.hpp | 4 ++-- 4 files changed, 9 insertions(+), 11 deletions(-) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 7707571e67203..54af17eb73ff4 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -439,7 +439,7 @@ std::string device_impl::getDeviceName() const { /* On first call this function queries for device timestamp along with host synchronized timestamp and stores it in memeber varaible deviceTimePair. - Succive calls to this function would just retrieve the host timestamp , + Successive calls to this function would just retrieve the host timestamp , compute difference against the host timestamp in deviceTimePair and calculate the device timestamp based on the difference. deviceTimePair is refreshed with new device and host timestamp after a diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index c71fc0e0c9ac4..41cb1c9fdb62c 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -240,7 +240,7 @@ class device_impl { /// Gets the current device timestamp /// @throw sycl::feature_not_supported if feature is not supported on device - uint64_t getTime(); + uint64_t getCurrentDeviceTime(); private: explicit device_impl(pi_native_handle InteropDevice, RT::PiDevice Device, @@ -253,7 +253,7 @@ class device_impl { bool MIsAssertFailSupported = false; mutable std::string MDeviceName; mutable std::once_flag MDeviceNameFlag; - std::pair deviceTimePair; + std::pair MDeviceHostBaseTime; }; // class device_impl } // namespace detail diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 459c8de3b8e20..3cf5ba4eb98ca 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -422,12 +422,10 @@ void event_impl::cleanDepEventsThroughOneLevel() { } void event_impl::setSubmissionTime() { - if (!MSubmittedQueue.expired()) { - auto queue = MSubmittedQueue.lock(); - if (queue->MIsProfilingEnabled) { - MSubmitTime = queue->getDeviceImplPtr()->getTime(); - } - } + if (!MIsProfilingEnabled) + return; + if (QueueImplPtr Queue = getSubmittedQueue()) + MSubmitTime = Queue->getDeviceImplPtr()->getTime(); } uint64_t event_impl::getSubmissionTime() { return MSubmitTime; } diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 85e8abd7e40d1..464d285acde76 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -264,8 +264,8 @@ class event_impl { bool MIsInitialized = true; bool MIsContextInitialized = false; RT::PiEvent MEvent = nullptr; - uint64_t MSubmitTime = - 0; // Stores submission time of command associated with event + // Stores submission time of command associated with event + uint64_t MSubmitTime = 0; ContextImplPtr MContext; bool MHostEvent = true; std::unique_ptr MHostProfilingInfo; From 2d6a6366a39727d39914495fbb753688f83ad839 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Mon, 19 Dec 2022 14:29:20 -0800 Subject: [PATCH 39/53] Added unittests Signed-off-by: Rauf, Rana --- sycl/plugins/opencl/pi_opencl.cpp | 3 +- sycl/source/detail/device_impl.cpp | 38 ++++++---- sycl/source/detail/event_impl.cpp | 9 ++- sycl/source/handler.cpp | 3 +- sycl/unittests/queue/GetProfilingInfo.cpp | 84 +++++++++++++++++++++++ 5 files changed, 119 insertions(+), 18 deletions(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index b4f5cadd362cb..5ae31eaa9c69d 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1686,7 +1686,8 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, ret_err = getPlatformVersion(platform, platVer); - if (platVer < OCLV::V2_1 && devVer < OCLV::V2_1) { + if (platVer < OCLV::V2_1 || devVer < OCLV::V2_1) { + setErrorMessage("OpenCL version for device and/or platform is less than 2.1",PI_ERROR_INVALID_OPERATION); return PI_ERROR_INVALID_OPERATION; } diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 54af17eb73ff4..7907ac85b3a59 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -37,7 +37,7 @@ device_impl::device_impl(pi_native_handle InteropDeviceHandle, RT::PiDevice Device, PlatformImplPtr Platform, const plugin &Plugin) : MDevice(Device), MIsHostDevice(false), - deviceTimePair(std::make_pair(0, 0)) { + MDeviceHostBaseTime(std::make_pair(0, 0)) { bool InteroperabilityConstructor = false; if (Device == nullptr) { @@ -438,17 +438,19 @@ std::string device_impl::getDeviceName() const { /* On first call this function queries for device timestamp along with host synchronized timestamp - and stores it in memeber varaible deviceTimePair. - Successive calls to this function would just retrieve the host timestamp , - compute difference against the host timestamp in deviceTimePair + and stores it in memeber varaible deviceTimePair. + Subsequent calls to this function would just retrieve the host timestamp , + compute difference against the host timestamp in deviceTimePair and calculate the device timestamp based on the difference. - deviceTimePair is refreshed with new device and host timestamp after a - certain interval (determined by timeTillRefresh) to account for clock skew - between host and device. -*/ + deviceTimePair is refreshed with new device and host timestamp after a certain interval + (determined by timeTillRefresh) to account for clock drift between host and device. +*/ + +uint64_t device_impl::getCurrentDeviceTime(){ + // To account for potential clock drift between host clock and device clock. + // The value set is arbitrary: 200 seconds + constexpr uint64_t timeTillRefresh= 200e9; -uint64_t device_impl::getTime(){ - constexpr uint64_t timeTillRefresh = 100e9; uint64_t hostTime; if (MIsHostDevice) { using namespace std::chrono; @@ -463,17 +465,25 @@ uint64_t device_impl::getTime(){ : result); if(result == PI_ERROR_INVALID_OPERATION){ + std::string errorMsg{}; + char* p; + plugin.call_nocheck(&p); + while (*p != '\0'){ + errorMsg +=*p; + p++; + } throw sycl::feature_not_supported( - "Device and/or backend does not support querying timestamp", result); + "Device and/or backend does not support querying timestamp: " + errorMsg, + result); } - uint64_t diff= hostTime - deviceTimePair.second; + uint64_t diff= hostTime - MDeviceHostBaseTime.second; if (diff > timeTillRefresh || diff <= 0) { - plugin.call(MDevice, &deviceTimePair.first, &deviceTimePair.second); + plugin.call(MDevice, &MDeviceHostBaseTime.first, &MDeviceHostBaseTime.second); diff=0; } - return deviceTimePair.first + diff; + return MDeviceHostBaseTime.first + diff; } } // namespace detail diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 3cf5ba4eb98ca..5beb20cc67460 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -424,8 +424,13 @@ void event_impl::cleanDepEventsThroughOneLevel() { void event_impl::setSubmissionTime() { if (!MIsProfilingEnabled) return; - if (QueueImplPtr Queue = getSubmittedQueue()) - MSubmitTime = Queue->getDeviceImplPtr()->getTime(); + if (QueueImplPtr Queue = getSubmittedQueue()){ + try{ + MSubmitTime = Queue->getDeviceImplPtr()->getCurrentDeviceTime(); + }catch(feature_not_supported & e){ + throw feature_not_supported(std::string("Unable to get command group submission time: ") + e.what(),PI_ERROR_INVALID_OPERATION); + } + } } uint64_t event_impl::getSubmissionTime() { return MSubmitTime; } diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 49f5ada011656..b5d1f01cb4cb9 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -210,7 +210,8 @@ event handler::finalize() { PI_ERROR_INVALID_OPERATION); else if (NewEvent->is_host() || NewEvent->getHandleRef() == nullptr) NewEvent->setComplete(); - + + NewEvent->setSubmittedQueue(MQueue); NewEvent->setSubmissionTime(); MLastEvent = detail::createSyclObjFromImpl(NewEvent); } diff --git a/sycl/unittests/queue/GetProfilingInfo.cpp b/sycl/unittests/queue/GetProfilingInfo.cpp index f410811e63445..c6e5a1e70e236 100644 --- a/sycl/unittests/queue/GetProfilingInfo.cpp +++ b/sycl/unittests/queue/GetProfilingInfo.cpp @@ -17,6 +17,7 @@ #include #include +#include #include @@ -316,3 +317,86 @@ TEST(GetProfilingInfo, check_if_now_dead_queue_property_not_set) { // The test passes without this, but keep it still, just in case. sycl::detail::getSyclObjImpl(Ctx)->getKernelProgramCache().reset(); } + +bool DeviceTimerCalled; + +pi_result redefinedPiGetDeviceAndHostTimer(pi_device Device, uint64_t* DeviceTime,uint64_t* HostTime){ + DeviceTimerCalled=true; + return PI_SUCCESS; +} + +TEST(GetProfilingInfo, check_no_command_submission_time_when_event_profiling_disabled){ + using namespace sycl; + unittest::PiMock Mock; + platform Plt= Mock.getPlatform(); + Mock.redefine(redefinedPiGetDeviceAndHostTimer); + device Dev=Plt.get_devices()[0]; + context Ctx{Dev}; + queue Queue{Ctx,Dev}; + DeviceTimerCalled=false; + + event E=Queue.submit([&](handler& cgh){ + cgh.single_task>([](){}); + }); + EXPECT_FALSE(DeviceTimerCalled); + +} + +//Checks to see if command submit time is calculated before queue.submit returns. +//A host accessor is contructed before submitting the command, +//to ensure command submission time is calculated even if command may not be enqueued +//due to overlap in data dependencies between the kernel and host accessor +TEST(GetProfilingInfo, check_command_submission_time_with_host_accessor){ + using namespace sycl; + unittest::PiMock Mock; + platform Plt= Mock.getPlatform(); + Mock.redefine(redefinedPiGetDeviceAndHostTimer); + device Dev=Plt.get_devices()[0]; + context Ctx{Dev}; + queue Queue{Ctx,Dev,property::queue::enable_profiling()}; + int data[1024]; + buffer Buf{data,range<1>{1024}}; + DeviceTimerCalled=false; + + accessor host_acc= Buf.get_access(); + event E=Queue.submit([&](handler& cgh){ + accessor writeRes{Buf,cgh,read_write}; + + cgh.single_task>([](){}); + }); + + EXPECT_TRUE(DeviceTimerCalled); + +} + +pi_result redefinedFailedPiGetDeviceAndHostTimer(pi_device Device, uint64_t* DeviceTime,uint64_t* HostTime){ + return PI_ERROR_INVALID_OPERATION; +} + +pi_result redefinedPiPluginGetLastError(char **message) { + static char messageString[50]= "Plugin version not supported"; + *message=messageString; + return PI_SUCCESS; +} + +TEST(GetProfilingInfo, submission_time_exception_check){ + using namespace sycl; + unittest::PiMock Mock; + platform Plt= Mock.getPlatform(); + Mock.redefine(redefinedFailedPiGetDeviceAndHostTimer); + Mock.redefine(redefinedPiPluginGetLastError); + device Dev=Plt.get_devices()[0]; + context Ctx{Dev}; + queue Queue{Ctx,Dev,property::queue::enable_profiling()}; + + try{ + event E=Queue.submit([&](handler& cgh){ + cgh.single_task>([](){}); + }); + FAIL(); + }catch(feature_not_supported &e){ + EXPECT_STREQ(e.what(),"Unable to get command group submission time: " + "Device and/or backend does not support querying timestamp: " + "Plugin version not supported -59 (PI_ERROR_INVALID_OPERATION) -59 (PI_ERROR_INVALID_OPERATION)"); + } +} From b9c417117365222c91c37cde4045d613f0c87927 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Mon, 19 Dec 2022 14:31:29 -0800 Subject: [PATCH 40/53] Formatting Signed-off-by: Rauf, Rana --- sycl/plugins/opencl/pi_opencl.cpp | 4 +- sycl/source/detail/device_impl.cpp | 31 ++++--- sycl/source/detail/event_impl.cpp | 15 ++-- sycl/source/handler.cpp | 2 +- sycl/unittests/queue/GetProfilingInfo.cpp | 105 ++++++++++++---------- 5 files changed, 87 insertions(+), 70 deletions(-) diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 5ae31eaa9c69d..bdea86faca36f 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1687,7 +1687,9 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, ret_err = getPlatformVersion(platform, platVer); if (platVer < OCLV::V2_1 || devVer < OCLV::V2_1) { - setErrorMessage("OpenCL version for device and/or platform is less than 2.1",PI_ERROR_INVALID_OPERATION); + setErrorMessage( + "OpenCL version for device and/or platform is less than 2.1", + PI_ERROR_INVALID_OPERATION); return PI_ERROR_INVALID_OPERATION; } diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 7907ac85b3a59..4a07dcae0cfbe 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -438,18 +438,19 @@ std::string device_impl::getDeviceName() const { /* On first call this function queries for device timestamp along with host synchronized timestamp - and stores it in memeber varaible deviceTimePair. + and stores it in memeber varaible deviceTimePair. Subsequent calls to this function would just retrieve the host timestamp , - compute difference against the host timestamp in deviceTimePair + compute difference against the host timestamp in deviceTimePair and calculate the device timestamp based on the difference. - deviceTimePair is refreshed with new device and host timestamp after a certain interval - (determined by timeTillRefresh) to account for clock drift between host and device. -*/ + deviceTimePair is refreshed with new device and host timestamp after a + certain interval (determined by timeTillRefresh) to account for clock drift + between host and device. +*/ -uint64_t device_impl::getCurrentDeviceTime(){ - // To account for potential clock drift between host clock and device clock. +uint64_t device_impl::getCurrentDeviceTime() { + // To account for potential clock drift between host clock and device clock. // The value set is arbitrary: 200 seconds - constexpr uint64_t timeTillRefresh= 200e9; + constexpr uint64_t timeTillRefresh = 200e9; uint64_t hostTime; if (MIsHostDevice) { @@ -466,20 +467,22 @@ uint64_t device_impl::getCurrentDeviceTime(){ if(result == PI_ERROR_INVALID_OPERATION){ std::string errorMsg{}; - char* p; + char *p; plugin.call_nocheck(&p); - while (*p != '\0'){ - errorMsg +=*p; + while (*p != '\0') { + errorMsg += *p; p++; } throw sycl::feature_not_supported( - "Device and/or backend does not support querying timestamp: " + errorMsg, + "Device and/or backend does not support querying timestamp: " + + errorMsg, result); } - uint64_t diff= hostTime - MDeviceHostBaseTime.second; + uint64_t diff = hostTime - MDeviceHostBaseTime.second; if (diff > timeTillRefresh || diff <= 0) { - plugin.call(MDevice, &MDeviceHostBaseTime.first, &MDeviceHostBaseTime.second); + plugin.call( + MDevice, &MDeviceHostBaseTime.first, &MDeviceHostBaseTime.second); diff=0; } diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 5beb20cc67460..348b215420cf5 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -424,13 +424,16 @@ void event_impl::cleanDepEventsThroughOneLevel() { void event_impl::setSubmissionTime() { if (!MIsProfilingEnabled) return; - if (QueueImplPtr Queue = getSubmittedQueue()){ - try{ - MSubmitTime = Queue->getDeviceImplPtr()->getCurrentDeviceTime(); - }catch(feature_not_supported & e){ - throw feature_not_supported(std::string("Unable to get command group submission time: ") + e.what(),PI_ERROR_INVALID_OPERATION); - } + if (QueueImplPtr Queue = getSubmittedQueue()) { + try { + MSubmitTime = Queue->getDeviceImplPtr()->getCurrentDeviceTime(); + } catch (feature_not_supported &e) { + throw feature_not_supported( + std::string("Unable to get command group submission time: ") + + e.what(), + PI_ERROR_INVALID_OPERATION); } + } } uint64_t event_impl::getSubmissionTime() { return MSubmitTime; } diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index b5d1f01cb4cb9..21904674f21ca 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -210,7 +210,7 @@ event handler::finalize() { PI_ERROR_INVALID_OPERATION); else if (NewEvent->is_host() || NewEvent->getHandleRef() == nullptr) NewEvent->setComplete(); - + NewEvent->setSubmittedQueue(MQueue); NewEvent->setSubmissionTime(); MLastEvent = detail::createSyclObjFromImpl(NewEvent); diff --git a/sycl/unittests/queue/GetProfilingInfo.cpp b/sycl/unittests/queue/GetProfilingInfo.cpp index c6e5a1e70e236..2b5526158e22e 100644 --- a/sycl/unittests/queue/GetProfilingInfo.cpp +++ b/sycl/unittests/queue/GetProfilingInfo.cpp @@ -320,83 +320,92 @@ TEST(GetProfilingInfo, check_if_now_dead_queue_property_not_set) { bool DeviceTimerCalled; -pi_result redefinedPiGetDeviceAndHostTimer(pi_device Device, uint64_t* DeviceTime,uint64_t* HostTime){ - DeviceTimerCalled=true; - return PI_SUCCESS; +pi_result redefinedPiGetDeviceAndHostTimer(pi_device Device, + uint64_t *DeviceTime, + uint64_t *HostTime) { + DeviceTimerCalled = true; + return PI_SUCCESS; } -TEST(GetProfilingInfo, check_no_command_submission_time_when_event_profiling_disabled){ +TEST(GetProfilingInfo, + check_no_command_submission_time_when_event_profiling_disabled) { using namespace sycl; unittest::PiMock Mock; - platform Plt= Mock.getPlatform(); - Mock.redefine(redefinedPiGetDeviceAndHostTimer); - device Dev=Plt.get_devices()[0]; + platform Plt = Mock.getPlatform(); + Mock.redefine( + redefinedPiGetDeviceAndHostTimer); + device Dev = Plt.get_devices()[0]; context Ctx{Dev}; - queue Queue{Ctx,Dev}; - DeviceTimerCalled=false; + queue Queue{Ctx, Dev}; + DeviceTimerCalled = false; - event E=Queue.submit([&](handler& cgh){ - cgh.single_task>([](){}); - }); + event E = Queue.submit( + [&](handler &cgh) { cgh.single_task>([]() {}); }); EXPECT_FALSE(DeviceTimerCalled); - } -//Checks to see if command submit time is calculated before queue.submit returns. -//A host accessor is contructed before submitting the command, -//to ensure command submission time is calculated even if command may not be enqueued -//due to overlap in data dependencies between the kernel and host accessor -TEST(GetProfilingInfo, check_command_submission_time_with_host_accessor){ +// Checks to see if command submit time is calculated before queue.submit +// returns. A host accessor is contructed before submitting the command, to +// ensure command submission time is calculated even if command may not be +// enqueued due to overlap in data dependencies between the kernel and host +// accessor +TEST(GetProfilingInfo, check_command_submission_time_with_host_accessor) { using namespace sycl; unittest::PiMock Mock; - platform Plt= Mock.getPlatform(); - Mock.redefine(redefinedPiGetDeviceAndHostTimer); - device Dev=Plt.get_devices()[0]; + platform Plt = Mock.getPlatform(); + Mock.redefine( + redefinedPiGetDeviceAndHostTimer); + device Dev = Plt.get_devices()[0]; context Ctx{Dev}; - queue Queue{Ctx,Dev,property::queue::enable_profiling()}; + queue Queue{Ctx, Dev, property::queue::enable_profiling()}; int data[1024]; - buffer Buf{data,range<1>{1024}}; - DeviceTimerCalled=false; + buffer Buf{data, range<1>{1024}}; + DeviceTimerCalled = false; - accessor host_acc= Buf.get_access(); - event E=Queue.submit([&](handler& cgh){ - accessor writeRes{Buf,cgh,read_write}; + accessor host_acc = Buf.get_access(); + event E = Queue.submit([&](handler &cgh) { + accessor writeRes{Buf, cgh, read_write}; - cgh.single_task>([](){}); + cgh.single_task>([]() {}); }); EXPECT_TRUE(DeviceTimerCalled); - } -pi_result redefinedFailedPiGetDeviceAndHostTimer(pi_device Device, uint64_t* DeviceTime,uint64_t* HostTime){ - return PI_ERROR_INVALID_OPERATION; +pi_result redefinedFailedPiGetDeviceAndHostTimer(pi_device Device, + uint64_t *DeviceTime, + uint64_t *HostTime) { + return PI_ERROR_INVALID_OPERATION; } pi_result redefinedPiPluginGetLastError(char **message) { - static char messageString[50]= "Plugin version not supported"; - *message=messageString; + static char messageString[50] = "Plugin version not supported"; + *message = messageString; return PI_SUCCESS; } -TEST(GetProfilingInfo, submission_time_exception_check){ +TEST(GetProfilingInfo, submission_time_exception_check) { using namespace sycl; unittest::PiMock Mock; - platform Plt= Mock.getPlatform(); - Mock.redefine(redefinedFailedPiGetDeviceAndHostTimer); - Mock.redefine(redefinedPiPluginGetLastError); - device Dev=Plt.get_devices()[0]; + platform Plt = Mock.getPlatform(); + Mock.redefine( + redefinedFailedPiGetDeviceAndHostTimer); + Mock.redefine( + redefinedPiPluginGetLastError); + device Dev = Plt.get_devices()[0]; context Ctx{Dev}; - queue Queue{Ctx,Dev,property::queue::enable_profiling()}; + queue Queue{Ctx, Dev, property::queue::enable_profiling()}; - try{ - event E=Queue.submit([&](handler& cgh){ - cgh.single_task>([](){}); - }); - FAIL(); - }catch(feature_not_supported &e){ - EXPECT_STREQ(e.what(),"Unable to get command group submission time: " - "Device and/or backend does not support querying timestamp: " - "Plugin version not supported -59 (PI_ERROR_INVALID_OPERATION) -59 (PI_ERROR_INVALID_OPERATION)"); + try { + event E = Queue.submit( + [&](handler &cgh) { cgh.single_task>([]() {}); }); + FAIL(); + } catch (feature_not_supported &e) { + EXPECT_STREQ( + e.what(), + "Unable to get command group submission time: " + "Device and/or backend does not support querying timestamp: " + "Plugin version not supported -59 (PI_ERROR_INVALID_OPERATION) -59 " + "(PI_ERROR_INVALID_OPERATION)"); } } From ac695db551eeaf42711264d70d5779498916c479 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Mon, 19 Dec 2022 14:42:13 -0800 Subject: [PATCH 41/53] More formatting Signed-off-by: Rauf, Rana --- sycl/source/detail/device_impl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/device_impl.cpp b/sycl/source/detail/device_impl.cpp index 4a07dcae0cfbe..b030b5f78dc9d 100644 --- a/sycl/source/detail/device_impl.cpp +++ b/sycl/source/detail/device_impl.cpp @@ -465,7 +465,7 @@ uint64_t device_impl::getCurrentDeviceTime() { plugin.checkPiResult(result == PI_ERROR_INVALID_OPERATION ? PI_SUCCESS : result); - if(result == PI_ERROR_INVALID_OPERATION){ + if (result == PI_ERROR_INVALID_OPERATION) { std::string errorMsg{}; char *p; plugin.call_nocheck(&p); @@ -483,7 +483,7 @@ uint64_t device_impl::getCurrentDeviceTime() { if (diff > timeTillRefresh || diff <= 0) { plugin.call( MDevice, &MDeviceHostBaseTime.first, &MDeviceHostBaseTime.second); - diff=0; + diff = 0; } return MDeviceHostBaseTime.first + diff; From 89ffa97c9823da8bbd60cc67d4f4b9dba9a72cdb Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Mon, 19 Dec 2022 14:52:19 -0800 Subject: [PATCH 42/53] Fix HIP fail Signed-off-by: Rauf, Rana --- sycl/plugins/hip/pi_hip.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index c2ee33963012c..579adb2e345c9 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5214,8 +5214,8 @@ pi_result hip_piTearDown(void *PluginParameter) { return PI_SUCCESS; } -pi_result hip_piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, - uint64_t *hostTime) { +pi_result hip_piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, + uint64_t *HostTime) { _pi_event::native_type event; using namespace std::chrono; if (DeviceTime) { From 554fde5872af54769276cf6bf6a56562725f922c Mon Sep 17 00:00:00 2001 From: raaiq1 <106714052+raaiq1@users.noreply.github.com> Date: Tue, 20 Dec 2022 09:48:35 -0500 Subject: [PATCH 43/53] Apply suggestions from code review Co-authored-by: smaslov-intel --- sycl/include/sycl/detail/pi.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index bf6511d8b84bd..23cbab5099754 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -1909,10 +1909,11 @@ __SYCL_EXPORT pi_result piPluginGetLastError(char **message); /// HostTime with the value of the host timer at the closest possible point in /// time to that at which DeviceTime was returned. /// -/// \param device device to query for timestamp -/// \param deviceTime pointer to store device timestamp in nanoseconds. Optional -/// argument, can be nullptr \param hostTime pointer to store host timestamp in -/// nanoseconds. Optional argurment, can be nullptr +/// \param Device device to query for timestamp +/// \param DeviceTime pointer to store device timestamp in nanoseconds. Optional +/// argument, can be nullptr +\param HostTime pointer to store host timestamp in +/// nanoseconds. Optional argurment, can be nullptr in which case timestamp will not be written __SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime); From 228b22e818d0eb96cdfcc2778cc643ca03376512 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Tue, 20 Dec 2022 07:52:28 -0800 Subject: [PATCH 44/53] Add review suggestions,fix HIP issues and handle host platform Signed-off-by: Rauf, Rana --- sycl/include/sycl/detail/pi.h | 5 +++-- sycl/plugins/hip/pi_hip.cpp | 23 +++++++++++++++-------- sycl/plugins/hip/pi_hip.hpp | 6 ++---- sycl/plugins/opencl/pi_opencl.cpp | 3 +-- sycl/source/detail/event_impl.cpp | 8 +------- 5 files changed, 22 insertions(+), 23 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 23cbab5099754..aea63ff0c26f2 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -1912,8 +1912,9 @@ __SYCL_EXPORT pi_result piPluginGetLastError(char **message); /// \param Device device to query for timestamp /// \param DeviceTime pointer to store device timestamp in nanoseconds. Optional /// argument, can be nullptr -\param HostTime pointer to store host timestamp in -/// nanoseconds. Optional argurment, can be nullptr in which case timestamp will not be written +/// \param HostTime pointer to store host timestamp in +/// nanoseconds. Optional argurment, can be nullptr in which case timestamp will +/// not be written __SYCL_EXPORT pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime); diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 579adb2e345c9..47fd2c0764d6c 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -609,7 +609,7 @@ pi_uint64 _pi_event::get_start_time() const { assert(is_started()); PI_CHECK_ERROR( - hipEventElapsedTime(&miliSeconds, context_->evBase_, evStart_)); + hipEventElapsedTime(&miliSeconds, _pi_platform::evBase_, evStart_)); return static_cast(miliSeconds * 1.0e6); } @@ -617,7 +617,8 @@ pi_uint64 _pi_event::get_end_time() const { float miliSeconds = 0.0f; assert(is_started() && is_recorded()); - PI_CHECK_ERROR(hipEventElapsedTime(&miliSeconds, context_->evBase_, evEnd_)); + PI_CHECK_ERROR( + hipEventElapsedTime(&miliSeconds, _pi_platform::evBase_, evEnd_)); return static_cast(miliSeconds * 1.0e6); } @@ -1992,10 +1993,16 @@ pi_result hip_piContextCreate(const pi_context_properties *properties, _pi_context::kind::user_defined, newContext, *devices}); } - // Use default stream to record base event counter - PI_CHECK_ERROR( - hipEventCreateWithFlags(&piContextPtr->evBase_, hipEventDefault)); - PI_CHECK_ERROR(hipEventRecord(piContextPtr->evBase_, 0)); + static std::once_flag initFlag; + std::call_once( + initFlag, + [](pi_result &err) { + // Use default stream to record base event counter + PI_CHECK_ERROR( + hipEventCreateWithFlags(&_pi_platform::evBase_, hipEventDefault)); + PI_CHECK_ERROR(hipEventRecord(_pi_platform::evBase_, 0)); + }, + errcode_ret); // For non-primary scoped contexts keep the last active on top of the stack // as `cuCtxCreate` replaces it implicitly otherwise. @@ -2025,8 +2032,6 @@ pi_result hip_piContextRelease(pi_context ctxt) { std::unique_ptr<_pi_context> context{ctxt}; - PI_CHECK_ERROR(hipEventDestroy(context->evBase_)); - if (!ctxt->is_primary()) { hipCtx_t hipCtxt = ctxt->get(); // hipCtxSynchronize is not supported for AMD platform so we can just @@ -5389,3 +5394,5 @@ pi_result piPluginInit(pi_plugin *PluginInit) { } } // extern "C" + +hipEvent_t _pi_platform::evBase_{nullptr}; \ No newline at end of file diff --git a/sycl/plugins/hip/pi_hip.hpp b/sycl/plugins/hip/pi_hip.hpp index af2ff53d6fa6f..c8f3c2e65dc10 100644 --- a/sycl/plugins/hip/pi_hip.hpp +++ b/sycl/plugins/hip/pi_hip.hpp @@ -65,6 +65,7 @@ using _pi_stream_guard = std::unique_lock; /// when devices are used. /// struct _pi_platform { + static hipEvent_t evBase_; // HIP event used as base counter std::vector> devices_; }; @@ -146,11 +147,8 @@ struct _pi_context { _pi_device *deviceId_; std::atomic_uint32_t refCount_; - hipEvent_t evBase_; // HIP event used as base counter - _pi_context(kind k, hipCtx_t ctxt, _pi_device *devId) - : kind_{k}, hipContext_{ctxt}, deviceId_{devId}, refCount_{1}, - evBase_(nullptr) { + : kind_{k}, hipContext_{ctxt}, deviceId_{devId}, refCount_{1} { hip_piDeviceRetain(deviceId_); }; diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index bdea86faca36f..c00e24a014acc 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -1693,9 +1693,8 @@ pi_result piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, return PI_ERROR_INVALID_OPERATION; } - uint64_t dummy; - if (DeviceTime) { + uint64_t dummy; clGetDeviceAndHostTimer(deviceID, DeviceTime, HostTime == nullptr ? &dummy : HostTime); diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 348b215420cf5..72a3111d8d30c 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -265,13 +265,7 @@ template <> uint64_t event_impl::get_profiling_info() { checkProfilingPreconditions(); - if (!MHostEvent) { - return MSubmitTime; - } - if (!MHostProfilingInfo) - throw invalid_object_error("Profiling info is not available.", - PI_ERROR_PROFILING_INFO_NOT_AVAILABLE); - return MHostProfilingInfo->getStartTime(); + return MSubmitTime; } template <> From f6426f3aefaf3704cfe2f23fa55be4871dc9cbf9 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Tue, 20 Dec 2022 10:57:14 -0800 Subject: [PATCH 45/53] Fix ESIMD fails Signed-off-by: Rauf, Rana --- sycl/include/sycl/detail/pi.h | 1 - sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp | 2 +- sycl/source/detail/event_impl.cpp | 5 ++--- sycl/unittests/queue/GetProfilingInfo.cpp | 5 ++--- 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index aea63ff0c26f2..027bb3d7b5055 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -53,7 +53,6 @@ // 10.14 Add PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY as an extension for // piDeviceGetInfo. // 11.15 piEventCreate creates even in the signalled state now. -<<<<<<< HEAD // 11.16 Add PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE and // PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH as an extension for // piDeviceGetInfo. diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 401030bfea1c0..9fdf6cb9d30b6 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -2053,7 +2053,7 @@ pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, uint64_t *hostTime) { PiTrace( "Warning : Querying device clock not supported under PI_ESIMD_EMULATOR"); - return PI_ERROR_INVALID_OPERATION; + return PI_SUCCESS; } const char SupportedVersion[] = _PI_ESIMD_PLUGIN_VERSION_STRING; diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 72a3111d8d30c..2f48d970aacbc 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -422,10 +422,9 @@ void event_impl::setSubmissionTime() { try { MSubmitTime = Queue->getDeviceImplPtr()->getCurrentDeviceTime(); } catch (feature_not_supported &e) { - throw feature_not_supported( + throw sycl::exception(make_error_code(errc::profiling), std::string("Unable to get command group submission time: ") + - e.what(), - PI_ERROR_INVALID_OPERATION); + e.what()); } } } diff --git a/sycl/unittests/queue/GetProfilingInfo.cpp b/sycl/unittests/queue/GetProfilingInfo.cpp index 2b5526158e22e..7cc9750255baf 100644 --- a/sycl/unittests/queue/GetProfilingInfo.cpp +++ b/sycl/unittests/queue/GetProfilingInfo.cpp @@ -400,12 +400,11 @@ TEST(GetProfilingInfo, submission_time_exception_check) { event E = Queue.submit( [&](handler &cgh) { cgh.single_task>([]() {}); }); FAIL(); - } catch (feature_not_supported &e) { + } catch (sycl::exception &e) { EXPECT_STREQ( e.what(), "Unable to get command group submission time: " "Device and/or backend does not support querying timestamp: " - "Plugin version not supported -59 (PI_ERROR_INVALID_OPERATION) -59 " - "(PI_ERROR_INVALID_OPERATION)"); + "Plugin version not supported -59 (PI_ERROR_INVALID_OPERATION)"); } } From 26319bf70f196f05b2d8a4bed53fe1da4ed4920d Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Tue, 20 Dec 2022 11:27:24 -0800 Subject: [PATCH 46/53] Fix ESIMD fails Signed-off-by: Rauf, Rana --- sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp | 2 +- sycl/source/detail/event_impl.cpp | 5 ++--- sycl/unittests/queue/GetProfilingInfo.cpp | 5 ++--- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index 401030bfea1c0..9fdf6cb9d30b6 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -2053,7 +2053,7 @@ pi_result piGetDeviceAndHostTimer(pi_device device, uint64_t *deviceTime, uint64_t *hostTime) { PiTrace( "Warning : Querying device clock not supported under PI_ESIMD_EMULATOR"); - return PI_ERROR_INVALID_OPERATION; + return PI_SUCCESS; } const char SupportedVersion[] = _PI_ESIMD_PLUGIN_VERSION_STRING; diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 72a3111d8d30c..2f48d970aacbc 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -422,10 +422,9 @@ void event_impl::setSubmissionTime() { try { MSubmitTime = Queue->getDeviceImplPtr()->getCurrentDeviceTime(); } catch (feature_not_supported &e) { - throw feature_not_supported( + throw sycl::exception(make_error_code(errc::profiling), std::string("Unable to get command group submission time: ") + - e.what(), - PI_ERROR_INVALID_OPERATION); + e.what()); } } } diff --git a/sycl/unittests/queue/GetProfilingInfo.cpp b/sycl/unittests/queue/GetProfilingInfo.cpp index 2b5526158e22e..7cc9750255baf 100644 --- a/sycl/unittests/queue/GetProfilingInfo.cpp +++ b/sycl/unittests/queue/GetProfilingInfo.cpp @@ -400,12 +400,11 @@ TEST(GetProfilingInfo, submission_time_exception_check) { event E = Queue.submit( [&](handler &cgh) { cgh.single_task>([]() {}); }); FAIL(); - } catch (feature_not_supported &e) { + } catch (sycl::exception &e) { EXPECT_STREQ( e.what(), "Unable to get command group submission time: " "Device and/or backend does not support querying timestamp: " - "Plugin version not supported -59 (PI_ERROR_INVALID_OPERATION) -59 " - "(PI_ERROR_INVALID_OPERATION)"); + "Plugin version not supported -59 (PI_ERROR_INVALID_OPERATION)"); } } From 843bf6c53d799fd2d30961827af281e1d03a3a45 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Tue, 20 Dec 2022 12:41:24 -0800 Subject: [PATCH 47/53] Fix command submit query placement Signed-off-by: Rauf, Rana --- sycl/source/detail/scheduler/scheduler.cpp | 1 + sycl/source/handler.cpp | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index e2e7f5df48cee..eed28df954632 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -137,6 +137,7 @@ EventImplPtr Scheduler::addCG(std::unique_ptr CommandGroup, NewEvent = Result.NewEvent; ShouldEnqueue = Result.ShouldEnqueue; } + NewEvent->setSubmissionTime(); } if (ShouldEnqueue) { diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 21904674f21ca..d1cf34ba1ba97 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -162,6 +162,9 @@ event handler::finalize() { // 'Result' for single point of return pi_int32 Result = PI_ERROR_INVALID_VALUE; + NewEvent->setSubmittedQueue(MQueue); + NewEvent->setSubmissionTime(); + if (MQueue->is_host()) { MHostKernel->call(MNDRDesc, (NewEvent) ? NewEvent->getHostProfilingInfo() @@ -211,8 +214,6 @@ event handler::finalize() { else if (NewEvent->is_host() || NewEvent->getHandleRef() == nullptr) NewEvent->setComplete(); - NewEvent->setSubmittedQueue(MQueue); - NewEvent->setSubmissionTime(); MLastEvent = detail::createSyclObjFromImpl(NewEvent); } return MLastEvent; @@ -337,7 +338,6 @@ event handler::finalize() { detail::EventImplPtr Event = detail::Scheduler::getInstance().addCG( std::move(CommandGroup), std::move(MQueue)); - Event->setSubmissionTime(); MLastEvent = detail::createSyclObjFromImpl(Event); return MLastEvent; } From eda7e394bb5491d5dc27780ce0272ebea3d4f6e5 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Wed, 21 Dec 2022 09:23:42 -0800 Subject: [PATCH 48/53] Fix test fails Signed-off-by: Rauf, Rana --- sycl/source/detail/event_impl.cpp | 2 +- sycl/source/handler.cpp | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 2f48d970aacbc..3e2cd116cfaad 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -418,7 +418,7 @@ void event_impl::cleanDepEventsThroughOneLevel() { void event_impl::setSubmissionTime() { if (!MIsProfilingEnabled) return; - if (QueueImplPtr Queue = getSubmittedQueue()) { + if (QueueImplPtr Queue = MQueue.lock()) { try { MSubmitTime = Queue->getDeviceImplPtr()->getCurrentDeviceTime(); } catch (feature_not_supported &e) { diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index d1cf34ba1ba97..4d6d2c878fb29 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -162,9 +162,6 @@ event handler::finalize() { // 'Result' for single point of return pi_int32 Result = PI_ERROR_INVALID_VALUE; - NewEvent->setSubmittedQueue(MQueue); - NewEvent->setSubmissionTime(); - if (MQueue->is_host()) { MHostKernel->call(MNDRDesc, (NewEvent) ? NewEvent->getHostProfilingInfo() @@ -208,6 +205,8 @@ event handler::finalize() { NewEvent->setStateIncomplete(); OutEvent = &NewEvent->getHandleRef(); + NewEvent->setSubmissionTime(); + if (PI_SUCCESS != EnqueueKernel()) throw runtime_error("Enqueue process failed.", PI_ERROR_INVALID_OPERATION); From 43b24e70d292b91cf6ca3ef905382027107e5456 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Wed, 21 Dec 2022 14:03:58 -0800 Subject: [PATCH 49/53] Fix CUDA fails again Signed-off-by: Rauf, Rana --- sycl/plugins/cuda/pi_cuda.cpp | 4 +++- sycl/plugins/cuda/pi_cuda.hpp | 6 ++++++ sycl/plugins/hip/pi_hip.cpp | 5 ++++- sycl/plugins/hip/pi_hip.hpp | 6 ++++++ 4 files changed, 19 insertions(+), 2 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 41b4efbac7235..d20e51e9d7edf 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -5494,13 +5494,15 @@ pi_result cuda_piTearDown(void *) { pi_result cuda_piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime) { _pi_event::native_type event; - using namespace std::chrono; + ScopedContext active(Device->get_context()); if (DeviceTime) { PI_CHECK_ERROR(cuEventCreate(&event, CU_EVENT_DEFAULT)); PI_CHECK_ERROR(cuEventRecord(event, 0)); } if (HostTime) { + + using namespace std::chrono; *HostTime = duration_cast(steady_clock::now().time_since_epoch()) .count(); diff --git a/sycl/plugins/cuda/pi_cuda.hpp b/sycl/plugins/cuda/pi_cuda.hpp index 542a8bfe4368a..93176790471cf 100644 --- a/sycl/plugins/cuda/pi_cuda.hpp +++ b/sycl/plugins/cuda/pi_cuda.hpp @@ -88,6 +88,7 @@ struct _pi_device { native_type cuDevice_; std::atomic_uint32_t refCount_; pi_platform platform_; + pi_context context_; static constexpr pi_uint32 max_work_item_dimensions = 3u; size_t max_work_item_sizes[max_work_item_dimensions]; @@ -103,6 +104,10 @@ struct _pi_device { pi_platform get_platform() const noexcept { return platform_; }; + void set_context(pi_context ctx) { context_ = ctx; }; + + pi_context get_context() { return context_;}; + void save_max_work_item_sizes(size_t size, size_t *save_max_work_item_sizes) noexcept { memcpy(max_work_item_sizes, save_max_work_item_sizes, size); @@ -178,6 +183,7 @@ struct _pi_context { bool backend_owns = true) : kind_{k}, cuContext_{ctxt}, deviceId_{devId}, refCount_{1}, has_ownership{backend_owns} { + deviceId_->set_context(this); cuda_piDeviceRetain(deviceId_); }; diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 47fd2c0764d6c..54b899da74fe0 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5222,12 +5222,15 @@ pi_result hip_piTearDown(void *PluginParameter) { pi_result hip_piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, uint64_t *HostTime) { _pi_event::native_type event; - using namespace std::chrono; + + ScopedContext active(Device->get_context()); + if (DeviceTime) { PI_CHECK_ERROR(hipEventCreateWithFlags(&event, hipEventDefault)); PI_CHECK_ERROR(hipEventRecord(event)); } if (HostTime) { + using namespace std::chrono; *HostTime = duration_cast(steady_clock::now().time_since_epoch()) .count(); diff --git a/sycl/plugins/hip/pi_hip.hpp b/sycl/plugins/hip/pi_hip.hpp index c8f3c2e65dc10..d922992d5ea3a 100644 --- a/sycl/plugins/hip/pi_hip.hpp +++ b/sycl/plugins/hip/pi_hip.hpp @@ -81,6 +81,7 @@ struct _pi_device { native_type cuDevice_; std::atomic_uint32_t refCount_; pi_platform platform_; + pi_context context_; public: _pi_device(native_type cuDevice, pi_platform platform) @@ -91,6 +92,10 @@ struct _pi_device { pi_uint32 get_reference_count() const noexcept { return refCount_; } pi_platform get_platform() const noexcept { return platform_; }; + + void set_context(pi_context ctx) { context_ = ctx; }; + + pi_context get_context() { return context_;}; }; /// PI context mapping to a HIP context object. @@ -149,6 +154,7 @@ struct _pi_context { _pi_context(kind k, hipCtx_t ctxt, _pi_device *devId) : kind_{k}, hipContext_{ctxt}, deviceId_{devId}, refCount_{1} { + deviceId_->set_context(this); hip_piDeviceRetain(deviceId_); }; From e83487972675d5b96ecc8f0d0f11f4e25a4f4c1e Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Wed, 21 Dec 2022 14:05:09 -0800 Subject: [PATCH 50/53] Formatting Signed-off-by: Rauf, Rana --- sycl/plugins/cuda/pi_cuda.cpp | 4 ++-- sycl/plugins/cuda/pi_cuda.hpp | 2 +- sycl/plugins/hip/pi_hip.cpp | 2 +- sycl/plugins/hip/pi_hip.hpp | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index d20e51e9d7edf..2cd27be5815f0 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -5501,8 +5501,8 @@ pi_result cuda_piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, PI_CHECK_ERROR(cuEventRecord(event, 0)); } if (HostTime) { - - using namespace std::chrono; + + using namespace std::chrono; *HostTime = duration_cast(steady_clock::now().time_since_epoch()) .count(); diff --git a/sycl/plugins/cuda/pi_cuda.hpp b/sycl/plugins/cuda/pi_cuda.hpp index 93176790471cf..8514083346434 100644 --- a/sycl/plugins/cuda/pi_cuda.hpp +++ b/sycl/plugins/cuda/pi_cuda.hpp @@ -106,7 +106,7 @@ struct _pi_device { void set_context(pi_context ctx) { context_ = ctx; }; - pi_context get_context() { return context_;}; + pi_context get_context() { return context_; }; void save_max_work_item_sizes(size_t size, size_t *save_max_work_item_sizes) noexcept { diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 54b899da74fe0..31cebfe270114 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -5224,7 +5224,7 @@ pi_result hip_piGetDeviceAndHostTimer(pi_device Device, uint64_t *DeviceTime, _pi_event::native_type event; ScopedContext active(Device->get_context()); - + if (DeviceTime) { PI_CHECK_ERROR(hipEventCreateWithFlags(&event, hipEventDefault)); PI_CHECK_ERROR(hipEventRecord(event)); diff --git a/sycl/plugins/hip/pi_hip.hpp b/sycl/plugins/hip/pi_hip.hpp index d922992d5ea3a..02e96570d5d88 100644 --- a/sycl/plugins/hip/pi_hip.hpp +++ b/sycl/plugins/hip/pi_hip.hpp @@ -95,7 +95,7 @@ struct _pi_device { void set_context(pi_context ctx) { context_ = ctx; }; - pi_context get_context() { return context_;}; + pi_context get_context() { return context_; }; }; /// PI context mapping to a HIP context object. From 37f2e53b99d778a68b73c6e74471002dc8faf1bb Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Thu, 22 Dec 2022 07:23:05 -0800 Subject: [PATCH 51/53] Remove removal comments Signed-off-by: Rauf, Rana --- sycl/plugins/cuda/pi_cuda.cpp | 5 +---- sycl/plugins/hip/pi_hip.cpp | 4 ---- sycl/plugins/level_zero/pi_level_zero.cpp | 2 -- 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index 2cd27be5815f0..b2276e80c005f 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -558,8 +558,7 @@ bool _pi_event::is_completed() const noexcept { } return true; } -// TODO: Remove this function and other code for setting or getting -// queued/submit time + pi_uint64 _pi_event::get_queued_time() const { float miliSeconds = 0.0f; assert(is_started()); @@ -3890,8 +3889,6 @@ pi_result cuda_piEventGetProfilingInfo(pi_event event, } switch (param_name) { - // TODO: Remove this and other related code for setting or getting - // queued/submit time case PI_PROFILING_INFO_COMMAND_QUEUED: case PI_PROFILING_INFO_COMMAND_SUBMIT: return getInfo(param_value_size, param_value, diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 31cebfe270114..c1c5bd8f2962b 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -565,8 +565,6 @@ pi_result _pi_event::start() { try { if (queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE) { // NOTE: This relies on the default stream to be unused. - // TODO: Remove this and other related code for setting or getting - // queued/submit time PI_CHECK_ERROR(hipEventRecord(evQueued_, 0)); PI_CHECK_ERROR(hipEventRecord(evStart_, queue_->get())); } @@ -3714,8 +3712,6 @@ pi_result hip_piEventGetProfilingInfo(pi_event event, } switch (param_name) { - // TODO: Remove this and other related code for setting or getting - // queued/submit time case PI_PROFILING_INFO_COMMAND_QUEUED: case PI_PROFILING_INFO_COMMAND_SUBMIT: return getInfo(param_value_size, param_value, diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 4022f9bf20c8c..a43cfbdbd75df 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -5986,8 +5986,6 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName, ContextEndTime *= ZeTimerResolution; return ReturnValue(ContextEndTime); } - // TODO: Remove this and other related code for setting or getting - // queued/submit time case PI_PROFILING_INFO_COMMAND_QUEUED: case PI_PROFILING_INFO_COMMAND_SUBMIT: return ReturnValue(uint64_t{0}); From 2df30af2c82f27669f2d113ed0aa821b29b34ad4 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Thu, 22 Dec 2022 09:36:34 -0800 Subject: [PATCH 52/53] Added TODO comment Signed-off-by: Rauf, Rana --- sycl/plugins/cuda/pi_cuda.cpp | 1 + sycl/plugins/hip/pi_hip.cpp | 1 + sycl/plugins/level_zero/pi_level_zero.cpp | 4 ++++ 3 files changed, 6 insertions(+) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index b2276e80c005f..cd5f3a4282b9c 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -3891,6 +3891,7 @@ pi_result cuda_piEventGetProfilingInfo(pi_event event, switch (param_name) { case PI_PROFILING_INFO_COMMAND_QUEUED: case PI_PROFILING_INFO_COMMAND_SUBMIT: + // Note: No user for this case return getInfo(param_value_size, param_value, param_value_size_ret, event->get_queued_time()); case PI_PROFILING_INFO_COMMAND_START: diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index c1c5bd8f2962b..7128d7d6c4d74 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -3714,6 +3714,7 @@ pi_result hip_piEventGetProfilingInfo(pi_event event, switch (param_name) { case PI_PROFILING_INFO_COMMAND_QUEUED: case PI_PROFILING_INFO_COMMAND_SUBMIT: + // Note: No user for this case return getInfo(param_value_size, param_value, param_value_size_ret, event->get_queued_time()); case PI_PROFILING_INFO_COMMAND_START: diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index a43cfbdbd75df..79f9cbe6791fe 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -5988,6 +5988,10 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName, } case PI_PROFILING_INFO_COMMAND_QUEUED: case PI_PROFILING_INFO_COMMAND_SUBMIT: + // Note: No users for this case + // TODO: Implement commmand submission time when needed, + // by recording device timestamp (using zeDeviceGetGlobalTimestamps) + // before submitting command to device return ReturnValue(uint64_t{0}); default: zePrint("piEventGetProfilingInfo: not supported ParamName\n"); From dc18419ea930d26c69a55a2bbdf29c19b51b83c9 Mon Sep 17 00:00:00 2001 From: "Rauf, Rana" Date: Thu, 22 Dec 2022 09:39:26 -0800 Subject: [PATCH 53/53] Remove bad comment Signed-off-by: Rauf, Rana --- sycl/plugins/cuda/pi_cuda.cpp | 2 -- sycl/plugins/hip/pi_hip.cpp | 1 - 2 files changed, 3 deletions(-) diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index cd5f3a4282b9c..b71fab48946ae 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -529,8 +529,6 @@ pi_result _pi_event::start() { try { if (queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE) { // NOTE: This relies on the default stream to be unused. - // TODO: Remove this and other related code for setting or getting - // queued/submit time result = PI_CHECK_ERROR(cuEventRecord(evQueued_, 0)); result = PI_CHECK_ERROR(cuEventRecord(evStart_, stream_)); } diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index 7128d7d6c4d74..202626dcfb9b6 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -593,7 +593,6 @@ bool _pi_event::is_completed() const noexcept { return true; } -// TODO: Remove this and other code for setting or getting queued/submit time pi_uint64 _pi_event::get_queued_time() const { float miliSeconds = 0.0f; assert(is_started());