From 28de9d55fa6b603ebeed490bc5ef1a2ff5a0f429 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Mon, 13 Jan 2025 17:08:58 -0800 Subject: [PATCH 01/56] when enqueueing a command and its dependencies, and exception might be thrown. In that case, the command will have a failed EnqueueStatus. During the clean up, we don't want to reenqueue it if we know it has failed before --- sycl/source/detail/scheduler/scheduler.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index efbbb52acab73..77706f035b667 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -52,6 +52,9 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record, #endif std::vector ToCleanUp; for (Command *Cmd : Record->MReadLeaves) { + if (Cmd->MEnqueueStatus == EnqueueResultT::SyclEnqueueFailed) + continue; // nothing to do + EnqueueResultT Res; bool Enqueued = GraphProcessor::enqueueCommand(Cmd, GraphReadLock, Res, ToCleanUp, Cmd); @@ -65,6 +68,9 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record, GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); } for (Command *Cmd : Record->MWriteLeaves) { + if (Cmd->MEnqueueStatus == EnqueueResultT::SyclEnqueueFailed) + continue; // nothing to do + EnqueueResultT Res; bool Enqueued = GraphProcessor::enqueueCommand(Cmd, GraphReadLock, Res, ToCleanUp, Cmd); From e24a731a902182f9af3ab15a116e36b04f07e878 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 21 Jan 2025 11:29:53 -0800 Subject: [PATCH 02/56] fix OTHER memory release path and test both --- .../source/detail/scheduler/graph_builder.cpp | 3 + sycl/source/detail/scheduler/scheduler.cpp | 14 ++-- .../test-e2e/Scheduler/DeleteCmdException.cpp | 64 +++++++++++++++++++ 3 files changed, 71 insertions(+), 10 deletions(-) create mode 100644 sycl/test-e2e/Scheduler/DeleteCmdException.cpp diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 5636309cdccc1..a944aaba6d0e9 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -486,6 +486,9 @@ Scheduler::GraphBuilder::addCopyBack(Requirement *Req, std::vector ToCleanUp; for (Command *Dep : Deps) { + if (Dep->MEnqueueStatus == EnqueueResultT::SyclEnqueueFailed) + continue; // nothing to do + Command *ConnCmd = MemCpyCmd->addDep( DepDesc{Dep, MemCpyCmd->getRequirement(), SrcAllocaCmd}, ToCleanUp); if (ConnCmd) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 77706f035b667..6602ea7a3d48f 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -155,19 +155,13 @@ void Scheduler::enqueueCommandForCG(EventImplPtr NewEvent, EnqueueResultT Res; bool Enqueued; - auto CleanUp = [&]() { - if (NewCmd && (NewCmd->MDeps.size() == 0 && NewCmd->MUsers.size() == 0)) { - if (NewEvent) { - NewEvent->setCommand(nullptr); - } - delete NewCmd; - } - }; + auto CleanUp = [&]() { cleanupCommands(ToCleanUp); }; for (Command *Cmd : AuxiliaryCmds) { - Enqueued = GraphProcessor::enqueueCommand(Cmd, Lock, Res, ToCleanUp, Cmd, - Blocking); try { + Enqueued = GraphProcessor::enqueueCommand(Cmd, Lock, Res, ToCleanUp, + Cmd, Blocking); + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw exception(make_error_code(errc::runtime), "Auxiliary enqueue process failed."); diff --git a/sycl/test-e2e/Scheduler/DeleteCmdException.cpp b/sycl/test-e2e/Scheduler/DeleteCmdException.cpp new file mode 100644 index 0000000000000..c06820428b150 --- /dev/null +++ b/sycl/test-e2e/Scheduler/DeleteCmdException.cpp @@ -0,0 +1,64 @@ +//==------------------- DeleteCmdException.cpp ----------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: level_zero + +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out + +#include + +void test_exception(sycl::queue &q, sycl::buffer &buf, + size_t workGroupSize) { + + try { + // Illegal nd_range + auto illegal_range = sycl::nd_range<1>{sycl::range<1>{workGroupSize * 2}, + sycl::range<1>{workGroupSize + 32}}; + + // Will throw when submitted + q.submit([&](sycl::handler &cgh) { + auto acc = buf.get_access(cgh); + cgh.parallel_for(illegal_range, [=](sycl::nd_item<1> nd_item) { + acc[nd_item.get_global_linear_id()] = 42; // will not be reached + }); + }).wait_and_throw(); + } catch (const sycl::exception &e) { + std::cout << "exception caught: " << e.code() << ":\t"; + std::cout << e.what() << std::endl; + } +} + +int main() { + sycl::queue q; + sycl::device dev = q.get_device(); + int maxWorkGroupSize = + dev.get_info(); + + constexpr size_t NumWorkItems = + 2048; // this value is arbitrary since kernel is never run. + std::vector source(NumWorkItems, 0); + { + // Buffers with their own memory will have their memory release deferred, + // while buffers backstopped by host memory will release when the buffer is + // destroyed. This means there are two different paths we need to check to + // ensure we are not leaking resources when encountering exceptions. + + // buffer with own memory + sycl::buffer buf{sycl::range<1>{NumWorkItems}}; + + // buffer backstopped by host memory + sycl::buffer buf2{source.data(), sycl::range<1>{NumWorkItems}}; + + test_exception(q, buf, maxWorkGroupSize); + + test_exception(q, buf2, maxWorkGroupSize); + } + + return 0; +} \ No newline at end of file From 626a8331ae5be9342d77c6e68583d52ed3f053dc Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Wed, 22 Jan 2025 16:55:50 -0800 Subject: [PATCH 03/56] restoring other CleanUp code which is used by queue memcpy ops --- sycl/source/detail/scheduler/scheduler.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 6602ea7a3d48f..d80b42d58a8f2 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -155,7 +155,15 @@ void Scheduler::enqueueCommandForCG(EventImplPtr NewEvent, EnqueueResultT Res; bool Enqueued; - auto CleanUp = [&]() { cleanupCommands(ToCleanUp); }; + auto CleanUp = [&]() { + if (NewCmd && (NewCmd->MDeps.size() == 0 && NewCmd->MUsers.size() == 0)) { + if (NewEvent) { + NewEvent->setCommand(nullptr); + } + delete NewCmd; + } + cleanupCommands(ToCleanUp); + }; for (Command *Cmd : AuxiliaryCmds) { try { From 9401ae14f583be11734c7556053b27a5b8a42a9c Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Fri, 24 Jan 2025 10:08:15 -0800 Subject: [PATCH 04/56] interesting and excellent. --- sycl/source/detail/global_handler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 5669fbdaacc50..2331fe23afc26 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -287,10 +287,10 @@ void GlobalHandler::prepareSchedulerToRelease(bool Blocking) { #ifndef _WIN32 if (Blocking) drainThreadPool(); +#endif if (MScheduler.Inst) MScheduler.Inst->releaseResources(Blocking ? BlockingT::BLOCKING : BlockingT::NON_BLOCKING); -#endif } void GlobalHandler::drainThreadPool() { From a03da625a9fd32ae516928f9137952674fd2839b Mon Sep 17 00:00:00 2001 From: "Perkins, Chris" Date: Fri, 24 Jan 2025 12:48:10 -0800 Subject: [PATCH 05/56] blind fix --- sycl/source/detail/global_handler.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 2331fe23afc26..9a9afd80f93d8 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -287,6 +287,8 @@ void GlobalHandler::prepareSchedulerToRelease(bool Blocking) { #ifndef _WIN32 if (Blocking) drainThreadPool(); +#else + Blocking = false; #endif if (MScheduler.Inst) MScheduler.Inst->releaseResources(Blocking ? BlockingT::BLOCKING From 113927daf0e52af0ab5f33bc27abd7e685b8f9ad Mon Sep 17 00:00:00 2001 From: "Perkins, Chris" Date: Mon, 27 Jan 2025 17:08:54 -0800 Subject: [PATCH 06/56] checkpoint. cleanup needed --- sycl/source/detail/global_handler.cpp | 26 +++++++++++++--------- sycl/source/detail/scheduler/scheduler.cpp | 18 +++++++++++++++ 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 9a9afd80f93d8..dfb9a5cc38b25 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -60,10 +60,11 @@ class ObjectUsageCounter { LockGuard Guard(GlobalHandler::MSyclGlobalHandlerProtector); MCounter--; - GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr(); - if (RTGlobalObjHandler) { - RTGlobalObjHandler->prepareSchedulerToRelease(!MCounter); - } + // CP - CLEANUP NEEDED + // GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr(); + // if (RTGlobalObjHandler) { + // RTGlobalObjHandler->prepareSchedulerToRelease(!MCounter); + // } } catch (std::exception &e) { __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~ObjectUsageCounter", e); } @@ -241,8 +242,10 @@ struct EarlyShutdownHandler { ~EarlyShutdownHandler() { try { #ifdef _WIN32 + // CP - CLEANUP NEEDED // on Windows we keep to the existing shutdown procedure - GlobalHandler::instance().releaseDefaultContexts(); + //GlobalHandler::instance().releaseDefaultContexts(); + shutdown_early(); #else shutdown_early(); #endif @@ -287,8 +290,6 @@ void GlobalHandler::prepareSchedulerToRelease(bool Blocking) { #ifndef _WIN32 if (Blocking) drainThreadPool(); -#else - Blocking = false; #endif if (MScheduler.Inst) MScheduler.Inst->releaseResources(Blocking ? BlockingT::BLOCKING @@ -300,7 +301,8 @@ void GlobalHandler::drainThreadPool() { MHostTaskThreadPool.Inst->drain(); } -#ifdef _WIN32 + // CP - CLEANUP NEEDED + //#ifdef _WIN32 // because of something not-yet-understood on Windows // threads may be shutdown once the end of main() is reached // making an orderly shutdown difficult. Fortunately, Windows @@ -311,7 +313,7 @@ void shutdown_win() { GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); Handler->unloadAdapters(); } -#else + //#else void shutdown_early() { const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector}; GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); @@ -355,7 +357,7 @@ void shutdown_late() { delete Handler; Handler = nullptr; } -#endif + //#endif #ifdef _WIN32 extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, @@ -384,7 +386,9 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, #endif try { - shutdown_win(); + // CP - CLEANUP NEEDED + //shutdown_win(); // works + shutdown_late(); } catch (std::exception &e) { __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in shutdown_win", e); return FALSE; diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index d80b42d58a8f2..71376a32c03b9 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -279,6 +279,19 @@ bool Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj, // No operations were performed on the mem object return true; + #ifdef _WIN32 + // CP - CLEANUP NEEDED + bool hasUserData = MemObj->hasUserDataPtr(); + //bool OkDefer = GlobalHandler::instance().isOkToDefer(); + GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); + bool OkDefer = Handler ? Handler->isOkToDefer() : false; + std::cout << "Handler: " << Handler << " hasUserData: " << hasUserData << " OkDefer: " << OkDefer << std::endl; + bool allowWait = hasUserData || OkDefer; //MemObj->hasUserDataPtr() || GlobalHandler::instance().isOkToDefer(); +#else + bool allowWait = true; + #endif + + if(allowWait) { // This only needs a shared mutex as it only involves enqueueing and // awaiting for events @@ -397,6 +410,10 @@ void Scheduler::releaseResources(BlockingT Blocking) { cleanupCommands({}); cleanupAuxiliaryResources(Blocking); + // CP - CLEANUP NEEDED + //#ifdef _WIN32 + //cleanupDeferredMemObjects(Blocking); + //#else // We need loop since sometimes we may need new objects to be added to // deferred mem objects storage during cleanup. Known example is: we cleanup // existing deferred mem objects under write lock, during this process we @@ -407,6 +424,7 @@ void Scheduler::releaseResources(BlockingT Blocking) { do { cleanupDeferredMemObjects(Blocking); } while (Blocking == BlockingT::BLOCKING && !isDeferredMemObjectsEmpty()); + //#endif } MemObjRecord *Scheduler::getMemObjRecord(const Requirement *const Req) { From 59ae241d9facefba768d28a8f36a9b129365e4cb Mon Sep 17 00:00:00 2001 From: "Perkins, Chris" Date: Tue, 28 Jan 2025 09:16:57 -0800 Subject: [PATCH 07/56] checkpoint --- sycl/source/detail/global_handler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index dfb9a5cc38b25..20bd6c72c6c5a 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -387,8 +387,8 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, try { // CP - CLEANUP NEEDED - //shutdown_win(); // works - shutdown_late(); + shutdown_win(); // works + //shutdown_late(); } catch (std::exception &e) { __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in shutdown_win", e); return FALSE; From 5ac2f12a143469a05727574f16b279616e70ac27 Mon Sep 17 00:00:00 2001 From: "Perkins, Chris" Date: Tue, 28 Jan 2025 11:04:54 -0800 Subject: [PATCH 08/56] another checkpoint --- sycl/source/detail/global_handler.cpp | 20 ++++++++++++++++---- sycl/source/detail/scheduler/scheduler.cpp | 8 ++++---- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 20bd6c72c6c5a..364f72bf03d47 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -29,6 +29,10 @@ #include +// CP - remove +//#define CPOUT std::clog +#define CPOUT std::clog.rdbuf(NULL); std::clog + namespace sycl { inline namespace _V1 { namespace detail { @@ -76,8 +80,10 @@ class ObjectUsageCounter { }; std::atomic_uint ObjectUsageCounter::MCounter{0}; -GlobalHandler::GlobalHandler() = default; -GlobalHandler::~GlobalHandler() = default; + //GlobalHandler::GlobalHandler() = default; + //GlobalHandler::~GlobalHandler() = default; + GlobalHandler::GlobalHandler(){ CPOUT << "GlobalHandler constructor ---" << std::endl; } + GlobalHandler::~GlobalHandler() { CPOUT << "~GlobalHandler destructor ---" << std::endl; } void GlobalHandler::InitXPTI() { #ifdef XPTI_ENABLE_INSTRUMENTATION @@ -246,6 +252,7 @@ struct EarlyShutdownHandler { // on Windows we keep to the existing shutdown procedure //GlobalHandler::instance().releaseDefaultContexts(); shutdown_early(); + shutdown_win(); #else shutdown_early(); #endif @@ -310,11 +317,13 @@ void GlobalHandler::drainThreadPool() { // we focus solely on unloading the adapters, so as to not // accidentally retain device handles. etc void shutdown_win() { + CPOUT << "shutdown_win() ---" << std::endl; GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); Handler->unloadAdapters(); } //#else void shutdown_early() { + CPOUT << "shutdown_early() ---" << std::endl; const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector}; GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); if (!Handler) @@ -336,6 +345,7 @@ void shutdown_early() { } void shutdown_late() { + CPOUT << "shutdown_late() --- " << std::endl; const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector}; GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); if (!Handler) @@ -377,7 +387,7 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, case DLL_PROCESS_DETACH: if (PrintUrTrace) std::cout << "---> DLL_PROCESS_DETACH syclx.dll\n" << std::endl; - + /* #ifdef XPTI_ENABLE_INSTRUMENTATION if (xptiTraceEnabled()) return TRUE; // When doing xpti tracing, we can't safely call shutdown. @@ -387,12 +397,14 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, try { // CP - CLEANUP NEEDED - shutdown_win(); // works + //shutdown_early(); + //shutdown_win(); // works //shutdown_late(); } catch (std::exception &e) { __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in shutdown_win", e); return FALSE; } + */ break; case DLL_PROCESS_ATTACH: if (PrintUrTrace) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 71376a32c03b9..80bdf267b28dd 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -282,10 +282,10 @@ bool Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj, #ifdef _WIN32 // CP - CLEANUP NEEDED bool hasUserData = MemObj->hasUserDataPtr(); - //bool OkDefer = GlobalHandler::instance().isOkToDefer(); - GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); - bool OkDefer = Handler ? Handler->isOkToDefer() : false; - std::cout << "Handler: " << Handler << " hasUserData: " << hasUserData << " OkDefer: " << OkDefer << std::endl; + bool OkDefer = GlobalHandler::instance().isOkToDefer(); + //GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); + //bool OkDefer = Handler ? Handler->isOkToDefer() : false; + //std::cout << " hasUserData: " << hasUserData << " OkDefer: " << OkDefer << std::endl; bool allowWait = hasUserData || OkDefer; //MemObj->hasUserDataPtr() || GlobalHandler::instance().isOkToDefer(); #else bool allowWait = true; From ea2fe36ca3c5ade83a4cba2c715ca1756f51ee7a Mon Sep 17 00:00:00 2001 From: "Perkins, Chris" Date: Tue, 28 Jan 2025 16:12:15 -0800 Subject: [PATCH 09/56] ready for more testing. Probably needs clang-format fixes. --- sycl/source/detail/global_handler.cpp | 48 ++++------------------ sycl/source/detail/scheduler/scheduler.cpp | 24 ++++------- 2 files changed, 14 insertions(+), 58 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 364f72bf03d47..f67b3eac6ab44 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -29,10 +29,6 @@ #include -// CP - remove -//#define CPOUT std::clog -#define CPOUT std::clog.rdbuf(NULL); std::clog - namespace sycl { inline namespace _V1 { namespace detail { @@ -64,11 +60,6 @@ class ObjectUsageCounter { LockGuard Guard(GlobalHandler::MSyclGlobalHandlerProtector); MCounter--; - // CP - CLEANUP NEEDED - // GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr(); - // if (RTGlobalObjHandler) { - // RTGlobalObjHandler->prepareSchedulerToRelease(!MCounter); - // } } catch (std::exception &e) { __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~ObjectUsageCounter", e); } @@ -80,10 +71,8 @@ class ObjectUsageCounter { }; std::atomic_uint ObjectUsageCounter::MCounter{0}; - //GlobalHandler::GlobalHandler() = default; - //GlobalHandler::~GlobalHandler() = default; - GlobalHandler::GlobalHandler(){ CPOUT << "GlobalHandler constructor ---" << std::endl; } - GlobalHandler::~GlobalHandler() { CPOUT << "~GlobalHandler destructor ---" << std::endl; } +GlobalHandler::GlobalHandler() = default; +GlobalHandler::~GlobalHandler() = default; void GlobalHandler::InitXPTI() { #ifdef XPTI_ENABLE_INSTRUMENTATION @@ -248,10 +237,9 @@ struct EarlyShutdownHandler { ~EarlyShutdownHandler() { try { #ifdef _WIN32 - // CP - CLEANUP NEEDED // on Windows we keep to the existing shutdown procedure - //GlobalHandler::instance().releaseDefaultContexts(); - shutdown_early(); + GlobalHandler::instance().endDeferredRelease(); + GlobalHandler::instance().releaseDefaultContexts(); shutdown_win(); #else shutdown_early(); @@ -308,8 +296,7 @@ void GlobalHandler::drainThreadPool() { MHostTaskThreadPool.Inst->drain(); } - // CP - CLEANUP NEEDED - //#ifdef _WIN32 +#ifdef _WIN32 // because of something not-yet-understood on Windows // threads may be shutdown once the end of main() is reached // making an orderly shutdown difficult. Fortunately, Windows @@ -317,13 +304,11 @@ void GlobalHandler::drainThreadPool() { // we focus solely on unloading the adapters, so as to not // accidentally retain device handles. etc void shutdown_win() { - CPOUT << "shutdown_win() ---" << std::endl; GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); Handler->unloadAdapters(); } - //#else +#else void shutdown_early() { - CPOUT << "shutdown_early() ---" << std::endl; const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector}; GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); if (!Handler) @@ -345,7 +330,6 @@ void shutdown_early() { } void shutdown_late() { - CPOUT << "shutdown_late() --- " << std::endl; const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector}; GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); if (!Handler) @@ -367,7 +351,7 @@ void shutdown_late() { delete Handler; Handler = nullptr; } - //#endif +#endif #ifdef _WIN32 extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, @@ -387,24 +371,6 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, case DLL_PROCESS_DETACH: if (PrintUrTrace) std::cout << "---> DLL_PROCESS_DETACH syclx.dll\n" << std::endl; - /* -#ifdef XPTI_ENABLE_INSTRUMENTATION - if (xptiTraceEnabled()) - return TRUE; // When doing xpti tracing, we can't safely call shutdown. - // TODO: figure out what XPTI is doing that prevents - // release. -#endif - - try { - // CP - CLEANUP NEEDED - //shutdown_early(); - //shutdown_win(); // works - //shutdown_late(); - } catch (std::exception &e) { - __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in shutdown_win", e); - return FALSE; - } - */ break; case DLL_PROCESS_ATTACH: if (PrintUrTrace) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 80bdf267b28dd..ea58e7ec4fff1 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -279,20 +279,14 @@ bool Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj, // No operations were performed on the mem object return true; - #ifdef _WIN32 - // CP - CLEANUP NEEDED - bool hasUserData = MemObj->hasUserDataPtr(); - bool OkDefer = GlobalHandler::instance().isOkToDefer(); - //GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); - //bool OkDefer = Handler ? Handler->isOkToDefer() : false; - //std::cout << " hasUserData: " << hasUserData << " OkDefer: " << OkDefer << std::endl; - bool allowWait = hasUserData || OkDefer; //MemObj->hasUserDataPtr() || GlobalHandler::instance().isOkToDefer(); +#ifdef _WIN32 + bool allowWait = MemObj->hasUserDataPtr() || + GlobalHandler::instance().isOkToDefer(); #else - bool allowWait = true; - #endif + bool allowWait = true; +#endif - if(allowWait) - { + if(allowWait) { // This only needs a shared mutex as it only involves enqueueing and // awaiting for events ReadLockT Lock = StrictLock ? ReadLockT(MGraphLock) @@ -410,10 +404,7 @@ void Scheduler::releaseResources(BlockingT Blocking) { cleanupCommands({}); cleanupAuxiliaryResources(Blocking); - // CP - CLEANUP NEEDED - //#ifdef _WIN32 - //cleanupDeferredMemObjects(Blocking); - //#else + // We need loop since sometimes we may need new objects to be added to // deferred mem objects storage during cleanup. Known example is: we cleanup // existing deferred mem objects under write lock, during this process we @@ -424,7 +415,6 @@ void Scheduler::releaseResources(BlockingT Blocking) { do { cleanupDeferredMemObjects(Blocking); } while (Blocking == BlockingT::BLOCKING && !isDeferredMemObjectsEmpty()); - //#endif } MemObjRecord *Scheduler::getMemObjRecord(const Requirement *const Req) { From 194b47e74a979232e711523ed5063aeff1143c94 Mon Sep 17 00:00:00 2001 From: "Perkins, Chris" Date: Tue, 28 Jan 2025 16:30:43 -0800 Subject: [PATCH 10/56] misery loves clang-format --- sycl/source/detail/scheduler/scheduler.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index ea58e7ec4fff1..3b3a6808d0785 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -280,13 +280,13 @@ bool Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj, return true; #ifdef _WIN32 - bool allowWait = MemObj->hasUserDataPtr() || - GlobalHandler::instance().isOkToDefer(); + bool allowWait = + MemObj->hasUserDataPtr() || GlobalHandler::instance().isOkToDefer(); #else bool allowWait = true; #endif - - if(allowWait) { + + if (allowWait) { // This only needs a shared mutex as it only involves enqueueing and // awaiting for events ReadLockT Lock = StrictLock ? ReadLockT(MGraphLock) From 0d064b90b0f567207c094475422239d21af5dd81 Mon Sep 17 00:00:00 2001 From: "Perkins, Chris" Date: Wed, 29 Jan 2025 19:18:49 -0800 Subject: [PATCH 11/56] improvements. need CI to check GPU side. may need clang-format --- sycl/source/detail/global_handler.cpp | 40 ++++++++++++---------- sycl/source/detail/scheduler/scheduler.cpp | 3 ++ 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index f67b3eac6ab44..a15b1fa60b54e 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -236,12 +236,8 @@ void GlobalHandler::releaseDefaultContexts() { struct EarlyShutdownHandler { ~EarlyShutdownHandler() { try { -#ifdef _WIN32 - // on Windows we keep to the existing shutdown procedure - GlobalHandler::instance().endDeferredRelease(); - GlobalHandler::instance().releaseDefaultContexts(); - shutdown_win(); -#else + // For Linux. Windows calls from DllMain +#ifndef _WIN32 shutdown_early(); #endif } catch (std::exception &e) { @@ -296,18 +292,6 @@ void GlobalHandler::drainThreadPool() { MHostTaskThreadPool.Inst->drain(); } -#ifdef _WIN32 -// because of something not-yet-understood on Windows -// threads may be shutdown once the end of main() is reached -// making an orderly shutdown difficult. Fortunately, Windows -// itself is very aggressive about reclaiming memory. Thus, -// we focus solely on unloading the adapters, so as to not -// accidentally retain device handles. etc -void shutdown_win() { - GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); - Handler->unloadAdapters(); -} -#else void shutdown_early() { const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector}; GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); @@ -351,9 +335,17 @@ void shutdown_late() { delete Handler; Handler = nullptr; } -#endif #ifdef _WIN32 +// a simple wrapper to catch and stream any exception then continue +template +void safe_call(F func) { + try { + func(); + } catch (const std::exception& e) { + std::cerr << "exception in DllMain DLL_PROCESS_DETACH " << e.what() << std::endl; + } +} extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) { @@ -366,11 +358,21 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, return FALSE; } +#ifdef XPTI_ENABLE_INSTRUMENTATION + if (xptiTraceEnabled()) + return TRUE; // When doing xpti tracing, we can't safely call shutdown. + // TODO: figure out what XPTI is doing that prevents + // release. +#endif + // Perform actions based on the reason for calling. switch (fdwReason) { case DLL_PROCESS_DETACH: if (PrintUrTrace) std::cout << "---> DLL_PROCESS_DETACH syclx.dll\n" << std::endl; + + safe_call([](){ shutdown_early(); }); + safe_call([](){ shutdown_late(); }); break; case DLL_PROCESS_ATTACH: if (PrintUrTrace) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 3b3a6808d0785..80ad89047cdcf 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -280,6 +280,9 @@ bool Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj, return true; #ifdef _WIN32 + // If we are shutting down on Windows it may not be + // safe to wait on host threads, as the OS may + // abandon them. But no worries, the memory WILL be reclaimed. bool allowWait = MemObj->hasUserDataPtr() || GlobalHandler::instance().isOkToDefer(); #else From ce964b92d117ffaaa023a952bf6e97d4d5b44cf0 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Wed, 29 Jan 2025 19:31:25 -0800 Subject: [PATCH 12/56] removing shutdown_win() refs and clang-forrmattery --- sycl/doc/design/GlobalObjectsInRuntime.md | 3 +- sycl/source/detail/global_handler.cpp | 54 +++++++++++------------ sycl/source/detail/global_handler.hpp | 1 - 3 files changed, 28 insertions(+), 30 deletions(-) diff --git a/sycl/doc/design/GlobalObjectsInRuntime.md b/sycl/doc/design/GlobalObjectsInRuntime.md index b56dd7767d108..46bcda45c42c5 100644 --- a/sycl/doc/design/GlobalObjectsInRuntime.md +++ b/sycl/doc/design/GlobalObjectsInRuntime.md @@ -109,8 +109,7 @@ for (adapter in initializedAdapters) { urLoaderTearDown(); ``` -Which in turn is called by either `shutdown_late()` or `shutdown_win()` -depending on platform. +Which in turn is called by `shutdown_late()`. ![](images/adapter-lifetime.jpg) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index a15b1fa60b54e..ca78dea9a4e43 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -37,7 +37,6 @@ using LockGuard = std::lock_guard; SpinLock GlobalHandler::MSyclGlobalHandlerProtector{}; // forward decl -void shutdown_win(); // TODO: win variant will go away soon void shutdown_early(); void shutdown_late(); @@ -338,13 +337,13 @@ void shutdown_late() { #ifdef _WIN32 // a simple wrapper to catch and stream any exception then continue -template -void safe_call(F func) { - try { - func(); - } catch (const std::exception& e) { - std::cerr << "exception in DllMain DLL_PROCESS_DETACH " << e.what() << std::endl; - } +template void safe_call(F func) { + try { + func(); + } catch (const std::exception &e) { + std::cerr << "exception in DllMain DLL_PROCESS_DETACH " << e.what() + << std::endl; + } } extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, @@ -365,25 +364,26 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, // release. #endif - // Perform actions based on the reason for calling. - switch (fdwReason) { - case DLL_PROCESS_DETACH: - if (PrintUrTrace) - std::cout << "---> DLL_PROCESS_DETACH syclx.dll\n" << std::endl; - - safe_call([](){ shutdown_early(); }); - safe_call([](){ shutdown_late(); }); - break; - case DLL_PROCESS_ATTACH: - if (PrintUrTrace) - std::cout << "---> DLL_PROCESS_ATTACH syclx.dll\n" << std::endl; - break; - case DLL_THREAD_ATTACH: - break; - case DLL_THREAD_DETACH: - break; - } - return TRUE; // Successful DLL_PROCESS_ATTACH. + // Perform actions based on the reason for calling. + switch (fdwReason) { + case DLL_PROCESS_DETACH: + if (PrintUrTrace) + std::cout << "---> DLL_PROCESS_DETACH syclx.dll\n" << std::endl; + + safe_call([]() { shutdown_early(); }); + safe_call([]() { shutdown_late(); }); + break; + case DLL_PROCESS_ATTACH: + if (PrintUrTrace) + std::cout << "---> DLL_PROCESS_ATTACH syclx.dll\n" << std::endl; + + break; + case DLL_THREAD_ATTACH: + break; + case DLL_THREAD_DETACH: + break; + } + return TRUE; // Successful DLL_PROCESS_ATTACH. } #else // Setting low priority on destructor ensures it runs after all other global diff --git a/sycl/source/detail/global_handler.hpp b/sycl/source/detail/global_handler.hpp index 4b834927e3832..768b98665d87c 100644 --- a/sycl/source/detail/global_handler.hpp +++ b/sycl/source/detail/global_handler.hpp @@ -95,7 +95,6 @@ class GlobalHandler { bool OkToDefer = true; - friend void shutdown_win(); friend void shutdown_early(); friend void shutdown_late(); friend class ObjectUsageCounter; From fd0052bf65f852a81f5c510bba839b008a7a2e9e Mon Sep 17 00:00:00 2001 From: "Perkins, Chris" Date: Thu, 30 Jan 2025 23:15:54 -0800 Subject: [PATCH 13/56] fix of dllMain test. will need clang-format --- sycl/source/detail/global_handler.cpp | 9 +++++++-- sycl/unittests/helpers/UrMock.hpp | 21 +++++++++++++-------- sycl/unittests/windows/dllmain.cpp | 13 ++++--------- 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index ca78dea9a4e43..18afe64ce5d34 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -345,6 +345,7 @@ template void safe_call(F func) { << std::endl; } } +std::atomic dllRefCount = 0; extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) { @@ -370,13 +371,17 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, if (PrintUrTrace) std::cout << "---> DLL_PROCESS_DETACH syclx.dll\n" << std::endl; - safe_call([]() { shutdown_early(); }); - safe_call([]() { shutdown_late(); }); + dllRefCount--; + if (dllRefCount == 0) { + safe_call([]() { shutdown_early(); }); + safe_call([]() { shutdown_late(); }); + } break; case DLL_PROCESS_ATTACH: if (PrintUrTrace) std::cout << "---> DLL_PROCESS_ATTACH syclx.dll\n" << std::endl; + dllRefCount++; break; case DLL_THREAD_ATTACH: break; diff --git a/sycl/unittests/helpers/UrMock.hpp b/sycl/unittests/helpers/UrMock.hpp index 53c4db96dd84d..e7afce9fd8515 100644 --- a/sycl/unittests/helpers/UrMock.hpp +++ b/sycl/unittests/helpers/UrMock.hpp @@ -562,18 +562,23 @@ template class UrMock { urLoaderConfigRelease(UrLoaderConfig); } + bool releaseSyclObjsOnDestruction=true; + UrMock(UrMock &&Other) = delete; UrMock(const UrMock &) = delete; UrMock &operator=(const UrMock &) = delete; ~UrMock() { - // mock::getCallbacks() is an application lifetime object, we need to reset - // these between tests - detail::GlobalHandler::instance().prepareSchedulerToRelease(true); - detail::GlobalHandler::instance().releaseDefaultContexts(); - // clear platform cache in case subsequent tests want a different backend, - // this forces platforms to be reconstructed (and thus queries about UR - // backend info to be called again) - detail::GlobalHandler::instance().getPlatformCache().clear(); + // if we are testing shutdown, these will already be released + // and GlobalHandler instance freed. + if(releaseSyclObjsOnDestruction){ + // mock::getCallbacks() is an application lifetime object, we need to reset these between tests + detail::GlobalHandler::instance().prepareSchedulerToRelease(true); + detail::GlobalHandler::instance().releaseDefaultContexts(); + // clear platform cache in case subsequent tests want a different backend, + // this forces platforms to be reconstructed (and thus queries about UR + // backend info to be called again) + detail::GlobalHandler::instance().getPlatformCache().clear(); + } mock::getCallbacks().resetCallbacks(); } diff --git a/sycl/unittests/windows/dllmain.cpp b/sycl/unittests/windows/dllmain.cpp index 79c41981f426b..b57a7f975e9a1 100644 --- a/sycl/unittests/windows/dllmain.cpp +++ b/sycl/unittests/windows/dllmain.cpp @@ -39,24 +39,19 @@ ur_result_t redefinedAdapterRelease(void *) { TEST(Windows, DllMainCall) { #ifdef _WIN32 sycl::unittest::UrMock<> Mock; + Mock.releaseSyclObjsOnDestruction = false; + sycl::platform Plt = sycl::platform(); mock::getCallbacks().set_before_callback("urAdapterRelease", &redefinedAdapterRelease); - // Teardown calls are only expected on sycl.dll library unload, not when - // process gets terminated. - // The first call to DllMain is to simulate library unload. The second one - // is to simulate process termination - fprintf(stderr, "Call DllMain for the first time\n"); + DllMain((HINSTANCE)0, DLL_PROCESS_ATTACH, (LPVOID)NULL); + fprintf(stderr, "Call DllMain detach\n"); DllMain((HINSTANCE)0, DLL_PROCESS_DETACH, (LPVOID)NULL); int TearDownCallsDone = TearDownCalls.load(); EXPECT_NE(TearDownCallsDone, 0); - fprintf(stderr, "Call DllMain for the second time\n"); - DllMain((HINSTANCE)0, DLL_PROCESS_DETACH, (LPVOID)0x01); - - EXPECT_EQ(TearDownCalls.load(), TearDownCallsDone); #endif } From e85ac9ba6f4579425abfa704eca06d72148d5405 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Thu, 30 Jan 2025 23:32:40 -0800 Subject: [PATCH 14/56] clang-formation --- sycl/unittests/helpers/UrMock.hpp | 19 ++++++++++--------- sycl/unittests/windows/dllmain.cpp | 2 +- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/sycl/unittests/helpers/UrMock.hpp b/sycl/unittests/helpers/UrMock.hpp index e7afce9fd8515..be27cc94c971a 100644 --- a/sycl/unittests/helpers/UrMock.hpp +++ b/sycl/unittests/helpers/UrMock.hpp @@ -562,7 +562,7 @@ template class UrMock { urLoaderConfigRelease(UrLoaderConfig); } - bool releaseSyclObjsOnDestruction=true; + bool releaseSyclObjsOnDestruction = true; UrMock(UrMock &&Other) = delete; UrMock(const UrMock &) = delete; @@ -570,14 +570,15 @@ template class UrMock { ~UrMock() { // if we are testing shutdown, these will already be released // and GlobalHandler instance freed. - if(releaseSyclObjsOnDestruction){ - // mock::getCallbacks() is an application lifetime object, we need to reset these between tests - detail::GlobalHandler::instance().prepareSchedulerToRelease(true); - detail::GlobalHandler::instance().releaseDefaultContexts(); - // clear platform cache in case subsequent tests want a different backend, - // this forces platforms to be reconstructed (and thus queries about UR - // backend info to be called again) - detail::GlobalHandler::instance().getPlatformCache().clear(); + if (releaseSyclObjsOnDestruction) { + // mock::getCallbacks() is an application lifetime object, we need to + // reset these between tests + detail::GlobalHandler::instance().prepareSchedulerToRelease(true); + detail::GlobalHandler::instance().releaseDefaultContexts(); + // clear platform cache in case subsequent tests want a different backend, + // this forces platforms to be reconstructed (and thus queries about UR + // backend info to be called again) + detail::GlobalHandler::instance().getPlatformCache().clear(); } mock::getCallbacks().resetCallbacks(); } diff --git a/sycl/unittests/windows/dllmain.cpp b/sycl/unittests/windows/dllmain.cpp index b57a7f975e9a1..abfde12444830 100644 --- a/sycl/unittests/windows/dllmain.cpp +++ b/sycl/unittests/windows/dllmain.cpp @@ -40,7 +40,7 @@ TEST(Windows, DllMainCall) { #ifdef _WIN32 sycl::unittest::UrMock<> Mock; Mock.releaseSyclObjsOnDestruction = false; - + sycl::platform Plt = sycl::platform(); mock::getCallbacks().set_before_callback("urAdapterRelease", &redefinedAdapterRelease); From 334dfcf447ea8ed06c9f7848ee813314beb0647b Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Fri, 31 Jan 2025 11:30:51 -0800 Subject: [PATCH 15/56] improve comment --- sycl/unittests/windows/dllmain.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sycl/unittests/windows/dllmain.cpp b/sycl/unittests/windows/dllmain.cpp index abfde12444830..812a35ffc1f84 100644 --- a/sycl/unittests/windows/dllmain.cpp +++ b/sycl/unittests/windows/dllmain.cpp @@ -10,6 +10,9 @@ * This test calls DllMain on Windows. This means, the process performs actions * which are required for library unload. That said, the test requires to be a * distinct binary executable. + * Do NOT add any other test cases to this file. + * Do NOT attempt to move its one test into any other file, because the + * release of the global handler that it causes would interfere with others. */ #include From fa814cad080b10388d831e66a124a6a9c7087828 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Mon, 3 Feb 2025 18:17:01 -0800 Subject: [PATCH 16/56] changes based on timing tests and improve write up --- sycl/doc/design/GlobalObjectsInRuntime.md | 50 ++++++++++++++++++++--- sycl/source/detail/global_handler.cpp | 24 +++++++---- sycl/source/detail/global_handler.hpp | 2 +- sycl/source/detail/platform_impl.cpp | 2 +- 4 files changed, 62 insertions(+), 16 deletions(-) diff --git a/sycl/doc/design/GlobalObjectsInRuntime.md b/sycl/doc/design/GlobalObjectsInRuntime.md index 46bcda45c42c5..13fc20f87083e 100644 --- a/sycl/doc/design/GlobalObjectsInRuntime.md +++ b/sycl/doc/design/GlobalObjectsInRuntime.md @@ -55,9 +55,49 @@ Deinitialization is platform-specific. Upon application shutdown, the DPC++ runtime frees memory pointed by `GlobalHandler` global pointer, which triggers destruction of nested `std::unique_ptr`s. +### Shutdown Tasks and Challenges + +As the user's app ends, SYCL's primary goal is to release any UR adapters that +have been gotten, and teardown the plugins/adapters themselves. Additionally, +we need to stop deferring any new buffer releases and clean up any memory +whose release was deferred. + +To this end, the shutdown occurs in two phases: early and late. In the early +shutdown we stop deferring, tell the scheduler to prepare for release, and +try releasing the memory that has been deferred so far. Following this, if +the user has any global or static handles to sycl objects, they'll be destroyed. +Finally, the late shutdown routine is called the last of the UR handles and +adapters are let go, as is the GlobalHandler itself. + +#### Threads +The deferred memory marshalling is built on a thread pool, but there is a +challenge here in that on Windows, once the end of the users main() is reached +and their app is shutting down, the Windows OS will abandon all remaining +in-flight threads. These threads can be .join() but they simply return instantly, +the threads are not completed. Further any thread specific variables +(or thread_local static vars) will NOT have their destructors called. Note +that the standard while-loop-over-condition-var pattern will cause a hang - +we cannot "wait" on abandoned threads. +On Windows, short of adding some user called API to signal this, there is +no way to detect or avoid this. None of the "end-of-library" lifecycle events +occurs before the threads are abandoned. ( not std::atexit(), not globals or +static, or static thread_local var destruction, not DllMain(DLL_PROCESS_DETACH) ) +This means that on Windows, once we arrive at shutdown_early we cannot wait on +host events or the thread pool. + +For the deferred memory itself, there is no issue here. The Windows OS will +reclaim the memory for us. The issue of which we must be wary is placing UR +handles (and simiar) in host threads. The RAII mechanism of unique and +shared pointers will not work in any thread that is abandoned on Windows. + + ### Linux -On Linux DPC++ runtime uses `__attribute__((destructor))` property with low +On Linux, the "eary_shutdown()" is begun by the destruction of a static +StaticVarShutdownHandler object, which is initialized by +platform::get_platforms(). + +late_shutdown() timing uses `__attribute__((destructor))` property with low priority value 110. This approach does not guarantee, that `GlobalHandler` destructor is the last thing to run, as user code may contain a similar function with the same priority value. At the same time, users may specify priorities @@ -72,10 +112,10 @@ times, the memory leak may impact code performance. ### Windows -To identify shutdown moment on Windows, DPC++ runtime uses default `DllMain` -function with `DLL_PROCESS_DETACH` reason. This guarantees, that global objects -deinitialization happens right before `sycl.dll` is unloaded from process -address space. +Differing from Linux, on Windows the "early_shutdown()" is begun by the DLL `DllMain` +function with `DLL_PROCESS_DETACH` reason. + +The "late_shutdown()" is begun by the destruction of a static StaticVarShutdownHandler object, which is initialized by platform::get_platforms(). ( On linux, this is when we do "early_shutdown()". Go figure.) This is as late as we can manage, but it is later than any user application global, static, or thread_local variable destruction. ### Recommendations for DPC++ runtime developers diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 18afe64ce5d34..f149768dbcff2 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -232,22 +232,29 @@ void GlobalHandler::releaseDefaultContexts() { MPlatformToDefaultContextCache.Inst.reset(nullptr); } -struct EarlyShutdownHandler { - ~EarlyShutdownHandler() { +// Shutdown is split into two parts. shutdown_early() stops any more +// objects from being deferred and takes an initial pass at freeing them. +// shutdown_late() finishes and releases the adapters and the GlobalHandler. +// For Windows, early shutdown is called from DllMain, and late shutdown is +// here. For Linux, early shutdown is here, and late shutdown is called from +// a low priority destructor. +struct StaticVarShutdownHandler { + ~StaticVarShutdownHandler() { try { - // For Linux. Windows calls from DllMain -#ifndef _WIN32 +#ifdef _WIN32 + shutdown_late(); +#else shutdown_early(); #endif } catch (std::exception &e) { - __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~EarlyShutdownHandler", - e); + __SYCL_REPORT_EXCEPTION_TO_STREAM( + "exception in ~StaticVarShutdownHandler", e); } } }; -void GlobalHandler::registerEarlyShutdownHandler() { - static EarlyShutdownHandler handler{}; +void GlobalHandler::registerStaticVarShutdownHandler() { + static StaticVarShutdownHandler handler{}; } bool GlobalHandler::isOkToDefer() const { return OkToDefer; } @@ -374,7 +381,6 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, dllRefCount--; if (dllRefCount == 0) { safe_call([]() { shutdown_early(); }); - safe_call([]() { shutdown_late(); }); } break; case DLL_PROCESS_ATTACH: diff --git a/sycl/source/detail/global_handler.hpp b/sycl/source/detail/global_handler.hpp index 768b98665d87c..71e28eaf8e60b 100644 --- a/sycl/source/detail/global_handler.hpp +++ b/sycl/source/detail/global_handler.hpp @@ -73,7 +73,7 @@ class GlobalHandler { XPTIRegistry &getXPTIRegistry(); ThreadPool &getHostTaskThreadPool(); - static void registerEarlyShutdownHandler(); + static void registerStaticVarShutdownHandler(); bool isOkToDefer() const; void endDeferredRelease(); diff --git a/sycl/source/detail/platform_impl.cpp b/sycl/source/detail/platform_impl.cpp index cb9a9f0f1b97f..373414b4b3515 100644 --- a/sycl/source/detail/platform_impl.cpp +++ b/sycl/source/detail/platform_impl.cpp @@ -166,7 +166,7 @@ std::vector platform_impl::get_platforms() { // This initializes a function-local variable whose destructor is invoked as // the SYCL shared library is first being unloaded. - GlobalHandler::registerEarlyShutdownHandler(); + GlobalHandler::registerStaticVarShutdownHandler(); return Platforms; } From 724d6e06077824cc925054f0e001d6177f8877af Mon Sep 17 00:00:00 2001 From: "Perkins, Chris" Date: Mon, 3 Feb 2025 19:31:25 -0800 Subject: [PATCH 17/56] XPTI juggling --- sycl/source/detail/global_handler.cpp | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index f149768dbcff2..824c6e668dd03 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -304,6 +304,12 @@ void shutdown_early() { if (!Handler) return; +#ifdef XPTI_ENABLE_INSTRUMENTATION && _WIN32 + if (xptiTraceEnabled()) + return; // When doing xpti tracing, we can't safely shutdown on Win. + // TODO: figure out why XPTI prevents release. +#endif + // Now that we are shutting down, we will no longer defer MemObj releases. Handler->endDeferredRelease(); @@ -324,6 +330,12 @@ void shutdown_late() { GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); if (!Handler) return; + +#ifdef XPTI_ENABLE_INSTRUMENTATION && _WIN32 + if (xptiTraceEnabled()) + return; // When doing xpti tracing, we can't safely shutdown on Win. + // TODO: figure out why XPTI prevents release. +#endif // First, release resources, that may access adapters. Handler->MPlatformCache.Inst.reset(nullptr); @@ -365,13 +377,6 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, return FALSE; } -#ifdef XPTI_ENABLE_INSTRUMENTATION - if (xptiTraceEnabled()) - return TRUE; // When doing xpti tracing, we can't safely call shutdown. - // TODO: figure out what XPTI is doing that prevents - // release. -#endif - // Perform actions based on the reason for calling. switch (fdwReason) { case DLL_PROCESS_DETACH: From 2e618432841a9f0c2afc449c1e994b805cdb1d19 Mon Sep 17 00:00:00 2001 From: "Perkins, Chris" Date: Mon, 3 Feb 2025 21:14:09 -0800 Subject: [PATCH 18/56] safety --- sycl/source/detail/global_handler.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 824c6e668dd03..4547aa0099e7b 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -247,8 +247,7 @@ struct StaticVarShutdownHandler { shutdown_early(); #endif } catch (std::exception &e) { - __SYCL_REPORT_EXCEPTION_TO_STREAM( - "exception in ~StaticVarShutdownHandler", e); + std::cout << "exception in ~StaticVarShutdownHandler " << e.what() << std::endl; } } }; From b396f7fd967978e8a1f5b832afdc991511d0f076 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 4 Feb 2025 10:36:14 -0800 Subject: [PATCH 19/56] reformat --- sycl/doc/design/GlobalObjectsInRuntime.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sycl/doc/design/GlobalObjectsInRuntime.md b/sycl/doc/design/GlobalObjectsInRuntime.md index 13fc20f87083e..a3cc87a1fffca 100644 --- a/sycl/doc/design/GlobalObjectsInRuntime.md +++ b/sycl/doc/design/GlobalObjectsInRuntime.md @@ -87,7 +87,7 @@ host events or the thread pool. For the deferred memory itself, there is no issue here. The Windows OS will reclaim the memory for us. The issue of which we must be wary is placing UR -handles (and simiar) in host threads. The RAII mechanism of unique and +handles (and similar) in host threads. The RAII mechanism of unique and shared pointers will not work in any thread that is abandoned on Windows. @@ -115,7 +115,11 @@ times, the memory leak may impact code performance. Differing from Linux, on Windows the "early_shutdown()" is begun by the DLL `DllMain` function with `DLL_PROCESS_DETACH` reason. -The "late_shutdown()" is begun by the destruction of a static StaticVarShutdownHandler object, which is initialized by platform::get_platforms(). ( On linux, this is when we do "early_shutdown()". Go figure.) This is as late as we can manage, but it is later than any user application global, static, or thread_local variable destruction. +The "late_shutdown()" is begun by the destruction of a +static StaticVarShutdownHandler object, which is initialized by +platform::get_platforms(). ( On linux, this is when we do "early_shutdown()". +Go figure.) This is as late as we can manage, but it is later than any user +application global, static, or thread_local variable destruction. ### Recommendations for DPC++ runtime developers From 392a1fa2e060c9b0b3fd3cd79c1aa163d55363d4 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 4 Feb 2025 10:50:16 -0800 Subject: [PATCH 20/56] clang-format --- sycl/source/detail/global_handler.cpp | 65 ++++++++++++++------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 4547aa0099e7b..72e42cdaa637b 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -247,7 +247,8 @@ struct StaticVarShutdownHandler { shutdown_early(); #endif } catch (std::exception &e) { - std::cout << "exception in ~StaticVarShutdownHandler " << e.what() << std::endl; + std::cout << "exception in ~StaticVarShutdownHandler " << e.what() + << std::endl; } } }; @@ -303,10 +304,10 @@ void shutdown_early() { if (!Handler) return; -#ifdef XPTI_ENABLE_INSTRUMENTATION && _WIN32 - if (xptiTraceEnabled()) - return; // When doing xpti tracing, we can't safely shutdown on Win. - // TODO: figure out why XPTI prevents release. +#ifdef XPTI_ENABLE_INSTRUMENTATION &&_WIN32 + if (xptiTraceEnabled()) + return; // When doing xpti tracing, we can't safely shutdown on Win. + // TODO: figure out why XPTI prevents release. #endif // Now that we are shutting down, we will no longer defer MemObj releases. @@ -329,11 +330,11 @@ void shutdown_late() { GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); if (!Handler) return; - -#ifdef XPTI_ENABLE_INSTRUMENTATION && _WIN32 - if (xptiTraceEnabled()) - return; // When doing xpti tracing, we can't safely shutdown on Win. - // TODO: figure out why XPTI prevents release. + +#ifdef XPTI_ENABLE_INSTRUMENTATION &&_WIN32 + if (xptiTraceEnabled()) + return; // When doing xpti tracing, we can't safely shutdown on Win. + // TODO: figure out why XPTI prevents release. #endif // First, release resources, that may access adapters. @@ -376,29 +377,29 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, return FALSE; } - // Perform actions based on the reason for calling. - switch (fdwReason) { - case DLL_PROCESS_DETACH: - if (PrintUrTrace) - std::cout << "---> DLL_PROCESS_DETACH syclx.dll\n" << std::endl; - - dllRefCount--; - if (dllRefCount == 0) { - safe_call([]() { shutdown_early(); }); - } - break; - case DLL_PROCESS_ATTACH: - if (PrintUrTrace) - std::cout << "---> DLL_PROCESS_ATTACH syclx.dll\n" << std::endl; - - dllRefCount++; - break; - case DLL_THREAD_ATTACH: - break; - case DLL_THREAD_DETACH: - break; + // Perform actions based on the reason for calling. + switch (fdwReason) { + case DLL_PROCESS_DETACH: + if (PrintUrTrace) + std::cout << "---> DLL_PROCESS_DETACH syclx.dll\n" << std::endl; + + dllRefCount--; + if (dllRefCount == 0) { + safe_call([]() { shutdown_early(); }); } - return TRUE; // Successful DLL_PROCESS_ATTACH. + break; + case DLL_PROCESS_ATTACH: + if (PrintUrTrace) + std::cout << "---> DLL_PROCESS_ATTACH syclx.dll\n" << std::endl; + + dllRefCount++; + break; + case DLL_THREAD_ATTACH: + break; + case DLL_THREAD_DETACH: + break; + } + return TRUE; // Successful DLL_PROCESS_ATTACH. } #else // Setting low priority on destructor ensures it runs after all other global From 1bd79cbf26a5eb2e176365b0e30dd9782b2d0772 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 4 Feb 2025 12:40:44 -0800 Subject: [PATCH 21/56] clang-format has a bug, apparently --- sycl/source/detail/global_handler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 72e42cdaa637b..2177a74f63351 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -304,7 +304,7 @@ void shutdown_early() { if (!Handler) return; -#ifdef XPTI_ENABLE_INSTRUMENTATION &&_WIN32 +#if defined(XPTI_ENABLE_INSTRUMENTATION) && defined(_WIN32) if (xptiTraceEnabled()) return; // When doing xpti tracing, we can't safely shutdown on Win. // TODO: figure out why XPTI prevents release. @@ -331,7 +331,7 @@ void shutdown_late() { if (!Handler) return; -#ifdef XPTI_ENABLE_INSTRUMENTATION &&_WIN32 +#if defined(XPTI_ENABLE_INSTRUMENTATION) && defined(_WIN32) if (xptiTraceEnabled()) return; // When doing xpti tracing, we can't safely shutdown on Win. // TODO: figure out why XPTI prevents release. From 32de860c2af8e842ebac84ef4b4fdcdeecde7334 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 4 Feb 2025 21:29:49 -0800 Subject: [PATCH 22/56] U is for unit tests and unified runtime --- sycl/source/detail/queue_impl.hpp | 10 ++++- sycl/unittests/helpers/UrMock.hpp | 22 ++++------ sycl/unittests/windows/CMakeLists.txt | 4 -- sycl/unittests/windows/dllmain.cpp | 60 --------------------------- 4 files changed, 17 insertions(+), 79 deletions(-) delete mode 100644 sycl/unittests/windows/CMakeLists.txt delete mode 100644 sycl/unittests/windows/dllmain.cpp diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 7a9128d75292c..7da0b71416fb2 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -264,7 +264,15 @@ class queue_impl { destructorNotification(); #endif throw_asynchronous(); - getAdapter()->call(MQueues[0]); + auto status = + getAdapter()->call_nocheck(MQueues[0]); + // if loader is already closed, it'll return a not-initialized status + // which the UR should convert to SUCCESS code. But that isn't always + // working on Windows. This is a temporary workaround until that is fixed. + if (status != UR_RESULT_SUCCESS || + status != UR_RESULT_ERROR_UNINITIALIZED) { + __SYCL_CHECK_UR_CODE_NO_EXC(status); + } } catch (std::exception &e) { __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~queue_impl", e); } diff --git a/sycl/unittests/helpers/UrMock.hpp b/sycl/unittests/helpers/UrMock.hpp index be27cc94c971a..53c4db96dd84d 100644 --- a/sycl/unittests/helpers/UrMock.hpp +++ b/sycl/unittests/helpers/UrMock.hpp @@ -562,24 +562,18 @@ template class UrMock { urLoaderConfigRelease(UrLoaderConfig); } - bool releaseSyclObjsOnDestruction = true; - UrMock(UrMock &&Other) = delete; UrMock(const UrMock &) = delete; UrMock &operator=(const UrMock &) = delete; ~UrMock() { - // if we are testing shutdown, these will already be released - // and GlobalHandler instance freed. - if (releaseSyclObjsOnDestruction) { - // mock::getCallbacks() is an application lifetime object, we need to - // reset these between tests - detail::GlobalHandler::instance().prepareSchedulerToRelease(true); - detail::GlobalHandler::instance().releaseDefaultContexts(); - // clear platform cache in case subsequent tests want a different backend, - // this forces platforms to be reconstructed (and thus queries about UR - // backend info to be called again) - detail::GlobalHandler::instance().getPlatformCache().clear(); - } + // mock::getCallbacks() is an application lifetime object, we need to reset + // these between tests + detail::GlobalHandler::instance().prepareSchedulerToRelease(true); + detail::GlobalHandler::instance().releaseDefaultContexts(); + // clear platform cache in case subsequent tests want a different backend, + // this forces platforms to be reconstructed (and thus queries about UR + // backend info to be called again) + detail::GlobalHandler::instance().getPlatformCache().clear(); mock::getCallbacks().resetCallbacks(); } diff --git a/sycl/unittests/windows/CMakeLists.txt b/sycl/unittests/windows/CMakeLists.txt deleted file mode 100644 index 6143d5de55045..0000000000000 --- a/sycl/unittests/windows/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_sycl_unittest(WindowsDllMainTest OBJECT - dllmain.cpp -) - diff --git a/sycl/unittests/windows/dllmain.cpp b/sycl/unittests/windows/dllmain.cpp deleted file mode 100644 index 812a35ffc1f84..0000000000000 --- a/sycl/unittests/windows/dllmain.cpp +++ /dev/null @@ -1,60 +0,0 @@ -//==----- dllmain.cpp --- verify behaviour of lib on process termination ---==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -/* - * This test calls DllMain on Windows. This means, the process performs actions - * which are required for library unload. That said, the test requires to be a - * distinct binary executable. - * Do NOT add any other test cases to this file. - * Do NOT attempt to move its one test into any other file, because the - * release of the global handler that it causes would interfere with others. - */ - -#include -#include - -#include - -#ifdef _WIN32 -#include - -extern "C" BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, - LPVOID lpReserved); - -static std::atomic TearDownCalls{0}; - -// Before the port this was an override for LoaderTearDown, UR's mock -// functionality can't override loader functions but AdapterRelease is called -// in the runtime in the same place as LoaderTearDown -ur_result_t redefinedAdapterRelease(void *) { - fprintf(stderr, "intercepted tear down\n"); - ++TearDownCalls; - - return UR_RESULT_SUCCESS; -} -#endif - -TEST(Windows, DllMainCall) { -#ifdef _WIN32 - sycl::unittest::UrMock<> Mock; - Mock.releaseSyclObjsOnDestruction = false; - - sycl::platform Plt = sycl::platform(); - mock::getCallbacks().set_before_callback("urAdapterRelease", - &redefinedAdapterRelease); - - DllMain((HINSTANCE)0, DLL_PROCESS_ATTACH, (LPVOID)NULL); - fprintf(stderr, "Call DllMain detach\n"); - DllMain((HINSTANCE)0, DLL_PROCESS_DETACH, (LPVOID)NULL); - - int TearDownCallsDone = TearDownCalls.load(); - - EXPECT_NE(TearDownCallsDone, 0); - -#endif -} From db91abab2346795886ee7a6997688b42e541fdd5 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 4 Feb 2025 21:36:41 -0800 Subject: [PATCH 23/56] forgot to add file --- sycl/unittests/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/unittests/CMakeLists.txt b/sycl/unittests/CMakeLists.txt index 8831426784de2..b099538ac93e5 100644 --- a/sycl/unittests/CMakeLists.txt +++ b/sycl/unittests/CMakeLists.txt @@ -39,7 +39,6 @@ add_subdirectory(pipes) add_subdirectory(program_manager) add_subdirectory(assert) add_subdirectory(Extensions) -add_subdirectory(windows) add_subdirectory(event) add_subdirectory(buffer) add_subdirectory(context_device) From de45858905ee4867d0c1f08e96d07cd335789e90 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 4 Feb 2025 21:47:45 -0800 Subject: [PATCH 24/56] you can tell it's late --- sycl/source/detail/queue_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 7da0b71416fb2..5a55d4ad1eced 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -269,7 +269,7 @@ class queue_impl { // if loader is already closed, it'll return a not-initialized status // which the UR should convert to SUCCESS code. But that isn't always // working on Windows. This is a temporary workaround until that is fixed. - if (status != UR_RESULT_SUCCESS || + if (status != UR_RESULT_SUCCESS && status != UR_RESULT_ERROR_UNINITIALIZED) { __SYCL_CHECK_UR_CODE_NO_EXC(status); } From 7d08a9b9bf0ac0ae09194c45db952573dce474e9 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Thu, 6 Feb 2025 11:01:35 -0800 Subject: [PATCH 25/56] clarity --- sycl/source/detail/scheduler/scheduler.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 80ad89047cdcf..35b51c035d428 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -586,11 +586,10 @@ void Scheduler::cleanupAuxiliaryResources(BlockingT Blocking) { std::unique_lock Lock{MAuxiliaryResourcesMutex}; for (auto It = MAuxiliaryResources.begin(); It != MAuxiliaryResources.end();) { - const EventImplPtr &Event = It->first; if (Blocking == BlockingT::BLOCKING) { - Event->waitInternal(); + It->first->waitInternal(); It = MAuxiliaryResources.erase(It); - } else if (Event->isCompleted()) + } else if (It->first->isCompleted()) It = MAuxiliaryResources.erase(It); else ++It; From d8058c6f101a077e3e5f4df938753741dd09fc1d Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Fri, 7 Feb 2025 09:10:58 -0800 Subject: [PATCH 26/56] test --- sycl/source/detail/global_handler.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 2177a74f63351..823c25261b953 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -317,8 +317,10 @@ void shutdown_early() { // upon its release Handler->prepareSchedulerToRelease(true); +#ifndef _WIN32 if (Handler->MHostTaskThreadPool.Inst) Handler->MHostTaskThreadPool.Inst->finishAndWait(); +#endif // This releases OUR reference to the default context, but // other may yet have refs From 802858e0a831fc40687d54818ecec07bab4499ce Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Fri, 7 Feb 2025 11:16:04 -0800 Subject: [PATCH 27/56] ci --- sycl/source/detail/global_handler.cpp | 2 -- sycl/unittests/helpers/UrMock.hpp | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 823c25261b953..2177a74f63351 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -317,10 +317,8 @@ void shutdown_early() { // upon its release Handler->prepareSchedulerToRelease(true); -#ifndef _WIN32 if (Handler->MHostTaskThreadPool.Inst) Handler->MHostTaskThreadPool.Inst->finishAndWait(); -#endif // This releases OUR reference to the default context, but // other may yet have refs diff --git a/sycl/unittests/helpers/UrMock.hpp b/sycl/unittests/helpers/UrMock.hpp index 53c4db96dd84d..5006c03f515d1 100644 --- a/sycl/unittests/helpers/UrMock.hpp +++ b/sycl/unittests/helpers/UrMock.hpp @@ -568,8 +568,10 @@ template class UrMock { ~UrMock() { // mock::getCallbacks() is an application lifetime object, we need to reset // these between tests +#ifndef _WIN32 detail::GlobalHandler::instance().prepareSchedulerToRelease(true); detail::GlobalHandler::instance().releaseDefaultContexts(); +#endif // clear platform cache in case subsequent tests want a different backend, // this forces platforms to be reconstructed (and thus queries about UR // backend info to be called again) From ea328de9305593d6cebe62c27f5d5b23b48553cd Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Fri, 7 Feb 2025 13:32:17 -0800 Subject: [PATCH 28/56] with feathers --- sycl/unittests/helpers/UrMock.hpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sycl/unittests/helpers/UrMock.hpp b/sycl/unittests/helpers/UrMock.hpp index 5006c03f515d1..1650c7f14d633 100644 --- a/sycl/unittests/helpers/UrMock.hpp +++ b/sycl/unittests/helpers/UrMock.hpp @@ -568,10 +568,9 @@ template class UrMock { ~UrMock() { // mock::getCallbacks() is an application lifetime object, we need to reset // these between tests -#ifndef _WIN32 - detail::GlobalHandler::instance().prepareSchedulerToRelease(true); + detail::GlobalHandler::instance().releaseDefaultContexts(); -#endif + // clear platform cache in case subsequent tests want a different backend, // this forces platforms to be reconstructed (and thus queries about UR // backend info to be called again) From 61931728d49117f5d25056048908a9bf97a07f37 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Fri, 7 Feb 2025 14:00:39 -0800 Subject: [PATCH 29/56] not seeing problem locally --- sycl/unittests/helpers/UrMock.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sycl/unittests/helpers/UrMock.hpp b/sycl/unittests/helpers/UrMock.hpp index 1650c7f14d633..a56a1815dabec 100644 --- a/sycl/unittests/helpers/UrMock.hpp +++ b/sycl/unittests/helpers/UrMock.hpp @@ -568,7 +568,9 @@ template class UrMock { ~UrMock() { // mock::getCallbacks() is an application lifetime object, we need to reset // these between tests - +#ifndef _WIN32 + detail::GlobalHandler::instance().prepareSchedulerToRelease(true); +#endif detail::GlobalHandler::instance().releaseDefaultContexts(); // clear platform cache in case subsequent tests want a different backend, From 46490bcc089ca2134b958cf64ccbfc915febc4a7 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Fri, 7 Feb 2025 15:05:38 -0800 Subject: [PATCH 30/56] did I say with feathers already? --- sycl/unittests/helpers/UrMock.hpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sycl/unittests/helpers/UrMock.hpp b/sycl/unittests/helpers/UrMock.hpp index a56a1815dabec..4051a13bb209f 100644 --- a/sycl/unittests/helpers/UrMock.hpp +++ b/sycl/unittests/helpers/UrMock.hpp @@ -568,15 +568,18 @@ template class UrMock { ~UrMock() { // mock::getCallbacks() is an application lifetime object, we need to reset // these between tests + +#ifdef _WIN32 + detail::GlobalHandler::instance().releaseDefaultContexts(); #ifndef _WIN32 detail::GlobalHandler::instance().prepareSchedulerToRelease(true); -#endif detail::GlobalHandler::instance().releaseDefaultContexts(); - // clear platform cache in case subsequent tests want a different backend, // this forces platforms to be reconstructed (and thus queries about UR // backend info to be called again) detail::GlobalHandler::instance().getPlatformCache().clear(); +#endif + mock::getCallbacks().resetCallbacks(); } From 432ecd699f55987a7ff6dcac1e3725bde4aa12c7 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Fri, 7 Feb 2025 16:19:42 -0800 Subject: [PATCH 31/56] d'oh --- sycl/unittests/helpers/UrMock.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/unittests/helpers/UrMock.hpp b/sycl/unittests/helpers/UrMock.hpp index 4051a13bb209f..144f7f7bacd78 100644 --- a/sycl/unittests/helpers/UrMock.hpp +++ b/sycl/unittests/helpers/UrMock.hpp @@ -571,7 +571,7 @@ template class UrMock { #ifdef _WIN32 detail::GlobalHandler::instance().releaseDefaultContexts(); -#ifndef _WIN32 +#else _WIN32 detail::GlobalHandler::instance().prepareSchedulerToRelease(true); detail::GlobalHandler::instance().releaseDefaultContexts(); // clear platform cache in case subsequent tests want a different backend, From e61ccfad9a2583335d300ba1bf73ecc0833b5fd9 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Fri, 7 Feb 2025 17:10:23 -0800 Subject: [PATCH 32/56] hm --- sycl/unittests/helpers/UrMock.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/unittests/helpers/UrMock.hpp b/sycl/unittests/helpers/UrMock.hpp index 144f7f7bacd78..3605a6007da01 100644 --- a/sycl/unittests/helpers/UrMock.hpp +++ b/sycl/unittests/helpers/UrMock.hpp @@ -571,7 +571,7 @@ template class UrMock { #ifdef _WIN32 detail::GlobalHandler::instance().releaseDefaultContexts(); -#else _WIN32 +#else detail::GlobalHandler::instance().prepareSchedulerToRelease(true); detail::GlobalHandler::instance().releaseDefaultContexts(); // clear platform cache in case subsequent tests want a different backend, From a20e93b02e6e4240ee840e39a2d428d6507eee6f Mon Sep 17 00:00:00 2001 From: "Perkins, Chris" Date: Mon, 10 Feb 2025 12:12:43 -0800 Subject: [PATCH 33/56] adjust unit tests to avoid interactions. might need clang-format --- sycl/unittests/SYCL2020/KernelBundle.cpp | 9 +++++++++ .../program_manager/DynamicLinking/DynamicLinking.cpp | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/sycl/unittests/SYCL2020/KernelBundle.cpp b/sycl/unittests/SYCL2020/KernelBundle.cpp index 5ffe4ecbc341a..05617a3a2decf 100644 --- a/sycl/unittests/SYCL2020/KernelBundle.cpp +++ b/sycl/unittests/SYCL2020/KernelBundle.cpp @@ -487,6 +487,14 @@ ur_result_t redefinedDevicePartitionAfter(void *pParams) { return UR_RESULT_SUCCESS; } +#ifndef _WIN32 +// While the Mock object is created anew for each test, the SYCL +// GlobalHandler is not. It is the same across all tests in a +// single executable. Some of the mock callbacks modify the +// global platforms/devices. On Linux, we can clear these +// changes by faking shutdown. But on Windows we can't do that. +// To avoid conflicts, this test is being skipped on Windows. +// It would pass if placed in its own suite. TEST(KernelBundle, DescendentDevice) { // Mock a non-OpenCL adapter since use of descendent devices of context // members is not supported there yet. @@ -521,6 +529,7 @@ TEST(KernelBundle, DescendentDevice) { EXPECT_EQ(KernelBundle, RetKernelBundle); } +#endif TEST(KernelBundle, CheckIfBundleHasIncompatibleKernel) { sycl::unittest::UrMock<> Mock; diff --git a/sycl/unittests/program_manager/DynamicLinking/DynamicLinking.cpp b/sycl/unittests/program_manager/DynamicLinking/DynamicLinking.cpp index c48a5c1626c69..d523408a347f4 100644 --- a/sycl/unittests/program_manager/DynamicLinking/DynamicLinking.cpp +++ b/sycl/unittests/program_manager/DynamicLinking/DynamicLinking.cpp @@ -253,6 +253,13 @@ TEST(DynamicLinking, AheadOfTime) { AOT_CASE_PRG_NATIVE * AOT_CASE_PRG_DEP_NATIVE); } +#ifndef _WIN32 +// The 'setupRuntimeLinkingMock' used by other tests results in +// changes to the global platforms/devices that will result +// in a test failure if not cleared. On Linux, the Mock's destructor +// fakes shutdown to clear them. But on Windows we can't +// do that hack. So we skip this test. It would pass +// if placed in its own test suite. TEST(DynamicLinking, AheadOfTimeUnsupported) { try { sycl::unittest::UrMock Mock; @@ -266,6 +273,7 @@ TEST(DynamicLinking, AheadOfTimeUnsupported) { "unsupported for the backend"); } } +#endif static ur_result_t redefined_urProgramCompileExp(void *pParams) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; From 3978329c4df1a48a25f71ca754d1defeb5976aac Mon Sep 17 00:00:00 2001 From: "Perkins, Chris" Date: Mon, 10 Feb 2025 15:20:13 -0800 Subject: [PATCH 34/56] no choice --- sycl/source/detail/host_task.hpp | 8 ++++++++ sycl/source/detail/thread_pool.hpp | 2 ++ 2 files changed, 10 insertions(+) diff --git a/sycl/source/detail/host_task.hpp b/sycl/source/detail/host_task.hpp index 5f7ae11c6a0e4..a1608699f14dd 100644 --- a/sycl/source/detail/host_task.hpp +++ b/sycl/source/detail/host_task.hpp @@ -13,9 +13,11 @@ #pragma once #include +#include #include #include #include +#include namespace sycl { inline namespace _V1 { @@ -33,6 +35,9 @@ class HostTask { bool isInteropTask() const { return !!MInteropTask; } void call(HostProfilingInfo *HPI) { + std::cout << "host_task call()" << std::endl; + if(!GlobalHandler::instance().isOkToDefer()){ return; } + if (HPI) HPI->start(); MHostTask(); @@ -41,6 +46,9 @@ class HostTask { } void call(HostProfilingInfo *HPI, interop_handle handle) { + std::cout << "host_task call()" << std::endl; + if(!GlobalHandler::instance().isOkToDefer()){ return; } + if (HPI) HPI->start(); MInteropTask(handle); diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index 50240e0a98b06..e7d2fc8905a8e 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -55,6 +55,7 @@ class ThreadPool { } void start() { + std::cout << "thread_pool start()" << std::endl; MLaunchedThreads.reserve(MThreadCount); MJobsInPool.store(0); @@ -82,6 +83,7 @@ class ThreadPool { } void finishAndWait() { + std::cout << "finishAndWait()" << std::endl; { std::lock_guard Lock(MJobQueueMutex); MStop = true; From eea07da908af6a4b4d946f4760e93f530d2cfd04 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Mon, 10 Feb 2025 16:18:24 -0800 Subject: [PATCH 35/56] clang-format --- sycl/source/detail/host_task.hpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/host_task.hpp b/sycl/source/detail/host_task.hpp index a1608699f14dd..70bb03b75a6f4 100644 --- a/sycl/source/detail/host_task.hpp +++ b/sycl/source/detail/host_task.hpp @@ -36,7 +36,9 @@ class HostTask { void call(HostProfilingInfo *HPI) { std::cout << "host_task call()" << std::endl; - if(!GlobalHandler::instance().isOkToDefer()){ return; } + if (!GlobalHandler::instance().isOkToDefer()) { + return; + } if (HPI) HPI->start(); @@ -47,7 +49,9 @@ class HostTask { void call(HostProfilingInfo *HPI, interop_handle handle) { std::cout << "host_task call()" << std::endl; - if(!GlobalHandler::instance().isOkToDefer()){ return; } + if (!GlobalHandler::instance().isOkToDefer()) { + return; + } if (HPI) HPI->start(); From 7afd0276f10a768b2f3506eb8f79fadcdd0f2e5f Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Mon, 10 Feb 2025 16:43:02 -0800 Subject: [PATCH 36/56] more clang-format --- sycl/source/detail/host_task.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/source/detail/host_task.hpp b/sycl/source/detail/host_task.hpp index 70bb03b75a6f4..f9413260f20c7 100644 --- a/sycl/source/detail/host_task.hpp +++ b/sycl/source/detail/host_task.hpp @@ -17,7 +17,6 @@ #include #include #include -#include namespace sycl { inline namespace _V1 { From 6fdfa2e9660db62895803418ed7cb21f577dffb1 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Mon, 10 Feb 2025 18:13:17 -0800 Subject: [PATCH 37/56] again --- sycl/source/detail/global_handler.cpp | 2 ++ sycl/source/detail/thread_pool.hpp | 1 + 2 files changed, 3 insertions(+) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 2177a74f63351..bcc2132de447f 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -299,6 +299,7 @@ void GlobalHandler::drainThreadPool() { } void shutdown_early() { + std::cout << "shutdown_early()" << std::endl; const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector}; GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); if (!Handler) @@ -320,6 +321,7 @@ void shutdown_early() { if (Handler->MHostTaskThreadPool.Inst) Handler->MHostTaskThreadPool.Inst->finishAndWait(); + std::cout << "finishAndWait() done" << std::endl; // This releases OUR reference to the default context, but // other may yet have refs Handler->releaseDefaultContexts(); diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index e7d2fc8905a8e..69111f389599e 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -76,6 +76,7 @@ class ThreadPool { ~ThreadPool() { try { + std::cout << "~ThreadPool()" << std::endl; finishAndWait(); } catch (std::exception &e) { __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~ThreadPool", e); From fac5ffa6d1c8d4d1303a4196469cf75717bcacc6 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Mon, 10 Feb 2025 19:19:57 -0800 Subject: [PATCH 38/56] moar logging. sad. --- sycl/source/detail/global_handler.cpp | 3 +++ sycl/source/detail/thread_pool.hpp | 12 ++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index bcc2132de447f..1b8338af3ac03 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -328,6 +328,7 @@ void shutdown_early() { } void shutdown_late() { + std::cout << "shutdown_late()" << std::endl; const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector}; GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); if (!Handler) @@ -354,6 +355,8 @@ void shutdown_late() { // Release the rest of global resources. delete Handler; Handler = nullptr; + + std::cout << "shutdown_late() done" << std::endl; } #ifdef _WIN32 diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index 69111f389599e..00568f0eb2eaf 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -75,12 +75,12 @@ class ThreadPool { } ~ThreadPool() { - try { - std::cout << "~ThreadPool()" << std::endl; - finishAndWait(); - } catch (std::exception &e) { - __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~ThreadPool", e); - } + // try { + // std::cout << "~ThreadPool()" << std::endl; + // finishAndWait(); + // } catch (std::exception &e) { + // __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~ThreadPool", e); + // } } void finishAndWait() { From 134e3a9e91e848a811422ad95747b78ffbec07a8 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Mon, 10 Feb 2025 21:14:03 -0800 Subject: [PATCH 39/56] test --- sycl/source/detail/global_handler.hpp | 3 +++ sycl/unittests/helpers/UrMock.hpp | 22 +++++++++++++--------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/sycl/source/detail/global_handler.hpp b/sycl/source/detail/global_handler.hpp index 71e28eaf8e60b..596ba39cb40d3 100644 --- a/sycl/source/detail/global_handler.hpp +++ b/sycl/source/detail/global_handler.hpp @@ -31,6 +31,9 @@ using PlatformImplPtr = std::shared_ptr; using ContextImplPtr = std::shared_ptr; using AdapterPtr = std::shared_ptr; +// Forward declaration +void shutdown_early(); + /// Wrapper class for global data structures with non-trivial destructors. /// /// As user code can call SYCL Runtime functions from destructor of global diff --git a/sycl/unittests/helpers/UrMock.hpp b/sycl/unittests/helpers/UrMock.hpp index 3605a6007da01..156efbd5d375a 100644 --- a/sycl/unittests/helpers/UrMock.hpp +++ b/sycl/unittests/helpers/UrMock.hpp @@ -569,16 +569,20 @@ template class UrMock { // mock::getCallbacks() is an application lifetime object, we need to reset // these between tests -#ifdef _WIN32 - detail::GlobalHandler::instance().releaseDefaultContexts(); -#else - detail::GlobalHandler::instance().prepareSchedulerToRelease(true); - detail::GlobalHandler::instance().releaseDefaultContexts(); - // clear platform cache in case subsequent tests want a different backend, - // this forces platforms to be reconstructed (and thus queries about UR - // backend info to be called again) + detail::shutdown_early(); + + // #ifdef _WIN32 + // detail::GlobalHandler::instance().releaseDefaultContexts(); + // #else + // detail::GlobalHandler::instance().prepareSchedulerToRelease(true); + // detail::GlobalHandler::instance().releaseDefaultContexts(); + // // clear platform cache in case subsequent tests want a different + // backend, + // // this forces platforms to be reconstructed (and thus queries about + // UR + // // backend info to be called again) detail::GlobalHandler::instance().getPlatformCache().clear(); -#endif + // #endif mock::getCallbacks().resetCallbacks(); } From 9f5556f33da9ea649abf961ab6a3dcec098499c6 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 11 Feb 2025 10:39:23 -0800 Subject: [PATCH 40/56] transfer --- sycl/unittests/buffer/BufferLocation.cpp | 4 ++++ sycl/unittests/buffer/BufferReleaseBase.hpp | 3 +++ sycl/unittests/helpers/UrMock.hpp | 18 +++++------------- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/sycl/unittests/buffer/BufferLocation.cpp b/sycl/unittests/buffer/BufferLocation.cpp index 71c6d1fa545cd..c352fe1bbfe6d 100644 --- a/sycl/unittests/buffer/BufferLocation.cpp +++ b/sycl/unittests/buffer/BufferLocation.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -108,6 +109,9 @@ class BufferTest : public ::testing::Test { mock::getCallbacks().set_after_callback("urDeviceGetInfo", &redefinedDeviceGetInfoAfter); } +#ifdef _WIN32 + void TearDown() override { sycl::detail::shutdown_early(); } +#endif sycl::unittest::UrMock<> Mock; sycl::platform Plt; diff --git a/sycl/unittests/buffer/BufferReleaseBase.hpp b/sycl/unittests/buffer/BufferReleaseBase.hpp index a4982af3b581f..a0274d6ad8075 100644 --- a/sycl/unittests/buffer/BufferReleaseBase.hpp +++ b/sycl/unittests/buffer/BufferReleaseBase.hpp @@ -49,6 +49,9 @@ class BufferDestructionCheckCommon : public ::testing::Test { } void TearDown() override { sycl::detail::GlobalHandler::instance().attachScheduler(NULL); +#ifdef _WIN32 + sycl::detail::shutdown_early(); +#endif } template diff --git a/sycl/unittests/helpers/UrMock.hpp b/sycl/unittests/helpers/UrMock.hpp index 156efbd5d375a..66f660defc6a3 100644 --- a/sycl/unittests/helpers/UrMock.hpp +++ b/sycl/unittests/helpers/UrMock.hpp @@ -569,20 +569,12 @@ template class UrMock { // mock::getCallbacks() is an application lifetime object, we need to reset // these between tests - detail::shutdown_early(); - - // #ifdef _WIN32 - // detail::GlobalHandler::instance().releaseDefaultContexts(); - // #else - // detail::GlobalHandler::instance().prepareSchedulerToRelease(true); - // detail::GlobalHandler::instance().releaseDefaultContexts(); - // // clear platform cache in case subsequent tests want a different - // backend, - // // this forces platforms to be reconstructed (and thus queries about - // UR - // // backend info to be called again) + detail::GlobalHandler::instance().prepareSchedulerToRelease(true); + detail::GlobalHandler::instance().releaseDefaultContexts(); + // clear platform cache in case subsequent tests want a different backend, + // this forces platforms to be reconstructed (and thus queries about UR + // backend info to be called again) detail::GlobalHandler::instance().getPlatformCache().clear(); - // #endif mock::getCallbacks().resetCallbacks(); } From c088821b527781a0fa00577bc453507a99de0c9f Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 11 Feb 2025 21:39:18 -0800 Subject: [PATCH 41/56] latest win reattempt --- sycl/doc/design/GlobalObjectsInRuntime.md | 23 +++++++++++++--- sycl/source/detail/global_handler.cpp | 33 +++++++++++------------ sycl/source/detail/host_task.hpp | 2 ++ sycl/source/detail/thread_pool.hpp | 3 +++ 4 files changed, 40 insertions(+), 21 deletions(-) diff --git a/sycl/doc/design/GlobalObjectsInRuntime.md b/sycl/doc/design/GlobalObjectsInRuntime.md index a3cc87a1fffca..d85382cf9e402 100644 --- a/sycl/doc/design/GlobalObjectsInRuntime.md +++ b/sycl/doc/design/GlobalObjectsInRuntime.md @@ -62,13 +62,23 @@ have been gotten, and teardown the plugins/adapters themselves. Additionally, we need to stop deferring any new buffer releases and clean up any memory whose release was deferred. -To this end, the shutdown occurs in two phases: early and late. In the early -shutdown we stop deferring, tell the scheduler to prepare for release, and +To this end, the shutdown occurs in two phases: early and late. The purpose +for eary shutdown is primarily to stop any further deferring of memory release. +This is because the deferred memory release is based on threads and on Windows +the threads will be abandoned. So as soon as possible we want to stop deferring +memory and try to let go any that has been deferred. The purpose for late +shutdown is to hold onto the handles and adapters longer than the user's +application. We don't want to initiate late shutdown until after all the users +static and thread local vars have been destroyed, in case those destructors are +calling SYCL. + +In the early shutdown we stop deferring, tell the scheduler to prepare for release, and try releasing the memory that has been deferred so far. Following this, if the user has any global or static handles to sycl objects, they'll be destroyed. Finally, the late shutdown routine is called the last of the UR handles and adapters are let go, as is the GlobalHandler itself. + #### Threads The deferred memory marshalling is built on a thread pool, but there is a challenge here in that on Windows, once the end of the users main() is reached @@ -90,6 +100,12 @@ reclaim the memory for us. The issue of which we must be wary is placing UR handles (and similar) in host threads. The RAII mechanism of unique and shared pointers will not work in any thread that is abandoned on Windows. +One last note about threads. It is entirely the OS's discretion on when to +start or schedule a thread. If the main process is very busy then it is +possible that threads the SYCL library creates (host_tasks/thread_pool) +won't even be started until AFTER the host application main() function is done. +This is not a normal occurrence, but it can happen if there is no call to queue.wait() + ### Linux @@ -112,8 +128,7 @@ times, the memory leak may impact code performance. ### Windows -Differing from Linux, on Windows the "early_shutdown()" is begun by the DLL `DllMain` -function with `DLL_PROCESS_DETACH` reason. +Differing from Linux, on Windows the "early_shutdown()" is begun by std::atexit() The "late_shutdown()" is begun by the destruction of a static StaticVarShutdownHandler object, which is initialized by diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 1b8338af3ac03..44b676c176779 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -235,8 +235,8 @@ void GlobalHandler::releaseDefaultContexts() { // Shutdown is split into two parts. shutdown_early() stops any more // objects from being deferred and takes an initial pass at freeing them. // shutdown_late() finishes and releases the adapters and the GlobalHandler. -// For Windows, early shutdown is called from DllMain, and late shutdown is -// here. For Linux, early shutdown is here, and late shutdown is called from +// For Windows, early shutdown is called from std::atexit(), and late shutdown +// is here. For Linux, early shutdown is here, and late shutdown is called from // a low priority destructor. struct StaticVarShutdownHandler { ~StaticVarShutdownHandler() { @@ -254,6 +254,16 @@ struct StaticVarShutdownHandler { }; void GlobalHandler::registerStaticVarShutdownHandler() { +#ifdef _WIN32 + std::atexit([]() { + try { + shutdown_early(); + } catch (std::exception &e) { + std::cout << "exception in atexit/shutdown_early() " << e.what() + << std::endl; + } + }); +#endif static StaticVarShutdownHandler handler{}; } @@ -299,6 +309,7 @@ void GlobalHandler::drainThreadPool() { } void shutdown_early() { + // CP std::cout << "shutdown_early()" << std::endl; const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector}; GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); @@ -321,6 +332,7 @@ void shutdown_early() { if (Handler->MHostTaskThreadPool.Inst) Handler->MHostTaskThreadPool.Inst->finishAndWait(); + // CP std::cout << "finishAndWait() done" << std::endl; // This releases OUR reference to the default context, but // other may yet have refs @@ -328,6 +340,7 @@ void shutdown_early() { } void shutdown_late() { + // CP std::cout << "shutdown_late()" << std::endl; const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector}; GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); @@ -356,20 +369,11 @@ void shutdown_late() { delete Handler; Handler = nullptr; + // CP std::cout << "shutdown_late() done" << std::endl; } #ifdef _WIN32 -// a simple wrapper to catch and stream any exception then continue -template void safe_call(F func) { - try { - func(); - } catch (const std::exception &e) { - std::cerr << "exception in DllMain DLL_PROCESS_DETACH " << e.what() - << std::endl; - } -} -std::atomic dllRefCount = 0; extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) { @@ -388,16 +392,11 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, if (PrintUrTrace) std::cout << "---> DLL_PROCESS_DETACH syclx.dll\n" << std::endl; - dllRefCount--; - if (dllRefCount == 0) { - safe_call([]() { shutdown_early(); }); - } break; case DLL_PROCESS_ATTACH: if (PrintUrTrace) std::cout << "---> DLL_PROCESS_ATTACH syclx.dll\n" << std::endl; - dllRefCount++; break; case DLL_THREAD_ATTACH: break; diff --git a/sycl/source/detail/host_task.hpp b/sycl/source/detail/host_task.hpp index f9413260f20c7..085f7d794f344 100644 --- a/sycl/source/detail/host_task.hpp +++ b/sycl/source/detail/host_task.hpp @@ -34,6 +34,7 @@ class HostTask { bool isInteropTask() const { return !!MInteropTask; } void call(HostProfilingInfo *HPI) { + // CP std::cout << "host_task call()" << std::endl; if (!GlobalHandler::instance().isOkToDefer()) { return; @@ -47,6 +48,7 @@ class HostTask { } void call(HostProfilingInfo *HPI, interop_handle handle) { + // CP std::cout << "host_task call()" << std::endl; if (!GlobalHandler::instance().isOkToDefer()) { return; diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index 00568f0eb2eaf..660014ccfc6e7 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -55,6 +55,7 @@ class ThreadPool { } void start() { + // CP std::cout << "thread_pool start()" << std::endl; MLaunchedThreads.reserve(MThreadCount); @@ -75,6 +76,7 @@ class ThreadPool { } ~ThreadPool() { + // CP // try { // std::cout << "~ThreadPool()" << std::endl; // finishAndWait(); @@ -84,6 +86,7 @@ class ThreadPool { } void finishAndWait() { + // CP std::cout << "finishAndWait()" << std::endl; { std::lock_guard Lock(MJobQueueMutex); From bde54c631857addaf91ffb2e90868b7f02c034ff Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 11 Feb 2025 22:08:22 -0800 Subject: [PATCH 42/56] overlooked. should have been removed --- sycl/unittests/buffer/BufferLocation.cpp | 4 ---- sycl/unittests/buffer/BufferReleaseBase.hpp | 4 ---- 2 files changed, 8 deletions(-) diff --git a/sycl/unittests/buffer/BufferLocation.cpp b/sycl/unittests/buffer/BufferLocation.cpp index c352fe1bbfe6d..71c6d1fa545cd 100644 --- a/sycl/unittests/buffer/BufferLocation.cpp +++ b/sycl/unittests/buffer/BufferLocation.cpp @@ -10,7 +10,6 @@ #include #include -#include #include #include @@ -109,9 +108,6 @@ class BufferTest : public ::testing::Test { mock::getCallbacks().set_after_callback("urDeviceGetInfo", &redefinedDeviceGetInfoAfter); } -#ifdef _WIN32 - void TearDown() override { sycl::detail::shutdown_early(); } -#endif sycl::unittest::UrMock<> Mock; sycl::platform Plt; diff --git a/sycl/unittests/buffer/BufferReleaseBase.hpp b/sycl/unittests/buffer/BufferReleaseBase.hpp index a0274d6ad8075..322b85ffe469c 100644 --- a/sycl/unittests/buffer/BufferReleaseBase.hpp +++ b/sycl/unittests/buffer/BufferReleaseBase.hpp @@ -16,7 +16,6 @@ #include #include -#include #include #include @@ -49,9 +48,6 @@ class BufferDestructionCheckCommon : public ::testing::Test { } void TearDown() override { sycl::detail::GlobalHandler::instance().attachScheduler(NULL); -#ifdef _WIN32 - sycl::detail::shutdown_early(); -#endif } template From f2b92983d6d9bcce28684a47618cb0f4f2f07af5 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 11 Feb 2025 22:21:40 -0800 Subject: [PATCH 43/56] relocate --- sycl/source/detail/global_handler.cpp | 22 ++++++++++++---------- sycl/source/detail/global_handler.hpp | 3 --- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 44b676c176779..9e9cb243a9127 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -239,6 +239,18 @@ void GlobalHandler::releaseDefaultContexts() { // is here. For Linux, early shutdown is here, and late shutdown is called from // a low priority destructor. struct StaticVarShutdownHandler { +#ifdef _WIN32 + StaticVarShutdownHandler() { + std::atexit([]() { + try { + shutdown_early(); + } catch (std::exception &e) { + std::cout << "exception in atexit/shutdown_early() " << e.what() + << std::endl; + } + }); + } +#endif ~StaticVarShutdownHandler() { try { #ifdef _WIN32 @@ -254,16 +266,6 @@ struct StaticVarShutdownHandler { }; void GlobalHandler::registerStaticVarShutdownHandler() { -#ifdef _WIN32 - std::atexit([]() { - try { - shutdown_early(); - } catch (std::exception &e) { - std::cout << "exception in atexit/shutdown_early() " << e.what() - << std::endl; - } - }); -#endif static StaticVarShutdownHandler handler{}; } diff --git a/sycl/source/detail/global_handler.hpp b/sycl/source/detail/global_handler.hpp index 596ba39cb40d3..71e28eaf8e60b 100644 --- a/sycl/source/detail/global_handler.hpp +++ b/sycl/source/detail/global_handler.hpp @@ -31,9 +31,6 @@ using PlatformImplPtr = std::shared_ptr; using ContextImplPtr = std::shared_ptr; using AdapterPtr = std::shared_ptr; -// Forward declaration -void shutdown_early(); - /// Wrapper class for global data structures with non-trivial destructors. /// /// As user code can call SYCL Runtime functions from destructor of global From a6ae675bf17250eaed16c51534e4228cdbb70674 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 11 Feb 2025 22:36:27 -0800 Subject: [PATCH 44/56] another change --- sycl/doc/design/GlobalObjectsInRuntime.md | 4 ++- sycl/source/detail/global_handler.cpp | 36 +++++++++++++++-------- 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/sycl/doc/design/GlobalObjectsInRuntime.md b/sycl/doc/design/GlobalObjectsInRuntime.md index d85382cf9e402..b4b8a2ae88a4d 100644 --- a/sycl/doc/design/GlobalObjectsInRuntime.md +++ b/sycl/doc/design/GlobalObjectsInRuntime.md @@ -128,7 +128,9 @@ times, the memory leak may impact code performance. ### Windows -Differing from Linux, on Windows the "early_shutdown()" is begun by std::atexit() +Differing from Linux, on Windows the "early_shutdown()" is begun by +DllMain(PROCESS_DETACH), unless the sycl library is linked statically +to an app, in which case we do it immediately before late_shutdown(). The "late_shutdown()" is begun by the destruction of a static StaticVarShutdownHandler object, which is initialized by diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 9e9cb243a9127..c996802836523 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -239,23 +239,21 @@ void GlobalHandler::releaseDefaultContexts() { // is here. For Linux, early shutdown is here, and late shutdown is called from // a low priority destructor. struct StaticVarShutdownHandler { -#ifdef _WIN32 - StaticVarShutdownHandler() { - std::atexit([]() { - try { - shutdown_early(); - } catch (std::exception &e) { - std::cout << "exception in atexit/shutdown_early() " << e.what() - << std::endl; - } - }); - } -#endif + ~StaticVarShutdownHandler() { try { #ifdef _WIN32 + +#ifndef __SYCL_BUILD_SYCL_DLL + // with static linking, DllMain is not called. So we call shutdown_early() + // here. CP + std::cout << "StaticVarShutdownHandler calling shutdown_early()" + << std::endl; + shutdown_early(); +#endif + shutdown_late(); -#else +#else // _WIN32 shutdown_early(); #endif } catch (std::exception &e) { @@ -394,6 +392,18 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, if (PrintUrTrace) std::cout << "---> DLL_PROCESS_DETACH syclx.dll\n" << std::endl; +#ifdef __SYCL_BUILD_SYCL_DLL + // CP + std::cout << "DLL_PROCESS_DETACH syclx.dll calling shutdown_early()" + << std::endl; + try { + shutdown_early(); + } catch (std::exception &e) { + std::cout << "exception in DLL_PROCESS_DETACH" << e.what() << std::endl; + return FALSE; + } +#endif + break; case DLL_PROCESS_ATTACH: if (PrintUrTrace) From 92c97253069eff6f9bcec699deee7e7d4d8e72bd Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 11 Feb 2025 22:59:03 -0800 Subject: [PATCH 45/56] won't work --- sycl/doc/design/GlobalObjectsInRuntime.md | 3 +-- sycl/source/detail/global_handler.cpp | 16 +--------------- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/sycl/doc/design/GlobalObjectsInRuntime.md b/sycl/doc/design/GlobalObjectsInRuntime.md index b4b8a2ae88a4d..dac988368ff67 100644 --- a/sycl/doc/design/GlobalObjectsInRuntime.md +++ b/sycl/doc/design/GlobalObjectsInRuntime.md @@ -129,8 +129,7 @@ times, the memory leak may impact code performance. ### Windows Differing from Linux, on Windows the "early_shutdown()" is begun by -DllMain(PROCESS_DETACH), unless the sycl library is linked statically -to an app, in which case we do it immediately before late_shutdown(). +DllMain(PROCESS_DETACH). The "late_shutdown()" is begun by the destruction of a static StaticVarShutdownHandler object, which is initialized by diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index c996802836523..dee19101342ed 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -243,17 +243,8 @@ struct StaticVarShutdownHandler { ~StaticVarShutdownHandler() { try { #ifdef _WIN32 - -#ifndef __SYCL_BUILD_SYCL_DLL - // with static linking, DllMain is not called. So we call shutdown_early() - // here. CP - std::cout << "StaticVarShutdownHandler calling shutdown_early()" - << std::endl; - shutdown_early(); -#endif - shutdown_late(); -#else // _WIN32 +#else shutdown_early(); #endif } catch (std::exception &e) { @@ -392,17 +383,12 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, if (PrintUrTrace) std::cout << "---> DLL_PROCESS_DETACH syclx.dll\n" << std::endl; -#ifdef __SYCL_BUILD_SYCL_DLL - // CP - std::cout << "DLL_PROCESS_DETACH syclx.dll calling shutdown_early()" - << std::endl; try { shutdown_early(); } catch (std::exception &e) { std::cout << "exception in DLL_PROCESS_DETACH" << e.what() << std::endl; return FALSE; } -#endif break; case DLL_PROCESS_ATTACH: From 10ac7cab7891a26e98c01540de711c46b9d06bc7 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Wed, 12 Feb 2025 10:23:39 -0800 Subject: [PATCH 46/56] elegant or kludge? a lady has her secrets --- sycl/source/detail/global_handler.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index dee19101342ed..ae8cca1df1bd2 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -39,6 +39,11 @@ SpinLock GlobalHandler::MSyclGlobalHandlerProtector{}; // forward decl void shutdown_early(); void shutdown_late(); +#ifdef _WIN32 +extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, + DWORD fdwReason, + LPVOID lpReserved); +#endif // Utility class to track references on object. // Used for GlobalHandler now and created as thread_local object on the first @@ -243,6 +248,17 @@ struct StaticVarShutdownHandler { ~StaticVarShutdownHandler() { try { #ifdef _WIN32 + // Detect module handle here. If we can't find it, then + // we are statically linked and need to call shutdown_early() as well. + HMODULE hModule = nullptr; + if (!GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS, &DllMain, + &hModule) == TRUE) { + // GetModuleHandleEx failed. Statically linked. + std::cout << "StaticVarShutdownHandler calling shutdown_early()" + << std::endl; + shutdown_early(); + } + shutdown_late(); #else shutdown_early(); @@ -384,6 +400,9 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, std::cout << "---> DLL_PROCESS_DETACH syclx.dll\n" << std::endl; try { + // CP + std::cout << "DllMain(PROCESS_DETACH) calling shutdown_early()" + << std::endl; shutdown_early(); } catch (std::exception &e) { std::cout << "exception in DLL_PROCESS_DETACH" << e.what() << std::endl; From cf5d5536b512144223d6aa6d2ad244c30d1631e8 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Wed, 12 Feb 2025 12:10:26 -0800 Subject: [PATCH 47/56] improvement --- sycl/source/detail/global_handler.cpp | 38 +++++++++++++++++++-------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index ae8cca1df1bd2..8e409aba7645f 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -40,9 +40,7 @@ SpinLock GlobalHandler::MSyclGlobalHandlerProtector{}; void shutdown_early(); void shutdown_late(); #ifdef _WIN32 -extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, - DWORD fdwReason, - LPVOID lpReserved); +BOOL isLinkedStatically(); #endif // Utility class to track references on object. @@ -248,14 +246,9 @@ struct StaticVarShutdownHandler { ~StaticVarShutdownHandler() { try { #ifdef _WIN32 - // Detect module handle here. If we can't find it, then - // we are statically linked and need to call shutdown_early() as well. - HMODULE hModule = nullptr; - if (!GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS, &DllMain, - &hModule) == TRUE) { - // GetModuleHandleEx failed. Statically linked. - std::cout << "StaticVarShutdownHandler calling shutdown_early()" - << std::endl; + // If statically linked, DllMain will not be called. So we do its work + // here. + if (isLinkedStatically()) { shutdown_early(); } @@ -422,6 +415,29 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, } return TRUE; // Successful DLL_PROCESS_ATTACH. } +BOOL isLinkedStatically() { + // if the exePath is the same as the dllPath, then we are linked statically + // or, if a module handle is not retrievable. + // but otherwise, we are dynamically linked or loaded. + HMODULE hModule = nullptr; + auto LpModuleAddr = reinterpret_cast(&DllMain); + if (GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS, LpModuleAddr, + &hModule)) { + char dllPath[MAX_PATH]; + if (GetModuleFileNameA(hModule, dllPath, MAX_PATH)) { + char exePath[MAX_PATH]; + if (GetModuleFileNameA(NULL, exePath, MAX_PATH)) { + + if (std::string(dllPath) == std::string(exePath)) { + return true; + } + } + } + } else { + return true; + } + return false; +} #else // Setting low priority on destructor ensures it runs after all other global // destructors. Priorities 0-100 are reserved by the compiler. The priority From f85aa0f7c9f6ac9e874507bbf65ace0cee502191 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Wed, 12 Feb 2025 13:24:30 -0800 Subject: [PATCH 48/56] updated comments, temporarily turning off some logging --- sycl/doc/design/GlobalObjectsInRuntime.md | 4 ++-- sycl/source/detail/global_handler.cpp | 25 ++++++++++++----------- sycl/source/detail/host_task.hpp | 4 ++-- sycl/source/detail/thread_pool.hpp | 4 ++-- sycl/unittests/helpers/UrMock.hpp | 2 -- 5 files changed, 19 insertions(+), 20 deletions(-) diff --git a/sycl/doc/design/GlobalObjectsInRuntime.md b/sycl/doc/design/GlobalObjectsInRuntime.md index dac988368ff67..5606adf754d7d 100644 --- a/sycl/doc/design/GlobalObjectsInRuntime.md +++ b/sycl/doc/design/GlobalObjectsInRuntime.md @@ -100,7 +100,7 @@ reclaim the memory for us. The issue of which we must be wary is placing UR handles (and similar) in host threads. The RAII mechanism of unique and shared pointers will not work in any thread that is abandoned on Windows. -One last note about threads. It is entirely the OS's discretion on when to +One last note about threads. It is entirely the OS's discretion when to start or schedule a thread. If the main process is very busy then it is possible that threads the SYCL library creates (host_tasks/thread_pool) won't even be started until AFTER the host application main() function is done. @@ -129,7 +129,7 @@ times, the memory leak may impact code performance. ### Windows Differing from Linux, on Windows the "early_shutdown()" is begun by -DllMain(PROCESS_DETACH). +DllMain(PROCESS_DETACH), unless statically linked. The "late_shutdown()" is begun by the destruction of a static StaticVarShutdownHandler object, which is initialized by diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 8e409aba7645f..a2151cdf5e5b7 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -238,8 +238,9 @@ void GlobalHandler::releaseDefaultContexts() { // Shutdown is split into two parts. shutdown_early() stops any more // objects from being deferred and takes an initial pass at freeing them. // shutdown_late() finishes and releases the adapters and the GlobalHandler. -// For Windows, early shutdown is called from std::atexit(), and late shutdown -// is here. For Linux, early shutdown is here, and late shutdown is called from +// For Windows, early shutdown is typically called from DllMain, +// and late shutdown is here. +// For Linux, early shutdown is here, and late shutdown is called from // a low priority destructor. struct StaticVarShutdownHandler { @@ -310,7 +311,7 @@ void GlobalHandler::drainThreadPool() { void shutdown_early() { // CP - std::cout << "shutdown_early()" << std::endl; + // std::cout << "shutdown_early()" << std::endl; const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector}; GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); if (!Handler) @@ -333,7 +334,7 @@ void shutdown_early() { Handler->MHostTaskThreadPool.Inst->finishAndWait(); // CP - std::cout << "finishAndWait() done" << std::endl; + // std::cout << "finishAndWait() done" << std::endl; // This releases OUR reference to the default context, but // other may yet have refs Handler->releaseDefaultContexts(); @@ -341,7 +342,7 @@ void shutdown_early() { void shutdown_late() { // CP - std::cout << "shutdown_late()" << std::endl; + // std::cout << "shutdown_late()" << std::endl; const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector}; GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); if (!Handler) @@ -370,7 +371,7 @@ void shutdown_late() { Handler = nullptr; // CP - std::cout << "shutdown_late() done" << std::endl; + // std::cout << "shutdown_late() done" << std::endl; } #ifdef _WIN32 @@ -394,8 +395,8 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, try { // CP - std::cout << "DllMain(PROCESS_DETACH) calling shutdown_early()" - << std::endl; + // std::cout << "DllMain(PROCESS_DETACH) calling shutdown_early()" + // << std::endl; shutdown_early(); } catch (std::exception &e) { std::cout << "exception in DLL_PROCESS_DETACH" << e.what() << std::endl; @@ -416,9 +417,10 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, return TRUE; // Successful DLL_PROCESS_ATTACH. } BOOL isLinkedStatically() { - // if the exePath is the same as the dllPath, then we are linked statically - // or, if a module handle is not retrievable. - // but otherwise, we are dynamically linked or loaded. + // If the exePath is the same as the dllPath, + // or if the module handle for DllMain is not retrievable, + // then we are linked statically + // Otherwise we are dynamically linked or loaded. HMODULE hModule = nullptr; auto LpModuleAddr = reinterpret_cast(&DllMain); if (GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS, LpModuleAddr, @@ -427,7 +429,6 @@ BOOL isLinkedStatically() { if (GetModuleFileNameA(hModule, dllPath, MAX_PATH)) { char exePath[MAX_PATH]; if (GetModuleFileNameA(NULL, exePath, MAX_PATH)) { - if (std::string(dllPath) == std::string(exePath)) { return true; } diff --git a/sycl/source/detail/host_task.hpp b/sycl/source/detail/host_task.hpp index 085f7d794f344..d9c0a455fb9cc 100644 --- a/sycl/source/detail/host_task.hpp +++ b/sycl/source/detail/host_task.hpp @@ -35,7 +35,7 @@ class HostTask { void call(HostProfilingInfo *HPI) { // CP - std::cout << "host_task call()" << std::endl; + // std::cout << "host_task call()" << std::endl; if (!GlobalHandler::instance().isOkToDefer()) { return; } @@ -49,7 +49,7 @@ class HostTask { void call(HostProfilingInfo *HPI, interop_handle handle) { // CP - std::cout << "host_task call()" << std::endl; + // std::cout << "host_task call()" << std::endl; if (!GlobalHandler::instance().isOkToDefer()) { return; } diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index 660014ccfc6e7..b20fada29f1dd 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -56,7 +56,7 @@ class ThreadPool { void start() { // CP - std::cout << "thread_pool start()" << std::endl; + // std::cout << "thread_pool start()" << std::endl; MLaunchedThreads.reserve(MThreadCount); MJobsInPool.store(0); @@ -87,7 +87,7 @@ class ThreadPool { void finishAndWait() { // CP - std::cout << "finishAndWait()" << std::endl; + // std::cout << "finishAndWait()" << std::endl; { std::lock_guard Lock(MJobQueueMutex); MStop = true; diff --git a/sycl/unittests/helpers/UrMock.hpp b/sycl/unittests/helpers/UrMock.hpp index 66f660defc6a3..53c4db96dd84d 100644 --- a/sycl/unittests/helpers/UrMock.hpp +++ b/sycl/unittests/helpers/UrMock.hpp @@ -568,14 +568,12 @@ template class UrMock { ~UrMock() { // mock::getCallbacks() is an application lifetime object, we need to reset // these between tests - detail::GlobalHandler::instance().prepareSchedulerToRelease(true); detail::GlobalHandler::instance().releaseDefaultContexts(); // clear platform cache in case subsequent tests want a different backend, // this forces platforms to be reconstructed (and thus queries about UR // backend info to be called again) detail::GlobalHandler::instance().getPlatformCache().clear(); - mock::getCallbacks().resetCallbacks(); } From a95284c7bc888961c84a52a8f5fcfe50a61a90f8 Mon Sep 17 00:00:00 2001 From: "Perkins, Chris" Date: Wed, 12 Feb 2025 16:46:01 -0800 Subject: [PATCH 49/56] restoring --- sycl/unittests/SYCL2020/KernelBundle.cpp | 9 --------- .../program_manager/DynamicLinking/DynamicLinking.cpp | 8 -------- 2 files changed, 17 deletions(-) diff --git a/sycl/unittests/SYCL2020/KernelBundle.cpp b/sycl/unittests/SYCL2020/KernelBundle.cpp index 05617a3a2decf..5ffe4ecbc341a 100644 --- a/sycl/unittests/SYCL2020/KernelBundle.cpp +++ b/sycl/unittests/SYCL2020/KernelBundle.cpp @@ -487,14 +487,6 @@ ur_result_t redefinedDevicePartitionAfter(void *pParams) { return UR_RESULT_SUCCESS; } -#ifndef _WIN32 -// While the Mock object is created anew for each test, the SYCL -// GlobalHandler is not. It is the same across all tests in a -// single executable. Some of the mock callbacks modify the -// global platforms/devices. On Linux, we can clear these -// changes by faking shutdown. But on Windows we can't do that. -// To avoid conflicts, this test is being skipped on Windows. -// It would pass if placed in its own suite. TEST(KernelBundle, DescendentDevice) { // Mock a non-OpenCL adapter since use of descendent devices of context // members is not supported there yet. @@ -529,7 +521,6 @@ TEST(KernelBundle, DescendentDevice) { EXPECT_EQ(KernelBundle, RetKernelBundle); } -#endif TEST(KernelBundle, CheckIfBundleHasIncompatibleKernel) { sycl::unittest::UrMock<> Mock; diff --git a/sycl/unittests/program_manager/DynamicLinking/DynamicLinking.cpp b/sycl/unittests/program_manager/DynamicLinking/DynamicLinking.cpp index d523408a347f4..c48a5c1626c69 100644 --- a/sycl/unittests/program_manager/DynamicLinking/DynamicLinking.cpp +++ b/sycl/unittests/program_manager/DynamicLinking/DynamicLinking.cpp @@ -253,13 +253,6 @@ TEST(DynamicLinking, AheadOfTime) { AOT_CASE_PRG_NATIVE * AOT_CASE_PRG_DEP_NATIVE); } -#ifndef _WIN32 -// The 'setupRuntimeLinkingMock' used by other tests results in -// changes to the global platforms/devices that will result -// in a test failure if not cleared. On Linux, the Mock's destructor -// fakes shutdown to clear them. But on Windows we can't -// do that hack. So we skip this test. It would pass -// if placed in its own test suite. TEST(DynamicLinking, AheadOfTimeUnsupported) { try { sycl::unittest::UrMock Mock; @@ -273,7 +266,6 @@ TEST(DynamicLinking, AheadOfTimeUnsupported) { "unsupported for the backend"); } } -#endif static ur_result_t redefined_urProgramCompileExp(void *pParams) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; From 6314b633f08d8aa685ec290a48a3209e050e4138 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Wed, 12 Feb 2025 17:21:55 -0800 Subject: [PATCH 50/56] clang-format --- sycl/source/detail/scheduler/scheduler.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 35b51c035d428..5cb1702f47530 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -285,6 +285,9 @@ bool Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj, // abandon them. But no worries, the memory WILL be reclaimed. bool allowWait = MemObj->hasUserDataPtr() || GlobalHandler::instance().isOkToDefer(); + if (!allowWait) { + StrictLock = false; + } #else bool allowWait = true; #endif From f12f495d42f7fe32b5403bc4242ec448f5c70545 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Wed, 12 Feb 2025 20:21:27 -0800 Subject: [PATCH 51/56] knew this was coming. Hopefully it won't upset any tests --- sycl/source/detail/scheduler/scheduler.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 5cb1702f47530..7b91399959a9b 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -302,10 +302,20 @@ bool Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj, waitForRecordToFinish(Record, Lock); } { + // If allowWait is false, it means the application is shutting down. + // On Windows we can't safely wait on threads, because they have likely been + // abandoned. So we will try to get the lock. If we can, great, we'll remove + // the record. But if we can't, we just skip. The OS will reclaim the + // memory. WriteLockT Lock = StrictLock ? acquireWriteLock() : WriteLockT(MGraphLock, std::try_to_lock); - if (!Lock.owns_lock()) - return false; + if (!Lock.owns_lock()) { + + if (allowWait) + return false; // Record was not removed, the caller may try again. + else + return true; // skip. + } MGraphBuilder.decrementLeafCountersForRecord(Record); MGraphBuilder.cleanupCommandsForRecord(Record); MGraphBuilder.removeRecordForMemObj(MemObj); From cf8c1b4754933f881a60e81ba151ddda8e06d112 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Wed, 12 Feb 2025 21:50:35 -0800 Subject: [PATCH 52/56] cleanup --- sycl/source/detail/global_handler.cpp | 12 ------------ sycl/source/detail/host_task.hpp | 4 ---- sycl/source/detail/thread_pool.hpp | 14 -------------- 3 files changed, 30 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index a2151cdf5e5b7..a35fd1511ad16 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -310,8 +310,6 @@ void GlobalHandler::drainThreadPool() { } void shutdown_early() { - // CP - // std::cout << "shutdown_early()" << std::endl; const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector}; GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); if (!Handler) @@ -333,16 +331,12 @@ void shutdown_early() { if (Handler->MHostTaskThreadPool.Inst) Handler->MHostTaskThreadPool.Inst->finishAndWait(); - // CP - // std::cout << "finishAndWait() done" << std::endl; // This releases OUR reference to the default context, but // other may yet have refs Handler->releaseDefaultContexts(); } void shutdown_late() { - // CP - // std::cout << "shutdown_late()" << std::endl; const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector}; GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); if (!Handler) @@ -369,9 +363,6 @@ void shutdown_late() { // Release the rest of global resources. delete Handler; Handler = nullptr; - - // CP - // std::cout << "shutdown_late() done" << std::endl; } #ifdef _WIN32 @@ -394,9 +385,6 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, std::cout << "---> DLL_PROCESS_DETACH syclx.dll\n" << std::endl; try { - // CP - // std::cout << "DllMain(PROCESS_DETACH) calling shutdown_early()" - // << std::endl; shutdown_early(); } catch (std::exception &e) { std::cout << "exception in DLL_PROCESS_DETACH" << e.what() << std::endl; diff --git a/sycl/source/detail/host_task.hpp b/sycl/source/detail/host_task.hpp index d9c0a455fb9cc..f7e3feff8d0ef 100644 --- a/sycl/source/detail/host_task.hpp +++ b/sycl/source/detail/host_task.hpp @@ -34,8 +34,6 @@ class HostTask { bool isInteropTask() const { return !!MInteropTask; } void call(HostProfilingInfo *HPI) { - // CP - // std::cout << "host_task call()" << std::endl; if (!GlobalHandler::instance().isOkToDefer()) { return; } @@ -48,8 +46,6 @@ class HostTask { } void call(HostProfilingInfo *HPI, interop_handle handle) { - // CP - // std::cout << "host_task call()" << std::endl; if (!GlobalHandler::instance().isOkToDefer()) { return; } diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index b20fada29f1dd..988ecdeb00f60 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -55,8 +55,6 @@ class ThreadPool { } void start() { - // CP - // std::cout << "thread_pool start()" << std::endl; MLaunchedThreads.reserve(MThreadCount); MJobsInPool.store(0); @@ -75,19 +73,7 @@ class ThreadPool { start(); } - ~ThreadPool() { - // CP - // try { - // std::cout << "~ThreadPool()" << std::endl; - // finishAndWait(); - // } catch (std::exception &e) { - // __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~ThreadPool", e); - // } - } - void finishAndWait() { - // CP - // std::cout << "finishAndWait()" << std::endl; { std::lock_guard Lock(MJobQueueMutex); MStop = true; From 979d6b256ace78b3ebd068f54e49f56283bc9892 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Thu, 13 Feb 2025 16:10:38 -0800 Subject: [PATCH 53/56] no finishWait on win in threadpool destructor --- sycl/source/detail/thread_pool.hpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sycl/source/detail/thread_pool.hpp b/sycl/source/detail/thread_pool.hpp index 988ecdeb00f60..e9d441d6d27d1 100644 --- a/sycl/source/detail/thread_pool.hpp +++ b/sycl/source/detail/thread_pool.hpp @@ -73,6 +73,16 @@ class ThreadPool { start(); } + ~ThreadPool() { + try { +#ifndef _WIN32 + finishAndWait(); +#endif + } catch (std::exception &e) { + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~ThreadPool", e); + } + } + void finishAndWait() { { std::lock_guard Lock(MJobQueueMutex); From 8d111f1a47b5ab89995f2225b558bcd1c86800b2 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 18 Feb 2025 17:42:04 -0800 Subject: [PATCH 54/56] remove stray line. ( Actually, I just want to try a new test run ) --- sycl/source/detail/scheduler/scheduler.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 7b91399959a9b..ac7d7b2ecb62d 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -310,7 +310,6 @@ bool Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj, WriteLockT Lock = StrictLock ? acquireWriteLock() : WriteLockT(MGraphLock, std::try_to_lock); if (!Lock.owns_lock()) { - if (allowWait) return false; // Record was not removed, the caller may try again. else From caedef95c55181c5733fe4993947cd8bd03868d5 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Thu, 20 Feb 2025 16:23:43 -0800 Subject: [PATCH 55/56] reviewer feedback --- sycl/doc/design/GlobalObjectsInRuntime.md | 4 ++-- sycl/source/detail/queue_impl.hpp | 3 ++- sycl/source/detail/scheduler/graph_builder.cpp | 2 +- sycl/source/detail/scheduler/scheduler.cpp | 8 ++++---- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/sycl/doc/design/GlobalObjectsInRuntime.md b/sycl/doc/design/GlobalObjectsInRuntime.md index 5606adf754d7d..eb5ed35ce25bd 100644 --- a/sycl/doc/design/GlobalObjectsInRuntime.md +++ b/sycl/doc/design/GlobalObjectsInRuntime.md @@ -84,7 +84,7 @@ The deferred memory marshalling is built on a thread pool, but there is a challenge here in that on Windows, once the end of the users main() is reached and their app is shutting down, the Windows OS will abandon all remaining in-flight threads. These threads can be .join() but they simply return instantly, -the threads are not completed. Further any thread specific variables +the threads are not completed. Furthermore, any thread specific variables (or thread_local static vars) will NOT have their destructors called. Note that the standard while-loop-over-condition-var pattern will cause a hang - we cannot "wait" on abandoned threads. @@ -133,7 +133,7 @@ DllMain(PROCESS_DETACH), unless statically linked. The "late_shutdown()" is begun by the destruction of a static StaticVarShutdownHandler object, which is initialized by -platform::get_platforms(). ( On linux, this is when we do "early_shutdown()". +platform::get_platforms(). (On Linux, this is when we do "early_shutdown()". Go figure.) This is as late as we can manage, but it is later than any user application global, static, or thread_local variable destruction. diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 7bb22bc8296f8..6b57cb5a6235c 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -266,9 +266,10 @@ class queue_impl { throw_asynchronous(); auto status = getAdapter()->call_nocheck(MQueues[0]); - // if loader is already closed, it'll return a not-initialized status + // If loader is already closed, it'll return a not-initialized status // which the UR should convert to SUCCESS code. But that isn't always // working on Windows. This is a temporary workaround until that is fixed. + // TODO: Remove this workaround when UR is fixed. if (status != UR_RESULT_SUCCESS && status != UR_RESULT_ERROR_UNINITIALIZED) { __SYCL_CHECK_UR_CODE_NO_EXC(status); diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index a944aaba6d0e9..85bc93f7d6a9a 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -487,7 +487,7 @@ Scheduler::GraphBuilder::addCopyBack(Requirement *Req, std::vector ToCleanUp; for (Command *Dep : Deps) { if (Dep->MEnqueueStatus == EnqueueResultT::SyclEnqueueFailed) - continue; // nothing to do + continue; Command *ConnCmd = MemCpyCmd->addDep( DepDesc{Dep, MemCpyCmd->getRequirement(), SrcAllocaCmd}, ToCleanUp); diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index ac7d7b2ecb62d..4c84bd2d8fbb9 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -53,7 +53,7 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record, std::vector ToCleanUp; for (Command *Cmd : Record->MReadLeaves) { if (Cmd->MEnqueueStatus == EnqueueResultT::SyclEnqueueFailed) - continue; // nothing to do + continue; EnqueueResultT Res; bool Enqueued = @@ -69,7 +69,7 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record, } for (Command *Cmd : Record->MWriteLeaves) { if (Cmd->MEnqueueStatus == EnqueueResultT::SyclEnqueueFailed) - continue; // nothing to do + continue; EnqueueResultT Res; bool Enqueued = @@ -283,13 +283,13 @@ bool Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj, // If we are shutting down on Windows it may not be // safe to wait on host threads, as the OS may // abandon them. But no worries, the memory WILL be reclaimed. - bool allowWait = + const bool allowWait = MemObj->hasUserDataPtr() || GlobalHandler::instance().isOkToDefer(); if (!allowWait) { StrictLock = false; } #else - bool allowWait = true; + const bool allowWait = true; #endif if (allowWait) { From 3b93f6456a74fd35c8b3b0bd2ea21abcd884917a Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Fri, 21 Feb 2025 10:53:30 -0800 Subject: [PATCH 56/56] more reviewer feedback before splitting into two PR --- sycl/doc/design/GlobalObjectsInRuntime.md | 2 +- sycl/source/detail/global_handler.cpp | 18 +++++++++--------- sycl/source/detail/queue_impl.hpp | 3 ++- sycl/test-e2e/Scheduler/DeleteCmdException.cpp | 5 ++++- 4 files changed, 16 insertions(+), 12 deletions(-) diff --git a/sycl/doc/design/GlobalObjectsInRuntime.md b/sycl/doc/design/GlobalObjectsInRuntime.md index eb5ed35ce25bd..911ce3055c615 100644 --- a/sycl/doc/design/GlobalObjectsInRuntime.md +++ b/sycl/doc/design/GlobalObjectsInRuntime.md @@ -109,7 +109,7 @@ This is not a normal occurrence, but it can happen if there is no call to queue. ### Linux -On Linux, the "eary_shutdown()" is begun by the destruction of a static +On Linux, the "early_shutdown()" is begun by the destruction of a static StaticVarShutdownHandler object, which is initialized by platform::get_platforms(). diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index a35fd1511ad16..8233198970e09 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -258,8 +258,8 @@ struct StaticVarShutdownHandler { shutdown_early(); #endif } catch (std::exception &e) { - std::cout << "exception in ~StaticVarShutdownHandler " << e.what() - << std::endl; + __SYCL_REPORT_EXCEPTION_TO_STREAM( + "exception in ~StaticVarShutdownHandler", e); } } }; @@ -387,7 +387,7 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, try { shutdown_early(); } catch (std::exception &e) { - std::cout << "exception in DLL_PROCESS_DETACH" << e.what() << std::endl; + __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in DLL_PROCESS_DETACH", e); return FALSE; } @@ -411,21 +411,21 @@ BOOL isLinkedStatically() { // Otherwise we are dynamically linked or loaded. HMODULE hModule = nullptr; auto LpModuleAddr = reinterpret_cast(&DllMain); - if (GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS, LpModuleAddr, - &hModule)) { + if (!GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS, LpModuleAddr, + &hModule)) { + return true; // not retrievable, therefore statically linked + } else { char dllPath[MAX_PATH]; if (GetModuleFileNameA(hModule, dllPath, MAX_PATH)) { char exePath[MAX_PATH]; if (GetModuleFileNameA(NULL, exePath, MAX_PATH)) { if (std::string(dllPath) == std::string(exePath)) { - return true; + return true; // paths identical, therefore statically linked } } } - } else { - return true; } - return false; + return false; // Otherwise dynamically linked or loaded } #else // Setting low priority on destructor ensures it runs after all other global diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 6b57cb5a6235c..290661e93b5b6 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -269,7 +269,8 @@ class queue_impl { // If loader is already closed, it'll return a not-initialized status // which the UR should convert to SUCCESS code. But that isn't always // working on Windows. This is a temporary workaround until that is fixed. - // TODO: Remove this workaround when UR is fixed. + // TODO: Remove this workaround when UR is fixed, and restore + // ->call<>() instead of ->call_nocheck<>() above. if (status != UR_RESULT_SUCCESS && status != UR_RESULT_ERROR_UNINITIALIZED) { __SYCL_CHECK_UR_CODE_NO_EXC(status); diff --git a/sycl/test-e2e/Scheduler/DeleteCmdException.cpp b/sycl/test-e2e/Scheduler/DeleteCmdException.cpp index c06820428b150..28788351c549b 100644 --- a/sycl/test-e2e/Scheduler/DeleteCmdException.cpp +++ b/sycl/test-e2e/Scheduler/DeleteCmdException.cpp @@ -8,6 +8,9 @@ // REQUIRES: level_zero +// UNSUPPORTED: windows +// UNSUPPORTED-TRACKER: CMPLRLLVM-44705 + // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out @@ -61,4 +64,4 @@ int main() { } return 0; -} \ No newline at end of file +}