From cc6d57c917a6632ccc30f8b59f01ae205ddfc21e Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Mon, 9 Jun 2025 15:47:22 -0700 Subject: [PATCH] [NFC][SYCL] Use raw `context_impl &` in `program_manager` Part of the ongoing refactoring to prefer raw ptr/ref for SYCL RT objects by default with explicit `shared_from_this` when lifetimes need to be extended. --- sycl/source/detail/graph_impl.cpp | 2 +- sycl/source/detail/helpers.cpp | 6 +- sycl/source/detail/kernel_impl.cpp | 4 +- sycl/source/detail/memory_manager.cpp | 2 +- .../program_manager/program_manager.cpp | 147 +++++++++--------- .../program_manager/program_manager.hpp | 21 ++- sycl/source/detail/scheduler/commands.cpp | 6 +- sycl/unittests/program_manager/Cleanup.cpp | 6 +- sycl/unittests/program_manager/SubDevices.cpp | 4 +- .../arg_mask/EliminatedArgMask.cpp | 4 +- 10 files changed, 101 insertions(+), 101 deletions(-) diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index caf50642c3a2c..cfb16ed59c79f 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -1545,7 +1545,7 @@ void exec_graph_impl::populateURKernelUpdateStructs( EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); } else { BundleObjs = sycl::detail::ProgramManager::getInstance().getOrCreateKernel( - ContextImpl, DeviceImpl, ExecCG.MKernelName, + *ContextImpl, DeviceImpl, ExecCG.MKernelName, ExecCG.MKernelNameBasedCachePtr); UrKernel = BundleObjs->MKernelHandle; EliminatedArgMask = BundleObjs->MKernelArgMask; diff --git a/sycl/source/detail/helpers.cpp b/sycl/source/detail/helpers.cpp index 5568d28b56eb6..df0a63086f1a7 100644 --- a/sycl/source/detail/helpers.cpp +++ b/sycl/source/detail/helpers.cpp @@ -62,7 +62,7 @@ retrieveKernelBinary(queue_impl &Queue, KernelNameStrRefT KernelName, auto ContextImpl = Queue.getContextImplPtr(); ur_program_handle_t Program = detail::ProgramManager::getInstance().createURProgram( - **DeviceImage, ContextImpl, {createSyclObjFromImpl(Dev)}); + **DeviceImage, *ContextImpl, {createSyclObjFromImpl(Dev)}); return {*DeviceImage, Program}; } @@ -82,9 +82,9 @@ retrieveKernelBinary(queue_impl &Queue, KernelNameStrRefT KernelName, } else { auto ContextImpl = Queue.getContextImplPtr(); DeviceImage = &detail::ProgramManager::getInstance().getDeviceImage( - KernelName, ContextImpl, &Dev); + KernelName, *ContextImpl, &Dev); Program = detail::ProgramManager::getInstance().createURProgram( - *DeviceImage, ContextImpl, {createSyclObjFromImpl(Dev)}); + *DeviceImage, *ContextImpl, {createSyclObjFromImpl(Dev)}); } return {DeviceImage, Program}; } diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index 50fadfd1f4f0a..8b02811b287e4 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -20,8 +20,8 @@ kernel_impl::kernel_impl(ur_kernel_handle_t Kernel, ContextImplPtr Context, KernelBundleImplPtr KernelBundleImpl, const KernelArgMask *ArgMask) : MKernel(Kernel), MContext(Context), - MProgram(ProgramManager::getInstance().getUrProgramFromUrKernel(Kernel, - Context)), + MProgram(ProgramManager::getInstance().getUrProgramFromUrKernel( + Kernel, *Context)), MCreatedFromSource(true), MKernelBundleImpl(std::move(KernelBundleImpl)), MIsInterop(true), MKernelArgMaskPtr{ArgMask} { ur_context_handle_t UrContext = nullptr; diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index cc20d700c672f..4f7d318e38cde 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -1146,7 +1146,7 @@ getOrBuildProgramForDeviceGlobal(queue_impl &Queue, auto Context = createSyclObjFromImpl(ContextImpl); ProgramManager &PM = ProgramManager::getInstance(); RTDeviceBinaryImage &Img = PM.getDeviceImage( - DeviceGlobalEntry->MImages, ContextImpl, getSyclObjImpl(Device).get()); + DeviceGlobalEntry->MImages, *ContextImpl, getSyclObjImpl(Device).get()); device_image_plain DeviceImage = PM.getDeviceImageFromBinaryImage(&Img, Context, Device); device_image_plain BuiltImage = diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index d2abdbe29fb30..885c019cf7050 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -73,11 +73,10 @@ ProgramManager &ProgramManager::getInstance() { } static ur_program_handle_t -createBinaryProgram(const ContextImplPtr &Context, - const std::vector &Devices, +createBinaryProgram(context_impl &Context, const std::vector &Devices, const uint8_t **Binaries, size_t *Lengths, const std::vector &Metadata) { - const AdapterPtr &Adapter = Context->getAdapter(); + const AdapterPtr &Adapter = Context.getAdapter(); ur_program_handle_t Program; std::vector DeviceHandles; std::transform( @@ -92,7 +91,7 @@ createBinaryProgram(const ContextImplPtr &Context, assert(Devices.size() > 0 && "No devices provided for program creation"); Adapter->call( - Context->getHandleRef(), DeviceHandles.size(), DeviceHandles.data(), + Context.getHandleRef(), DeviceHandles.size(), DeviceHandles.data(), Lengths, Binaries, &Properties, &Program); if (BinaryStatus != UR_RESULT_SUCCESS) { throw detail::set_ur_error( @@ -104,30 +103,30 @@ createBinaryProgram(const ContextImplPtr &Context, return Program; } -static ur_program_handle_t createSpirvProgram(const ContextImplPtr &Context, +static ur_program_handle_t createSpirvProgram(context_impl &Context, const unsigned char *Data, size_t DataLen) { ur_program_handle_t Program = nullptr; - const AdapterPtr &Adapter = Context->getAdapter(); - Adapter->call(Context->getHandleRef(), Data, + const AdapterPtr &Adapter = Context.getAdapter(); + Adapter->call(Context.getHandleRef(), Data, DataLen, nullptr, &Program); return Program; } // TODO replace this with a new UR API function -static bool isDeviceBinaryTypeSupported(const ContextImplPtr &ContextImpl, +static bool isDeviceBinaryTypeSupported(context_impl &ContextImpl, ur::DeviceBinaryType Format) { // All formats except SYCL_DEVICE_BINARY_TYPE_SPIRV are supported. if (Format != SYCL_DEVICE_BINARY_TYPE_SPIRV) return true; - const backend ContextBackend = ContextImpl->getBackend(); + const backend ContextBackend = ContextImpl.getBackend(); // The CUDA backend cannot use SPIR-V if (ContextBackend == backend::ext_oneapi_cuda) return false; - const std::vector &Devices = ContextImpl->getDevices(); + const std::vector &Devices = ContextImpl.getDevices(); // Program type is SPIR-V, so we need a device compiler to do JIT. for (const device &D : Devices) { @@ -137,7 +136,7 @@ static bool isDeviceBinaryTypeSupported(const ContextImplPtr &ContextImpl, // OpenCL 2.1 and greater require clCreateProgramWithIL if (ContextBackend == backend::opencl) { - std::string ver = ContextImpl->get_info() + std::string ver = ContextImpl.get_info() .get_info(); if (ver.find("OpenCL 1.0") == std::string::npos && ver.find("OpenCL 1.1") == std::string::npos && @@ -190,7 +189,7 @@ static bool isDeviceBinaryTypeSupported(const ContextImplPtr &ContextImpl, ur_program_handle_t ProgramManager::createURProgram(const RTDeviceBinaryImage &Img, - const ContextImplPtr &ContextImpl, + context_impl &ContextImpl, const std::vector &Devices) { if constexpr (DbgProgMgr > 0) { std::vector URDevices; @@ -247,10 +246,10 @@ ProgramManager::createURProgram(const RTDeviceBinaryImage &Img, { std::lock_guard Lock(MNativeProgramsMutex); // associate the UR program with the image it was created for - NativePrograms.insert({Res, {ContextImpl, &Img}}); + NativePrograms.insert({Res, {ContextImpl.shared_from_this(), &Img}}); } - ContextImpl->addDeviceGlobalInitializer(Res, Devices, &Img); + ContextImpl.addDeviceGlobalInitializer(Res, Devices, &Img); if constexpr (DbgProgMgr > 1) std::cerr << "created program: " << Res @@ -518,7 +517,7 @@ static void applyOptionsFromEnvironment(std::string &CompileOpts, std::pair ProgramManager::getOrCreateURProgram( const RTDeviceBinaryImage &MainImg, const std::vector &AllImages, - const ContextImplPtr &ContextImpl, const std::vector &Devices, + context_impl &ContextImpl, const std::vector &Devices, const std::string &CompileAndLinkOptions, SerializedObj SpecConsts) { ur_program_handle_t NativePrg; @@ -551,7 +550,7 @@ std::pair ProgramManager::getOrCreateURProgram( /// Emits information about built programs if the appropriate contitions are /// met, namely when SYCL_RT_WARNING_LEVEL is greater than or equal to 2. static void emitBuiltProgramInfo(const ur_program_handle_t &Prog, - const ContextImplPtr &Context) { + context_impl &Context) { if (SYCLConfig::get() >= 2) { std::string ProgramBuildLog = ProgramManager::getProgramBuildLog(Prog, Context); @@ -849,7 +848,7 @@ CheckAndDecompressImage([[maybe_unused]] RTDeviceBinaryImage *Img) { // When caching is enabled, the returned UrProgram will already have // its ref count incremented. ur_program_handle_t ProgramManager::getBuiltURProgram( - const ContextImplPtr &ContextImpl, device_impl &DeviceImpl, + context_impl &ContextImpl, device_impl &DeviceImpl, KernelNameStrRefT KernelName, const NDRDescT &NDRDesc) { device_impl *RootDevImpl; ur_bool_t MustBuildOnSubdevice = true; @@ -864,12 +863,12 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( RootDevImpl->get_info()) .get(); // Sharing is allowed within a single context only - if (!ContextImpl->hasDevice(*ParentDev)) + if (!ContextImpl.hasDevice(*ParentDev)) break; RootDevImpl = ParentDev; } - ContextImpl->getAdapter()->call( + ContextImpl.getAdapter()->call( RootDevImpl->getHandleRef(), UR_DEVICE_INFO_BUILD_ON_SUBDEVICE, sizeof(ur_bool_t), &MustBuildOnSubdevice, nullptr); } @@ -901,7 +900,7 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( } ur_program_handle_t ProgramManager::getBuiltURProgram( - const BinImgWithDeps &ImgWithDeps, const ContextImplPtr &ContextImpl, + const BinImgWithDeps &ImgWithDeps, context_impl &ContextImpl, const std::vector &Devs, const DevImgPlainWithDeps *DevImgWithDeps, const SerializedObj &SpecConsts) { std::string CompileOpts; @@ -909,7 +908,7 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( applyOptionsFromEnvironment(CompileOpts, LinkOpts); auto BuildF = [this, &ImgWithDeps, &DevImgWithDeps, &ContextImpl, &Devs, &CompileOpts, &LinkOpts, &SpecConsts] { - const AdapterPtr &Adapter = ContextImpl->getAdapter(); + const AdapterPtr &Adapter = ContextImpl.getAdapter(); const RTDeviceBinaryImage &MainImg = *ImgWithDeps.getMain(); applyOptionsFromImage(CompileOpts, LinkOpts, MainImg, Devs, Adapter); // Should always come last! @@ -994,11 +993,12 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( // removal of map entries with same handle (obviously invalid entries). std::ignore = NativePrograms.erase(BuiltProgram.get()); for (const RTDeviceBinaryImage *Img : ImgWithDeps) { - NativePrograms.insert({BuiltProgram.get(), {ContextImpl, Img}}); + NativePrograms.insert( + {BuiltProgram.get(), {ContextImpl.shared_from_this(), Img}}); } } - ContextImpl->addDeviceGlobalInitializer(BuiltProgram.get(), Devs, &MainImg); + ContextImpl.addDeviceGlobalInitializer(BuiltProgram.get(), Devs, &MainImg); // Save program to persistent cache if it is not there if (!DeviceCodeWasInCache) { @@ -1023,7 +1023,7 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( auto CacheKey = std::make_pair(std::make_pair(SpecConsts, ImgId), URDevicesSet); - KernelProgramCache &Cache = ContextImpl->getKernelProgramCache(); + KernelProgramCache &Cache = ContextImpl.getKernelProgramCache(); auto GetCachedBuildF = [&Cache, &CacheKey]() { return Cache.getOrInsertProgram(CacheKey); }; @@ -1043,7 +1043,7 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( // Here we have multiple devices a program is built for, so add the program to // the cache for all subsets of provided list of devices. - const AdapterPtr &Adapter = ContextImpl->getAdapter(); + const AdapterPtr &Adapter = ContextImpl.getAdapter(); // If we linked any extra device images, then we need to // cache them as well. auto CacheLinkedImages = [&Adapter, &Cache, &CacheKey, &ResProgram, @@ -1109,17 +1109,17 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( } FastKernelCacheValPtr ProgramManager::getOrCreateKernel( - const ContextImplPtr &ContextImpl, device_impl &DeviceImpl, + context_impl &ContextImpl, device_impl &DeviceImpl, KernelNameStrRefT KernelName, KernelNameBasedCacheT *KernelNameBasedCachePtr, const NDRDescT &NDRDesc) { if constexpr (DbgProgMgr > 0) { - std::cerr << ">>> ProgramManager::getOrCreateKernel(" << ContextImpl.get() + std::cerr << ">>> ProgramManager::getOrCreateKernel(" << &ContextImpl << ", " << &DeviceImpl << ", " << KernelName << ")\n"; } using KernelArgMaskPairT = KernelProgramCache::KernelArgMaskPairT; - KernelProgramCache &Cache = ContextImpl->getKernelProgramCache(); + KernelProgramCache &Cache = ContextImpl.getKernelProgramCache(); ur_device_handle_t UrDevice = DeviceImpl.getHandleRef(); FastKernelSubcacheT *CacheHintPtr = KernelNameBasedCachePtr ? &KernelNameBasedCachePtr->FastKernelSubcache @@ -1137,12 +1137,12 @@ FastKernelCacheValPtr ProgramManager::getOrCreateKernel( auto BuildF = [this, &Program, &KernelName, &ContextImpl] { ur_kernel_handle_t Kernel = nullptr; - const AdapterPtr &Adapter = ContextImpl->getAdapter(); + const AdapterPtr &Adapter = ContextImpl.getAdapter(); Adapter->call( Program, KernelName.data(), &Kernel); // Only set UR_USM_INDIRECT_ACCESS if the platform can handle it. - if (ContextImpl->getPlatformImpl().supports_usm()) { + if (ContextImpl.getPlatformImpl().supports_usm()) { // Some UR Adapters (like OpenCL) require this call to enable USM // For others, UR will turn this into a NOP. const ur_bool_t UrTrue = true; @@ -1167,7 +1167,7 @@ FastKernelCacheValPtr ProgramManager::getOrCreateKernel( // nullptr for the mutex. auto [Kernel, ArgMask] = BuildF(); return std::make_shared( - Kernel, nullptr, ArgMask, Program, *ContextImpl->getAdapter().get()); + Kernel, nullptr, ArgMask, Program, *ContextImpl.getAdapter().get()); } auto BuildResult = Cache.getOrBuild(GetCachedBuildF, BuildF); @@ -1176,12 +1176,12 @@ FastKernelCacheValPtr ProgramManager::getOrCreateKernel( const KernelArgMaskPairT &KernelArgMaskPair = BuildResult->Val; auto ret_val = std::make_shared( KernelArgMaskPair.first, &(BuildResult->MBuildResultMutex), - KernelArgMaskPair.second, Program, *ContextImpl->getAdapter().get()); + KernelArgMaskPair.second, Program, *ContextImpl.getAdapter().get()); // If caching is enabled, one copy of the kernel handle will be // stored in FastKernelCacheVal, and one is in // KernelProgramCache::MKernelsPerProgramCache. To cover // MKernelsPerProgramCache, we need to increase the ref count of the kernel. - ContextImpl->getAdapter()->call( + ContextImpl.getAdapter()->call( KernelArgMaskPair.first); Cache.saveKernel(KernelName, UrDevice, ret_val, CacheHintPtr); return ret_val; @@ -1189,9 +1189,9 @@ FastKernelCacheValPtr ProgramManager::getOrCreateKernel( ur_program_handle_t ProgramManager::getUrProgramFromUrKernel(ur_kernel_handle_t Kernel, - const ContextImplPtr &Context) { + context_impl &Context) { ur_program_handle_t Program; - const AdapterPtr &Adapter = Context->getAdapter(); + const AdapterPtr &Adapter = Context.getAdapter(); Adapter->call(Kernel, UR_KERNEL_INFO_PROGRAM, sizeof(ur_program_handle_t), &Program, nullptr); @@ -1200,9 +1200,9 @@ ProgramManager::getUrProgramFromUrKernel(ur_kernel_handle_t Kernel, std::string ProgramManager::getProgramBuildLog(const ur_program_handle_t &Program, - const ContextImplPtr &Context) { + context_impl &Context) { size_t URDevicesSize = 0; - const AdapterPtr &Adapter = Context->getAdapter(); + const AdapterPtr &Adapter = Context.getAdapter(); Adapter->call(Program, UR_PROGRAM_INFO_DEVICES, 0, nullptr, &URDevicesSize); std::vector URDevices(URDevicesSize / @@ -1246,7 +1246,7 @@ ProgramManager::getProgramBuildLog(const ur_program_handle_t &Program, // TODO device libraries may use scpecialization constants, manifest files, etc. // To support that they need to be delivered in a different container - so that // sycl_device_binary_struct can be created for each of them. -static bool loadDeviceLib(const ContextImplPtr Context, const char *Name, +static bool loadDeviceLib(context_impl &Context, const char *Name, ur_program_handle_t &Prog) { std::string LibSyclDir = OSUtil::getCurrentDSODir(); std::ifstream File(LibSyclDir + OSUtil::DirSep + Name, @@ -1345,12 +1345,12 @@ static ur_result_t doCompile(const AdapterPtr &Adapter, } static ur_program_handle_t -loadDeviceLibFallback(const ContextImplPtr Context, DeviceLibExt Extension, +loadDeviceLibFallback(context_impl &Context, DeviceLibExt Extension, std::vector &Devices, bool UseNativeLib) { auto LibFileName = getDeviceLibFilename(Extension, UseNativeLib); - auto LockedCache = Context->acquireCachedLibPrograms(); + auto LockedCache = Context.acquireCachedLibPrograms(); auto &CachedLibPrograms = LockedCache.get(); // Collect list of devices to compile the library for. Library was already // compiled for a device if there is a corresponding record in the per-context @@ -1394,7 +1394,7 @@ loadDeviceLibFallback(const ContextImplPtr Context, DeviceLibExt Extension, // Insert URProgram into the cache for all devices that we compiled it for. // Retain UR program for each record in the cache. - const AdapterPtr &Adapter = Context->getAdapter(); + const AdapterPtr &Adapter = Context.getAdapter(); // UR program handle is stored in the cache for each device that we compiled // it for. We have to retain UR program for each record in the cache. We need @@ -1414,7 +1414,7 @@ loadDeviceLibFallback(const ContextImplPtr Context, DeviceLibExt Extension, // well, and what actually happens to a SPIR-V program if we apply them. ur_result_t Error = doCompile(Adapter, URProgram, DevicesToCompile.size(), - DevicesToCompile.data(), Context->getHandleRef(), ""); + DevicesToCompile.data(), Context.getHandleRef(), ""); if (Error != UR_RESULT_SUCCESS) { EraseProgramForDevices(); throw detail::set_ur_error( @@ -1489,7 +1489,7 @@ sycl_device_binary getRawImg(RTDeviceBinaryImage *Img) { template RTDeviceBinaryImage *getBinImageFromMultiMap( const std::unordered_multimap &ImagesSet, - const StorageKey &Key, const ContextImplPtr &ContextImpl, + const StorageKey &Key, context_impl &ContextImpl, const device_impl *DeviceImpl) { auto [ItBegin, ItEnd] = ImagesSet.equal_range(Key); if (ItBegin == ItEnd) @@ -1520,7 +1520,7 @@ RTDeviceBinaryImage *getBinImageFromMultiMap( uint32_t ImgInd = 0; // Ask the native runtime under the given context to choose the device image // it prefers. - ContextImpl->getAdapter()->call( + ContextImpl.getAdapter()->call( DeviceImpl->getHandleRef(), UrBinaries.data(), UrBinaries.size(), &ImgInd); return DeviceFilteredImgs[ImgInd]; @@ -1528,7 +1528,7 @@ RTDeviceBinaryImage *getBinImageFromMultiMap( RTDeviceBinaryImage & ProgramManager::getDeviceImage(KernelNameStrRefT KernelName, - const ContextImplPtr &ContextImpl, + context_impl &ContextImpl, const device_impl *DeviceImpl) { if constexpr (DbgProgMgr > 0) { std::cerr << ">>> ProgramManager::getDeviceImage(\"" << KernelName << "\", " @@ -1575,7 +1575,7 @@ ProgramManager::getDeviceImage(KernelNameStrRefT KernelName, RTDeviceBinaryImage &ProgramManager::getDeviceImage( const std::unordered_set &ImageSet, - const ContextImplPtr &ContextImpl, const device_impl *DeviceImpl) { + context_impl &ContextImpl, const device_impl *DeviceImpl) { assert(ImageSet.size() > 0); if constexpr (DbgProgMgr > 0) { @@ -1602,7 +1602,7 @@ RTDeviceBinaryImage &ProgramManager::getDeviceImage( getUrDeviceTarget(RawImgs[BinaryCount]->DeviceTargetSpec); } - ContextImpl->getAdapter()->call( + ContextImpl.getAdapter()->call( DeviceImpl->getHandleRef(), UrBinaries.data(), UrBinaries.size(), &ImgInd); @@ -1625,7 +1625,7 @@ static bool isDeviceLibRequired(DeviceLibExt Ext, uint32_t DeviceLibReqMask) { } static std::vector -getDeviceLibPrograms(const ContextImplPtr Context, +getDeviceLibPrograms(context_impl &Context, std::vector &Devices, uint32_t DeviceLibReqMask) { std::vector Programs; @@ -1647,7 +1647,7 @@ getDeviceLibPrograms(const ContextImplPtr Context, // one underlying device doesn't support cl_khr_fp64. const bool fp64Support = std::all_of( Devices.begin(), Devices.end(), [&Context](ur_device_handle_t Device) { - return Context->getPlatformImpl().getDeviceImpl(Device)->has_extension( + return Context.getPlatformImpl().getDeviceImpl(Device)->has_extension( "cl_khr_fp64"); }); @@ -1657,7 +1657,7 @@ getDeviceLibPrograms(const ContextImplPtr Context, // TODO: device_impl::has_extension should cache extension string, then we'd // be able to use that in the loop below directly. std::string DevExtList = urGetInfoString( - *Context->getPlatformImpl().getDeviceImpl(Device), + *Context.getPlatformImpl().getDeviceImpl(Device), UR_DEVICE_INFO_EXTENSIONS); for (auto &Pair : RequiredDeviceLibExt) { @@ -1714,7 +1714,7 @@ static inline bool isDeviceImageCompressed(sycl_device_binary Bin) { } ProgramManager::ProgramPtr ProgramManager::build( - ProgramPtr Program, const ContextImplPtr &Context, + ProgramPtr Program, context_impl &Context, const std::string &CompileOptions, const std::string &LinkOptions, std::vector &Devices, uint32_t DeviceLibReqMask, const std::vector &ExtraProgramsToLink, @@ -1745,7 +1745,7 @@ ProgramManager::ProgramPtr ProgramManager::build( static const char *ForceLinkEnv = std::getenv("SYCL_FORCE_LINK"); static bool ForceLink = ForceLinkEnv && (*ForceLinkEnv == '1'); - const AdapterPtr &Adapter = Context->getAdapter(); + const AdapterPtr &Adapter = Context.getAdapter(); if (LinkPrograms.empty() && ExtraProgramsToLink.empty() && !ForceLink) { const std::string &Options = LinkOptions.empty() ? CompileOptions @@ -1754,7 +1754,7 @@ ProgramManager::ProgramPtr ProgramManager::build( Program.get(), Devices.size(), Devices.data(), Options.c_str()); if (Error == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { Error = Adapter->call_nocheck( - Context->getHandleRef(), Program.get(), Options.c_str()); + Context.getHandleRef(), Program.get(), Options.c_str()); } if (Error != UR_RESULT_SUCCESS) @@ -1769,7 +1769,7 @@ ProgramManager::ProgramPtr ProgramManager::build( // Include the main program and compile/link everything together if (!CreatedFromBinary) { auto Res = doCompile(Adapter, Program.get(), Devices.size(), Devices.data(), - Context->getHandleRef(), CompileOptions.c_str()); + Context.getHandleRef(), CompileOptions.c_str()); Adapter->checkUrResult(Res); } LinkPrograms.push_back(Program.get()); @@ -1777,7 +1777,7 @@ ProgramManager::ProgramPtr ProgramManager::build( for (ur_program_handle_t Prg : ExtraProgramsToLink) { if (!CreatedFromBinary) { auto Res = doCompile(Adapter, Prg, Devices.size(), Devices.data(), - Context->getHandleRef(), CompileOptions.c_str()); + Context.getHandleRef(), CompileOptions.c_str()); Adapter->checkUrResult(Res); } LinkPrograms.push_back(Prg); @@ -1786,12 +1786,12 @@ ProgramManager::ProgramPtr ProgramManager::build( ur_program_handle_t LinkedProg = nullptr; auto doLink = [&] { auto Res = Adapter->call_nocheck( - Context->getHandleRef(), Devices.size(), Devices.data(), + Context.getHandleRef(), Devices.size(), Devices.data(), LinkPrograms.size(), LinkPrograms.data(), LinkOptions.c_str(), &LinkedProg); if (Res == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { Res = Adapter->call_nocheck( - Context->getHandleRef(), LinkPrograms.size(), LinkPrograms.data(), + Context.getHandleRef(), LinkPrograms.size(), LinkPrograms.data(), LinkOptions.c_str(), &LinkedProg); } return Res; @@ -1800,7 +1800,7 @@ ProgramManager::ProgramPtr ProgramManager::build( if (Error == UR_RESULT_ERROR_OUT_OF_RESOURCES || Error == UR_RESULT_ERROR_OUT_OF_HOST_MEMORY || Error == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY) { - Context->getKernelProgramCache().reset(); + Context.getKernelProgramCache().reset(); Error = doLink(); } @@ -2919,7 +2919,7 @@ ProgramManager::compile(const DevImgPlainWithDeps &ImgWithDeps, ur_program_handle_t Prog = createURProgram(*InputImpl->get_bin_image_ref(), - getSyclObjImpl(InputImpl->get_context()), Devs); + *getSyclObjImpl(InputImpl->get_context()), Devs); if (InputImpl->get_bin_image_ref()->supportsSpecConstants()) setSpecializationConstants(InputImpl, Prog, Adapter); @@ -2949,7 +2949,7 @@ ProgramManager::compile(const DevImgPlainWithDeps &ImgWithDeps, throw sycl::exception( make_error_code(errc::build), getProgramBuildLog(ObjectImpl->get_ur_program_ref(), - getSyclObjImpl(ObjectImpl->get_context()))); + *getSyclObjImpl(ObjectImpl->get_context()))); CompiledImages.push_back( createSyclObjFromImpl(std::move(ObjectImpl))); @@ -3053,18 +3053,18 @@ ProgramManager::link(const std::vector &Imgs, // Should always come last! appendLinkEnvironmentVariablesThatAppend(LinkOptionsStr); const context &Context = FirstImgImpl->get_context(); - const ContextImplPtr &ContextImpl = getSyclObjImpl(Context); - const AdapterPtr &Adapter = ContextImpl->getAdapter(); + context_impl &ContextImpl = *getSyclObjImpl(Context); + const AdapterPtr &Adapter = ContextImpl.getAdapter(); ur_program_handle_t LinkedProg = nullptr; auto doLink = [&] { auto Res = Adapter->call_nocheck( - ContextImpl->getHandleRef(), URDevices.size(), URDevices.data(), + ContextImpl.getHandleRef(), URDevices.size(), URDevices.data(), URPrograms.size(), URPrograms.data(), LinkOptionsStr.c_str(), &LinkedProg); if (Res == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { Res = Adapter->call_nocheck( - ContextImpl->getHandleRef(), URPrograms.size(), URPrograms.data(), + ContextImpl.getHandleRef(), URPrograms.size(), URPrograms.data(), LinkOptionsStr.c_str(), &LinkedProg); } return Res; @@ -3073,7 +3073,7 @@ ProgramManager::link(const std::vector &Imgs, if (Error == UR_RESULT_ERROR_OUT_OF_RESOURCES || Error == UR_RESULT_ERROR_OUT_OF_HOST_MEMORY || Error == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY) { - ContextImpl->getKernelProgramCache().reset(); + ContextImpl.getKernelProgramCache().reset(); Error = doLink(); } @@ -3104,7 +3104,8 @@ ProgramManager::link(const std::vector &Imgs, const std::shared_ptr &ImgImpl = getSyclObjImpl(Img); if (ImgImpl->get_bin_image_ref()) NativePrograms.insert( - {LinkedProg, {ContextImpl, ImgImpl->get_bin_image_ref()}}); + {LinkedProg, + {ContextImpl.shared_from_this(), ImgImpl->get_bin_image_ref()}}); } } @@ -3153,7 +3154,7 @@ ProgramManager::build(const DevImgPlainWithDeps &DevImgWithDeps, getSyclObjImpl(DevImgWithDeps.getMain()); const context &Context = MainInputImpl->get_context(); - const ContextImplPtr &ContextImpl = detail::getSyclObjImpl(Context); + context_impl &ContextImpl = *detail::getSyclObjImpl(Context); std::vector BinImgs; BinImgs.reserve(DevImgWithDeps.size()); @@ -3221,19 +3222,19 @@ ProgramManager::getOrCreateKernel(const context &Context, PropList, NoAllowedPropertiesCheck, NoAllowedPropertiesCheck); } - const ContextImplPtr &Ctx = getSyclObjImpl(Context); + context_impl &Ctx = *getSyclObjImpl(Context); - KernelProgramCache &Cache = Ctx->getKernelProgramCache(); + KernelProgramCache &Cache = Ctx.getKernelProgramCache(); auto BuildF = [this, &Program, &KernelName, &Ctx] { ur_kernel_handle_t Kernel = nullptr; - const AdapterPtr &Adapter = Ctx->getAdapter(); + const AdapterPtr &Adapter = Ctx.getAdapter(); Adapter->call(Program, KernelName.data(), &Kernel); // Only set UR_USM_INDIRECT_ACCESS if the platform can handle it. - if (Ctx->getPlatformImpl().supports_usm()) { + if (Ctx.getPlatformImpl().supports_usm()) { bool EnableAccess = true; Adapter->call( Kernel, UR_KERNEL_EXEC_INFO_USM_INDIRECT_ACCESS, sizeof(ur_bool_t), @@ -3267,7 +3268,7 @@ ProgramManager::getOrCreateKernel(const context &Context, // stored in the cache, and one handle is returned to the // caller. In that case, we need to increase the ref count of the // kernel. - Ctx->getAdapter()->call(BuildResult->Val.first); + Ctx.getAdapter()->call(BuildResult->Val.first); return std::make_tuple(BuildResult->Val.first, &(BuildResult->MBuildResultMutex), BuildResult->Val.second); @@ -3318,7 +3319,7 @@ ur_kernel_handle_t ProgramManager::getOrCreateMaterializedKernel( if constexpr (DbgProgMgr > 0) std::cerr << ">>> Adding the kernel to the cache.\n"; - const ContextImplPtr &ContextImpl = detail::getSyclObjImpl(Context); + context_impl &ContextImpl = *detail::getSyclObjImpl(Context); auto Program = createURProgram(Img, ContextImpl, {Device}); detail::device_impl &DeviceImpl = *detail::getSyclObjImpl(Device); auto &Adapter = DeviceImpl.getAdapter(); diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index ceb48a2e57d66..7d0d7d01b86c8 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -135,15 +135,15 @@ class ProgramManager { static ProgramManager &getInstance(); RTDeviceBinaryImage &getDeviceImage(KernelNameStrRefT KernelName, - const ContextImplPtr &ContextImpl, + context_impl &ContextImpl, const device_impl *DeviceImpl); RTDeviceBinaryImage &getDeviceImage( const std::unordered_set &ImagesToVerify, - const ContextImplPtr &ContextImpl, const device_impl *DeviceImpl); + context_impl &ContextImpl, const device_impl *DeviceImpl); ur_program_handle_t createURProgram(const RTDeviceBinaryImage &Img, - const ContextImplPtr &ContextImpl, + context_impl &ContextImpl, const std::vector &Devices); /// Creates a UR program using either a cached device code binary if present /// in the persistent cache or from the supplied device image otherwise. @@ -167,7 +167,7 @@ class ProgramManager { std::pair getOrCreateURProgram( const RTDeviceBinaryImage &Img, const std::vector &AllImages, - const ContextImplPtr &ContextImpl, const std::vector &Devices, + context_impl &ContextImpl, const std::vector &Devices, const std::string &CompileAndLinkOptions, SerializedObj SpecConsts); /// Builds or retrieves from cache a program defining the kernel with given /// name. @@ -176,7 +176,7 @@ class ProgramManager { /// \param Context the context to build the program with /// \param Device the device for which the program is built /// \param KernelName the kernel's name - ur_program_handle_t getBuiltURProgram(const ContextImplPtr &ContextImpl, + ur_program_handle_t getBuiltURProgram(context_impl &ContextImpl, device_impl &DeviceImpl, KernelNameStrRefT KernelName, const NDRDescT &NDRDesc = {}); @@ -193,13 +193,12 @@ class ProgramManager { /// the program should be built with. ur_program_handle_t getBuiltURProgram(const BinImgWithDeps &ImgWithDeps, - const ContextImplPtr &ContextImpl, - const std::vector &Devs, + context_impl &ContextImpl, const std::vector &Devs, const DevImgPlainWithDeps *DevImgWithDeps = nullptr, const SerializedObj &SpecConsts = {}); FastKernelCacheValPtr - getOrCreateKernel(const ContextImplPtr &ContextImpl, device_impl &DeviceImpl, + getOrCreateKernel(context_impl &ContextImpl, device_impl &DeviceImpl, KernelNameStrRefT KernelName, KernelNameBasedCacheT *KernelNameBasedCachePtr, const NDRDescT &NDRDesc = {}); @@ -214,7 +213,7 @@ class ProgramManager { const std::vector &SpecializationConsts); ur_program_handle_t getUrProgramFromUrKernel(ur_kernel_handle_t Kernel, - const ContextImplPtr &Context); + context_impl &Context); void addImage(sycl_device_binary RawImg, bool RegisterImgExports = true, RTDeviceBinaryImage **OutImage = nullptr, @@ -223,7 +222,7 @@ class ProgramManager { void removeImages(sycl_device_binaries DeviceImages); void debugPrintBinaryImages() const; static std::string getProgramBuildLog(const ur_program_handle_t &Program, - const ContextImplPtr &Context); + context_impl &Context); uint32_t getDeviceLibReqMask(const RTDeviceBinaryImage &Img); @@ -394,7 +393,7 @@ class ProgramManager { using ProgramPtr = std::unique_ptr, decltype(&::urProgramRelease)>; - ProgramPtr build(ProgramPtr Program, const ContextImplPtr &Context, + ProgramPtr build(ProgramPtr Program, context_impl &Context, const std::string &CompileOptions, const std::string &LinkOptions, std::vector &Devices, diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 03c09a6476f40..d376db2d398c8 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2016,7 +2016,7 @@ void instrumentationAddExtraKernelMetadata( // by graph API, when a modifiable graph is finalized. FastKernelCacheValPtr FastKernelCacheVal = detail::ProgramManager::getInstance().getOrCreateKernel( - Queue->getContextImplPtr(), Queue->getDeviceImpl(), KernelName, + *Queue->getContextImplPtr(), Queue->getDeviceImpl(), KernelName, KernelNameBasedCachePtr); EliminatedArgMask = FastKernelCacheVal->MKernelArgMask; } @@ -2552,7 +2552,7 @@ getCGKernelInfo(const CGExecKernel &CommandGroup, ContextImplPtr ContextImpl, } else { FastKernelCacheValPtr FastKernelCacheVal = sycl::detail::ProgramManager::getInstance().getOrCreateKernel( - ContextImpl, DeviceImpl, CommandGroup.MKernelName, + *ContextImpl, DeviceImpl, CommandGroup.MKernelName, CommandGroup.MKernelNameBasedCachePtr); UrKernel = FastKernelCacheVal->MKernelHandle; EliminatedArgMask = FastKernelCacheVal->MKernelArgMask; @@ -2715,7 +2715,7 @@ void enqueueImpKernel( KernelMutex = SyclKernelImpl->getCacheMutex(); } else { KernelCacheVal = detail::ProgramManager::getInstance().getOrCreateKernel( - ContextImpl, DeviceImpl, KernelName, KernelNameBasedCachePtr, NDRDesc); + *ContextImpl, DeviceImpl, KernelName, KernelNameBasedCachePtr, NDRDesc); Kernel = KernelCacheVal->MKernelHandle; KernelMutex = KernelCacheVal->MMutex; Program = KernelCacheVal->MProgramHandle; diff --git a/sycl/unittests/program_manager/Cleanup.cpp b/sycl/unittests/program_manager/Cleanup.cpp index 91cc890cc99fd..5729902069ac8 100644 --- a/sycl/unittests/program_manager/Cleanup.cpp +++ b/sycl/unittests/program_manager/Cleanup.cpp @@ -383,13 +383,13 @@ TEST(ImageRemoval, NativePrograms) { const sycl::device Dev = Plt.get_devices()[0]; sycl::queue Queue{Dev}; auto Ctx = Queue.get_context(); - auto ProgramA = PM.getBuiltURProgram(sycl::detail::getSyclObjImpl(Ctx), + auto ProgramA = PM.getBuiltURProgram(*sycl::detail::getSyclObjImpl(Ctx), *sycl::detail::getSyclObjImpl(Dev), generateRefName("A", "Kernel")); - auto ProgramB = PM.getBuiltURProgram(sycl::detail::getSyclObjImpl(Ctx), + auto ProgramB = PM.getBuiltURProgram(*sycl::detail::getSyclObjImpl(Ctx), *sycl::detail::getSyclObjImpl(Dev), generateRefName("B", "Kernel")); - std::ignore = PM.getBuiltURProgram(sycl::detail::getSyclObjImpl(Ctx), + std::ignore = PM.getBuiltURProgram(*sycl::detail::getSyclObjImpl(Ctx), *sycl::detail::getSyclObjImpl(Dev), generateRefName("C", "Kernel")); diff --git a/sycl/unittests/program_manager/SubDevices.cpp b/sycl/unittests/program_manager/SubDevices.cpp index a577a94d9a04c..6153453eefc16 100644 --- a/sycl/unittests/program_manager/SubDevices.cpp +++ b/sycl/unittests/program_manager/SubDevices.cpp @@ -120,11 +120,11 @@ TEST(SubDevices, DISABLED_BuildProgramForSubdevices) { // Build program via getBuiltPIProgram API sycl::detail::ProgramManager::getInstance().getBuiltURProgram( - sycl::detail::getSyclObjImpl(Ctx), subDev1, + *sycl::detail::getSyclObjImpl(Ctx), subDev1, sycl::detail::KernelInfo>::getName()); // This call should re-use built binary from the cache. If urProgramBuild is // called again, the test will fail as second call of redefinedProgramBuild sycl::detail::ProgramManager::getInstance().getBuiltURProgram( - sycl::detail::getSyclObjImpl(Ctx), subDev2, + *sycl::detail::getSyclObjImpl(Ctx), subDev2, sycl::detail::KernelInfo>::getName()); } diff --git a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp index 4d5a468122b28..ff464c6d2a1ee 100644 --- a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp @@ -303,7 +303,7 @@ TEST(EliminatedArgMask, ReuseOfHandleValues) { const sycl::device Dev = Plt.get_devices()[0]; sycl::queue Queue{Dev}; auto Ctx = Queue.get_context(); - ProgBefore = PM.getBuiltURProgram(sycl::detail::getSyclObjImpl(Ctx), + ProgBefore = PM.getBuiltURProgram(*sycl::detail::getSyclObjImpl(Ctx), *sycl::detail::getSyclObjImpl(Dev), Name); auto Mask = PM.getEliminatedKernelArgMask(ProgBefore, Name); EXPECT_NE(Mask, nullptr); @@ -328,7 +328,7 @@ TEST(EliminatedArgMask, ReuseOfHandleValues) { const sycl::device Dev = Plt.get_devices()[0]; sycl::queue Queue{Dev}; auto Ctx = Queue.get_context(); - ProgAfter = PM.getBuiltURProgram(sycl::detail::getSyclObjImpl(Ctx), + ProgAfter = PM.getBuiltURProgram(*sycl::detail::getSyclObjImpl(Ctx), *sycl::detail::getSyclObjImpl(Dev), Name); auto Mask = PM.getEliminatedKernelArgMask(ProgAfter, Name); EXPECT_NE(Mask, nullptr);