From 585de022d6891a86a1ecca283860053de6f14769 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 23 Jun 2025 08:56:26 -0700 Subject: [PATCH 1/2] [SYCL][NFCI] Make device binary image pointers const in more places This commit changes the use of RTDeviceBinaryImage pointers to be const in more places. Primary cases where we break this is for compressed images, where decompression happens semi-lazily, so const-casts are occassionally necessary. Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_binary_image.hpp | 2 +- sycl/source/detail/device_global_map.hpp | 2 +- .../source/detail/device_global_map_entry.hpp | 7 +- sycl/source/detail/device_image_impl.hpp | 13 ++- sycl/source/detail/helpers.cpp | 2 +- sycl/source/detail/memory_manager.cpp | 2 +- .../program_manager/program_manager.cpp | 106 +++++++++--------- .../program_manager/program_manager.hpp | 36 +++--- sycl/source/kernel_bundle.cpp | 2 +- sycl/unittests/program_manager/Cleanup.cpp | 10 +- 10 files changed, 91 insertions(+), 91 deletions(-) diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index 872cad84ced27..d7943cb2bd0c0 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -296,7 +296,7 @@ class DynRTDeviceBinaryImage : public RTDeviceBinaryImage { } static DynRTDeviceBinaryImage - merge(const std::vector &Imgs); + merge(const std::vector &Imgs); protected: DynRTDeviceBinaryImage(); diff --git a/sycl/source/detail/device_global_map.hpp b/sycl/source/detail/device_global_map.hpp index 77bfed5c34a85..42b63fe3abb56 100644 --- a/sycl/source/detail/device_global_map.hpp +++ b/sycl/source/detail/device_global_map.hpp @@ -23,7 +23,7 @@ namespace detail { class DeviceGlobalMap { public: - void initializeEntries(RTDeviceBinaryImage *Img) { + void initializeEntries(const RTDeviceBinaryImage *Img) { const auto &DeviceGlobals = Img->getDeviceGlobals(); std::lock_guard DeviceGlobalsGuard(MDeviceGlobalsMutex); for (const sycl_device_binary_property &DeviceGlobal : DeviceGlobals) { diff --git a/sycl/source/detail/device_global_map_entry.hpp b/sycl/source/detail/device_global_map_entry.hpp index cfa86a6639e43..5f020f1358e8b 100644 --- a/sycl/source/detail/device_global_map_entry.hpp +++ b/sycl/source/detail/device_global_map_entry.hpp @@ -55,7 +55,7 @@ struct DeviceGlobalMapEntry { // Pointer to the device_global on host. const void *MDeviceGlobalPtr = nullptr; // Images device_global are used by. - std::unordered_set MImages; + std::unordered_set MImages; // The image identifiers for the images using the device_global used by in the // cache. std::set MImageIdentifiers; @@ -71,7 +71,7 @@ struct DeviceGlobalMapEntry { // Constructor for only initializing ID, type size, and device image scope // flag. The pointer to the device global will be initialized later. - DeviceGlobalMapEntry(std::string UniqueId, RTDeviceBinaryImage *Img, + DeviceGlobalMapEntry(std::string UniqueId, const RTDeviceBinaryImage *Img, std::uint32_t DeviceGlobalTSize, bool IsDeviceImageScopeDecorated) : MUniqueId(UniqueId), MImages{Img}, @@ -89,7 +89,8 @@ struct DeviceGlobalMapEntry { // Initialize the device_global's element type size and the flag signalling // if the device_global has the device_image_scope property. - void initialize(RTDeviceBinaryImage *Img, std::uint32_t DeviceGlobalTSize, + void initialize(const RTDeviceBinaryImage *Img, + std::uint32_t DeviceGlobalTSize, bool IsDeviceImageScopeDecorated) { if (MDeviceGlobalTSize != 0) { // The device global entry has already been initialized. This can happen diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 1e307851e5c6a..15fcdd9a31442 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -1111,11 +1111,12 @@ class device_image_impl { // imports. // TODO: Consider making a collectDeviceImageDeps variant that takes a // set reference and inserts into that instead. - std::set ImgDeps; + std::set ImgDeps; for (const device &Device : DevImgImpl->get_devices()) { - std::set DevImgDeps = PM.collectDeviceImageDeps( - *NewImage, *getSyclObjImpl(Device), - /*ErrorOnUnresolvableImport=*/State == bundle_state::executable); + std::set DevImgDeps = + PM.collectDeviceImageDeps(*NewImage, *getSyclObjImpl(Device), + /*ErrorOnUnresolvableImport=*/State == + bundle_state::executable); ImgDeps.insert(DevImgDeps.begin(), DevImgDeps.end()); } @@ -1130,13 +1131,13 @@ class device_image_impl { if (State == bundle_state::executable) { // If target is executable we bundle the image and dependencies together // and bring it into state. - for (RTDeviceBinaryImage *ImgDep : ImgDeps) + for (const RTDeviceBinaryImage *ImgDep : ImgDeps) NewImageAndDeps.push_back(PM.createDependencyImage( MContext, SupportingDevsRef, ImgDep, bundle_state::input)); } else if (State == bundle_state::object) { // If the target is object, we bring the dependencies into object state // individually and put them in the bundle. - for (RTDeviceBinaryImage *ImgDep : ImgDeps) { + for (const RTDeviceBinaryImage *ImgDep : ImgDeps) { DevImgPlainWithDeps ImgDepWithDeps{PM.createDependencyImage( MContext, SupportingDevsRef, ImgDep, bundle_state::input)}; PM.bringSYCLDeviceImageToState(ImgDepWithDeps, State); diff --git a/sycl/source/detail/helpers.cpp b/sycl/source/detail/helpers.cpp index 14e7aca275221..45d1ee112263e 100644 --- a/sycl/source/detail/helpers.cpp +++ b/sycl/source/detail/helpers.cpp @@ -50,7 +50,7 @@ retrieveKernelBinary(queue_impl &Queue, KernelNameStrRefT KernelName, ProgramManager::getInstance().getRawDeviceImages(KernelIds); auto DeviceImage = std::find_if( DeviceImages.begin(), DeviceImages.end(), - [isNvidia](RTDeviceBinaryImage *DI) { + [isNvidia](const RTDeviceBinaryImage *DI) { const std::string &TargetSpec = isNvidia ? std::string("llvm_nvptx64") : std::string("llvm_amdgcn"); return DI->getFormat() == SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE && diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index eef788f883fad..7736e656cc574 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -1145,7 +1145,7 @@ getOrBuildProgramForDeviceGlobal(queue_impl &Queue, // If there was no cached program, build one. auto Context = createSyclObjFromImpl(ContextImpl); ProgramManager &PM = ProgramManager::getInstance(); - RTDeviceBinaryImage &Img = PM.getDeviceImage( + const RTDeviceBinaryImage &Img = PM.getDeviceImage( DeviceGlobalEntry->MImages, ContextImpl, *getSyclObjImpl(Device)); device_image_plain DeviceImage = diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index aef03bc462282..c1bea1ccc8957 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -583,7 +583,7 @@ static const char *getUrDeviceTarget(const char *URDeviceTarget) { return UR_DEVICE_BINARY_TARGET_UNKNOWN; } -static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage, +static bool compatibleWithDevice(const RTDeviceBinaryImage *BinImage, const device_impl &DeviceImpl) { auto &Adapter = DeviceImpl.getAdapter(); @@ -593,11 +593,10 @@ static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage, // compatible with implementation. The function returns invalid index if no // device images are compatible. uint32_t SuitableImageID = std::numeric_limits::max(); - sycl_device_binary DevBin = - const_cast(&BinImage->getRawData()); + const sycl_device_binary_struct &DevBin = BinImage->getRawData(); ur_device_binary_t UrBinary{}; - UrBinary.pDeviceTargetSpec = getUrDeviceTarget(DevBin->DeviceTargetSpec); + UrBinary.pDeviceTargetSpec = getUrDeviceTarget(DevBin.DeviceTargetSpec); ur_result_t Error = Adapter->call_nocheck( URDeviceHandle, &UrBinary, @@ -611,7 +610,8 @@ static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage, } // Check if the device image is a BF16 devicelib image. -bool ProgramManager::isBfloat16DeviceImage(RTDeviceBinaryImage *BinImage) { +bool ProgramManager::isBfloat16DeviceImage( + const RTDeviceBinaryImage *BinImage) { // SYCL devicelib image. if ((m_Bfloat16DeviceLibImages[0].get() == BinImage) || m_Bfloat16DeviceLibImages[1].get() == BinImage) @@ -623,7 +623,7 @@ bool ProgramManager::isBfloat16DeviceImage(RTDeviceBinaryImage *BinImage) { // Check if device natively support BF16 conversion and accordingly // decide whether to use fallback or native BF16 devicelib image. bool ProgramManager::shouldBF16DeviceImageBeUsed( - RTDeviceBinaryImage *BinImage, const device_impl &DeviceImpl) { + const RTDeviceBinaryImage *BinImage, const device_impl &DeviceImpl) { // Decide whether a devicelib image should be used. int Bfloat16DeviceLibVersion = -1; if (m_Bfloat16DeviceLibImages[0].get() == BinImage) @@ -672,17 +672,17 @@ static bool checkLinkingSupport(const device_impl &DeviceImpl, return false; } -std::set +std::set ProgramManager::collectDeviceImageDeps(const RTDeviceBinaryImage &Img, const device_impl &Dev, bool ErrorOnUnresolvableImport) { // TODO collecting dependencies for virtual functions and imported symbols // should be combined since one can lead to new unresolved dependencies for // the other. - std::set DeviceImagesToLink = + std::set DeviceImagesToLink = collectDependentDeviceImagesForVirtualFunctions(Img, Dev); - std::set ImageDeps = + std::set ImageDeps = collectDeviceImageDepsForImportedSymbols(Img, Dev, ErrorOnUnresolvableImport); DeviceImagesToLink.insert(ImageDeps.begin(), ImageDeps.end()); @@ -690,19 +690,19 @@ ProgramManager::collectDeviceImageDeps(const RTDeviceBinaryImage &Img, } static inline void -CheckAndDecompressImage([[maybe_unused]] RTDeviceBinaryImage *Img) { +CheckAndDecompressImage([[maybe_unused]] const RTDeviceBinaryImage *Img) { #ifdef SYCL_RT_ZSTD_AVAILABLE - if (auto CompImg = dynamic_cast(Img)) + if (auto CompImg = dynamic_cast(Img)) if (CompImg->IsCompressed()) - CompImg->Decompress(); + const_cast(CompImg)->Decompress(); #endif } -std::set +std::set ProgramManager::collectDeviceImageDepsForImportedSymbols( const RTDeviceBinaryImage &MainImg, const device_impl &Dev, bool ErrorOnUnresolvableImport) { - std::set DeviceImagesToLink; + std::set DeviceImagesToLink; std::set HandledSymbols; std::queue WorkList; for (const sycl_device_binary_property &ISProp : @@ -722,7 +722,7 @@ ProgramManager::collectDeviceImageDepsForImportedSymbols( auto Range = m_ExportedSymbolImages.equal_range(Symbol); bool Found = false; for (auto It = Range.first; It != Range.second; ++It) { - RTDeviceBinaryImage *Img = It->second; + const RTDeviceBinaryImage *Img = It->second; if (!doesDevSupportDeviceRequirements(Dev, *Img) || !compatibleWithDevice(Img, Dev)) @@ -737,8 +737,7 @@ ProgramManager::collectDeviceImageDepsForImportedSymbols( // and then check if the format matches. if (Format == SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE || Img->getFormat() == SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE) { - auto MainImgPtr = const_cast(&MainImg); - CheckAndDecompressImage(MainImgPtr); + CheckAndDecompressImage(&MainImg); CheckAndDecompressImage(Img); Format = MainImg.getFormat(); } @@ -760,18 +759,18 @@ ProgramManager::collectDeviceImageDepsForImportedSymbols( "No device image found for external symbol " + Symbol); } - DeviceImagesToLink.erase(const_cast(&MainImg)); + DeviceImagesToLink.erase(&MainImg); return DeviceImagesToLink; } -std::set +std::set ProgramManager::collectDependentDeviceImagesForVirtualFunctions( const RTDeviceBinaryImage &Img, const device_impl &Dev) { // If virtual functions are used in a program, then we need to link several // device images together to make sure that vtable pointers stored in // objects are valid between different kernels (which could be in different // device images). - std::set DeviceImagesToLink; + std::set DeviceImagesToLink; // KernelA may use some set-a, which is also used by KernelB that in turn // uses set-b, meaning that this search should be recursive. The set below // is used to stop that recursion, i.e. to avoid looking at sets we have @@ -804,7 +803,7 @@ ProgramManager::collectDependentDeviceImagesForVirtualFunctions( // There could be more than one device image that uses the same set // of virtual functions, or provides virtual funtions from the same // set. - for (RTDeviceBinaryImage *BinImage : m_VFSet2BinImage.at(SetName)) { + for (const RTDeviceBinaryImage *BinImage : m_VFSet2BinImage.at(SetName)) { // Here we can encounter both uses-virtual-functions-set and // virtual-functions-set properties, but their handling is the same: we // just grab all sets they reference and add them for consideration if @@ -833,7 +832,7 @@ ProgramManager::collectDependentDeviceImagesForVirtualFunctions( // We may have inserted the original image into the list as well, because it // is also a part of m_VFSet2BinImage map. No need to to return it to avoid // passing it twice to link call later. - DeviceImagesToLink.erase(const_cast(&Img)); + DeviceImagesToLink.erase(&Img); return DeviceImagesToLink; } @@ -901,11 +900,11 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( checkDevSupportDeviceRequirements(RootOrSubDevImpl, Img, NDRDesc)) throw *exception; - std::set DeviceImagesToLink = + std::set DeviceImagesToLink = collectDeviceImageDeps(Img, {RootOrSubDevImpl}); // Decompress all DeviceImagesToLink - for (RTDeviceBinaryImage *BinImg : DeviceImagesToLink) + for (const RTDeviceBinaryImage *BinImg : DeviceImagesToLink) CheckAndDecompressImage(BinImg); std::vector AllImages; @@ -1500,14 +1499,10 @@ const char *getArchName(const device_impl &DeviceImpl) { return "unknown"; } -sycl_device_binary getRawImg(RTDeviceBinaryImage *Img) { - return reinterpret_cast( - const_cast(&Img->getRawData())); -} - template -RTDeviceBinaryImage *getBinImageFromMultiMap( - const std::unordered_multimap &ImagesSet, +const RTDeviceBinaryImage *getBinImageFromMultiMap( + const std::unordered_multimap + &ImagesSet, const StorageKey &Key, context_impl &ContextImpl, const device_impl &DeviceImpl) { auto [ItBegin, ItEnd] = ImagesSet.equal_range(Key); @@ -1519,7 +1514,7 @@ RTDeviceBinaryImage *getBinImageFromMultiMap( // (checked using info::device::architecture) or JIT compiled. // This selection will then be passed to urDeviceSelectBinary // for final selection. - std::vector DeviceFilteredImgs; + std::vector DeviceFilteredImgs; DeviceFilteredImgs.reserve(std::distance(ItBegin, ItEnd)); for (auto It = ItBegin; It != ItEnd; ++It) { if (doesImageTargetMatchDevice(*It->second, DeviceImpl)) @@ -1533,7 +1528,7 @@ RTDeviceBinaryImage *getBinImageFromMultiMap( for (uint32_t BinaryCount = 0; BinaryCount < DeviceFilteredImgs.size(); BinaryCount++) { UrBinaries[BinaryCount].pDeviceTargetSpec = getUrDeviceTarget( - getRawImg(DeviceFilteredImgs[BinaryCount])->DeviceTargetSpec); + DeviceFilteredImgs[BinaryCount]->getRawData().DeviceTargetSpec); } uint32_t ImgInd = 0; @@ -1544,7 +1539,7 @@ RTDeviceBinaryImage *getBinImageFromMultiMap( return DeviceFilteredImgs[ImgInd]; } -RTDeviceBinaryImage & +const RTDeviceBinaryImage & ProgramManager::getDeviceImage(KernelNameStrRefT KernelName, context_impl &ContextImpl, const device_impl &DeviceImpl) { @@ -1559,11 +1554,11 @@ ProgramManager::getDeviceImage(KernelNameStrRefT KernelName, if (m_UseSpvFile) { assert(m_SpvFileImage); return getDeviceImage( - std::unordered_set({m_SpvFileImage.get()}), + std::unordered_set({m_SpvFileImage.get()}), ContextImpl, DeviceImpl); } - RTDeviceBinaryImage *Img = nullptr; + const RTDeviceBinaryImage *Img = nullptr; { std::lock_guard KernelIDsGuard(m_KernelIDsMutex); if (auto KernelId = m_KernelName2KernelIDs.find(KernelName); @@ -1591,8 +1586,8 @@ ProgramManager::getDeviceImage(KernelNameStrRefT KernelName, "No kernel named " + std::string(KernelName) + " was found"); } -RTDeviceBinaryImage &ProgramManager::getDeviceImage( - const std::unordered_set &ImageSet, +const RTDeviceBinaryImage &ProgramManager::getDeviceImage( + const std::unordered_set &ImageSet, context_impl &ContextImpl, const device_impl &DeviceImpl) { assert(ImageSet.size() > 0); @@ -1838,7 +1833,7 @@ ProgramManager::ProgramPtr ProgramManager::build( return Program; } -void ProgramManager::cacheKernelUsesAssertInfo(RTDeviceBinaryImage &Img) { +void ProgramManager::cacheKernelUsesAssertInfo(const RTDeviceBinaryImage &Img) { const RTDeviceBinaryImage::PropertyRange &AssertUsedRange = Img.getAssertUsed(); if (AssertUsedRange.isAvailable()) @@ -1846,7 +1841,8 @@ void ProgramManager::cacheKernelUsesAssertInfo(RTDeviceBinaryImage &Img) { m_KernelUsesAssert.insert(Prop->Name); } -void ProgramManager::cacheKernelImplicitLocalArg(RTDeviceBinaryImage &Img) { +void ProgramManager::cacheKernelImplicitLocalArg( + const RTDeviceBinaryImage &Img) { const RTDeviceBinaryImage::PropertyRange &ImplicitLocalArgRange = Img.getImplicitLocalArg(); if (ImplicitLocalArgRange.isAvailable()) @@ -2385,7 +2381,7 @@ bool ProgramManager::hasCompatibleImage(const device_impl &DeviceImpl) { return std::any_of( m_BinImg2KernelIDs.cbegin(), m_BinImg2KernelIDs.cend(), - [&](std::pair>> Elem) { return compatibleWithDevice(Elem.first, DeviceImpl); }); } @@ -2419,9 +2415,9 @@ void ProgramManager::addOrInitDeviceGlobalEntry(const void *DeviceGlobalPtr, m_DeviceGlobals.addOrInitialize(DeviceGlobalPtr, UniqueId); } -std::set +std::set ProgramManager::getRawDeviceImages(const std::vector &KernelIDs) { - std::set BinImages; + std::set BinImages; std::lock_guard KernelIDsGuard(m_KernelIDsMutex); for (const kernel_id &KID : KernelIDs) { auto Range = m_KernelIDs2BinImage.equal_range(KID); @@ -2482,7 +2478,8 @@ HostPipeMapEntry *ProgramManager::getHostPipeEntry(const void *HostPipePtr) { } device_image_plain ProgramManager::getDeviceImageFromBinaryImage( - RTDeviceBinaryImage *BinImage, const context &Ctx, const device &Dev) { + const RTDeviceBinaryImage *BinImage, const context &Ctx, + const device &Dev) { const bundle_state ImgState = getBinImageState(BinImage); assert(compatibleWithDevice(BinImage, *getSyclObjImpl(Dev).get())); @@ -2508,7 +2505,7 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( // Collect unique raw device images taking into account kernel ids passed // TODO: Can we avoid repacking? - std::set BinImages; + std::set BinImages; if (!KernelIDs.empty()) { for (const auto &KID : KernelIDs) { bool isCompatibleWithAtLeastOneDev = @@ -2552,18 +2549,19 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( // a separate branch for that case to avoid unnecessary tracking work. struct DeviceBinaryImageInfo { std::shared_ptr> KernelIDs; - std::set Deps; + std::set Deps; bundle_state State = bundle_state::input; int RequirementCounter = 0; }; - std::unordered_map ImageInfoMap; + std::unordered_map + ImageInfoMap; for (const sycl::device &Dev : Devs) { device_impl &DevImpl = *getSyclObjImpl(Dev); // Track the highest image state for each requested kernel. using StateImagesPairT = - std::pair>; + std::pair>; using KernelImageMapT = std::map; KernelImageMapT KernelImageMap; @@ -2571,7 +2569,7 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( for (const kernel_id &KernelID : KernelIDs) KernelImageMap.insert({KernelID, {}}); - for (RTDeviceBinaryImage *BinImage : BinImages) { + for (const RTDeviceBinaryImage *BinImage : BinImages) { if (!compatibleWithDevice(BinImage, DevImpl) || !doesDevSupportDeviceRequirements(DevImpl, *BinImage)) continue; @@ -2618,7 +2616,7 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( KernelImages.push_back(BinImage); ++ImgRequirementCounter; } else if (KernelImagesState < ImgState) { - for (RTDeviceBinaryImage *Img : KernelImages) { + for (const RTDeviceBinaryImage *Img : KernelImages) { auto It = ImageInfoMap.find(Img); assert(It != ImageInfoMap.end()); assert(It->second.RequirementCounter > 0); @@ -2642,7 +2640,7 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( for (const auto &ImgInfoPair : ImageInfoMap) { if (ImgInfoPair.second.RequirementCounter == 0) continue; - for (RTDeviceBinaryImage *Dep : ImgInfoPair.second.Deps) { + for (const RTDeviceBinaryImage *Dep : ImgInfoPair.second.Deps) { auto It = ImageInfoMap.find(Dep); if (It != ImageInfoMap.end()) It->second.RequirementCounter = 0; @@ -2659,11 +2657,11 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( ImgInfoPair.second.KernelIDs, /*PIProgram=*/nullptr); std::vector Images; - const std::set &Deps = ImgInfoPair.second.Deps; + const std::set &Deps = ImgInfoPair.second.Deps; Images.reserve(Deps.size() + 1); Images.push_back( createSyclObjFromImpl(std::move(MainImpl))); - for (RTDeviceBinaryImage *Dep : Deps) + for (const RTDeviceBinaryImage *Dep : Deps) Images.push_back( createDependencyImage(Ctx, Devs, Dep, ImgInfoPair.second.State)); SYCLDeviceImages.push_back(std::move(Images)); @@ -2674,7 +2672,7 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( device_image_plain ProgramManager::createDependencyImage( const context &Ctx, const std::vector &Devs, - RTDeviceBinaryImage *DepImage, bundle_state DepState) { + const RTDeviceBinaryImage *DepImage, bundle_state DepState) { std::shared_ptr> DepKernelIDs; { std::lock_guard KernelIDsGuard(m_KernelIDsMutex); diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index bd81b4d086f2a..dc2ff2dc3ca44 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -135,12 +135,12 @@ class ProgramManager { // process. Can only be called after staticInit is done. static ProgramManager &getInstance(); - RTDeviceBinaryImage &getDeviceImage(KernelNameStrRefT KernelName, + const RTDeviceBinaryImage &getDeviceImage(KernelNameStrRefT KernelName, context_impl &ContextImpl, const device_impl &DeviceImpl); - RTDeviceBinaryImage &getDeviceImage( - const std::unordered_set &ImagesToVerify, + const RTDeviceBinaryImage &getDeviceImage( + const std::unordered_set &ImagesToVerify, context_impl &ContextImpl, const device_impl &DeviceImpl); ur_program_handle_t createURProgram(const RTDeviceBinaryImage &Img, @@ -287,7 +287,7 @@ class ProgramManager { HostPipeMapEntry *getHostPipeEntry(const void *HostPipePtr); device_image_plain - getDeviceImageFromBinaryImage(RTDeviceBinaryImage *BinImage, + getDeviceImageFromBinaryImage(const RTDeviceBinaryImage *BinImage, const context &Ctx, const device &Dev); // The function returns a vector of SYCL device images that are compiled with @@ -299,7 +299,7 @@ class ProgramManager { // Creates a new dependency image for a given dependency binary image. device_image_plain createDependencyImage(const context &Ctx, const std::vector &Devs, - RTDeviceBinaryImage *DepImage, + const RTDeviceBinaryImage *DepImage, bundle_state DepState); // Bring image to the required state. Does it inplace @@ -377,13 +377,13 @@ class ProgramManager { KernelNameStrRefT KernelName, KernelNameBasedCacheT *KernelNameBasedCachePtr) const; - std::set + std::set getRawDeviceImages(const std::vector &KernelIDs); - std::set + std::set collectDeviceImageDeps(const RTDeviceBinaryImage &Img, const device_impl &Dev, bool ErrorOnUnresolvableImport = true); - std::set + std::set collectDeviceImageDepsForImportedSymbols(const RTDeviceBinaryImage &Img, const device_impl &Dev, bool ErrorOnUnresolvableImport); @@ -406,17 +406,17 @@ class ProgramManager { void dumpImage(const RTDeviceBinaryImage &Img, uint32_t SequenceID = 0) const; /// Add info on kernels using assert into cache - void cacheKernelUsesAssertInfo(RTDeviceBinaryImage &Img); + void cacheKernelUsesAssertInfo(const RTDeviceBinaryImage &Img); /// Add info on kernels using local arg into cache - void cacheKernelImplicitLocalArg(RTDeviceBinaryImage &Img); + void cacheKernelImplicitLocalArg(const RTDeviceBinaryImage &Img); - std::set + std::set collectDependentDeviceImagesForVirtualFunctions( const RTDeviceBinaryImage &Img, const device_impl &Dev); - bool isBfloat16DeviceImage(RTDeviceBinaryImage *BinImage); - bool shouldBF16DeviceImageBeUsed(RTDeviceBinaryImage *BinImage, + bool isBfloat16DeviceImage(const RTDeviceBinaryImage *BinImage); + bool shouldBF16DeviceImageBeUsed(const RTDeviceBinaryImage *BinImage, const device_impl &DeviceImpl); protected: @@ -435,7 +435,7 @@ class ProgramManager { // in case of SPIRV + AOT. // Using shared_ptr to avoid expensive copy of the vector. /// Access must be guarded by the m_KernelIDsMutex mutex. - std::unordered_multimap + std::unordered_multimap m_KernelIDs2BinImage; // Maps device binary image to a vector of kernel ids in this image. @@ -443,7 +443,7 @@ class ProgramManager { // The vector is initialized in addImages function and is supposed to be // immutable afterwards. /// Access must be guarded by the m_KernelIDsMutex mutex. - std::unordered_map>> m_BinImg2KernelIDs; @@ -459,13 +459,13 @@ class ProgramManager { /// in the sycl::detail::__sycl_service_kernel__ namespace which is /// exclusively used for this purpose. /// Access must be guarded by the m_KernelIDsMutex mutex. - std::unordered_multimap + std::unordered_multimap m_ServiceKernels; /// Caches all exported symbols to allow faster lookup when excluding these // from kernel bundles. /// Access must be guarded by the m_KernelIDsMutex mutex. - std::unordered_multimap + std::unordered_multimap m_ExportedSymbolImages; /// Keeps all device images we are refering to during program lifetime. Used @@ -481,7 +481,7 @@ class ProgramManager { /// Caches list of device images that use or provide virtual functions from /// the same set. Used to simplify access. /// Access must be guarded by the m_KernelIDsMutex mutex. - std::unordered_map> + std::unordered_map> m_VFSet2BinImage; /// Protects built-in kernel ID cache. diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index 4d3d430eecd80..a27c65c117116 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -368,7 +368,7 @@ bool is_compatible(const std::vector &KernelIDs, const device &Dev) { // the device and whose target matches the device. detail::device_impl &DevImpl = *getSyclObjImpl(Dev); for (const auto &KernelID : KernelIDs) { - std::set BinImages = + std::set BinImages = detail::ProgramManager::getInstance().getRawDeviceImages({KernelID}); if (std::none_of(BinImages.begin(), BinImages.end(), diff --git a/sycl/unittests/program_manager/Cleanup.cpp b/sycl/unittests/program_manager/Cleanup.cpp index 6f7cca016264c..8aa1da1f2936d 100644 --- a/sycl/unittests/program_manager/Cleanup.cpp +++ b/sycl/unittests/program_manager/Cleanup.cpp @@ -13,7 +13,7 @@ class ProgramManagerExposed : public sycl::detail::ProgramManager { public: std::unordered_multimap & + const sycl::detail::RTDeviceBinaryImage *> & getKernelID2BinImage() { return m_KernelIDs2BinImage; } @@ -23,20 +23,20 @@ class ProgramManagerExposed : public sycl::detail::ProgramManager { return m_KernelName2KernelIDs; } - std::unordered_map>> & getBinImage2KernelId() { return m_BinImg2KernelIDs; } std::unordered_multimap & + const sycl::detail::RTDeviceBinaryImage *> & getServiceKernels() { return m_ServiceKernels; } std::unordered_multimap & + const sycl::detail::RTDeviceBinaryImage *> & getExportedSymbolImages() { return m_ExportedSymbolImages; } @@ -48,7 +48,7 @@ class ProgramManagerExposed : public sycl::detail::ProgramManager { } std::unordered_map> & + std::set> & getVFSet2BinImage() { return m_VFSet2BinImage; } From 55431ccb6dcfca9bb81d529d523913fa700f3416 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 23 Jun 2025 09:14:40 -0700 Subject: [PATCH 2/2] Fix formatting Signed-off-by: Larsen, Steffen --- sycl/source/detail/program_manager/program_manager.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index dc2ff2dc3ca44..0b318c65e9b7f 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -136,8 +136,8 @@ class ProgramManager { static ProgramManager &getInstance(); const RTDeviceBinaryImage &getDeviceImage(KernelNameStrRefT KernelName, - context_impl &ContextImpl, - const device_impl &DeviceImpl); + context_impl &ContextImpl, + const device_impl &DeviceImpl); const RTDeviceBinaryImage &getDeviceImage( const std::unordered_set &ImagesToVerify,