diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index db433ea01a407..ee96ccc998d27 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -296,7 +296,7 @@ class DynRTDeviceBinaryImage : public RTDeviceBinaryImage { } static DynRTDeviceBinaryImage - merge(const std::vector &Imgs); + merge(const std::vector &Imgs); protected: DynRTDeviceBinaryImage(); diff --git a/sycl/source/detail/device_global_map.hpp b/sycl/source/detail/device_global_map.hpp index 77bfed5c34a85..42b63fe3abb56 100644 --- a/sycl/source/detail/device_global_map.hpp +++ b/sycl/source/detail/device_global_map.hpp @@ -23,7 +23,7 @@ namespace detail { class DeviceGlobalMap { public: - void initializeEntries(RTDeviceBinaryImage *Img) { + void initializeEntries(const RTDeviceBinaryImage *Img) { const auto &DeviceGlobals = Img->getDeviceGlobals(); std::lock_guard DeviceGlobalsGuard(MDeviceGlobalsMutex); for (const sycl_device_binary_property &DeviceGlobal : DeviceGlobals) { diff --git a/sycl/source/detail/device_global_map_entry.hpp b/sycl/source/detail/device_global_map_entry.hpp index cfa86a6639e43..5f020f1358e8b 100644 --- a/sycl/source/detail/device_global_map_entry.hpp +++ b/sycl/source/detail/device_global_map_entry.hpp @@ -55,7 +55,7 @@ struct DeviceGlobalMapEntry { // Pointer to the device_global on host. const void *MDeviceGlobalPtr = nullptr; // Images device_global are used by. - std::unordered_set MImages; + std::unordered_set MImages; // The image identifiers for the images using the device_global used by in the // cache. std::set MImageIdentifiers; @@ -71,7 +71,7 @@ struct DeviceGlobalMapEntry { // Constructor for only initializing ID, type size, and device image scope // flag. The pointer to the device global will be initialized later. - DeviceGlobalMapEntry(std::string UniqueId, RTDeviceBinaryImage *Img, + DeviceGlobalMapEntry(std::string UniqueId, const RTDeviceBinaryImage *Img, std::uint32_t DeviceGlobalTSize, bool IsDeviceImageScopeDecorated) : MUniqueId(UniqueId), MImages{Img}, @@ -89,7 +89,8 @@ struct DeviceGlobalMapEntry { // Initialize the device_global's element type size and the flag signalling // if the device_global has the device_image_scope property. - void initialize(RTDeviceBinaryImage *Img, std::uint32_t DeviceGlobalTSize, + void initialize(const RTDeviceBinaryImage *Img, + std::uint32_t DeviceGlobalTSize, bool IsDeviceImageScopeDecorated) { if (MDeviceGlobalTSize != 0) { // The device global entry has already been initialized. This can happen diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 6be86572de356..07385d0d79c76 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -1125,11 +1125,12 @@ class device_image_impl { // imports. // TODO: Consider making a collectDeviceImageDeps variant that takes a // set reference and inserts into that instead. - std::set ImgDeps; + std::set ImgDeps; for (const device &Device : DevImgImpl->get_devices()) { - std::set DevImgDeps = PM.collectDeviceImageDeps( - *NewImage, *getSyclObjImpl(Device), - /*ErrorOnUnresolvableImport=*/State == bundle_state::executable); + std::set DevImgDeps = + PM.collectDeviceImageDeps(*NewImage, *getSyclObjImpl(Device), + /*ErrorOnUnresolvableImport=*/State == + bundle_state::executable); ImgDeps.insert(DevImgDeps.begin(), DevImgDeps.end()); } @@ -1144,13 +1145,13 @@ class device_image_impl { if (State == bundle_state::executable) { // If target is executable we bundle the image and dependencies together // and bring it into state. - for (RTDeviceBinaryImage *ImgDep : ImgDeps) + for (const RTDeviceBinaryImage *ImgDep : ImgDeps) NewImageAndDeps.push_back(PM.createDependencyImage( MContext, SupportingDevsRef, ImgDep, bundle_state::input)); } else if (State == bundle_state::object) { // If the target is object, we bring the dependencies into object state // individually and put them in the bundle. - for (RTDeviceBinaryImage *ImgDep : ImgDeps) { + for (const RTDeviceBinaryImage *ImgDep : ImgDeps) { DevImgPlainWithDeps ImgDepWithDeps{PM.createDependencyImage( MContext, SupportingDevsRef, ImgDep, bundle_state::input)}; PM.bringSYCLDeviceImageToState(ImgDepWithDeps, State); diff --git a/sycl/source/detail/helpers.cpp b/sycl/source/detail/helpers.cpp index 14e7aca275221..45d1ee112263e 100644 --- a/sycl/source/detail/helpers.cpp +++ b/sycl/source/detail/helpers.cpp @@ -50,7 +50,7 @@ retrieveKernelBinary(queue_impl &Queue, KernelNameStrRefT KernelName, ProgramManager::getInstance().getRawDeviceImages(KernelIds); auto DeviceImage = std::find_if( DeviceImages.begin(), DeviceImages.end(), - [isNvidia](RTDeviceBinaryImage *DI) { + [isNvidia](const RTDeviceBinaryImage *DI) { const std::string &TargetSpec = isNvidia ? std::string("llvm_nvptx64") : std::string("llvm_amdgcn"); return DI->getFormat() == SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE && diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index eef788f883fad..7736e656cc574 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -1145,7 +1145,7 @@ getOrBuildProgramForDeviceGlobal(queue_impl &Queue, // If there was no cached program, build one. auto Context = createSyclObjFromImpl(ContextImpl); ProgramManager &PM = ProgramManager::getInstance(); - RTDeviceBinaryImage &Img = PM.getDeviceImage( + const RTDeviceBinaryImage &Img = PM.getDeviceImage( DeviceGlobalEntry->MImages, ContextImpl, *getSyclObjImpl(Device)); device_image_plain DeviceImage = diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 455810696260d..c00966ef7abc8 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -583,7 +583,7 @@ static const char *getUrDeviceTarget(const char *URDeviceTarget) { return UR_DEVICE_BINARY_TARGET_UNKNOWN; } -static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage, +static bool compatibleWithDevice(const RTDeviceBinaryImage *BinImage, const device_impl &DeviceImpl) { auto &Adapter = DeviceImpl.getAdapter(); @@ -593,11 +593,10 @@ static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage, // compatible with implementation. The function returns invalid index if no // device images are compatible. uint32_t SuitableImageID = std::numeric_limits::max(); - sycl_device_binary DevBin = - const_cast(&BinImage->getRawData()); + const sycl_device_binary_struct &DevBin = BinImage->getRawData(); ur_device_binary_t UrBinary{}; - UrBinary.pDeviceTargetSpec = getUrDeviceTarget(DevBin->DeviceTargetSpec); + UrBinary.pDeviceTargetSpec = getUrDeviceTarget(DevBin.DeviceTargetSpec); ur_result_t Error = Adapter->call_nocheck( URDeviceHandle, &UrBinary, @@ -611,7 +610,8 @@ static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage, } // Check if the device image is a BF16 devicelib image. -bool ProgramManager::isBfloat16DeviceImage(RTDeviceBinaryImage *BinImage) { +bool ProgramManager::isBfloat16DeviceImage( + const RTDeviceBinaryImage *BinImage) { // SYCL devicelib image. if ((m_Bfloat16DeviceLibImages[0].get() == BinImage) || m_Bfloat16DeviceLibImages[1].get() == BinImage) @@ -623,7 +623,7 @@ bool ProgramManager::isBfloat16DeviceImage(RTDeviceBinaryImage *BinImage) { // Check if device natively support BF16 conversion and accordingly // decide whether to use fallback or native BF16 devicelib image. bool ProgramManager::shouldBF16DeviceImageBeUsed( - RTDeviceBinaryImage *BinImage, const device_impl &DeviceImpl) { + const RTDeviceBinaryImage *BinImage, const device_impl &DeviceImpl) { // Decide whether a devicelib image should be used. int Bfloat16DeviceLibVersion = -1; if (m_Bfloat16DeviceLibImages[0].get() == BinImage) @@ -672,17 +672,17 @@ static bool checkLinkingSupport(const device_impl &DeviceImpl, return false; } -std::set +std::set ProgramManager::collectDeviceImageDeps(const RTDeviceBinaryImage &Img, const device_impl &Dev, bool ErrorOnUnresolvableImport) { // TODO collecting dependencies for virtual functions and imported symbols // should be combined since one can lead to new unresolved dependencies for // the other. - std::set DeviceImagesToLink = + std::set DeviceImagesToLink = collectDependentDeviceImagesForVirtualFunctions(Img, Dev); - std::set ImageDeps = + std::set ImageDeps = collectDeviceImageDepsForImportedSymbols(Img, Dev, ErrorOnUnresolvableImport); DeviceImagesToLink.insert(ImageDeps.begin(), ImageDeps.end()); @@ -690,19 +690,19 @@ ProgramManager::collectDeviceImageDeps(const RTDeviceBinaryImage &Img, } static inline void -CheckAndDecompressImage([[maybe_unused]] RTDeviceBinaryImage *Img) { +CheckAndDecompressImage([[maybe_unused]] const RTDeviceBinaryImage *Img) { #ifdef SYCL_RT_ZSTD_AVAILABLE - if (auto CompImg = dynamic_cast(Img)) + if (auto CompImg = dynamic_cast(Img)) if (CompImg->IsCompressed()) - CompImg->Decompress(); + const_cast(CompImg)->Decompress(); #endif } -std::set +std::set ProgramManager::collectDeviceImageDepsForImportedSymbols( const RTDeviceBinaryImage &MainImg, const device_impl &Dev, bool ErrorOnUnresolvableImport) { - std::set DeviceImagesToLink; + std::set DeviceImagesToLink; std::set HandledSymbols; std::queue WorkList; for (const sycl_device_binary_property &ISProp : @@ -722,7 +722,7 @@ ProgramManager::collectDeviceImageDepsForImportedSymbols( auto Range = m_ExportedSymbolImages.equal_range(Symbol); bool Found = false; for (auto It = Range.first; It != Range.second; ++It) { - RTDeviceBinaryImage *Img = It->second; + const RTDeviceBinaryImage *Img = It->second; if (!doesDevSupportDeviceRequirements(Dev, *Img) || !compatibleWithDevice(Img, Dev)) @@ -737,8 +737,7 @@ ProgramManager::collectDeviceImageDepsForImportedSymbols( // and then check if the format matches. if (Format == SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE || Img->getFormat() == SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE) { - auto MainImgPtr = const_cast(&MainImg); - CheckAndDecompressImage(MainImgPtr); + CheckAndDecompressImage(&MainImg); CheckAndDecompressImage(Img); Format = MainImg.getFormat(); } @@ -760,18 +759,18 @@ ProgramManager::collectDeviceImageDepsForImportedSymbols( "No device image found for external symbol " + Symbol); } - DeviceImagesToLink.erase(const_cast(&MainImg)); + DeviceImagesToLink.erase(&MainImg); return DeviceImagesToLink; } -std::set +std::set ProgramManager::collectDependentDeviceImagesForVirtualFunctions( const RTDeviceBinaryImage &Img, const device_impl &Dev) { // If virtual functions are used in a program, then we need to link several // device images together to make sure that vtable pointers stored in // objects are valid between different kernels (which could be in different // device images). - std::set DeviceImagesToLink; + std::set DeviceImagesToLink; // KernelA may use some set-a, which is also used by KernelB that in turn // uses set-b, meaning that this search should be recursive. The set below // is used to stop that recursion, i.e. to avoid looking at sets we have @@ -804,7 +803,7 @@ ProgramManager::collectDependentDeviceImagesForVirtualFunctions( // There could be more than one device image that uses the same set // of virtual functions, or provides virtual funtions from the same // set. - for (RTDeviceBinaryImage *BinImage : m_VFSet2BinImage.at(SetName)) { + for (const RTDeviceBinaryImage *BinImage : m_VFSet2BinImage.at(SetName)) { // Here we can encounter both uses-virtual-functions-set and // virtual-functions-set properties, but their handling is the same: we // just grab all sets they reference and add them for consideration if @@ -833,7 +832,7 @@ ProgramManager::collectDependentDeviceImagesForVirtualFunctions( // We may have inserted the original image into the list as well, because it // is also a part of m_VFSet2BinImage map. No need to to return it to avoid // passing it twice to link call later. - DeviceImagesToLink.erase(const_cast(&Img)); + DeviceImagesToLink.erase(&Img); return DeviceImagesToLink; } @@ -901,11 +900,11 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( checkDevSupportDeviceRequirements(RootOrSubDevImpl, Img, NDRDesc)) throw *exception; - std::set DeviceImagesToLink = + std::set DeviceImagesToLink = collectDeviceImageDeps(Img, {RootOrSubDevImpl}); // Decompress all DeviceImagesToLink - for (RTDeviceBinaryImage *BinImg : DeviceImagesToLink) + for (const RTDeviceBinaryImage *BinImg : DeviceImagesToLink) CheckAndDecompressImage(BinImg); std::vector AllImages; @@ -1500,14 +1499,10 @@ const char *getArchName(const device_impl &DeviceImpl) { return "unknown"; } -sycl_device_binary getRawImg(RTDeviceBinaryImage *Img) { - return reinterpret_cast( - const_cast(&Img->getRawData())); -} - template -RTDeviceBinaryImage *getBinImageFromMultiMap( - const std::unordered_multimap &ImagesSet, +const RTDeviceBinaryImage *getBinImageFromMultiMap( + const std::unordered_multimap + &ImagesSet, const StorageKey &Key, context_impl &ContextImpl, const device_impl &DeviceImpl) { auto [ItBegin, ItEnd] = ImagesSet.equal_range(Key); @@ -1519,7 +1514,7 @@ RTDeviceBinaryImage *getBinImageFromMultiMap( // (checked using info::device::architecture) or JIT compiled. // This selection will then be passed to urDeviceSelectBinary // for final selection. - std::vector DeviceFilteredImgs; + std::vector DeviceFilteredImgs; DeviceFilteredImgs.reserve(std::distance(ItBegin, ItEnd)); for (auto It = ItBegin; It != ItEnd; ++It) { if (doesImageTargetMatchDevice(*It->second, DeviceImpl)) @@ -1538,13 +1533,14 @@ RTDeviceBinaryImage *getBinImageFromMultiMap( std::vector UrBinaries(NumImgs); for (uint32_t BinaryCount = 0; BinaryCount < NumImgs; BinaryCount++) { - sycl_device_binary RawImg = getRawImg(DeviceFilteredImgs[BinaryCount]); + const sycl_device_binary_struct &RawImg = + DeviceFilteredImgs[BinaryCount]->getRawData(); UrBinaries[BinaryCount].pDeviceTargetSpec = - getUrDeviceTarget(RawImg->DeviceTargetSpec); + getUrDeviceTarget(RawImg.DeviceTargetSpec); if (DeviceImpl.getBackend() == backend::ext_oneapi_hip) { UrBinariesStorage.emplace_back( - RawImg->BinaryStart, - std::distance(RawImg->BinaryStart, RawImg->BinaryEnd)); + RawImg.BinaryStart, + std::distance(RawImg.BinaryStart, RawImg.BinaryEnd)); UrBinaries[BinaryCount].pNext = &UrBinariesStorage[BinaryCount]; } } @@ -1557,7 +1553,7 @@ RTDeviceBinaryImage *getBinImageFromMultiMap( return DeviceFilteredImgs[ImgInd]; } -RTDeviceBinaryImage & +const RTDeviceBinaryImage & ProgramManager::getDeviceImage(KernelNameStrRefT KernelName, context_impl &ContextImpl, const device_impl &DeviceImpl) { @@ -1572,11 +1568,11 @@ ProgramManager::getDeviceImage(KernelNameStrRefT KernelName, if (m_UseSpvFile) { assert(m_SpvFileImage); return getDeviceImage( - std::unordered_set({m_SpvFileImage.get()}), + std::unordered_set({m_SpvFileImage.get()}), ContextImpl, DeviceImpl); } - RTDeviceBinaryImage *Img = nullptr; + const RTDeviceBinaryImage *Img = nullptr; { std::lock_guard KernelIDsGuard(m_KernelIDsMutex); if (auto KernelId = m_KernelName2KernelIDs.find(KernelName); @@ -1604,8 +1600,8 @@ ProgramManager::getDeviceImage(KernelNameStrRefT KernelName, "No kernel named " + std::string(KernelName) + " was found"); } -RTDeviceBinaryImage &ProgramManager::getDeviceImage( - const std::unordered_set &ImageSet, +const RTDeviceBinaryImage &ProgramManager::getDeviceImage( + const std::unordered_set &ImageSet, context_impl &ContextImpl, const device_impl &DeviceImpl) { assert(ImageSet.size() > 0); @@ -1851,7 +1847,7 @@ ProgramManager::ProgramPtr ProgramManager::build( return Program; } -void ProgramManager::cacheKernelUsesAssertInfo(RTDeviceBinaryImage &Img) { +void ProgramManager::cacheKernelUsesAssertInfo(const RTDeviceBinaryImage &Img) { const RTDeviceBinaryImage::PropertyRange &AssertUsedRange = Img.getAssertUsed(); if (AssertUsedRange.isAvailable()) @@ -1859,7 +1855,8 @@ void ProgramManager::cacheKernelUsesAssertInfo(RTDeviceBinaryImage &Img) { m_KernelUsesAssert.insert(Prop->Name); } -void ProgramManager::cacheKernelImplicitLocalArg(RTDeviceBinaryImage &Img) { +void ProgramManager::cacheKernelImplicitLocalArg( + const RTDeviceBinaryImage &Img) { const RTDeviceBinaryImage::PropertyRange &ImplicitLocalArgRange = Img.getImplicitLocalArg(); if (ImplicitLocalArgRange.isAvailable()) @@ -2399,7 +2396,7 @@ bool ProgramManager::hasCompatibleImage(const device_impl &DeviceImpl) { return std::any_of( m_BinImg2KernelIDs.cbegin(), m_BinImg2KernelIDs.cend(), - [&](std::pair>> Elem) { return compatibleWithDevice(Elem.first, DeviceImpl); }); } @@ -2433,9 +2430,9 @@ void ProgramManager::addOrInitDeviceGlobalEntry(const void *DeviceGlobalPtr, m_DeviceGlobals.addOrInitialize(DeviceGlobalPtr, UniqueId); } -std::set +std::set ProgramManager::getRawDeviceImages(const std::vector &KernelIDs) { - std::set BinImages; + std::set BinImages; std::lock_guard KernelIDsGuard(m_KernelIDsMutex); for (const kernel_id &KID : KernelIDs) { auto Range = m_KernelIDs2BinImage.equal_range(KID); @@ -2496,7 +2493,8 @@ HostPipeMapEntry *ProgramManager::getHostPipeEntry(const void *HostPipePtr) { } device_image_plain ProgramManager::getDeviceImageFromBinaryImage( - RTDeviceBinaryImage *BinImage, const context &Ctx, const device &Dev) { + const RTDeviceBinaryImage *BinImage, const context &Ctx, + const device &Dev) { const bundle_state ImgState = getBinImageState(BinImage); assert(compatibleWithDevice(BinImage, *getSyclObjImpl(Dev).get())); @@ -2522,7 +2520,7 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( // Collect unique raw device images taking into account kernel ids passed // TODO: Can we avoid repacking? - std::set BinImages; + std::set BinImages; if (!KernelIDs.empty()) { for (const auto &KID : KernelIDs) { bool isCompatibleWithAtLeastOneDev = @@ -2566,18 +2564,19 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( // a separate branch for that case to avoid unnecessary tracking work. struct DeviceBinaryImageInfo { std::shared_ptr> KernelIDs; - std::set Deps; + std::set Deps; bundle_state State = bundle_state::input; int RequirementCounter = 0; }; - std::unordered_map ImageInfoMap; + std::unordered_map + ImageInfoMap; for (const sycl::device &Dev : Devs) { device_impl &DevImpl = *getSyclObjImpl(Dev); // Track the highest image state for each requested kernel. using StateImagesPairT = - std::pair>; + std::pair>; using KernelImageMapT = std::map; KernelImageMapT KernelImageMap; @@ -2585,7 +2584,7 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( for (const kernel_id &KernelID : KernelIDs) KernelImageMap.insert({KernelID, {}}); - for (RTDeviceBinaryImage *BinImage : BinImages) { + for (const RTDeviceBinaryImage *BinImage : BinImages) { if (!compatibleWithDevice(BinImage, DevImpl) || !doesDevSupportDeviceRequirements(DevImpl, *BinImage)) continue; @@ -2632,7 +2631,7 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( KernelImages.push_back(BinImage); ++ImgRequirementCounter; } else if (KernelImagesState < ImgState) { - for (RTDeviceBinaryImage *Img : KernelImages) { + for (const RTDeviceBinaryImage *Img : KernelImages) { auto It = ImageInfoMap.find(Img); assert(It != ImageInfoMap.end()); assert(It->second.RequirementCounter > 0); @@ -2656,7 +2655,7 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( for (const auto &ImgInfoPair : ImageInfoMap) { if (ImgInfoPair.second.RequirementCounter == 0) continue; - for (RTDeviceBinaryImage *Dep : ImgInfoPair.second.Deps) { + for (const RTDeviceBinaryImage *Dep : ImgInfoPair.second.Deps) { auto It = ImageInfoMap.find(Dep); if (It != ImageInfoMap.end()) It->second.RequirementCounter = 0; @@ -2673,11 +2672,11 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( ImgInfoPair.second.KernelIDs, /*PIProgram=*/nullptr); std::vector Images; - const std::set &Deps = ImgInfoPair.second.Deps; + const std::set &Deps = ImgInfoPair.second.Deps; Images.reserve(Deps.size() + 1); Images.push_back( createSyclObjFromImpl(std::move(MainImpl))); - for (RTDeviceBinaryImage *Dep : Deps) + for (const RTDeviceBinaryImage *Dep : Deps) Images.push_back( createDependencyImage(Ctx, Devs, Dep, ImgInfoPair.second.State)); SYCLDeviceImages.push_back(std::move(Images)); @@ -2688,7 +2687,7 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( device_image_plain ProgramManager::createDependencyImage( const context &Ctx, const std::vector &Devs, - RTDeviceBinaryImage *DepImage, bundle_state DepState) { + const RTDeviceBinaryImage *DepImage, bundle_state DepState) { std::shared_ptr> DepKernelIDs; { std::lock_guard KernelIDsGuard(m_KernelIDsMutex); diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index cecd2b8702942..f9386e30a3635 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -135,12 +135,12 @@ class ProgramManager { // process. Can only be called after staticInit is done. static ProgramManager &getInstance(); - RTDeviceBinaryImage &getDeviceImage(KernelNameStrRefT KernelName, - context_impl &ContextImpl, - const device_impl &DeviceImpl); + const RTDeviceBinaryImage &getDeviceImage(KernelNameStrRefT KernelName, + context_impl &ContextImpl, + const device_impl &DeviceImpl); - RTDeviceBinaryImage &getDeviceImage( - const std::unordered_set &ImagesToVerify, + const RTDeviceBinaryImage &getDeviceImage( + const std::unordered_set &ImagesToVerify, context_impl &ContextImpl, const device_impl &DeviceImpl); ur_program_handle_t createURProgram(const RTDeviceBinaryImage &Img, @@ -287,7 +287,7 @@ class ProgramManager { HostPipeMapEntry *getHostPipeEntry(const void *HostPipePtr); device_image_plain - getDeviceImageFromBinaryImage(RTDeviceBinaryImage *BinImage, + getDeviceImageFromBinaryImage(const RTDeviceBinaryImage *BinImage, const context &Ctx, const device &Dev); // The function returns a vector of SYCL device images that are compiled with @@ -299,7 +299,7 @@ class ProgramManager { // Creates a new dependency image for a given dependency binary image. device_image_plain createDependencyImage(const context &Ctx, const std::vector &Devs, - RTDeviceBinaryImage *DepImage, + const RTDeviceBinaryImage *DepImage, bundle_state DepState); // Bring image to the required state. Does it inplace @@ -377,13 +377,13 @@ class ProgramManager { KernelNameStrRefT KernelName, KernelNameBasedCacheT *KernelNameBasedCachePtr) const; - std::set + std::set getRawDeviceImages(const std::vector &KernelIDs); - std::set + std::set collectDeviceImageDeps(const RTDeviceBinaryImage &Img, const device_impl &Dev, bool ErrorOnUnresolvableImport = true); - std::set + std::set collectDeviceImageDepsForImportedSymbols(const RTDeviceBinaryImage &Img, const device_impl &Dev, bool ErrorOnUnresolvableImport); @@ -408,17 +408,17 @@ class ProgramManager { void dumpImage(const RTDeviceBinaryImage &Img, uint32_t SequenceID = 0) const; /// Add info on kernels using assert into cache - void cacheKernelUsesAssertInfo(RTDeviceBinaryImage &Img); + void cacheKernelUsesAssertInfo(const RTDeviceBinaryImage &Img); /// Add info on kernels using local arg into cache - void cacheKernelImplicitLocalArg(RTDeviceBinaryImage &Img); + void cacheKernelImplicitLocalArg(const RTDeviceBinaryImage &Img); - std::set + std::set collectDependentDeviceImagesForVirtualFunctions( const RTDeviceBinaryImage &Img, const device_impl &Dev); - bool isBfloat16DeviceImage(RTDeviceBinaryImage *BinImage); - bool shouldBF16DeviceImageBeUsed(RTDeviceBinaryImage *BinImage, + bool isBfloat16DeviceImage(const RTDeviceBinaryImage *BinImage); + bool shouldBF16DeviceImageBeUsed(const RTDeviceBinaryImage *BinImage, const device_impl &DeviceImpl); protected: @@ -437,7 +437,7 @@ class ProgramManager { // in case of SPIRV + AOT. // Using shared_ptr to avoid expensive copy of the vector. /// Access must be guarded by the m_KernelIDsMutex mutex. - std::unordered_multimap + std::unordered_multimap m_KernelIDs2BinImage; // Maps device binary image to a vector of kernel ids in this image. @@ -445,7 +445,7 @@ class ProgramManager { // The vector is initialized in addImages function and is supposed to be // immutable afterwards. /// Access must be guarded by the m_KernelIDsMutex mutex. - std::unordered_map>> m_BinImg2KernelIDs; @@ -461,13 +461,13 @@ class ProgramManager { /// in the sycl::detail::__sycl_service_kernel__ namespace which is /// exclusively used for this purpose. /// Access must be guarded by the m_KernelIDsMutex mutex. - std::unordered_multimap + std::unordered_multimap m_ServiceKernels; /// Caches all exported symbols to allow faster lookup when excluding these // from kernel bundles. /// Access must be guarded by the m_KernelIDsMutex mutex. - std::unordered_multimap + std::unordered_multimap m_ExportedSymbolImages; /// Keeps all device images we are refering to during program lifetime. Used @@ -483,7 +483,7 @@ class ProgramManager { /// Caches list of device images that use or provide virtual functions from /// the same set. Used to simplify access. /// Access must be guarded by the m_KernelIDsMutex mutex. - std::unordered_map> + std::unordered_map> m_VFSet2BinImage; /// Protects built-in kernel ID cache. diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index 47d4878807f3b..d10078622c941 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -374,7 +374,7 @@ bool is_compatible(const std::vector &KernelIDs, const device &Dev) { // the device and whose target matches the device. detail::device_impl &DevImpl = *getSyclObjImpl(Dev); for (const auto &KernelID : KernelIDs) { - std::set BinImages = + std::set BinImages = detail::ProgramManager::getInstance().getRawDeviceImages({KernelID}); if (std::none_of(BinImages.begin(), BinImages.end(), diff --git a/sycl/unittests/program_manager/Cleanup.cpp b/sycl/unittests/program_manager/Cleanup.cpp index 6f7cca016264c..8aa1da1f2936d 100644 --- a/sycl/unittests/program_manager/Cleanup.cpp +++ b/sycl/unittests/program_manager/Cleanup.cpp @@ -13,7 +13,7 @@ class ProgramManagerExposed : public sycl::detail::ProgramManager { public: std::unordered_multimap & + const sycl::detail::RTDeviceBinaryImage *> & getKernelID2BinImage() { return m_KernelIDs2BinImage; } @@ -23,20 +23,20 @@ class ProgramManagerExposed : public sycl::detail::ProgramManager { return m_KernelName2KernelIDs; } - std::unordered_map>> & getBinImage2KernelId() { return m_BinImg2KernelIDs; } std::unordered_multimap & + const sycl::detail::RTDeviceBinaryImage *> & getServiceKernels() { return m_ServiceKernels; } std::unordered_multimap & + const sycl::detail::RTDeviceBinaryImage *> & getExportedSymbolImages() { return m_ExportedSymbolImages; } @@ -48,7 +48,7 @@ class ProgramManagerExposed : public sycl::detail::ProgramManager { } std::unordered_map> & + std::set> & getVFSet2BinImage() { return m_VFSet2BinImage; }