From b13de7958fb5f30a60afc87854d30c24b372a566 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Wed, 29 Jan 2025 08:53:03 +0000 Subject: [PATCH 01/16] [SYCL][RTC] Cache frontend invocation Signed-off-by: Julian Oppermann --- sycl-jit/common/include/Kernel.h | 3 + sycl-jit/jit-compiler/CMakeLists.txt | 1 + sycl-jit/jit-compiler/include/KernelFusion.h | 15 ++- sycl-jit/jit-compiler/lib/KernelFusion.cpp | 66 ++++++++--- .../lib/rtc/DeviceCompilation.cpp | 33 ++++-- .../jit-compiler/lib/rtc/DeviceCompilation.h | 4 +- sycl/source/detail/jit_compiler.cpp | 13 ++- sycl/source/detail/jit_compiler.hpp | 3 +- sycl/source/detail/kernel_bundle_impl.hpp | 22 +++- .../kernel_compiler/kernel_compiler_sycl.cpp | 6 +- .../kernel_compiler/kernel_compiler_sycl.hpp | 4 +- .../detail/persistent_device_code_cache.cpp | 105 +++++++++++++++++ .../detail/persistent_device_code_cache.hpp | 17 +++ .../kernel_compiler_sycl_jit_cache.cpp | 108 ++++++++++++++++++ .../no_sycl_hpp_in_e2e_tests.cpp | 2 +- 15 files changed, 362 insertions(+), 40 deletions(-) create mode 100644 sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_cache.cpp diff --git a/sycl-jit/common/include/Kernel.h b/sycl-jit/common/include/Kernel.h index eb5ba0f05c913..09f7392cf0a8b 100644 --- a/sycl-jit/common/include/Kernel.h +++ b/sycl-jit/common/include/Kernel.h @@ -411,6 +411,9 @@ struct RTCDevImgInfo { using RTCBundleInfo = DynArray; +// LLVM's APIs prefer `char *` for byte buffers. +using RTCDeviceCodeIR = DynArray; + } // namespace jit_compiler #endif // SYCL_FUSION_COMMON_KERNEL_H diff --git a/sycl-jit/jit-compiler/CMakeLists.txt b/sycl-jit/jit-compiler/CMakeLists.txt index 63bb2ecc34ad9..dc45039c684a0 100644 --- a/sycl-jit/jit-compiler/CMakeLists.txt +++ b/sycl-jit/jit-compiler/CMakeLists.txt @@ -18,6 +18,7 @@ add_llvm_library(sycl-jit LINK_COMPONENTS BitReader + BitWriter Core Support Option diff --git a/sycl-jit/jit-compiler/include/KernelFusion.h b/sycl-jit/jit-compiler/include/KernelFusion.h index e79124f016c68..6177f78512962 100644 --- a/sycl-jit/jit-compiler/include/KernelFusion.h +++ b/sycl-jit/jit-compiler/include/KernelFusion.h @@ -61,8 +61,10 @@ class RTCResult { explicit RTCResult(const char *BuildLog) : Failed{true}, BundleInfo{}, BuildLog{BuildLog} {} - RTCResult(RTCBundleInfo &&BundleInfo, const char *BuildLog) - : Failed{false}, BundleInfo{std::move(BundleInfo)}, BuildLog{BuildLog} {} + RTCResult(RTCBundleInfo &&BundleInfo, RTCDeviceCodeIR &&DeviceCodeIR, + const char *BuildLog) + : Failed{false}, BundleInfo{std::move(BundleInfo)}, + DeviceCodeIR(std::move(DeviceCodeIR)), BuildLog{BuildLog} {} bool failed() const { return Failed; } @@ -73,9 +75,15 @@ class RTCResult { return BundleInfo; } + const RTCDeviceCodeIR &getDeviceCodeIR() const { + assert(!failed() && "No device code IR"); + return DeviceCodeIR; + } + private: bool Failed; RTCBundleInfo BundleInfo; + RTCDeviceCodeIR DeviceCodeIR; sycl::detail::string BuildLog; }; @@ -102,7 +110,8 @@ KF_EXPORT_SYMBOL JITResult materializeSpecConstants( KF_EXPORT_SYMBOL RTCResult compileSYCL(InMemoryFile SourceFile, View IncludeFiles, - View UserArgs); + View UserArgs, + View CachedIR, bool SaveIR); /// Clear all previously set options. KF_EXPORT_SYMBOL void resetJITConfiguration(); diff --git a/sycl-jit/jit-compiler/lib/KernelFusion.cpp b/sycl-jit/jit-compiler/lib/KernelFusion.cpp index 0ac2b12738f5e..d4ce298bb5171 100644 --- a/sycl-jit/jit-compiler/lib/KernelFusion.cpp +++ b/sycl-jit/jit-compiler/lib/KernelFusion.cpp @@ -19,7 +19,10 @@ #include "translation/SPIRVLLVMTranslation.h" #include +#include +#include #include +#include #include #include @@ -31,17 +34,21 @@ using namespace jit_compiler; using FusedFunction = helper::FusionHelper::FusedFunction; using FusedFunctionList = std::vector; -template -static ResultType errorTo(llvm::Error &&Err, const std::string &Msg) { +static std::string formatError(llvm::Error &&Err, const std::string &Msg) { std::stringstream ErrMsg; ErrMsg << Msg << "\nDetailed information:\n"; llvm::handleAllErrors(std::move(Err), [&ErrMsg](const llvm::StringError &StrErr) { - // Cannot throw an exception here if LLVM itself is - // compiled without exception support. ErrMsg << "\t" << StrErr.getMessage() << "\n"; }); - return ResultType{ErrMsg.str().c_str()}; + return ErrMsg.str(); +} + +template +static ResultType errorTo(llvm::Error &&Err, const std::string &Msg) { + // Cannot throw an exception here if LLVM itself is compiled without exception + // support. + return ResultType{formatError(std::move(Err), Msg).c_str()}; } static std::vector @@ -242,8 +249,10 @@ fuseKernels(View KernelInformation, const char *FusedKernelName, extern "C" KF_EXPORT_SYMBOL RTCResult compileSYCL(InMemoryFile SourceFile, View IncludeFiles, - View UserArgs) { + View UserArgs, View CachedIR, bool SaveIR) { + llvm::LLVMContext Context; std::string BuildLog; + configureDiagnostics(Context, BuildLog); auto UserArgListOrErr = parseUserArgs(UserArgs); if (!UserArgListOrErr) { @@ -272,16 +281,43 @@ compileSYCL(InMemoryFile SourceFile, View IncludeFiles, Verbose); } - auto ModuleOrErr = - compileDeviceCode(SourceFile, IncludeFiles, UserArgList, BuildLog); - if (!ModuleOrErr) { - return errorTo(ModuleOrErr.takeError(), - "Device compilation failed"); + std::unique_ptr Module; + + if (CachedIR.size() > 0) { + llvm::StringRef IRStr{CachedIR.begin(), CachedIR.size()}; + std::unique_ptr IRBuf = + llvm::MemoryBuffer::getMemBuffer(IRStr, /*BufferName=*/"", + /*RequiresNullTerminator=*/false); + auto ModuleOrError = llvm::parseBitcodeFile(*IRBuf, Context); + if (!ModuleOrError) { + // Not a fatal error, we'll just compile the source string normally. + BuildLog.append(formatError(ModuleOrError.takeError(), + "Loading of cached device code failed")); + } else { + Module = std::move(*ModuleOrError); + } + } + + bool FromSource = false; + if (!Module) { + auto ModuleOrErr = compileDeviceCode(SourceFile, IncludeFiles, UserArgList, + BuildLog, Context); + if (!ModuleOrErr) { + return errorTo(ModuleOrErr.takeError(), + "Device compilation failed"); + } + + Module = std::move(*ModuleOrErr); + FromSource = true; } - std::unique_ptr Context; - std::unique_ptr Module = std::move(*ModuleOrErr); - Context.reset(&Module->getContext()); + RTCDeviceCodeIR IR; + if (SaveIR && FromSource) { + std::string BCString; + llvm::raw_string_ostream BCStream{BCString}; + llvm::WriteBitcodeToFile(*Module, BCStream); + IR = RTCDeviceCodeIR{BCString.data(), BCString.data() + BCString.size()}; + } if (auto Error = linkDeviceLibraries(*Module, UserArgList, BuildLog)) { return errorTo(std::move(Error), "Device linking failed"); @@ -314,7 +350,7 @@ compileSYCL(InMemoryFile SourceFile, View IncludeFiles, } } - return RTCResult{std::move(BundleInfo), BuildLog.c_str()}; + return RTCResult{std::move(BundleInfo), std::move(IR), BuildLog.c_str()}; } extern "C" KF_EXPORT_SYMBOL void resetJITConfiguration() { diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index 8e59966daff1c..9c555614ddbd7 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -132,7 +132,8 @@ static const std::string &getDPCPPRoot() { namespace { -struct GetLLVMModuleAction : public ToolAction { +class GetLLVMModuleAction : public ToolAction { +public: // Code adapted from `FrontendActionFactory::runInvocation`. bool runInvocation(std::shared_ptr Invocation, FileManager *Files, @@ -160,20 +161,24 @@ struct GetLLVMModuleAction : public ToolAction { // Ignore `Compiler.getFrontendOpts().ProgramAction` (would be `EmitBC`) and // create/execute an `EmitLLVMOnlyAction` (= codegen to LLVM module without // emitting anything) instead. - EmitLLVMOnlyAction ELOA; + EmitLLVMOnlyAction ELOA{&Context}; const bool Success = Compiler.ExecuteAction(ELOA); Files->clearStatCache(); if (!Success) { return false; } - // Take the module and its context to extend the objects' lifetime. + // Take the module to extend its lifetime. Module = ELOA.takeModule(); - ELOA.takeLLVMContext(); return true; } + GetLLVMModuleAction(LLVMContext &Context) : Context{Context}, Module{} {} + std::unique_ptr takeModule() { return std::move(Module); } + +private: + LLVMContext &Context; std::unique_ptr Module; }; @@ -223,9 +228,11 @@ class LLVMDiagnosticWrapper : public llvm::DiagnosticHandler { } // anonymous namespace -Expected> jit_compiler::compileDeviceCode( - InMemoryFile SourceFile, View IncludeFiles, - const InputArgList &UserArgList, std::string &BuildLog) { +Expected> +jit_compiler::compileDeviceCode(InMemoryFile SourceFile, + View IncludeFiles, + const InputArgList &UserArgList, + std::string &BuildLog, LLVMContext &Context) { TimeTraceScope TTS{"compileDeviceCode"}; const std::string &DPCPPRoot = getDPCPPRoot(); @@ -285,9 +292,9 @@ Expected> jit_compiler::compileDeviceCode( return NewArgs; }); - GetLLVMModuleAction Action; + GetLLVMModuleAction Action{Context}; if (!Tool.run(&Action)) { - return std::move(Action.Module); + return Action.takeModule(); } return createStringError(BuildLog); @@ -409,8 +416,6 @@ Error jit_compiler::linkDeviceLibraries(llvm::Module &Module, } LLVMContext &Context = Module.getContext(); - Context.setDiagnosticHandler( - std::make_unique(BuildLog)); for (const std::string &LibName : LibNames) { std::string LibPath = DPCPPRoot + "/lib/" + LibName; @@ -652,3 +657,9 @@ jit_compiler::parseUserArgs(View UserArgs) { return std::move(AL); } + +void jit_compiler::configureDiagnostics(LLVMContext &Context, + std::string &BuildLog) { + Context.setDiagnosticHandler( + std::make_unique(BuildLog)); +} diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.h b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.h index 1c30e5a61fb4b..64d15d4bd0405 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.h +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.h @@ -25,7 +25,7 @@ namespace jit_compiler { llvm::Expected> compileDeviceCode(InMemoryFile SourceFile, View IncludeFiles, const llvm::opt::InputArgList &UserArgList, - std::string &BuildLog); + std::string &BuildLog, llvm::LLVMContext &Context); llvm::Error linkDeviceLibraries(llvm::Module &Module, const llvm::opt::InputArgList &UserArgList, @@ -40,6 +40,8 @@ performPostLink(std::unique_ptr Module, llvm::Expected parseUserArgs(View UserArgs); +void configureDiagnostics(llvm::LLVMContext &Context, std::string &BuildLog); + } // namespace jit_compiler #endif // SYCL_JIT_COMPILER_RTC_DEVICE_COMPILATION_H diff --git a/sycl/source/detail/jit_compiler.cpp b/sycl/source/detail/jit_compiler.cpp index e95b3ab2e58b8..fd52e645ac75b 100644 --- a/sycl/source/detail/jit_compiler.cpp +++ b/sycl/source/detail/jit_compiler.cpp @@ -1229,7 +1229,8 @@ sycl_device_binaries jit_compiler::compileSYCL( const std::string &CompilationID, const std::string &SYCLSource, const std::vector> &IncludePairs, const std::vector &UserArgs, std::string *LogPtr, - const std::vector &RegisteredKernelNames) { + const std::vector &RegisteredKernelNames, + const std::vector &CachedIR, std::vector *SavedIRPtr) { // RegisteredKernelNames may contain template specializations, so we just put // them in main() which ensures they are instantiated. @@ -1260,7 +1261,9 @@ sycl_device_binaries jit_compiler::compileSYCL( std::back_inserter(UserArgsView), [](const auto &Arg) { return Arg.c_str(); }); - auto Result = CompileSYCLHandle(SourceFile, IncludeFilesView, UserArgsView); + auto Result = + CompileSYCLHandle(SourceFile, IncludeFilesView, UserArgsView, CachedIR, + /*SaveIR=*/SavedIRPtr != nullptr); if (LogPtr) { LogPtr->append(Result.getBuildLog()); @@ -1270,6 +1273,12 @@ sycl_device_binaries jit_compiler::compileSYCL( throw sycl::exception(sycl::errc::build, Result.getBuildLog()); } + const auto &IR = Result.getDeviceCodeIR(); + if (SavedIRPtr && !IR.empty()) { + SavedIRPtr->resize(IR.size()); + std::memcpy(SavedIRPtr->data(), IR.begin(), IR.size()); + } + return createDeviceBinaryImage(Result.getBundleInfo(), /*OffloadEntryPrefix=*/CompilationID + '$'); } diff --git a/sycl/source/detail/jit_compiler.hpp b/sycl/source/detail/jit_compiler.hpp index aa190a3133afa..72aa48fb5e374 100644 --- a/sycl/source/detail/jit_compiler.hpp +++ b/sycl/source/detail/jit_compiler.hpp @@ -51,7 +51,8 @@ class jit_compiler { const std::string &CompilationID, const std::string &SYCLSource, const std::vector> &IncludePairs, const std::vector &UserArgs, std::string *LogPtr, - const std::vector &RegisteredKernelNames); + const std::vector &RegisteredKernelNames, + const std::vector &CachedIR, std::vector *SavedIRPtr); bool isAvailable() { return Available; } diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index a8d0bf13f287d..db0f1f7394ee4 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -495,11 +495,27 @@ class kernel_bundle_impl { if (Language == syclex::source_language::sycl_jit) { // Build device images via the program manager. - // TODO: Support persistent caching. - const std::string &SourceStr = std::get(this->Source); + + std::string BuildOptionsString; + std::vector CachedIR; + std::unique_ptr> SavedIRPtr; + if (PersistentDeviceCodeCache::isEnabled()) { + BuildOptionsString = syclex::detail::userArgsAsString(BuildOptions); + CachedIR = PersistentDeviceCodeCache::getDeviceCodeIRFromDisc( + MDevices, BuildOptionsString, SourceStr); + SavedIRPtr = std::make_unique>(); + } + auto [Binaries, CompilationID] = syclex::detail::SYCL_JIT_to_SPIRV( - SourceStr, IncludePairs, BuildOptions, LogPtr, RegisteredKernelNames); + SourceStr, IncludePairs, BuildOptions, LogPtr, RegisteredKernelNames, + CachedIR, SavedIRPtr.get()); + + if (PersistentDeviceCodeCache::isEnabled() && !SavedIRPtr->empty()) { + PersistentDeviceCodeCache::putDeviceCodeIRToDisc( + MDevices, BuildOptionsString, SourceStr, *SavedIRPtr); + SavedIRPtr.reset(); + } auto &PM = detail::ProgramManager::getInstance(); PM.addImages(Binaries); diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp index 9108572bb5b1d..89a34d0fa41cc 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp @@ -308,14 +308,16 @@ std::pair SYCL_JIT_to_SPIRV( [[maybe_unused]] include_pairs_t IncludePairs, [[maybe_unused]] const std::vector &UserArgs, [[maybe_unused]] std::string *LogPtr, - [[maybe_unused]] const std::vector &RegisteredKernelNames) { + [[maybe_unused]] const std::vector &RegisteredKernelNames, + [[maybe_unused]] const std::vector &CachedIR, + [[maybe_unused]] std::vector *SavedIRPtr) { #if SYCL_EXT_JIT_ENABLE static std::atomic_uintptr_t CompilationCounter; std::string CompilationID = "rtc_" + std::to_string(CompilationCounter++); sycl_device_binaries Binaries = sycl::detail::jit_compiler::get_instance().compileSYCL( CompilationID, SYCLSource, IncludePairs, UserArgs, LogPtr, - RegisteredKernelNames); + RegisteredKernelNames, CachedIR, SavedIRPtr); return std::make_pair(Binaries, std::move(CompilationID)); #else throw sycl::exception(sycl::errc::build, diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp index 8187c5373150a..48c72fc7cdc79 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp @@ -38,7 +38,9 @@ std::string userArgsAsString(const std::vector &UserArguments); std::pair SYCL_JIT_to_SPIRV(const std::string &Source, include_pairs_t IncludePairs, const std::vector &UserArgs, std::string *LogPtr, - const std::vector &RegisteredKernelNames); + const std::vector &RegisteredKernelNames, + const std::vector &CachedIR, + std::vector *SavedIRPtr); bool SYCL_JIT_Compilation_Available(); diff --git a/sycl/source/detail/persistent_device_code_cache.cpp b/sycl/source/detail/persistent_device_code_cache.cpp index 213948d526f59..e6966a900000b 100644 --- a/sycl/source/detail/persistent_device_code_cache.cpp +++ b/sycl/source/detail/persistent_device_code_cache.cpp @@ -557,6 +557,55 @@ void PersistentDeviceCodeCache::putCompiledKernelToDisc( updateCacheFileSizeAndTriggerEviction(getRootDir(), TotalSize); } +void PersistentDeviceCodeCache::putDeviceCodeIRToDisc( + const std::vector &Devices, const std::string &BuildOptionsString, + const std::string &SourceStr, const std::vector &IR) { + + repopulateCacheSizeFile(getRootDir()); + + // Do not insert any new item if eviction is in progress. + // Since evictions are rare, we can afford to spin lock here. + const std::string EvictionInProgressFile = + getRootDir() + EvictionInProgressFileSuffix; + // Stall until the other process finishes eviction. + while (OSUtil::isPathPresent(EvictionInProgressFile)) + continue; + + // Total size of the item that we are writing to the cache. + size_t TotalSize = 0; + + std::string DirName = + getDeviceCodeIRPath(Devices, BuildOptionsString, SourceStr); + + try { + OSUtil::makeDir(DirName.c_str()); + std::string FileName = DirName + "/ir"; + std::string FullFileName = FileName + ".bin"; + LockCacheItem Lock{FileName}; + if (Lock.isOwned()) { + writeBinaryDataToFile(FullFileName, IR); + PersistentDeviceCodeCache::trace_KernelCompiler( + "device code IR has been cached: ", FullFileName); + + TotalSize = getFileSize(FullFileName); + saveCurrentTimeInAFile(FileName + CacheEntryAccessTimeSuffix); + } else { + PersistentDeviceCodeCache::trace_KernelCompiler("cache lock not owned ", + FileName); + } + } catch (std::exception &e) { + PersistentDeviceCodeCache::trace_KernelCompiler( + std::string("exception encountered making cache: ") + e.what()); + } catch (...) { + PersistentDeviceCodeCache::trace_KernelCompiler( + std::string("error outputting cache: ") + std::strerror(errno)); + } + + // Update the cache size file and trigger cache eviction if needed. + if (TotalSize) + updateCacheFileSizeAndTriggerEviction(getRootDir(), TotalSize); +} + /* Program binaries built for one or more devices are read from persistent * cache and returned in form of vector of programs. Each binary program is * stored in vector of chars. There is a one-to-one correspondence between @@ -664,6 +713,39 @@ PersistentDeviceCodeCache::getCompiledKernelFromDisc( return Binaries; } +std::vector PersistentDeviceCodeCache::getDeviceCodeIRFromDisc( + const std::vector &Devices, const std::string &BuildOptionsString, + const std::string &SourceStr) { + std::vector IR; + + std::string DirName = + getDeviceCodeIRPath(Devices, BuildOptionsString, SourceStr); + + if (DirName.empty() || !OSUtil::isPathPresent(DirName)) + return {}; + + std::string FileName = DirName + "/ir"; + std::string FullFileName = FileName + ".bin"; + if (OSUtil::isPathPresent(FullFileName)) { + if (!LockCacheItem::isLocked(FileName)) { + try { + IR = readBinaryDataFromFile(FullFileName); + + // Explicitly update the access time of the file. This is required for + // eviction. + if (isEvictionEnabled()) + saveCurrentTimeInAFile(FileName + CacheEntryAccessTimeSuffix); + } catch (...) { + // If read was unsuccessfull try the next item + return {}; + } + } + } + PersistentDeviceCodeCache::trace_KernelCompiler( + "using cached device code IR: ", FullFileName); + return IR; +} + /* Returns string value which can be used to identify different device */ std::string PersistentDeviceCodeCache::getDeviceIDString(const device &Device) { @@ -864,6 +946,29 @@ std::string PersistentDeviceCodeCache::getCompiledKernelItemPath( std::to_string(StringHasher(SourceString)); } +std::string PersistentDeviceCodeCache::getDeviceCodeIRPath( + const std::vector &Devices, const std::string &BuildOptionsString, + const std::string &SourceString) { + assert(!Devices.empty()); + + std::string cache_root{getRootDir()}; + if (cache_root.empty()) { + trace("Disable persistent cache due to unconfigured cache root."); + return {}; + } + + std::string DevicesString; + for (const auto &Dev : Devices) + DevicesString += getDeviceIDString(Dev) + ","; + + std::hash StringHasher{}; + + return cache_root + "/ext_kernel_compiler" + "/" + + std::to_string(StringHasher(DevicesString)) + "/" + + std::to_string(StringHasher(BuildOptionsString)) + "/" + + std::to_string(StringHasher(SourceString)); +} + /* Returns true if persistent cache is enabled. */ bool PersistentDeviceCodeCache::isEnabled() { diff --git a/sycl/source/detail/persistent_device_code_cache.hpp b/sycl/source/detail/persistent_device_code_cache.hpp index 9346461c9229f..8e5cbadbac2bc 100644 --- a/sycl/source/detail/persistent_device_code_cache.hpp +++ b/sycl/source/detail/persistent_device_code_cache.hpp @@ -178,6 +178,13 @@ class PersistentDeviceCodeCache { const std::string &BuildOptionsString, const std::string &SourceString); + /* Get directory name when storing runtime compiled device code IR ( via + * kernel_compiler, sycl_jit language). + */ + static std::string getDeviceCodeIRPath(const std::vector &Devices, + const std::string &BuildOptionsString, + const std::string &SourceString); + /* Program binaries built for one or more devices are read from persistent * cache and returned in form of vector of programs. Each binary program is * stored in vector of chars. @@ -193,6 +200,11 @@ class PersistentDeviceCodeCache { const std::string &BuildOptionsString, const std::string &SourceStr); + static std::vector + getDeviceCodeIRFromDisc(const std::vector &Devices, + const std::string &BuildOptionsString, + const std::string &SourceStr); + /* Stores build program in persistent cache */ static void @@ -207,6 +219,11 @@ class PersistentDeviceCodeCache { const std::string &SourceStr, const ur_program_handle_t &NativePrg); + static void putDeviceCodeIRToDisc(const std::vector &Devices, + const std::string &BuildOptionsString, + const std::string &SourceStr, + const std::vector &IR); + /* Sends message to std:cerr stream when SYCL_CACHE_TRACE environemnt is set*/ static void trace(const std::string &msg, const std::string &path = "") { static const bool traceEnabled = diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_cache.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_cache.cpp new file mode 100644 index 0000000000000..98ed9a100df31 --- /dev/null +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_cache.cpp @@ -0,0 +1,108 @@ +//==- kernel_compiler_sycl_jit_cache.cpp --- persistent cache for SYCL-RTC -==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: (opencl || level_zero) +// REQUIRES: aspect-usm_device_allocations + +// UNSUPPORTED: accelerator +// UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. + +// DEFINE: %{cache_vars} = env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_TRACE=7 SYCL_CACHE_DIR=%t/cache_dir +// DEFINE: %{max_cache_size} = SYCL_CACHE_MAX_SIZE=10000 +// RUN: %{build} -o %t.out +// RUN: %if run-mode %{rm -rf %t/cache_dir%} +// RUN: %{cache_vars} %{run-unfiltered-devices} %t.out 2>&1 | FileCheck %s --check-prefix=CHECK +// RUN: %if run-mode %{rm -rf %t/cache_dir%} +// RUN: %{cache_vars} %{max_cache_size} %{run-unfiltered-devices} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-EVICT + +#include +#include + +auto constexpr SYCLSource = R"""( +#include + +extern "C" SYCL_EXTERNAL +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((sycl::ext::oneapi::experimental::nd_range_kernel<1>)) +void vec_add(float* in1, float* in2, float* out){ + size_t id = sycl::ext::oneapi::this_work_item::get_nd_item<1>().get_global_linear_id(); + out[id] = in1[id] + in2[id]; +} +)"""; + +static void dumpKernelIDs() { + for (auto &kernelID : sycl::get_kernel_ids()) + std::cout << kernelID.get_name() << std::endl; +} + +int test_persistent_cache() { + namespace syclex = sycl::ext::oneapi::experimental; + using source_kb = sycl::kernel_bundle; + using exe_kb = sycl::kernel_bundle; + + sycl::queue q; + sycl::context ctx = q.get_context(); + + bool ok = + q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl_jit); + if (!ok) { + std::cout << "Apparently this device does not support `sycl_jit` source " + "kernel bundle extensin: " + << q.get_device().get_info() + << std::endl; + return -1; + } + + source_kb kbSrc1 = syclex::create_kernel_bundle_from_source( + ctx, syclex::source_language::sycl_jit, SYCLSource); + + // Bundle is entered into cache on first build. + // CHECK: [kernel_compiler Persistent Cache]: device code IR has been cached + exe_kb kbExe1a = syclex::build(kbSrc1); + dumpKernelIDs(); + // CHECK: rtc_0$__sycl_kernel_vec_add + + // Cache hit! We get independent bundles with their own version of the kernel. + // CHECK: [kernel_compiler Persistent Cache]: using cached device code IR + exe_kb kbExe1b = syclex::build(kbSrc1); + dumpKernelIDs(); + // CHECK-DAG: rtc_0$__sycl_kernel_vec_add + // CHECK-DAG: rtc_1$__sycl_kernel_vec_add + + source_kb kbSrc2 = syclex::create_kernel_bundle_from_source( + ctx, syclex::source_language::sycl_jit, SYCLSource); + + // Different source bundle, but identical source is a cache hit. + // CHECK: [kernel_compiler Persistent Cache]: using cached device code IR + exe_kb kbExe2a = syclex::build(kbSrc2); + + // Different build_options means no cache hit. + // CHECK: [kernel_compiler Persistent Cache]: device code IR has been cached + std::vector flags{"-g", "-fno-fast-math"}; + exe_kb kbExe1c = + syclex::build(kbSrc1, syclex::properties{syclex::build_options{flags}}); + + // The kbExe1c build should trigger eviction if cache size is limited. + // CHECK: [kernel_compiler Persistent Cache]: using cached device code IR + // CHECK-EVICT: [kernel_compiler Persistent Cache]: device code IR has been cached + exe_kb kbExe2b = syclex::build(kbSrc2); + + // TODO: Add tests that `#include` files, either from the filesystem or + // defined with the `include_files` property, after the persistent + // cache becomes sensitive to includes. + + return 0; +} + +int main(int argc, char **) { +#ifdef SYCL_EXT_ONEAPI_KERNEL_COMPILER + return test_persistent_cache(); +#else + static_assert(false, "Kernel Compiler feature test macro undefined"); +#endif + return 0; +} diff --git a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp index 692ca4b8a16d2..dd068fb40752a 100644 --- a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp +++ b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp @@ -6,7 +6,7 @@ // CHECK-DAG: README.md // CHECK-DAG: lit.cfg.py // -// CHECK-NUM-MATCHES: 6 +// CHECK-NUM-MATCHES: 7 // // This test verifies that `` isn't used in E2E tests. Instead, // fine-grained includes should used, see From 73127bf9c7f2fc416302576b74b481d516d3575e Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Thu, 30 Jan 2025 09:13:41 +0000 Subject: [PATCH 02/16] Add comment Signed-off-by: Julian Oppermann --- .../kernel_compiler/kernel_compiler_sycl.hpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp index 48c72fc7cdc79..bf5b8848b377a 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp @@ -35,6 +35,20 @@ bool SYCL_Compilation_Available(); std::string userArgsAsString(const std::vector &UserArguments); +// Compile the given SYCL source string and virtual include files into the image +// format understood by the program manager. +// +// Returns a pointer to the image (owned by the `jit_compiler` class), and the +// bundle-specific prefix used for loading the kernels. +// +// If `CachedIR` is not empty, the JIT compiler tries to load the bytes as an +// LLVM bitcode module instead of invoking the frontend on the source string. It +// falls back to running the frontend if the bitcode is invalid (e.g., version +// mismatch). +// +// If `SavedIRPtr` is not nullptr, and the source string was compiled (meaning +// `CachedIR` was empty or invalid), the JIT compiler stores the LLVM module +// returned from the device compilation as bitcode into the given vector. std::pair SYCL_JIT_to_SPIRV(const std::string &Source, include_pairs_t IncludePairs, const std::vector &UserArgs, std::string *LogPtr, From f27d3e5e8004e7e49bf3559213e1682f2e611d56 Mon Sep 17 00:00:00 2001 From: Lukas Sommer Date: Tue, 4 Feb 2025 08:11:34 +0000 Subject: [PATCH 03/16] Calculate hash over preprocessed source Signed-off-by: Lukas Sommer Signed-off-by: Julian Oppermann --- sycl-jit/jit-compiler/lib/KernelFusion.cpp | 12 ++ .../lib/rtc/DeviceCompilation.cpp | 155 +++++++++++++++--- .../jit-compiler/lib/rtc/DeviceCompilation.h | 5 + 3 files changed, 148 insertions(+), 24 deletions(-) diff --git a/sycl-jit/jit-compiler/lib/KernelFusion.cpp b/sycl-jit/jit-compiler/lib/KernelFusion.cpp index d4ce298bb5171..742c39e2d4ae7 100644 --- a/sycl-jit/jit-compiler/lib/KernelFusion.cpp +++ b/sycl-jit/jit-compiler/lib/KernelFusion.cpp @@ -281,6 +281,18 @@ compileSYCL(InMemoryFile SourceFile, View IncludeFiles, Verbose); } + // TODO(julian): Expose as a separate API. + auto Start = std::chrono::high_resolution_clock::now(); + auto HashOrError = calculateSourceHash(SourceFile, IncludeFiles, UserArgList); + if (!HashOrError) { + return errorTo(HashOrError.takeError(), "Source hashing failed"); + } + auto SourceHash = *HashOrError; + auto Stop = std::chrono::high_resolution_clock::now(); + + std::chrono::duration HashTime = Stop - Start; + llvm::dbgs() << "Hashing took " << int(HashTime.count()) << "ms\n"; + std::unique_ptr Module; if (CachedIR.size() > 0) { diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index 9c555614ddbd7..660ecd3084199 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -16,8 +16,10 @@ #include #include #include +#include #include #include +#include #include #include @@ -32,6 +34,7 @@ #include #include #include +#include #include #include @@ -132,14 +135,39 @@ static const std::string &getDPCPPRoot() { namespace { -class GetLLVMModuleAction : public ToolAction { +class HashPreprocessedAction : public PreprocessorFrontendAction { + +public: + BLAKE3Result HashValue; + +protected: + void ExecuteAction() override { + CompilerInstance &CI = getCompilerInstance(); + + std::string PreprocessedSource; + raw_string_ostream PreprocessStream(PreprocessedSource); + + PreprocessorOutputOptions Opts; + Opts.ShowCPP = 1; + Opts.MinimizeWhitespace = 1; + + DoPrintPreprocessedInput(CI.getPreprocessor(), &PreprocessStream, Opts); + + llvm::ArrayRef PreprocessedData( + (const uint8_t *)PreprocessedSource.data(), PreprocessedSource.size()); + + HashValue = BLAKE3::hash(PreprocessedData); + } +}; + +class RTCToolActionBase : public ToolAction { public: // Code adapted from `FrontendActionFactory::runInvocation`. bool runInvocation(std::shared_ptr Invocation, FileManager *Files, std::shared_ptr PCHContainerOps, DiagnosticConsumer *DiagConsumer) override { - assert(!Module && "Action should only be invoked on a single file"); + assert(!hasExecuted() && "Action should only be invoked on a single file"); // Create a compiler instance to handle the actual work. CompilerInstance Compiler(std::move(PCHContainerOps)); @@ -158,11 +186,48 @@ class GetLLVMModuleAction : public ToolAction { Compiler.createSourceManager(*Files); + return executeAction(Compiler, Files); + } + + virtual ~RTCToolActionBase() = default; + +protected: + virtual bool hasExecuted() = 0; + virtual bool executeAction(CompilerInstance &, FileManager *Files) = 0; +}; + +struct GetSourceHashAction : public RTCToolActionBase { +protected: + bool executeAction(CompilerInstance &CI, FileManager *Files) override { + HashPreprocessedAction HPA; + const bool Success = CI.ExecuteAction(HPA); + Files->clearStatCache(); + if (!Success) { + return false; + } + // TODO(Lukas): Avoid copy + HashValue = HPA.HashValue; + Executed = true; + return true; + } + + bool hasExecuted() override { return Executed; } + +private: + bool Executed = false; + +public: + BLAKE3Result HashValue; +}; + +struct GetLLVMModuleAction : public RTCToolActionBase { +protected: + bool executeAction(CompilerInstance &CI, FileManager *Files) override { // Ignore `Compiler.getFrontendOpts().ProgramAction` (would be `EmitBC`) and // create/execute an `EmitLLVMOnlyAction` (= codegen to LLVM module without // emitting anything) instead. EmitLLVMOnlyAction ELOA{&Context}; - const bool Success = Compiler.ExecuteAction(ELOA); + const bool Success = CI.ExecuteAction(ELOA); Files->clearStatCache(); if (!Success) { return false; @@ -174,6 +239,9 @@ class GetLLVMModuleAction : public ToolAction { return true; } + bool hasExecuted() override { return static_cast(Module); } + +public: GetLLVMModuleAction(LLVMContext &Context) : Context{Context}, Module{} {} std::unique_ptr takeModule() { return std::move(Module); } @@ -226,20 +294,8 @@ class LLVMDiagnosticWrapper : public llvm::DiagnosticHandler { } }; -} // anonymous namespace - -Expected> -jit_compiler::compileDeviceCode(InMemoryFile SourceFile, - View IncludeFiles, - const InputArgList &UserArgList, - std::string &BuildLog, LLVMContext &Context) { - TimeTraceScope TTS{"compileDeviceCode"}; - - const std::string &DPCPPRoot = getDPCPPRoot(); - if (DPCPPRoot == InvalidDPCPPRoot) { - return createStringError("Could not locate DPCPP root directory"); - } - +void adjustArgs(const InputArgList &UserArgList, const std::string &DPCPPRoot, + SmallVectorImpl &CommandLine) { DerivedArgList DAL{UserArgList}; const auto &OptTable = getDriverOptTable(); DAL.AddFlagArg(nullptr, OptTable.getOption(OPT_fsycl_device_only)); @@ -258,17 +314,15 @@ jit_compiler::compileDeviceCode(InMemoryFile SourceFile, DAL.eraseArg(OPT_ftime_trace_granularity_EQ); DAL.eraseArg(OPT_ftime_trace_verbose); - SmallVector CommandLine; for (auto *Arg : DAL) { CommandLine.emplace_back(Arg->getAsString(DAL)); } +} - FixedCompilationDatabase DB{".", CommandLine}; - ClangTool Tool{DB, {SourceFile.Path}}; - - IntrusiveRefCntPtr DiagOpts{new DiagnosticOptions}; - ClangDiagnosticWrapper Wrapper(BuildLog, DiagOpts.get()); - Tool.setDiagnosticConsumer(Wrapper.consumer()); +void setupTool(ClangTool &Tool, const std::string &DPCPPRoot, + InMemoryFile SourceFile, View IncludeFiles, + DiagnosticConsumer *Consumer) { + Tool.setDiagnosticConsumer(Consumer); // Suppress message "Error while processing" being printed to stdout. Tool.setPrintErrorMessage(false); @@ -291,6 +345,32 @@ jit_compiler::compileDeviceCode(InMemoryFile SourceFile, NewArgs[0] = (Twine(DPCPPRoot) + "/bin/clang++").str(); return NewArgs; }); +} + +} // anonymous namespace + +Expected> +jit_compiler::compileDeviceCode(InMemoryFile SourceFile, + View IncludeFiles, + const InputArgList &UserArgList, + std::string &BuildLog, LLVMContext &Context) { + TimeTraceScope TTS{"compileDeviceCode"}; + + const std::string &DPCPPRoot = getDPCPPRoot(); + if (DPCPPRoot == InvalidDPCPPRoot) { + return createStringError("Could not locate DPCPP root directory"); + } + + SmallVector CommandLine; + adjustArgs(UserArgList, DPCPPRoot, CommandLine); + + FixedCompilationDatabase DB{".", CommandLine}; + ClangTool Tool{DB, {SourceFile.Path}}; + + IntrusiveRefCntPtr DiagOpts{new DiagnosticOptions}; + ClangDiagnosticWrapper Wrapper(BuildLog, DiagOpts.get()); + + setupTool(Tool, DPCPPRoot, SourceFile, IncludeFiles, Wrapper.consumer()); GetLLVMModuleAction Action{Context}; if (!Tool.run(&Action)) { @@ -300,6 +380,33 @@ jit_compiler::compileDeviceCode(InMemoryFile SourceFile, return createStringError(BuildLog); } +Expected +jit_compiler::calculateSourceHash(InMemoryFile SourceFile, + View IncludeFiles, + const InputArgList &UserArgList) { + TimeTraceScope TTS{"calculateSourceHash"}; + + const std::string &DPCPPRoot = getDPCPPRoot(); + if (DPCPPRoot == InvalidDPCPPRoot) { + return createStringError("Could not locate DPCPP root directory"); + } + + SmallVector CommandLine; + adjustArgs(UserArgList, DPCPPRoot, CommandLine); + + FixedCompilationDatabase DB{".", CommandLine}; + ClangTool Tool{DB, {SourceFile.Path}}; + + clang::IgnoringDiagConsumer DiagConsumer; + setupTool(Tool, DPCPPRoot, SourceFile, IncludeFiles, &DiagConsumer); + + GetSourceHashAction Action; + if (!Tool.run(&Action)) { + return DynArray(Action.HashValue.begin(), Action.HashValue.end()); + } + + return createStringError("Calculating source hash failed"); +} // This function is a simplified copy of the device library selection process in // `clang::driver::tools::SYCL::getDeviceLibraries`, assuming a SPIR-V target // (no AoT, no third-party GPUs, no native CPU). Keep in sync! diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.h b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.h index 64d15d4bd0405..9093cde9f3e6f 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.h +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.h @@ -22,6 +22,11 @@ namespace jit_compiler { +using SourceHash = DynArray; +llvm::Expected +calculateSourceHash(InMemoryFile SourceFile, View IncludeFiles, + const llvm::opt::InputArgList &UserArgList); + llvm::Expected> compileDeviceCode(InMemoryFile SourceFile, View IncludeFiles, const llvm::opt::InputArgList &UserArgList, From cb0aa23e040febc225e841527abe2a0409750a75 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Thu, 13 Feb 2025 07:33:33 +0000 Subject: [PATCH 04/16] Cleanup after cherry-pick. Signed-off-by: Julian Oppermann --- .../lib/rtc/DeviceCompilation.cpp | 90 ++++++++++--------- 1 file changed, 48 insertions(+), 42 deletions(-) diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index 660ecd3084199..8c31f7c79607d 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -136,10 +136,6 @@ static const std::string &getDPCPPRoot() { namespace { class HashPreprocessedAction : public PreprocessorFrontendAction { - -public: - BLAKE3Result HashValue; - protected: void ExecuteAction() override { CompilerInstance &CI = getCompilerInstance(); @@ -153,11 +149,17 @@ class HashPreprocessedAction : public PreprocessorFrontendAction { DoPrintPreprocessedInput(CI.getPreprocessor(), &PreprocessStream, Opts); - llvm::ArrayRef PreprocessedData( + ArrayRef PreprocessedData( (const uint8_t *)PreprocessedSource.data(), PreprocessedSource.size()); HashValue = BLAKE3::hash(PreprocessedData); } + +public: + BLAKE3Result<> takeHashValue() { return std::move(HashValue); } + +private: + BLAKE3Result<> HashValue; }; class RTCToolActionBase : public ToolAction { @@ -193,10 +195,10 @@ class RTCToolActionBase : public ToolAction { protected: virtual bool hasExecuted() = 0; - virtual bool executeAction(CompilerInstance &, FileManager *Files) = 0; + virtual bool executeAction(CompilerInstance &, FileManager *) = 0; }; -struct GetSourceHashAction : public RTCToolActionBase { +class GetSourceHashAction : public RTCToolActionBase { protected: bool executeAction(CompilerInstance &CI, FileManager *Files) override { HashPreprocessedAction HPA; @@ -205,19 +207,20 @@ struct GetSourceHashAction : public RTCToolActionBase { if (!Success) { return false; } - // TODO(Lukas): Avoid copy - HashValue = HPA.HashValue; + + HashValue = HPA.takeHashValue(); Executed = true; return true; } bool hasExecuted() override { return Executed; } +public: + BLAKE3Result<> takeHashValue() { return std::move(HashValue); } + private: + BLAKE3Result<> HashValue; bool Executed = false; - -public: - BLAKE3Result HashValue; }; struct GetLLVMModuleAction : public RTCToolActionBase { @@ -294,8 +297,11 @@ class LLVMDiagnosticWrapper : public llvm::DiagnosticHandler { } }; -void adjustArgs(const InputArgList &UserArgList, const std::string &DPCPPRoot, - SmallVectorImpl &CommandLine) { +} // anonymous namespace + +static void adjustArgs(const InputArgList &UserArgList, + const std::string &DPCPPRoot, + SmallVectorImpl &CommandLine) { DerivedArgList DAL{UserArgList}; const auto &OptTable = getDriverOptTable(); DAL.AddFlagArg(nullptr, OptTable.getOption(OPT_fsycl_device_only)); @@ -319,9 +325,9 @@ void adjustArgs(const InputArgList &UserArgList, const std::string &DPCPPRoot, } } -void setupTool(ClangTool &Tool, const std::string &DPCPPRoot, - InMemoryFile SourceFile, View IncludeFiles, - DiagnosticConsumer *Consumer) { +static void setupTool(ClangTool &Tool, const std::string &DPCPPRoot, + InMemoryFile SourceFile, View IncludeFiles, + DiagnosticConsumer *Consumer) { Tool.setDiagnosticConsumer(Consumer); // Suppress message "Error while processing" being printed to stdout. Tool.setPrintErrorMessage(false); @@ -347,14 +353,11 @@ void setupTool(ClangTool &Tool, const std::string &DPCPPRoot, }); } -} // anonymous namespace - -Expected> -jit_compiler::compileDeviceCode(InMemoryFile SourceFile, - View IncludeFiles, - const InputArgList &UserArgList, - std::string &BuildLog, LLVMContext &Context) { - TimeTraceScope TTS{"compileDeviceCode"}; +Expected +jit_compiler::calculateSourceHash(InMemoryFile SourceFile, + View IncludeFiles, + const InputArgList &UserArgList) { + TimeTraceScope TTS{"calculateSourceHash"}; const std::string &DPCPPRoot = getDPCPPRoot(); if (DPCPPRoot == InvalidDPCPPRoot) { @@ -367,24 +370,24 @@ jit_compiler::compileDeviceCode(InMemoryFile SourceFile, FixedCompilationDatabase DB{".", CommandLine}; ClangTool Tool{DB, {SourceFile.Path}}; - IntrusiveRefCntPtr DiagOpts{new DiagnosticOptions}; - ClangDiagnosticWrapper Wrapper(BuildLog, DiagOpts.get()); - - setupTool(Tool, DPCPPRoot, SourceFile, IncludeFiles, Wrapper.consumer()); + clang::IgnoringDiagConsumer DiagConsumer; + setupTool(Tool, DPCPPRoot, SourceFile, IncludeFiles, &DiagConsumer); - GetLLVMModuleAction Action{Context}; + GetSourceHashAction Action; if (!Tool.run(&Action)) { - return Action.takeModule(); + BLAKE3Result<> HashValue = Action.takeHashValue(); + return DynArray(HashValue.begin(), HashValue.end()); } - return createStringError(BuildLog); + return createStringError("Calculating source hash failed"); } -Expected -jit_compiler::calculateSourceHash(InMemoryFile SourceFile, - View IncludeFiles, - const InputArgList &UserArgList) { - TimeTraceScope TTS{"calculateSourceHash"}; +Expected> +jit_compiler::compileDeviceCode(InMemoryFile SourceFile, + View IncludeFiles, + const InputArgList &UserArgList, + std::string &BuildLog, LLVMContext &Context) { + TimeTraceScope TTS{"compileDeviceCode"}; const std::string &DPCPPRoot = getDPCPPRoot(); if (DPCPPRoot == InvalidDPCPPRoot) { @@ -397,16 +400,19 @@ jit_compiler::calculateSourceHash(InMemoryFile SourceFile, FixedCompilationDatabase DB{".", CommandLine}; ClangTool Tool{DB, {SourceFile.Path}}; - clang::IgnoringDiagConsumer DiagConsumer; - setupTool(Tool, DPCPPRoot, SourceFile, IncludeFiles, &DiagConsumer); + IntrusiveRefCntPtr DiagOpts{new DiagnosticOptions}; + ClangDiagnosticWrapper Wrapper(BuildLog, DiagOpts.get()); - GetSourceHashAction Action; + setupTool(Tool, DPCPPRoot, SourceFile, IncludeFiles, Wrapper.consumer()); + + GetLLVMModuleAction Action{Context}; if (!Tool.run(&Action)) { - return DynArray(Action.HashValue.begin(), Action.HashValue.end()); + return Action.takeModule(); } - return createStringError("Calculating source hash failed"); + return createStringError(BuildLog); } + // This function is a simplified copy of the device library selection process in // `clang::driver::tools::SYCL::getDeviceLibraries`, assuming a SPIR-V target // (no AoT, no third-party GPUs, no native CPU). Keep in sync! From 4f41bd8ede49f04038b25476a564e0b94c3e2399 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Thu, 13 Feb 2025 23:18:01 +0000 Subject: [PATCH 05/16] Expose hashing as API on sycl-jit, and adapt persistent cache to it. Signed-off-by: Julian Oppermann --- sycl-jit/jit-compiler/include/KernelFusion.h | 26 ++++++++++ sycl-jit/jit-compiler/ld-version-script.txt | 1 + sycl-jit/jit-compiler/lib/KernelFusion.cpp | 39 ++++++++++----- .../lib/rtc/DeviceCompilation.cpp | 36 +++++++++----- .../jit-compiler/lib/rtc/DeviceCompilation.h | 7 ++- sycl/source/detail/jit_compiler.cpp | 48 +++++++++++++++---- sycl/source/detail/jit_compiler.hpp | 6 ++- sycl/source/detail/kernel_bundle_impl.hpp | 21 +------- .../kernel_compiler/kernel_compiler_sycl.cpp | 6 +-- .../kernel_compiler/kernel_compiler_sycl.hpp | 13 +---- .../detail/persistent_device_code_cache.cpp | 32 ++++--------- .../detail/persistent_device_code_cache.hpp | 20 ++++---- 12 files changed, 143 insertions(+), 112 deletions(-) diff --git a/sycl-jit/jit-compiler/include/KernelFusion.h b/sycl-jit/jit-compiler/include/KernelFusion.h index 6177f78512962..188e509e60c57 100644 --- a/sycl-jit/jit-compiler/include/KernelFusion.h +++ b/sycl-jit/jit-compiler/include/KernelFusion.h @@ -56,6 +56,28 @@ class JITResult { sycl::detail::string ErrorMessage; }; +class RTCHashResult { +public: + explicit RTCHashResult(const char *PreprocLog) + : Failed{true}, Hash{}, PreprocLog{PreprocLog} {} + RTCHashResult(const char *Hash, const char *PreprocLog) + : Failed{false}, Hash{Hash}, PreprocLog{PreprocLog} {} + + bool failed() { return Failed; } + + const char *getPreprocLog() { return PreprocLog.c_str(); } + + const char *getHash() { + assert(!failed() && "No hash"); + return Hash.c_str(); + } + +private: + bool Failed; + sycl::detail::string Hash; + sycl::detail::string PreprocLog; +}; + class RTCResult { public: explicit RTCResult(const char *BuildLog) @@ -108,6 +130,10 @@ KF_EXPORT_SYMBOL JITResult materializeSpecConstants( const char *KernelName, jit_compiler::SYCLKernelBinaryInfo &BinInfo, View SpecConstBlob); +KF_EXPORT_SYMBOL RTCHashResult calculateHash(InMemoryFile SourceFile, + View IncludeFiles, + View UserArgs); + KF_EXPORT_SYMBOL RTCResult compileSYCL(InMemoryFile SourceFile, View IncludeFiles, View UserArgs, diff --git a/sycl-jit/jit-compiler/ld-version-script.txt b/sycl-jit/jit-compiler/ld-version-script.txt index c12256659ce30..e24109e489b57 100644 --- a/sycl-jit/jit-compiler/ld-version-script.txt +++ b/sycl-jit/jit-compiler/ld-version-script.txt @@ -3,6 +3,7 @@ /* Export the library entry points */ fuseKernels; materializeSpecConstants; + calculateHash; compileSYCL; resetJITConfiguration; addToJITConfiguration; diff --git a/sycl-jit/jit-compiler/lib/KernelFusion.cpp b/sycl-jit/jit-compiler/lib/KernelFusion.cpp index 742c39e2d4ae7..d6bbe2c9dce72 100644 --- a/sycl-jit/jit-compiler/lib/KernelFusion.cpp +++ b/sycl-jit/jit-compiler/lib/KernelFusion.cpp @@ -247,6 +247,33 @@ fuseKernels(View KernelInformation, const char *FusedKernelName, return JITResult{FusedKernelInfo}; } +extern "C" KF_EXPORT_SYMBOL RTCHashResult +calculateHash(InMemoryFile SourceFile, View IncludeFiles, + View UserArgs) { + auto UserArgListOrErr = parseUserArgs(UserArgs); + if (!UserArgListOrErr) { + return errorTo(UserArgListOrErr.takeError(), + "Parsing of user arguments failed"); + } + llvm::opt::InputArgList UserArgList = std::move(*UserArgListOrErr); + + auto Start = std::chrono::high_resolution_clock::now(); + auto HashOrError = calculateHash(SourceFile, IncludeFiles, UserArgList); + if (!HashOrError) { + return errorTo(HashOrError.takeError(), "Hashing failed"); + } + auto Hash = *HashOrError; + auto Stop = std::chrono::high_resolution_clock::now(); + + std::chrono::duration HashTime = Stop - Start; + if (UserArgList.hasArg(clang::driver::options::OPT_ftime_trace_EQ)) { + llvm::dbgs() << "Hashing of " << SourceFile.Path << " took " + << int(HashTime.count()) << " ms\n"; + } + + return RTCHashResult{Hash.c_str(), /*PreprocLog=*/""}; +} + extern "C" KF_EXPORT_SYMBOL RTCResult compileSYCL(InMemoryFile SourceFile, View IncludeFiles, View UserArgs, View CachedIR, bool SaveIR) { @@ -281,18 +308,6 @@ compileSYCL(InMemoryFile SourceFile, View IncludeFiles, Verbose); } - // TODO(julian): Expose as a separate API. - auto Start = std::chrono::high_resolution_clock::now(); - auto HashOrError = calculateSourceHash(SourceFile, IncludeFiles, UserArgList); - if (!HashOrError) { - return errorTo(HashOrError.takeError(), "Source hashing failed"); - } - auto SourceHash = *HashOrError; - auto Stop = std::chrono::high_resolution_clock::now(); - - std::chrono::duration HashTime = Stop - Start; - llvm::dbgs() << "Hashing took " << int(HashTime.count()) << "ms\n"; - std::unique_ptr Module; if (CachedIR.size() > 0) { diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index 8c31f7c79607d..9de61d17d5193 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -152,14 +153,14 @@ class HashPreprocessedAction : public PreprocessorFrontendAction { ArrayRef PreprocessedData( (const uint8_t *)PreprocessedSource.data(), PreprocessedSource.size()); - HashValue = BLAKE3::hash(PreprocessedData); + Hash = BLAKE3::hash(PreprocessedData); } public: - BLAKE3Result<> takeHashValue() { return std::move(HashValue); } + BLAKE3Result<> takeHash() { return std::move(Hash); } private: - BLAKE3Result<> HashValue; + BLAKE3Result<> Hash; }; class RTCToolActionBase : public ToolAction { @@ -208,7 +209,7 @@ class GetSourceHashAction : public RTCToolActionBase { return false; } - HashValue = HPA.takeHashValue(); + Hash = HPA.takeHash(); Executed = true; return true; } @@ -216,10 +217,10 @@ class GetSourceHashAction : public RTCToolActionBase { bool hasExecuted() override { return Executed; } public: - BLAKE3Result<> takeHashValue() { return std::move(HashValue); } + BLAKE3Result<> takeHash() { return std::move(Hash); } private: - BLAKE3Result<> HashValue; + BLAKE3Result<> Hash; bool Executed = false; }; @@ -353,11 +354,11 @@ static void setupTool(ClangTool &Tool, const std::string &DPCPPRoot, }); } -Expected -jit_compiler::calculateSourceHash(InMemoryFile SourceFile, - View IncludeFiles, - const InputArgList &UserArgList) { - TimeTraceScope TTS{"calculateSourceHash"}; +Expected +jit_compiler::calculateHash(InMemoryFile SourceFile, + View IncludeFiles, + const InputArgList &UserArgList) { + TimeTraceScope TTS{"calculateHash"}; const std::string &DPCPPRoot = getDPCPPRoot(); if (DPCPPRoot == InvalidDPCPPRoot) { @@ -375,8 +376,17 @@ jit_compiler::calculateSourceHash(InMemoryFile SourceFile, GetSourceHashAction Action; if (!Tool.run(&Action)) { - BLAKE3Result<> HashValue = Action.takeHashValue(); - return DynArray(HashValue.begin(), HashValue.end()); + BLAKE3Result<> SourceHash = Action.takeHash(); + // The adjusted command line contains the DPCPP root and clang major + // version. + BLAKE3Result<> CommandLineHash = + BLAKE3::hash(arrayRefFromStringRef(join(CommandLine, ","))); + + std::string EncodedHash = + encodeBase64(SourceHash) + encodeBase64(CommandLineHash); + // Make the encoding filesystem-friendly. + std::replace(EncodedHash.begin(), EncodedHash.end(), '/', '-'); + return std::move(EncodedHash); } return createStringError("Calculating source hash failed"); diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.h b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.h index 9093cde9f3e6f..7708c1ca857fd 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.h +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.h @@ -22,10 +22,9 @@ namespace jit_compiler { -using SourceHash = DynArray; -llvm::Expected -calculateSourceHash(InMemoryFile SourceFile, View IncludeFiles, - const llvm::opt::InputArgList &UserArgList); +llvm::Expected +calculateHash(InMemoryFile SourceFile, View IncludeFiles, + const llvm::opt::InputArgList &UserArgList); llvm::Expected> compileDeviceCode(InMemoryFile SourceFile, View IncludeFiles, diff --git a/sycl/source/detail/jit_compiler.cpp b/sycl/source/detail/jit_compiler.cpp index fd52e645ac75b..fa21b6b65e455 100644 --- a/sycl/source/detail/jit_compiler.cpp +++ b/sycl/source/detail/jit_compiler.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -90,6 +91,15 @@ jit_compiler::jit_compiler() return false; } + this->CalculateHashHandle = reinterpret_cast( + sycl::detail::ur::getOsLibraryFuncAddress(LibraryPtr.get(), + "calculateHash")); + if (!this->CalculateHashHandle) { + printPerformanceWarning( + "Cannot resolve JIT library function entry point"); + return false; + } + this->CompileSYCLHandle = reinterpret_cast( sycl::detail::ur::getOsLibraryFuncAddress(LibraryPtr.get(), "compileSYCL")); @@ -1229,8 +1239,12 @@ sycl_device_binaries jit_compiler::compileSYCL( const std::string &CompilationID, const std::string &SYCLSource, const std::vector> &IncludePairs, const std::vector &UserArgs, std::string *LogPtr, - const std::vector &RegisteredKernelNames, - const std::vector &CachedIR, std::vector *SavedIRPtr) { + const std::vector &RegisteredKernelNames) { + auto appendToLog = [LogPtr](const char *Msg) { + if (LogPtr) { + LogPtr->append(Msg); + } + }; // RegisteredKernelNames may contain template specializations, so we just put // them in main() which ensures they are instantiated. @@ -1261,22 +1275,36 @@ sycl_device_binaries jit_compiler::compileSYCL( std::back_inserter(UserArgsView), [](const auto &Arg) { return Arg.c_str(); }); - auto Result = - CompileSYCLHandle(SourceFile, IncludeFilesView, UserArgsView, CachedIR, - /*SaveIR=*/SavedIRPtr != nullptr); + std::string CacheKey; + std::vector CachedIR; + if (PersistentDeviceCodeCache::isEnabled()) { + auto Result = + CalculateHashHandle(SourceFile, IncludeFilesView, UserArgsView); - if (LogPtr) { - LogPtr->append(Result.getBuildLog()); + appendToLog(Result.getPreprocLog()); + if (!Result.failed()) { + CacheKey = Result.getHash(); + CachedIR = PersistentDeviceCodeCache::getDeviceCodeIRFromDisc(CacheKey); + } } + auto Result = CompileSYCLHandle(SourceFile, IncludeFilesView, UserArgsView, + CachedIR, /*SaveIR=*/!CacheKey.empty()); + + appendToLog(Result.getBuildLog()); if (Result.failed()) { throw sycl::exception(sycl::errc::build, Result.getBuildLog()); } const auto &IR = Result.getDeviceCodeIR(); - if (SavedIRPtr && !IR.empty()) { - SavedIRPtr->resize(IR.size()); - std::memcpy(SavedIRPtr->data(), IR.begin(), IR.size()); + if (!CacheKey.empty() && !IR.empty()) { + // The RTC result contains the bitcode blob iff the frontend was invoked on + // the source string, meaning we encountered either a cache miss, or a cache + // hit that returned unusable IR (e.g. due to a bitcode version mismatch). + // There's no explicit mechanism to invalidate the cache entry - we just + // overwrite the entry with the newly compiled IR. + std::vector SavedIR{IR.begin(), IR.end()}; + PersistentDeviceCodeCache::putDeviceCodeIRToDisc(CacheKey, SavedIR); } return createDeviceBinaryImage(Result.getBundleInfo(), diff --git a/sycl/source/detail/jit_compiler.hpp b/sycl/source/detail/jit_compiler.hpp index 72aa48fb5e374..6cb24968677bf 100644 --- a/sycl/source/detail/jit_compiler.hpp +++ b/sycl/source/detail/jit_compiler.hpp @@ -51,8 +51,7 @@ class jit_compiler { const std::string &CompilationID, const std::string &SYCLSource, const std::vector> &IncludePairs, const std::vector &UserArgs, std::string *LogPtr, - const std::vector &RegisteredKernelNames, - const std::vector &CachedIR, std::vector *SavedIRPtr); + const std::vector &RegisteredKernelNames); bool isAvailable() { return Available; } @@ -94,11 +93,14 @@ class jit_compiler { using FuseKernelsFuncT = decltype(::jit_compiler::fuseKernels) *; using MaterializeSpecConstFuncT = decltype(::jit_compiler::materializeSpecConstants) *; + using CalculateHashFuncT = + decltype(::jit_compiler::calculateHash) *; using CompileSYCLFuncT = decltype(::jit_compiler::compileSYCL) *; using ResetConfigFuncT = decltype(::jit_compiler::resetJITConfiguration) *; using AddToConfigFuncT = decltype(::jit_compiler::addToJITConfiguration) *; FuseKernelsFuncT FuseKernelsHandle = nullptr; MaterializeSpecConstFuncT MaterializeSpecConstHandle = nullptr; + CalculateHashFuncT CalculateHashHandle = nullptr; CompileSYCLFuncT CompileSYCLHandle = nullptr; ResetConfigFuncT ResetConfigHandle = nullptr; AddToConfigFuncT AddToConfigHandle = nullptr; diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index c3a3ee6b7c99b..bebbbf3301f85 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -497,26 +497,9 @@ class kernel_bundle_impl { if (MLanguage == syclex::source_language::sycl_jit) { // Build device images via the program manager. const std::string &SourceStr = std::get(MSource); - - std::string BuildOptionsString; - std::vector CachedIR; - std::unique_ptr> SavedIRPtr; - if (PersistentDeviceCodeCache::isEnabled()) { - BuildOptionsString = syclex::detail::userArgsAsString(BuildOptions); - CachedIR = PersistentDeviceCodeCache::getDeviceCodeIRFromDisc( - MDevices, BuildOptionsString, SourceStr); - SavedIRPtr = std::make_unique>(); - } - auto [Binaries, CompilationID] = syclex::detail::SYCL_JIT_to_SPIRV( - SourceStr, MIncludePairs, BuildOptions, LogPtr, RegisteredKernelNames, - CachedIR, SavedIRPtr.get()); - - if (PersistentDeviceCodeCache::isEnabled() && !SavedIRPtr->empty()) { - PersistentDeviceCodeCache::putDeviceCodeIRToDisc( - MDevices, BuildOptionsString, SourceStr, *SavedIRPtr); - SavedIRPtr.reset(); - } + SourceStr, MIncludePairs, BuildOptions, LogPtr, + RegisteredKernelNames); auto &PM = detail::ProgramManager::getInstance(); PM.addImages(Binaries); diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp index 89a34d0fa41cc..9108572bb5b1d 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp @@ -308,16 +308,14 @@ std::pair SYCL_JIT_to_SPIRV( [[maybe_unused]] include_pairs_t IncludePairs, [[maybe_unused]] const std::vector &UserArgs, [[maybe_unused]] std::string *LogPtr, - [[maybe_unused]] const std::vector &RegisteredKernelNames, - [[maybe_unused]] const std::vector &CachedIR, - [[maybe_unused]] std::vector *SavedIRPtr) { + [[maybe_unused]] const std::vector &RegisteredKernelNames) { #if SYCL_EXT_JIT_ENABLE static std::atomic_uintptr_t CompilationCounter; std::string CompilationID = "rtc_" + std::to_string(CompilationCounter++); sycl_device_binaries Binaries = sycl::detail::jit_compiler::get_instance().compileSYCL( CompilationID, SYCLSource, IncludePairs, UserArgs, LogPtr, - RegisteredKernelNames, CachedIR, SavedIRPtr); + RegisteredKernelNames); return std::make_pair(Binaries, std::move(CompilationID)); #else throw sycl::exception(sycl::errc::build, diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp index bf5b8848b377a..5df35e6ef6967 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp @@ -40,21 +40,10 @@ std::string userArgsAsString(const std::vector &UserArguments); // // Returns a pointer to the image (owned by the `jit_compiler` class), and the // bundle-specific prefix used for loading the kernels. -// -// If `CachedIR` is not empty, the JIT compiler tries to load the bytes as an -// LLVM bitcode module instead of invoking the frontend on the source string. It -// falls back to running the frontend if the bitcode is invalid (e.g., version -// mismatch). -// -// If `SavedIRPtr` is not nullptr, and the source string was compiled (meaning -// `CachedIR` was empty or invalid), the JIT compiler stores the LLVM module -// returned from the device compilation as bitcode into the given vector. std::pair SYCL_JIT_to_SPIRV(const std::string &Source, include_pairs_t IncludePairs, const std::vector &UserArgs, std::string *LogPtr, - const std::vector &RegisteredKernelNames, - const std::vector &CachedIR, - std::vector *SavedIRPtr); + const std::vector &RegisteredKernelNames); bool SYCL_JIT_Compilation_Available(); diff --git a/sycl/source/detail/persistent_device_code_cache.cpp b/sycl/source/detail/persistent_device_code_cache.cpp index e6966a900000b..12dea9d3b976f 100644 --- a/sycl/source/detail/persistent_device_code_cache.cpp +++ b/sycl/source/detail/persistent_device_code_cache.cpp @@ -558,8 +558,7 @@ void PersistentDeviceCodeCache::putCompiledKernelToDisc( } void PersistentDeviceCodeCache::putDeviceCodeIRToDisc( - const std::vector &Devices, const std::string &BuildOptionsString, - const std::string &SourceStr, const std::vector &IR) { + const std::string &Key, const std::vector &IR) { repopulateCacheSizeFile(getRootDir()); @@ -574,8 +573,7 @@ void PersistentDeviceCodeCache::putDeviceCodeIRToDisc( // Total size of the item that we are writing to the cache. size_t TotalSize = 0; - std::string DirName = - getDeviceCodeIRPath(Devices, BuildOptionsString, SourceStr); + std::string DirName = getDeviceCodeIRPath(Key); try { OSUtil::makeDir(DirName.c_str()); @@ -713,13 +711,11 @@ PersistentDeviceCodeCache::getCompiledKernelFromDisc( return Binaries; } -std::vector PersistentDeviceCodeCache::getDeviceCodeIRFromDisc( - const std::vector &Devices, const std::string &BuildOptionsString, - const std::string &SourceStr) { +std::vector +PersistentDeviceCodeCache::getDeviceCodeIRFromDisc(const std::string &Key) { std::vector IR; - std::string DirName = - getDeviceCodeIRPath(Devices, BuildOptionsString, SourceStr); + std::string DirName = getDeviceCodeIRPath(Key); if (DirName.empty() || !OSUtil::isPathPresent(DirName)) return {}; @@ -946,27 +942,15 @@ std::string PersistentDeviceCodeCache::getCompiledKernelItemPath( std::to_string(StringHasher(SourceString)); } -std::string PersistentDeviceCodeCache::getDeviceCodeIRPath( - const std::vector &Devices, const std::string &BuildOptionsString, - const std::string &SourceString) { - assert(!Devices.empty()); - +std::string +PersistentDeviceCodeCache::getDeviceCodeIRPath(const std::string &Key) { std::string cache_root{getRootDir()}; if (cache_root.empty()) { trace("Disable persistent cache due to unconfigured cache root."); return {}; } - std::string DevicesString; - for (const auto &Dev : Devices) - DevicesString += getDeviceIDString(Dev) + ","; - - std::hash StringHasher{}; - - return cache_root + "/ext_kernel_compiler" + "/" + - std::to_string(StringHasher(DevicesString)) + "/" + - std::to_string(StringHasher(BuildOptionsString)) + "/" + - std::to_string(StringHasher(SourceString)); + return cache_root + "/ext_kernel_compiler" + "/" + Key; } /* Returns true if persistent cache is enabled. diff --git a/sycl/source/detail/persistent_device_code_cache.hpp b/sycl/source/detail/persistent_device_code_cache.hpp index 8e5cbadbac2bc..464794ddbde30 100644 --- a/sycl/source/detail/persistent_device_code_cache.hpp +++ b/sycl/source/detail/persistent_device_code_cache.hpp @@ -178,12 +178,13 @@ class PersistentDeviceCodeCache { const std::string &BuildOptionsString, const std::string &SourceString); - /* Get directory name when storing runtime compiled device code IR ( via - * kernel_compiler, sycl_jit language). + /* Get directory name when storing runtime compiled device code IR (via + * kernel_compiler, sycl_jit language). The key is computed in the sycl-jit + * library, and encompasses the preprocesses source code, build options and + * compiler location. The frontend invocation (whose output we cache here) is + * device-agnostic, hence the device (list) is not part of the lookup. */ - static std::string getDeviceCodeIRPath(const std::vector &Devices, - const std::string &BuildOptionsString, - const std::string &SourceString); + static std::string getDeviceCodeIRPath(const std::string &Key); /* Program binaries built for one or more devices are read from persistent * cache and returned in form of vector of programs. Each binary program is @@ -200,10 +201,7 @@ class PersistentDeviceCodeCache { const std::string &BuildOptionsString, const std::string &SourceStr); - static std::vector - getDeviceCodeIRFromDisc(const std::vector &Devices, - const std::string &BuildOptionsString, - const std::string &SourceStr); + static std::vector getDeviceCodeIRFromDisc(const std::string &Key); /* Stores build program in persistent cache */ @@ -219,9 +217,7 @@ class PersistentDeviceCodeCache { const std::string &SourceStr, const ur_program_handle_t &NativePrg); - static void putDeviceCodeIRToDisc(const std::vector &Devices, - const std::string &BuildOptionsString, - const std::string &SourceStr, + static void putDeviceCodeIRToDisc(const std::string &Key, const std::vector &IR); /* Sends message to std:cerr stream when SYCL_CACHE_TRACE environemnt is set*/ From a365b7f6066071d14380636610d2a9b001b4043a Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Fri, 14 Feb 2025 01:26:28 +0000 Subject: [PATCH 06/16] Add test, and fix cache miss due to virtual filename. Signed-off-by: Julian Oppermann --- sycl/source/detail/jit_compiler.cpp | 4 ++- .../kernel_compiler_sycl_jit_cache.cpp | 36 +++++++++++++++++-- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/jit_compiler.cpp b/sycl/source/detail/jit_compiler.cpp index fa21b6b65e455..e60ee3ab33383 100644 --- a/sycl/source/detail/jit_compiler.cpp +++ b/sycl/source/detail/jit_compiler.cpp @@ -1258,7 +1258,9 @@ sycl_device_binaries jit_compiler::compileSYCL( std::string FinalSource = ss.str(); - std::string SYCLFileName = CompilationID + ".cpp"; + // The filename must be stable, because it is part of the preprocessed output + // and in consequence, the cache key. + std::string SYCLFileName = "rtc.cpp"; ::jit_compiler::InMemoryFile SourceFile{SYCLFileName.c_str(), FinalSource.c_str()}; diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_cache.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_cache.cpp index 98ed9a100df31..3b98220e851a9 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_cache.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_cache.cpp @@ -34,6 +34,18 @@ void vec_add(float* in1, float* in2, float* out){ } )"""; +auto constexpr SYCLSourceWithInclude = R"""( + #include "myheader.h" + #include + + extern "C" SYCL_EXTERNAL + SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((sycl::ext::oneapi::experimental::nd_range_kernel<1>)) + void KERNEL_NAME(float* in1, float* out){ + size_t id = sycl::ext::oneapi::this_work_item::get_nd_item<1>().get_global_linear_id(); + out[id] = in1[id]; + } + )"""; + static void dumpKernelIDs() { for (auto &kernelID : sycl::get_kernel_ids()) std::cout << kernelID.get_name() << std::endl; @@ -91,9 +103,27 @@ int test_persistent_cache() { // CHECK-EVICT: [kernel_compiler Persistent Cache]: device code IR has been cached exe_kb kbExe2b = syclex::build(kbSrc2); - // TODO: Add tests that `#include` files, either from the filesystem or - // defined with the `include_files` property, after the persistent - // cache becomes sensitive to includes. + source_kb kbSrc3 = syclex::create_kernel_bundle_from_source( + ctx, syclex::source_language::sycl_jit, SYCLSourceWithInclude, + syclex::properties{ + syclex::include_files{"myheader.h", "#define KERNEL_NAME foo"}}); + + // New source string -> cache miss + // CHECK: [kernel_compiler Persistent Cache]: device code IR has been cached + exe_kb kbExe3a = syclex::build(kbSrc3); + dumpKernelIDs(); + // CHECK: rtc_5$__sycl_kernel_foo + + source_kb kbSrc4 = syclex::create_kernel_bundle_from_source( + ctx, syclex::source_language::sycl_jit, SYCLSourceWithInclude, + syclex::properties{ + syclex::include_files{"myheader.h", "#define KERNEL_NAME bar"}}); + + // Same source string, but different header contents -> cache miss + // CHECK: [kernel_compiler Persistent Cache]: device code IR has been cached + exe_kb kbExe4a = syclex::build(kbSrc4); + dumpKernelIDs(); + // CHECK: rtc_6$__sycl_kernel_bar return 0; } From d2178e0dd9b0852b755364e36240088c61a454a4 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Fri, 14 Feb 2025 02:27:16 +0000 Subject: [PATCH 07/16] Distinction between compilation ID and prefix is no longer necessary. Signed-off-by: Julian Oppermann --- sycl/source/detail/jit_compiler.cpp | 12 ++++++------ sycl/source/detail/jit_compiler.hpp | 7 ++++--- sycl/source/detail/kernel_bundle_impl.hpp | 5 +---- .../detail/kernel_compiler/kernel_compiler_sycl.cpp | 12 ++++++------ .../detail/kernel_compiler/kernel_compiler_sycl.hpp | 3 ++- 5 files changed, 19 insertions(+), 20 deletions(-) diff --git a/sycl/source/detail/jit_compiler.cpp b/sycl/source/detail/jit_compiler.cpp index e60ee3ab33383..91a562d091082 100644 --- a/sycl/source/detail/jit_compiler.cpp +++ b/sycl/source/detail/jit_compiler.cpp @@ -1142,7 +1142,7 @@ sycl_device_binaries jit_compiler::createPIDeviceBinary( sycl_device_binaries jit_compiler::createDeviceBinaryImage( const ::jit_compiler::RTCBundleInfo &BundleInfo, - const std::string &OffloadEntryPrefix) { + const std::string &Prefix) { DeviceBinariesCollection Collection; for (const auto &DevImgInfo : BundleInfo) { @@ -1153,7 +1153,7 @@ sycl_device_binaries jit_compiler::createDeviceBinaryImage( // entrypoints remain unchanged. // It seems to be OK to set zero for most of the information here, at // least that is the case for compiled SPIR-V binaries. - std::string PrefixedName = OffloadEntryPrefix + Symbol.c_str(); + std::string PrefixedName = Prefix + Symbol.c_str(); OffloadEntryContainer Entry{PrefixedName, /*Addr=*/nullptr, /*Size=*/0, /*Flags=*/0, /*Reserved=*/0}; Binary.addOffloadEntry(std::move(Entry)); @@ -1236,10 +1236,11 @@ std::vector jit_compiler::encodeReqdWorkGroupSize( } sycl_device_binaries jit_compiler::compileSYCL( - const std::string &CompilationID, const std::string &SYCLSource, + const std::string &SYCLSource, const std::vector> &IncludePairs, const std::vector &UserArgs, std::string *LogPtr, - const std::vector &RegisteredKernelNames) { + const std::vector &RegisteredKernelNames, + const std::string &Prefix) { auto appendToLog = [LogPtr](const char *Msg) { if (LogPtr) { LogPtr->append(Msg); @@ -1309,8 +1310,7 @@ sycl_device_binaries jit_compiler::compileSYCL( PersistentDeviceCodeCache::putDeviceCodeIRToDisc(CacheKey, SavedIR); } - return createDeviceBinaryImage(Result.getBundleInfo(), - /*OffloadEntryPrefix=*/CompilationID + '$'); + return createDeviceBinaryImage(Result.getBundleInfo(), Prefix); } } // namespace detail diff --git a/sycl/source/detail/jit_compiler.hpp b/sycl/source/detail/jit_compiler.hpp index 6cb24968677bf..ce2a616a1f40c 100644 --- a/sycl/source/detail/jit_compiler.hpp +++ b/sycl/source/detail/jit_compiler.hpp @@ -48,10 +48,11 @@ class jit_compiler { const std::vector &SpecConstBlob); sycl_device_binaries compileSYCL( - const std::string &CompilationID, const std::string &SYCLSource, + const std::string &SYCLSource, const std::vector> &IncludePairs, const std::vector &UserArgs, std::string *LogPtr, - const std::vector &RegisteredKernelNames); + const std::vector &RegisteredKernelNames, + const std::string &Prefix); bool isAvailable() { return Available; } @@ -74,7 +75,7 @@ class jit_compiler { sycl_device_binaries createDeviceBinaryImage(const ::jit_compiler::RTCBundleInfo &BundleInfo, - const std::string &OffloadEntryPrefix); + const std::string &Prefix); std::vector encodeArgUsageMask(const ::jit_compiler::ArgUsageMask &Mask) const; diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index bebbbf3301f85..59b281b84ecd0 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -497,7 +497,7 @@ class kernel_bundle_impl { if (MLanguage == syclex::source_language::sycl_jit) { // Build device images via the program manager. const std::string &SourceStr = std::get(MSource); - auto [Binaries, CompilationID] = syclex::detail::SYCL_JIT_to_SPIRV( + auto [Binaries, Prefix] = syclex::detail::SYCL_JIT_to_SPIRV( SourceStr, MIncludePairs, BuildOptions, LogPtr, RegisteredKernelNames); @@ -506,9 +506,6 @@ class kernel_bundle_impl { std::vector KernelIDs; std::vector KernelNames; - // `jit_compiler::compileSYCL(..)` uses `CompilationID + '$'` as prefix - // for offload entry names. - std::string Prefix = CompilationID + '$'; for (const auto &KernelID : PM.getAllSYCLKernelIDs()) { std::string_view KernelName{KernelID.get_name()}; if (KernelName.find(Prefix) == 0) { diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp index 9108572bb5b1d..4b333269f8ef1 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp @@ -305,18 +305,18 @@ bool SYCL_JIT_Compilation_Available() { std::pair SYCL_JIT_to_SPIRV( [[maybe_unused]] const std::string &SYCLSource, - [[maybe_unused]] include_pairs_t IncludePairs, + [[maybe_unused]] const include_pairs_t& IncludePairs, [[maybe_unused]] const std::vector &UserArgs, [[maybe_unused]] std::string *LogPtr, [[maybe_unused]] const std::vector &RegisteredKernelNames) { #if SYCL_EXT_JIT_ENABLE - static std::atomic_uintptr_t CompilationCounter; - std::string CompilationID = "rtc_" + std::to_string(CompilationCounter++); + static std::atomic_uintptr_t CompilationID; + std::string Prefix = "rtc_" + std::to_string(CompilationID++) + "$"; sycl_device_binaries Binaries = sycl::detail::jit_compiler::get_instance().compileSYCL( - CompilationID, SYCLSource, IncludePairs, UserArgs, LogPtr, - RegisteredKernelNames); - return std::make_pair(Binaries, std::move(CompilationID)); + SYCLSource, IncludePairs, UserArgs, LogPtr, + RegisteredKernelNames, Prefix); + return std::make_pair(Binaries, std::move(Prefix)); #else throw sycl::exception(sycl::errc::build, "kernel_compiler via sycl-jit is not available"); diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp index 5df35e6ef6967..41f9c986c6c4b 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.hpp @@ -41,7 +41,8 @@ std::string userArgsAsString(const std::vector &UserArguments); // Returns a pointer to the image (owned by the `jit_compiler` class), and the // bundle-specific prefix used for loading the kernels. std::pair -SYCL_JIT_to_SPIRV(const std::string &Source, include_pairs_t IncludePairs, +SYCL_JIT_to_SPIRV(const std::string &Source, + const include_pairs_t &IncludePairs, const std::vector &UserArgs, std::string *LogPtr, const std::vector &RegisteredKernelNames); From 1cfec8b471c969fdd52faa0a1bf046e9d9f30ebe Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Fri, 14 Feb 2025 02:52:34 +0000 Subject: [PATCH 08/16] Rollback unwanted format change Signed-off-by: Julian Oppermann --- sycl/source/detail/kernel_bundle_impl.hpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 59b281b84ecd0..2e01f097954b6 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -869,11 +869,12 @@ class kernel_bundle_impl { } bool is_specialization_constant_set(const char *SpecName) const noexcept { - bool SetInDevImg = std::any_of( - begin(), end(), [SpecName](const device_image_plain &DeviceImage) { - return getSyclObjImpl(DeviceImage) - ->is_specialization_constant_set(SpecName); - }); + bool SetInDevImg = + std::any_of(begin(), end(), + [SpecName](const device_image_plain &DeviceImage) { + return getSyclObjImpl(DeviceImage) + ->is_specialization_constant_set(SpecName); + }); return SetInDevImg || MSpecConstValues.count(std::string{SpecName}) != 0; } From 71bf76e08c3bed97d4b620472ebfa2a59e6cf95d Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Fri, 14 Feb 2025 03:01:08 +0000 Subject: [PATCH 09/16] Nits. Signed-off-by: Julian Oppermann --- sycl-jit/jit-compiler/lib/KernelFusion.cpp | 2 +- sycl/source/detail/persistent_device_code_cache.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl-jit/jit-compiler/lib/KernelFusion.cpp b/sycl-jit/jit-compiler/lib/KernelFusion.cpp index d6bbe2c9dce72..e889e36552c23 100644 --- a/sycl-jit/jit-compiler/lib/KernelFusion.cpp +++ b/sycl-jit/jit-compiler/lib/KernelFusion.cpp @@ -265,8 +265,8 @@ calculateHash(InMemoryFile SourceFile, View IncludeFiles, auto Hash = *HashOrError; auto Stop = std::chrono::high_resolution_clock::now(); - std::chrono::duration HashTime = Stop - Start; if (UserArgList.hasArg(clang::driver::options::OPT_ftime_trace_EQ)) { + std::chrono::duration HashTime = Stop - Start; llvm::dbgs() << "Hashing of " << SourceFile.Path << " took " << int(HashTime.count()) << " ms\n"; } diff --git a/sycl/source/detail/persistent_device_code_cache.cpp b/sycl/source/detail/persistent_device_code_cache.cpp index 12dea9d3b976f..c0f6e53391216 100644 --- a/sycl/source/detail/persistent_device_code_cache.cpp +++ b/sycl/source/detail/persistent_device_code_cache.cpp @@ -950,7 +950,7 @@ PersistentDeviceCodeCache::getDeviceCodeIRPath(const std::string &Key) { return {}; } - return cache_root + "/ext_kernel_compiler" + "/" + Key; + return cache_root + "/ext_kernel_compiler/" + Key; } /* Returns true if persistent cache is enabled. From e50be3d4116b55319a35f57ea999dbf75afd6a20 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Fri, 14 Feb 2025 03:10:33 +0000 Subject: [PATCH 10/16] Format. Signed-off-by: Julian Oppermann --- sycl/source/detail/jit_compiler.hpp | 3 +-- sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/sycl/source/detail/jit_compiler.hpp b/sycl/source/detail/jit_compiler.hpp index ce2a616a1f40c..1b80f9c4fb795 100644 --- a/sycl/source/detail/jit_compiler.hpp +++ b/sycl/source/detail/jit_compiler.hpp @@ -94,8 +94,7 @@ class jit_compiler { using FuseKernelsFuncT = decltype(::jit_compiler::fuseKernels) *; using MaterializeSpecConstFuncT = decltype(::jit_compiler::materializeSpecConstants) *; - using CalculateHashFuncT = - decltype(::jit_compiler::calculateHash) *; + using CalculateHashFuncT = decltype(::jit_compiler::calculateHash) *; using CompileSYCLFuncT = decltype(::jit_compiler::compileSYCL) *; using ResetConfigFuncT = decltype(::jit_compiler::resetJITConfiguration) *; using AddToConfigFuncT = decltype(::jit_compiler::addToJITConfiguration) *; diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp index 4b333269f8ef1..116033bf66d12 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp @@ -305,7 +305,7 @@ bool SYCL_JIT_Compilation_Available() { std::pair SYCL_JIT_to_SPIRV( [[maybe_unused]] const std::string &SYCLSource, - [[maybe_unused]] const include_pairs_t& IncludePairs, + [[maybe_unused]] const include_pairs_t &IncludePairs, [[maybe_unused]] const std::vector &UserArgs, [[maybe_unused]] std::string *LogPtr, [[maybe_unused]] const std::vector &RegisteredKernelNames) { @@ -314,8 +314,8 @@ std::pair SYCL_JIT_to_SPIRV( std::string Prefix = "rtc_" + std::to_string(CompilationID++) + "$"; sycl_device_binaries Binaries = sycl::detail::jit_compiler::get_instance().compileSYCL( - SYCLSource, IncludePairs, UserArgs, LogPtr, - RegisteredKernelNames, Prefix); + SYCLSource, IncludePairs, UserArgs, LogPtr, RegisteredKernelNames, + Prefix); return std::make_pair(Binaries, std::move(Prefix)); #else throw sycl::exception(sycl::errc::build, From 2d16c10e48621556d8f6028827d47ad6b8861995 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Fri, 14 Feb 2025 04:41:27 +0000 Subject: [PATCH 11/16] Fix test (?) Signed-off-by: Julian Oppermann --- .../KernelCompiler/kernel_compiler_sycl_jit_cache.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_cache.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_cache.cpp index 3b98220e851a9..7a5966fbe5456 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_cache.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_cache.cpp @@ -15,9 +15,9 @@ // DEFINE: %{cache_vars} = env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_TRACE=7 SYCL_CACHE_DIR=%t/cache_dir // DEFINE: %{max_cache_size} = SYCL_CACHE_MAX_SIZE=10000 // RUN: %{build} -o %t.out -// RUN: %if run-mode %{rm -rf %t/cache_dir%} +// RUN: %{run-aux} rm -rf %t/cache_dir // RUN: %{cache_vars} %{run-unfiltered-devices} %t.out 2>&1 | FileCheck %s --check-prefix=CHECK -// RUN: %if run-mode %{rm -rf %t/cache_dir%} +// RUN: %{run-aux} rm -rf %t/cache_dir // RUN: %{cache_vars} %{max_cache_size} %{run-unfiltered-devices} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-EVICT #include From ed093ceb446e578dd12b88c7aec20c2440cb7887 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Sun, 16 Feb 2025 22:04:36 +0000 Subject: [PATCH 12/16] Alt design for RTCHashResult Signed-off-by: Julian Oppermann --- sycl-jit/jit-compiler/include/KernelFusion.h | 24 ++++++++++++++++---- sycl-jit/jit-compiler/lib/KernelFusion.cpp | 11 +++++---- sycl/source/detail/jit_compiler.cpp | 5 ++-- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/sycl-jit/jit-compiler/include/KernelFusion.h b/sycl-jit/jit-compiler/include/KernelFusion.h index 188e509e60c57..cd2cc76b26ac0 100644 --- a/sycl-jit/jit-compiler/include/KernelFusion.h +++ b/sycl-jit/jit-compiler/include/KernelFusion.h @@ -58,14 +58,20 @@ class JITResult { class RTCHashResult { public: - explicit RTCHashResult(const char *PreprocLog) - : Failed{true}, Hash{}, PreprocLog{PreprocLog} {} - RTCHashResult(const char *Hash, const char *PreprocLog) - : Failed{false}, Hash{Hash}, PreprocLog{PreprocLog} {} + static RTCHashResult success(const char *Hash) { + return RTCHashResult{/*Failed=*/false, Hash}; + } + + static RTCHashResult failure(const char *PreprocLog) { + return RTCHashResult{/*Failed=*/true, PreprocLog}; + } bool failed() { return Failed; } - const char *getPreprocLog() { return PreprocLog.c_str(); } + const char *getPreprocLog() { + assert(failed() && "No preprocessor log"); + return PreprocLog.c_str(); + } const char *getHash() { assert(!failed() && "No hash"); @@ -73,6 +79,14 @@ class RTCHashResult { } private: + RTCHashResult(bool Failed, const char *Str) : Failed(Failed) { + if (!Failed) { + this->Hash = Str; + } else { + this->PreprocLog = Str; + } + } + bool Failed; sycl::detail::string Hash; sycl::detail::string PreprocLog; diff --git a/sycl-jit/jit-compiler/lib/KernelFusion.cpp b/sycl-jit/jit-compiler/lib/KernelFusion.cpp index e889e36552c23..35c08bd19a896 100644 --- a/sycl-jit/jit-compiler/lib/KernelFusion.cpp +++ b/sycl-jit/jit-compiler/lib/KernelFusion.cpp @@ -252,15 +252,18 @@ calculateHash(InMemoryFile SourceFile, View IncludeFiles, View UserArgs) { auto UserArgListOrErr = parseUserArgs(UserArgs); if (!UserArgListOrErr) { - return errorTo(UserArgListOrErr.takeError(), - "Parsing of user arguments failed"); + return RTCHashResult::failure( + formatError(UserArgListOrErr.takeError(), + "Parsing of user arguments failed") + .c_str()); } llvm::opt::InputArgList UserArgList = std::move(*UserArgListOrErr); auto Start = std::chrono::high_resolution_clock::now(); auto HashOrError = calculateHash(SourceFile, IncludeFiles, UserArgList); if (!HashOrError) { - return errorTo(HashOrError.takeError(), "Hashing failed"); + return RTCHashResult::failure( + formatError(HashOrError.takeError(), "Hashing failed").c_str()); } auto Hash = *HashOrError; auto Stop = std::chrono::high_resolution_clock::now(); @@ -271,7 +274,7 @@ calculateHash(InMemoryFile SourceFile, View IncludeFiles, << int(HashTime.count()) << " ms\n"; } - return RTCHashResult{Hash.c_str(), /*PreprocLog=*/""}; + return RTCHashResult::success(Hash.c_str()); } extern "C" KF_EXPORT_SYMBOL RTCResult diff --git a/sycl/source/detail/jit_compiler.cpp b/sycl/source/detail/jit_compiler.cpp index 91a562d091082..6e1d795027129 100644 --- a/sycl/source/detail/jit_compiler.cpp +++ b/sycl/source/detail/jit_compiler.cpp @@ -1284,8 +1284,9 @@ sycl_device_binaries jit_compiler::compileSYCL( auto Result = CalculateHashHandle(SourceFile, IncludeFilesView, UserArgsView); - appendToLog(Result.getPreprocLog()); - if (!Result.failed()) { + if (Result.failed()) { + appendToLog(Result.getPreprocLog()); + } else { CacheKey = Result.getHash(); CachedIR = PersistentDeviceCodeCache::getDeviceCodeIRFromDisc(CacheKey); } From 6c6899ac08710b297626d9bf4fb26793313a15ab Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Sun, 16 Feb 2025 22:15:13 +0000 Subject: [PATCH 13/16] Add assertions to takeX() methods Signed-off-by: Julian Oppermann --- .../lib/rtc/DeviceCompilation.cpp | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index 9de61d17d5193..6dbb2a38c2917 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -154,13 +154,19 @@ class HashPreprocessedAction : public PreprocessorFrontendAction { (const uint8_t *)PreprocessedSource.data(), PreprocessedSource.size()); Hash = BLAKE3::hash(PreprocessedData); + Executed = true; } public: - BLAKE3Result<> takeHash() { return std::move(Hash); } + BLAKE3Result<> takeHash() { + assert(Executed); + Executed = false; + return std::move(Hash); + } private: BLAKE3Result<> Hash; + bool Executed = false; }; class RTCToolActionBase : public ToolAction { @@ -217,7 +223,11 @@ class GetSourceHashAction : public RTCToolActionBase { bool hasExecuted() override { return Executed; } public: - BLAKE3Result<> takeHash() { return std::move(Hash); } + BLAKE3Result<> takeHash() { + assert(Executed); + Executed = false; + return std::move(Hash); + } private: BLAKE3Result<> Hash; @@ -247,7 +257,10 @@ struct GetLLVMModuleAction : public RTCToolActionBase { public: GetLLVMModuleAction(LLVMContext &Context) : Context{Context}, Module{} {} - std::unique_ptr takeModule() { return std::move(Module); } + std::unique_ptr takeModule() { + assert(Module); + return std::move(Module); + } private: LLVMContext &Context; From 6f5ce6bf6f9a437ef79d3900dd79b1bc2571582c Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Mon, 17 Feb 2025 01:58:14 +0000 Subject: [PATCH 14/16] Improve cache test Signed-off-by: Julian Oppermann --- .../detail/persistent_device_code_cache.cpp | 40 ++++++++++--------- .../kernel_compiler_sycl_jit_cache.cpp | 29 +++++++++----- 2 files changed, 39 insertions(+), 30 deletions(-) diff --git a/sycl/source/detail/persistent_device_code_cache.cpp b/sycl/source/detail/persistent_device_code_cache.cpp index c0f6e53391216..0a6e708543e27 100644 --- a/sycl/source/detail/persistent_device_code_cache.cpp +++ b/sycl/source/detail/persistent_device_code_cache.cpp @@ -574,16 +574,16 @@ void PersistentDeviceCodeCache::putDeviceCodeIRToDisc( size_t TotalSize = 0; std::string DirName = getDeviceCodeIRPath(Key); + std::string FileName = DirName + "/ir"; + std::string FullFileName = FileName + ".bin"; try { OSUtil::makeDir(DirName.c_str()); - std::string FileName = DirName + "/ir"; - std::string FullFileName = FileName + ".bin"; LockCacheItem Lock{FileName}; if (Lock.isOwned()) { writeBinaryDataToFile(FullFileName, IR); PersistentDeviceCodeCache::trace_KernelCompiler( - "device code IR has been cached: ", FullFileName); + "storing device code IR: ", FullFileName); TotalSize = getFileSize(FullFileName); saveCurrentTimeInAFile(FileName + CacheEntryAccessTimeSuffix); @@ -716,27 +716,29 @@ PersistentDeviceCodeCache::getDeviceCodeIRFromDisc(const std::string &Key) { std::vector IR; std::string DirName = getDeviceCodeIRPath(Key); + std::string FileName = DirName + "/ir"; + std::string FullFileName = FileName + ".bin"; - if (DirName.empty() || !OSUtil::isPathPresent(DirName)) + if (DirName.empty() || !OSUtil::isPathPresent(FullFileName)) { + trace_KernelCompiler("cache miss: ", Key); return {}; + } - std::string FileName = DirName + "/ir"; - std::string FullFileName = FileName + ".bin"; - if (OSUtil::isPathPresent(FullFileName)) { - if (!LockCacheItem::isLocked(FileName)) { - try { - IR = readBinaryDataFromFile(FullFileName); - - // Explicitly update the access time of the file. This is required for - // eviction. - if (isEvictionEnabled()) - saveCurrentTimeInAFile(FileName + CacheEntryAccessTimeSuffix); - } catch (...) { - // If read was unsuccessfull try the next item - return {}; - } + if (!LockCacheItem::isLocked(FileName)) { + try { + IR = readBinaryDataFromFile(FullFileName); + + // Explicitly update the access time of the file. This is required for + // eviction. + if (isEvictionEnabled()) + saveCurrentTimeInAFile(FileName + CacheEntryAccessTimeSuffix); + } catch (...) { + // If read was unsuccessfull give up + trace_KernelCompiler("cache miss: ", Key); + return {}; } } + PersistentDeviceCodeCache::trace_KernelCompiler( "using cached device code IR: ", FullFileName); return IR; diff --git a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_cache.cpp b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_cache.cpp index 7a5966fbe5456..fa527c49a6854 100644 --- a/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_cache.cpp +++ b/sycl/test-e2e/KernelCompiler/kernel_compiler_sycl_jit_cache.cpp @@ -13,10 +13,10 @@ // UNSUPPORTED-INTENDED: while accelerator is AoT only, this cannot run there. // DEFINE: %{cache_vars} = env SYCL_CACHE_PERSISTENT=1 SYCL_CACHE_TRACE=7 SYCL_CACHE_DIR=%t/cache_dir -// DEFINE: %{max_cache_size} = SYCL_CACHE_MAX_SIZE=10000 +// DEFINE: %{max_cache_size} = SYCL_CACHE_MAX_SIZE=30000 // RUN: %{build} -o %t.out // RUN: %{run-aux} rm -rf %t/cache_dir -// RUN: %{cache_vars} %{run-unfiltered-devices} %t.out 2>&1 | FileCheck %s --check-prefix=CHECK +// RUN: %{cache_vars} %{run-unfiltered-devices} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-UNLIM // RUN: %{run-aux} rm -rf %t/cache_dir // RUN: %{cache_vars} %{max_cache_size} %{run-unfiltered-devices} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-EVICT @@ -63,7 +63,7 @@ int test_persistent_cache() { q.get_device().ext_oneapi_can_compile(syclex::source_language::sycl_jit); if (!ok) { std::cout << "Apparently this device does not support `sycl_jit` source " - "kernel bundle extensin: " + "kernel bundle extension: " << q.get_device().get_info() << std::endl; return -1; @@ -73,13 +73,14 @@ int test_persistent_cache() { ctx, syclex::source_language::sycl_jit, SYCLSource); // Bundle is entered into cache on first build. - // CHECK: [kernel_compiler Persistent Cache]: device code IR has been cached + // CHECK: [kernel_compiler Persistent Cache]: cache miss: [[KEY1:.*]] + // CHECK: [kernel_compiler Persistent Cache]: storing device code IR: {{.*}}/[[KEY1]] exe_kb kbExe1a = syclex::build(kbSrc1); dumpKernelIDs(); // CHECK: rtc_0$__sycl_kernel_vec_add // Cache hit! We get independent bundles with their own version of the kernel. - // CHECK: [kernel_compiler Persistent Cache]: using cached device code IR + // CHECK: [kernel_compiler Persistent Cache]: using cached device code IR: {{.*}}/[[KEY1]] exe_kb kbExe1b = syclex::build(kbSrc1); dumpKernelIDs(); // CHECK-DAG: rtc_0$__sycl_kernel_vec_add @@ -89,18 +90,22 @@ int test_persistent_cache() { ctx, syclex::source_language::sycl_jit, SYCLSource); // Different source bundle, but identical source is a cache hit. - // CHECK: [kernel_compiler Persistent Cache]: using cached device code IR + // CHECK: [kernel_compiler Persistent Cache]: using cached device code IR: {{.*}}/[[KEY1]] exe_kb kbExe2a = syclex::build(kbSrc2); // Different build_options means no cache hit. - // CHECK: [kernel_compiler Persistent Cache]: device code IR has been cached + // CHECK: [kernel_compiler Persistent Cache]: cache miss: [[KEY2:.*]] + // CHECK: [kernel_compiler Persistent Cache]: storing device code IR: {{.*}}/[[KEY2]] std::vector flags{"-g", "-fno-fast-math"}; exe_kb kbExe1c = syclex::build(kbSrc1, syclex::properties{syclex::build_options{flags}}); // The kbExe1c build should trigger eviction if cache size is limited. - // CHECK: [kernel_compiler Persistent Cache]: using cached device code IR - // CHECK-EVICT: [kernel_compiler Persistent Cache]: device code IR has been cached + // CHECK-UNLIM: [kernel_compiler Persistent Cache]: using cached device code IR: {{.*}}/[[KEY1]] + // CHECK-EVICT: [Persistent Cache]: Cache eviction triggered. + // CHECK-EVICT: [Persistent Cache]: File removed: {{.*}}/[[KEY1]] + // CHECK-EVICT: [kernel_compiler Persistent Cache]: cache miss: [[KEY1]] + // CHECK-EVICT: [kernel_compiler Persistent Cache]: storing device code IR: {{.*}}/[[KEY1]] exe_kb kbExe2b = syclex::build(kbSrc2); source_kb kbSrc3 = syclex::create_kernel_bundle_from_source( @@ -109,7 +114,8 @@ int test_persistent_cache() { syclex::include_files{"myheader.h", "#define KERNEL_NAME foo"}}); // New source string -> cache miss - // CHECK: [kernel_compiler Persistent Cache]: device code IR has been cached + // CHECK: [kernel_compiler Persistent Cache]: cache miss: [[KEY3:.*]] + // CHECK: [kernel_compiler Persistent Cache]: storing device code IR: {{.*}}/[[KEY3]] exe_kb kbExe3a = syclex::build(kbSrc3); dumpKernelIDs(); // CHECK: rtc_5$__sycl_kernel_foo @@ -120,7 +126,8 @@ int test_persistent_cache() { syclex::include_files{"myheader.h", "#define KERNEL_NAME bar"}}); // Same source string, but different header contents -> cache miss - // CHECK: [kernel_compiler Persistent Cache]: device code IR has been cached + // CHECK: [kernel_compiler Persistent Cache]: cache miss: [[KEY4:.*]] + // CHECK: [kernel_compiler Persistent Cache]: storing device code IR: {{.*}}/[[KEY4]] exe_kb kbExe4a = syclex::build(kbSrc4); dumpKernelIDs(); // CHECK: rtc_6$__sycl_kernel_bar From a33e06ea850d9e890d5efaebefc2128fa48a1c01 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Mon, 17 Feb 2025 09:42:24 +0000 Subject: [PATCH 15/16] Don't store hash and log separately Signed-off-by: Julian Oppermann --- sycl-jit/jit-compiler/include/KernelFusion.h | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/sycl-jit/jit-compiler/include/KernelFusion.h b/sycl-jit/jit-compiler/include/KernelFusion.h index cd2cc76b26ac0..e0403b3a9c816 100644 --- a/sycl-jit/jit-compiler/include/KernelFusion.h +++ b/sycl-jit/jit-compiler/include/KernelFusion.h @@ -70,26 +70,20 @@ class RTCHashResult { const char *getPreprocLog() { assert(failed() && "No preprocessor log"); - return PreprocLog.c_str(); + return HashOrLog.c_str(); } const char *getHash() { assert(!failed() && "No hash"); - return Hash.c_str(); + return HashOrLog.c_str(); } private: - RTCHashResult(bool Failed, const char *Str) : Failed(Failed) { - if (!Failed) { - this->Hash = Str; - } else { - this->PreprocLog = Str; - } - } + RTCHashResult(bool Failed, const char *HashOrLog) + : Failed(Failed), HashOrLog(HashOrLog) {} bool Failed; - sycl::detail::string Hash; - sycl::detail::string PreprocLog; + sycl::detail::string HashOrLog; }; class RTCResult { From cb747d01973f2f3a33036e812ebff0df929533a2 Mon Sep 17 00:00:00 2001 From: Julian Oppermann Date: Wed, 19 Feb 2025 20:56:58 +0000 Subject: [PATCH 16/16] Revert to unique virtual source file names Signed-off-by: Julian Oppermann --- .../jit-compiler/lib/rtc/DeviceCompilation.cpp | 7 +++---- sycl/source/detail/jit_compiler.cpp | 15 +++++++-------- sycl/source/detail/jit_compiler.hpp | 7 +++---- .../kernel_compiler/kernel_compiler_sycl.cpp | 12 +++++------- 4 files changed, 18 insertions(+), 23 deletions(-) diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index 6dbb2a38c2917..adc212f44eba1 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -147,13 +147,12 @@ class HashPreprocessedAction : public PreprocessorFrontendAction { PreprocessorOutputOptions Opts; Opts.ShowCPP = 1; Opts.MinimizeWhitespace = 1; + // Make cache key insensitive to virtual source file and header locations. + Opts.ShowLineMarkers = 0; DoPrintPreprocessedInput(CI.getPreprocessor(), &PreprocessStream, Opts); - ArrayRef PreprocessedData( - (const uint8_t *)PreprocessedSource.data(), PreprocessedSource.size()); - - Hash = BLAKE3::hash(PreprocessedData); + Hash = BLAKE3::hash(arrayRefFromStringRef(PreprocessedSource)); Executed = true; } diff --git a/sycl/source/detail/jit_compiler.cpp b/sycl/source/detail/jit_compiler.cpp index 1778ec007b1ba..7dd1b5cf31816 100644 --- a/sycl/source/detail/jit_compiler.cpp +++ b/sycl/source/detail/jit_compiler.cpp @@ -1256,12 +1256,11 @@ std::vector jit_compiler::encodeReqdWorkGroupSize( return Encoded; } -sycl_device_binaries jit_compiler::compileSYCL( - const std::string &SYCLSource, +std::pair jit_compiler::compileSYCL( + const std::string &CompilationID, const std::string &SYCLSource, const std::vector> &IncludePairs, const std::vector &UserArgs, std::string *LogPtr, - const std::vector &RegisteredKernelNames, - const std::string &Prefix) { + const std::vector &RegisteredKernelNames) { auto appendToLog = [LogPtr](const char *Msg) { if (LogPtr) { LogPtr->append(Msg); @@ -1280,9 +1279,7 @@ sycl_device_binaries jit_compiler::compileSYCL( std::string FinalSource = ss.str(); - // The filename must be stable, because it is part of the preprocessed output - // and in consequence, the cache key. - std::string SYCLFileName = "rtc.cpp"; + std::string SYCLFileName = CompilationID + ".cpp"; ::jit_compiler::InMemoryFile SourceFile{SYCLFileName.c_str(), FinalSource.c_str()}; @@ -1332,7 +1329,9 @@ sycl_device_binaries jit_compiler::compileSYCL( PersistentDeviceCodeCache::putDeviceCodeIRToDisc(CacheKey, SavedIR); } - return createDeviceBinaries(Result.getBundleInfo(), Prefix); + std::string Prefix = CompilationID + '$'; + return std::make_pair(createDeviceBinaries(Result.getBundleInfo(), Prefix), + std::move(Prefix)); } } // namespace detail diff --git a/sycl/source/detail/jit_compiler.hpp b/sycl/source/detail/jit_compiler.hpp index c783289be02a2..6a3bbe56e3d46 100644 --- a/sycl/source/detail/jit_compiler.hpp +++ b/sycl/source/detail/jit_compiler.hpp @@ -49,12 +49,11 @@ class jit_compiler { const std::string &KernelName, const std::vector &SpecConstBlob); - sycl_device_binaries compileSYCL( - const std::string &SYCLSource, + std::pair compileSYCL( + const std::string &CompilationID, const std::string &SYCLSource, const std::vector> &IncludePairs, const std::vector &UserArgs, std::string *LogPtr, - const std::vector &RegisteredKernelNames, - const std::string &Prefix); + const std::vector &RegisteredKernelNames); void destroyDeviceBinaries(sycl_device_binaries Binaries); diff --git a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp index 37fa5eef43be3..ce5793e356abf 100644 --- a/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp +++ b/sycl/source/detail/kernel_compiler/kernel_compiler_sycl.cpp @@ -310,13 +310,11 @@ std::pair SYCL_JIT_to_SPIRV( [[maybe_unused]] std::string *LogPtr, [[maybe_unused]] const std::vector &RegisteredKernelNames) { #if SYCL_EXT_JIT_ENABLE - static std::atomic_uintptr_t CompilationID; - std::string Prefix = "rtc_" + std::to_string(CompilationID++) + "$"; - sycl_device_binaries Binaries = - sycl::detail::jit_compiler::get_instance().compileSYCL( - SYCLSource, IncludePairs, UserArgs, LogPtr, RegisteredKernelNames, - Prefix); - return std::make_pair(Binaries, std::move(Prefix)); + static std::atomic_uintptr_t CompilationCounter; + std::string CompilationID = "rtc_" + std::to_string(CompilationCounter++); + return sycl::detail::jit_compiler::get_instance().compileSYCL( + CompilationID, SYCLSource, IncludePairs, UserArgs, LogPtr, + RegisteredKernelNames); #else throw sycl::exception(sycl::errc::build, "kernel_compiler via sycl-jit is not available");