From a1e71e820abe03455e831b70fbe2beebb82d8d51 Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Wed, 24 Sep 2025 14:59:19 -0700 Subject: [PATCH 01/19] [SYCL] Implement coverage instrumentation for device code Signed-off-by: Michael Aziz --- buildbot/configure.py | 2 +- clang/lib/CodeGen/BackendUtil.cpp | 2 +- clang/lib/Driver/ToolChains/SYCL.cpp | 5 -- .../lib/profile/InstrProfilingRuntime.cpp | 16 +++++ .../Instrumentation/InstrProfiling.cpp | 40 ++++++++++++ sycl/CMakeLists.txt | 3 + sycl/source/detail/context_impl.cpp | 5 ++ sycl/source/detail/device_global_map.hpp | 14 ++++- .../source/detail/device_global_map_entry.cpp | 63 ++++++++++++++++++- .../source/detail/device_global_map_entry.hpp | 9 +++ .../program_manager/program_manager.cpp | 13 ++++ .../program_manager/program_manager.hpp | 5 ++ sycl/test-e2e/Basic/device_code_coverage.cpp | 60 ++++++++++++++++++ 13 files changed, 227 insertions(+), 10 deletions(-) create mode 100644 sycl/test-e2e/Basic/device_code_coverage.cpp diff --git a/buildbot/configure.py b/buildbot/configure.py index b2f9a9805976f..4cc5c0e6f8823 100644 --- a/buildbot/configure.py +++ b/buildbot/configure.py @@ -21,7 +21,7 @@ def do_configure(args, passthrough_args): if not os.path.isdir(abs_obj_dir): os.makedirs(abs_obj_dir) - llvm_external_projects = "sycl;llvm-spirv;opencl;xpti;xptifw" + llvm_external_projects = "sycl;llvm-spirv;opencl;xpti;xptifw;compiler-rt" # libdevice build requires a working SYCL toolchain, which is not the case # with macOS target right now. diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 8bf72cd263931..72a4635ec5f98 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -601,7 +601,7 @@ getInstrProfOptions(const CodeGenOptions &CodeGenOpts, Options.InstrProfileOutput = CodeGenOpts.ContinuousProfileSync ? ("%c" + CodeGenOpts.InstrProfileOutput) : CodeGenOpts.InstrProfileOutput; - Options.Atomic = CodeGenOpts.AtomicProfileUpdate; + Options.Atomic = LangOpts.SYCLIsDevice || CodeGenOpts.AtomicProfileUpdate; return Options; } diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 12ac2922a31be..7a75bda24cb2e 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -1574,11 +1574,6 @@ static ArrayRef getUnsupportedOpts() { options::OPT_fno_profile_generate, // -f[no-]profile-generate options::OPT_ftest_coverage, options::OPT_fno_test_coverage, // -f[no-]test-coverage - options::OPT_fcoverage_mapping, - options::OPT_coverage, // --coverage - options::OPT_fno_coverage_mapping, // -f[no-]coverage-mapping - options::OPT_fprofile_instr_generate, - options::OPT_fprofile_instr_generate_EQ, options::OPT_fprofile_arcs, options::OPT_fno_profile_arcs, // -f[no-]profile-arcs options::OPT_fno_profile_instr_generate, // -f[no-]profile-instr-generate diff --git a/compiler-rt/lib/profile/InstrProfilingRuntime.cpp b/compiler-rt/lib/profile/InstrProfilingRuntime.cpp index 6b2ce97001735..ed1f277c96641 100644 --- a/compiler-rt/lib/profile/InstrProfilingRuntime.cpp +++ b/compiler-rt/lib/profile/InstrProfilingRuntime.cpp @@ -10,6 +10,22 @@ extern "C" { #include "InstrProfiling.h" +void __sycl_increment_profile_counters(uint64_t FnHash, size_t NumCounters, + const uint64_t *Increments) { + for (const __llvm_profile_data *DataVar = __llvm_profile_begin_data(); + DataVar < __llvm_profile_end_data(); DataVar++) { + if (DataVar->NameRef != FnHash || DataVar->NumCounters != NumCounters) + continue; + + uint64_t *const Counters = reinterpret_cast( + reinterpret_cast(DataVar) + + reinterpret_cast(DataVar->CounterPtr)); + for (size_t i = 0; i < NumCounters; i++) + Counters[i] += Increments[i]; + break; + } +} + static int RegisterRuntime() { __llvm_profile_initialize(); #ifdef _AIX diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 5e7548b0a2fd1..b76de9ae8a573 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -1002,6 +1002,9 @@ bool InstrLowerer::lower() { if (!NeedsRuntimeHook && ContainsProfiling) emitRuntimeHook(); + if (M.getTargetTriple().isSPIR()) + return true; + emitRegistration(); emitUses(); emitInitialization(); @@ -1116,6 +1119,21 @@ GlobalVariable *InstrLowerer::getOrCreateBiasVar(StringRef VarName) { } Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) { + if (M.getTargetTriple().isSPIR()) { + auto *Counters = getOrCreateRegionCounters(I); + IRBuilder<> Builder(I); + auto *Addr = Builder.CreateLoad(PointerType::get(M.getContext(), 1), + Counters, "pgocount.addr"); + const std::uint64_t Index = I->getIndex()->getZExtValue(); + if (Index > 0) { + auto *Offset = Builder.getInt64(I->getIndex()->getZExtValue()); + auto *AddrWithOffset = Builder.CreateGEP(Type::getInt64Ty(M.getContext()), + Addr, Offset, "pgocount.addr"); + return AddrWithOffset; + } + return Addr; + } + auto *Counters = getOrCreateRegionCounters(I); IRBuilder<> Builder(I); @@ -1657,6 +1675,28 @@ InstrLowerer::getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc) { GlobalVariable * InstrLowerer::createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name, GlobalValue::LinkageTypes Linkage) { + if (M.getTargetTriple().isSPIR()) { + uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); + auto &Ctx = M.getContext(); + GlobalVariable *GV; + auto *PtrTy = PointerType::get(Ctx, 1); + auto *IntTy = Type::getInt64Ty(Ctx); + auto *StructTy = StructType::get(Ctx, {PtrTy, IntTy}); + GV = new GlobalVariable(M, StructTy, false, Linkage, + Constant::getNullValue(StructTy), Name); + const std::uint64_t FnHash = IndexedInstrProf::ComputeHash( + getPGOFuncNameVarInitializer(Inc->getName())); + const std::string FnName = [&] { + auto *Arr = cast(Inc->getName()->getInitializer()); + StringRef NameStr = + Arr->isCString() ? Arr->getAsCString() : Arr->getAsString(); + return std::string{"__profc_"} + std::to_string(FnHash); + }(); + GV->addAttribute("sycl-unique-id", FnName); + GV->addAttribute("sycl-device-global-size", Twine(NumCounters * 8).str()); + return GV; + } + uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); auto &Ctx = M.getContext(); GlobalVariable *GV; diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index ae3fa0335ab17..afce141cb69d3 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -389,6 +389,7 @@ add_custom_target(sycl-compiler clang-offload-extract clang-offload-packager clang-linker-wrapper + compiler-rt file-table-tform llc llvm-ar @@ -396,6 +397,8 @@ add_custom_target(sycl-compiler llvm-spirv llvm-link llvm-objcopy + llvm-profdata + llvm-cov spirv-to-ir-wrapper sycl-post-link opencl-aot diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 6fb2dd375fe37..99123f106b6a3 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -128,6 +128,11 @@ context_impl::~context_impl() { if (DGEntry != nullptr) DGEntry->removeAssociatedResources(this); } + // Free all profile counter USM allocations associated with this context. + for (DeviceGlobalMapEntry *DGEntry : + detail::ProgramManager::getInstance() + .getProfileCounterDeviceGlobalEntries(this)) + DGEntry->cleanupProfileCounter(this); MCachedLibPrograms.clear(); // TODO catch an exception and put it to list of asynchronous exceptions getAdapter().call_nocheck(MContext); diff --git a/sycl/source/detail/device_global_map.hpp b/sycl/source/detail/device_global_map.hpp index 7dac09df41653..5e4a85a1bf512 100644 --- a/sycl/source/detail/device_global_map.hpp +++ b/sycl/source/detail/device_global_map.hpp @@ -75,7 +75,9 @@ class DeviceGlobalMap { // cannot be set until registration happens. auto EntryUPtr = std::make_unique( DeviceGlobal->Name, Img, TypeSize, DeviceImageScopeDecorated); - MDeviceGlobals.emplace(DeviceGlobal->Name, std::move(EntryUPtr)); + auto NewEntry = MDeviceGlobals.emplace(DeviceGlobal->Name, std::move(EntryUPtr)); + if (NewEntry.first->second->isProfileCounter()) + MProfileCounterDeviceGlobals.push_back(NewEntry.first->second.get()); } } } @@ -114,6 +116,8 @@ class DeviceGlobalMap { auto EntryUPtr = std::make_unique(UniqueId, DeviceGlobalPtr); auto NewEntry = MDeviceGlobals.emplace(UniqueId, std::move(EntryUPtr)); + if (NewEntry.first->second->isProfileCounter()) + MProfileCounterDeviceGlobals.push_back(NewEntry.first->second.get()); MPtr2DeviceGlobal.insert({DeviceGlobalPtr, NewEntry.first->second.get()}); } @@ -154,6 +158,11 @@ class DeviceGlobalMap { } } + std::vector getProfileCounterEntries() { + std::lock_guard DeviceGlobalsGuard(MDeviceGlobalsMutex); + return MProfileCounterDeviceGlobals; + } + const std::unordered_map getPointerMap() const { return MPtr2DeviceGlobal; @@ -177,6 +186,9 @@ class DeviceGlobalMap { MDeviceGlobals; std::unordered_map MPtr2DeviceGlobal; + // List of profile counter device globals. + std::vector MProfileCounterDeviceGlobals; + /// Protects MDeviceGlobals and MPtr2DeviceGlobal. std::mutex MDeviceGlobalsMutex; }; diff --git a/sycl/source/detail/device_global_map_entry.cpp b/sycl/source/detail/device_global_map_entry.cpp index 25704caaee6de..7d792389acdbf 100644 --- a/sycl/source/detail/device_global_map_entry.cpp +++ b/sycl/source/detail/device_global_map_entry.cpp @@ -53,6 +53,63 @@ OwnedUrEvent DeviceGlobalUSMMem::getInitEvent(adapter_impl &Adapter) { } } +bool DeviceGlobalMapEntry::isAvailableInContext(const context_impl *CtxImpl) { + std::lock_guard Lock{MDeviceToUSMPtrMapMutex}; + for (const auto &It : MDeviceToUSMPtrMap) + if (It.first.second == CtxImpl) + return true; + return false; +} + +bool DeviceGlobalMapEntry::isProfileCounter() { + const std::string CounterPrefix = "__profc_"; + return MUniqueId.substr(0, CounterPrefix.size()) == CounterPrefix; +} + +extern "C" void __attribute__((weak)) +__sycl_increment_profile_counters(std::uint64_t FnHash, std::size_t NumCounters, + const std::uint64_t *Increments); + +void DeviceGlobalMapEntry::cleanupProfileCounter(context_impl *CtxImpl) { + std::lock_guard Lock{MDeviceToUSMPtrMapMutex}; + const std::size_t NumCounters = MDeviceGlobalTSize / sizeof(std::uint64_t); + const std::uint64_t FnHash = [&] { + const auto PrefixSize = std::string{"__profc_"}.size(); + constexpr int DecimalBase = 10; + return std::strtoull(MUniqueId.substr(PrefixSize).c_str(), nullptr, + DecimalBase); + }(); + for (device_impl &Device : CtxImpl->getDevices()) { + auto USMPtrIt = MDeviceToUSMPtrMap.find({&Device, CtxImpl}); + if (USMPtrIt != MDeviceToUSMPtrMap.end()) { + DeviceGlobalUSMMem &USMMem = USMPtrIt->second; + + // Get the increments from the USM pointer + std::vector Increments(NumCounters); + const std::uint64_t *Counters = static_cast(USMMem.MPtr); + for (std::size_t I = 0; I < NumCounters; ++I) + Increments[I] += Counters[I]; + + // Call the weak symbol to update the profile counters + if (__sycl_increment_profile_counters) { + __sycl_increment_profile_counters(FnHash, Increments.size(), + Increments.data()); + } + + // Free the USM memory and release the event if it exists. + detail::usm::freeInternal(USMMem.MPtr, CtxImpl); + if (USMMem.MInitEvent != nullptr) + CtxImpl->getAdapter().call( + USMMem.MInitEvent); + + // Set to nullptr to avoid double free. + USMMem.MPtr = nullptr; + USMMem.MInitEvent = nullptr; + MDeviceToUSMPtrMap.erase(USMPtrIt); + } + } +} + DeviceGlobalUSMMem & DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl) { assert(!MIsDeviceImageScopeDecorated && @@ -67,7 +124,8 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl) { return DGUSMPtr->second; void *NewDGUSMPtr = detail::usm::alignedAllocInternal( - 0, MDeviceGlobalTSize, &CtxImpl, &DevImpl, sycl::usm::alloc::device); + 0, MDeviceGlobalTSize, &CtxImpl, &DevImpl, + isProfileCounter() ? sycl::usm::alloc::shared : sycl::usm::alloc::device); auto NewAllocIt = MDeviceToUSMPtrMap.emplace( std::piecewise_construct, std::forward_as_tuple(&DevImpl, &CtxImpl), @@ -125,7 +183,8 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(const context &Context) { return DGUSMPtr->second; void *NewDGUSMPtr = detail::usm::alignedAllocInternal( - 0, MDeviceGlobalTSize, &CtxImpl, &DevImpl, sycl::usm::alloc::device); + 0, MDeviceGlobalTSize, &CtxImpl, &DevImpl, + isProfileCounter() ? sycl::usm::alloc::shared : sycl::usm::alloc::device); auto NewAllocIt = MDeviceToUSMPtrMap.emplace( std::piecewise_construct, std::forward_as_tuple(&DevImpl, &CtxImpl), diff --git a/sycl/source/detail/device_global_map_entry.hpp b/sycl/source/detail/device_global_map_entry.hpp index 9ff30938cbf34..72e5bbf2d678d 100644 --- a/sycl/source/detail/device_global_map_entry.hpp +++ b/sycl/source/detail/device_global_map_entry.hpp @@ -110,6 +110,15 @@ struct DeviceGlobalMapEntry { MIsDeviceImageScopeDecorated = IsDeviceImageScopeDecorated; } + // Checks if the device_global is available in the given context. + bool isAvailableInContext(const context_impl *CtxImpl); + + // Returns true if the device_global is a profile counter. + bool isProfileCounter(); + + // Cleans up a profile counter device global + void cleanupProfileCounter(context_impl *CtxImpl); + // Gets or allocates USM memory for a device_global. DeviceGlobalUSMMem &getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl); diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index eab2b88d0ad34..8e58d6d80e6c1 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2466,6 +2466,19 @@ std::vector ProgramManager::getDeviceGlobalEntries( return FoundEntries; } +std::vector +ProgramManager::getProfileCounterDeviceGlobalEntries( + const context_impl *CtxImpl) { + std::vector ProfileCounters = + ProgramManager::getInstance().m_DeviceGlobals.getProfileCounterEntries(); + std::vector FoundEntries; + for (const auto &DGEntry : ProfileCounters) { + if (DGEntry->isAvailableInContext(CtxImpl)) + FoundEntries.push_back(DGEntry); + } + return FoundEntries; +} + void ProgramManager::addOrInitHostPipeEntry(const void *HostPipePtr, const char *UniqueId) { std::lock_guard HostPipesGuard(m_HostPipesMutex); diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index b9d0dc700f77c..8a659463fe0c1 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -282,6 +282,11 @@ class ProgramManager { std::vector getDeviceGlobalEntries(const std::vector &UniqueIds, bool ExcludeDeviceImageScopeDecorated = false); + + // The function gets all device_global entries that are profile counters. + std::vector + getProfileCounterDeviceGlobalEntries(const context_impl *CtxImpl); + // The function inserts or initializes a host_pipe entry into the // host_pipe map. void addOrInitHostPipeEntry(const void *HostPipePtr, const char *UniqueId); diff --git a/sycl/test-e2e/Basic/device_code_coverage.cpp b/sycl/test-e2e/Basic/device_code_coverage.cpp new file mode 100644 index 0000000000000..65127e3233a7a --- /dev/null +++ b/sycl/test-e2e/Basic/device_code_coverage.cpp @@ -0,0 +1,60 @@ +// RUN: %{build} -fprofile-instr-generate -fcoverage-mapping -o %t.out +// RUN: %{run} LLVM_PROFILE_FILE=%t.profraw %t.out +// RUN: llvm-profdata merge %t.profraw -o %t.profdata +// RUN: llvm-cov show -instr-profile=%t.profdata %t.out -name="main" | FileCheck %s + +#include + +int main() { + sycl::queue q; + int *values = sycl::malloc_shared(10, q); + q.submit([&](sycl::handler &h) { + h.parallel_for(sycl::range<1>(10), [=](sycl::id<1> idx) { + if (idx[0] < 8) + values[idx] = 42; + else + values[idx] = 7; + }); + }).wait(); + for (int i = 0; i < 10; i++) + assert(values[i] == (i < 8 ? 42 : 7)); + sycl::free(values, q); + return 0; +} + +// UNSUPPORTED: opencl && gpu +// UNSUPPORTED-TRACKER: GSD-4287 + +// CHECK: main: +// CHECK: 8| 1|int main() { +// CHECK: 9| 1| sycl::queue q; +// CHECK: 10| 1| int *values = sycl::malloc_shared(10, q); +// CHECK: 11| 1| q.submit([&](sycl::handler &h) { +// CHECK: 12| 1| h.parallel_for(sycl::range<1>(10), [=](sycl::id<1> idx) { +// CHECK: 13| 1| if (idx[0] < 8) +// CHECK: 14| 1| values[idx] = 42; +// CHECK: 15| 1| else +// CHECK: 16| 1| values[idx] = 7; +// CHECK: 17| 1| }); +// CHECK: 18| 1| }).wait(); +// CHECK: 19| 11| for (int i = 0; i < 10; i++) +// CHECK: 20| 10| assert(values[i] == (i < 8 ? 42 : 7)); +// CHECK: 21| 1| sycl::free(values, q); +// CHECK: 22| 1| return 0; +// CHECK: 23| 1|} +// CHECK: device_code_coverage.cpp:_ZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_: +// CHECK: 11| 1| q.submit([&](sycl::handler &h) { +// CHECK: 12| 1| h.parallel_for(sycl::range<1>(10), [=](sycl::id<1> idx) { +// CHECK: 13| 1| if (idx[0] < 8) +// CHECK: 14| 1| values[idx] = 42; +// CHECK: 15| 1| else +// CHECK: 16| 1| values[idx] = 7; +// CHECK: 17| 1| }); +// CHECK: 18| 1| }).wait(); +// CHECK: device_code_coverage.cpp:_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlNS0_2idILi1EEEE_clES5_: +// CHECK: 12| 10| h.parallel_for(sycl::range<1>(10), [=](sycl::id<1> idx) { +// CHECK: 13| 10| if (idx[0] < 8) +// CHECK: 14| 8| values[idx] = 42; +// CHECK: 15| 2| else +// CHECK: 16| 2| values[idx] = 7; +// CHECK: 17| 10| }); From 4f9041bd0657cbb30054affb139ef04a084c27ca Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Thu, 25 Sep 2025 19:52:09 -0700 Subject: [PATCH 02/19] Run `clang-format` Signed-off-by: Michael Aziz --- sycl/source/detail/device_global_map.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/device_global_map.hpp b/sycl/source/detail/device_global_map.hpp index 5e4a85a1bf512..fa46efe5e6680 100644 --- a/sycl/source/detail/device_global_map.hpp +++ b/sycl/source/detail/device_global_map.hpp @@ -75,7 +75,8 @@ class DeviceGlobalMap { // cannot be set until registration happens. auto EntryUPtr = std::make_unique( DeviceGlobal->Name, Img, TypeSize, DeviceImageScopeDecorated); - auto NewEntry = MDeviceGlobals.emplace(DeviceGlobal->Name, std::move(EntryUPtr)); + auto NewEntry = + MDeviceGlobals.emplace(DeviceGlobal->Name, std::move(EntryUPtr)); if (NewEntry.first->second->isProfileCounter()) MProfileCounterDeviceGlobals.push_back(NewEntry.first->second.get()); } From 43ff03ae7d2838c289161343579acd49c766e7bd Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Fri, 26 Sep 2025 09:17:25 -0700 Subject: [PATCH 03/19] Address review comments --- clang/lib/Driver/ToolChains/SYCL.cpp | 1 + clang/test/Driver/sycl-unsupported.cpp | 13 ------------- .../Transforms/Instrumentation/InstrProfiling.cpp | 14 +++++--------- sycl/source/detail/device_global_map_entry.cpp | 11 +++++------ sycl/source/detail/device_global_map_entry.hpp | 2 +- .../detail/program_manager/program_manager.cpp | 2 +- .../{Basic => Coverage}/device_code_coverage.cpp | 7 ++++--- 7 files changed, 17 insertions(+), 33 deletions(-) rename sycl/test-e2e/{Basic => Coverage}/device_code_coverage.cpp (91%) diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 7a75bda24cb2e..9bb71110c6502 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -1574,6 +1574,7 @@ static ArrayRef getUnsupportedOpts() { options::OPT_fno_profile_generate, // -f[no-]profile-generate options::OPT_ftest_coverage, options::OPT_fno_test_coverage, // -f[no-]test-coverage + options::OPT_coverage, // --coverage options::OPT_fprofile_arcs, options::OPT_fno_profile_arcs, // -f[no-]profile-arcs options::OPT_fno_profile_instr_generate, // -f[no-]profile-instr-generate diff --git a/clang/test/Driver/sycl-unsupported.cpp b/clang/test/Driver/sycl-unsupported.cpp index 311efbecf8b6b..b444edb9bc2c8 100644 --- a/clang/test/Driver/sycl-unsupported.cpp +++ b/clang/test/Driver/sycl-unsupported.cpp @@ -19,13 +19,6 @@ // RUN: -DOPT_CC1=-debug-info-kind=line-tables-only \ // RUN: -check-prefixes=UNSUPPORTED_OPT_DIAG,UNSUPPORTED_OPT -// RUN: %clangxx -fsycl -fprofile-instr-generate -### %s 2>&1 \ -// RUN: | FileCheck %s -DARCH=spir64 -DOPT=-fprofile-instr-generate \ -// RUN: -DOPT_CC1=-fprofile-instrument=clang \ -// RUN: -check-prefixes=UNSUPPORTED_OPT_DIAG,UNSUPPORTED_OPT -// RUN: %clangxx -fsycl -fcoverage-mapping \ -// RUN: -fprofile-instr-generate -### %s 2>&1 \ -// RUN: | FileCheck %s -DARCH=spir64 -DOPT=-fcoverage-mapping // RUN: %clangxx -fsycl -ftest-coverage -### %s 2>&1 \ // RUN: | FileCheck %s -DARCH=spir64 -DOPT=-ftest-coverage \ // RUN: -DOPT_CC1=-coverage-notes-file \ @@ -49,12 +42,6 @@ // RUN: | FileCheck %s -DARCH=spir64 -DOPT=--coverage \ // RUN: -DOPT_CC1=-coverage-notes-file \ // RUN: -check-prefixes=UNSUPPORTED_OPT_DIAG,UNSUPPORTED_OPT -// Check to make sure our '-fsanitize=address' exception isn't triggered by a -// different option -// RUN: %clangxx -fsycl -fprofile-instr-generate=address -### %s 2>&1 \ -// RUN: | FileCheck %s -DARCH=spir64 -DOPT=-fprofile-instr-generate=address \ -// RUN: -DOPT_CC1=-fprofile-instrument=clang \ -// RUN: -check-prefixes=UNSUPPORTED_OPT_DIAG,UNSUPPORTED_OPT // CHECK: ignoring '[[OPT]]' option as it is not currently supported for target '[[ARCH]]{{.*}}'; only supported for host compilation [-Woption-ignored] // CHECK-NOT: clang{{.*}} "-fsycl-is-device"{{.*}} "[[OPT]]{{.*}}" diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index b76de9ae8a573..9b5c4eb835d6a 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -1126,9 +1126,10 @@ Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) { Counters, "pgocount.addr"); const std::uint64_t Index = I->getIndex()->getZExtValue(); if (Index > 0) { - auto *Offset = Builder.getInt64(I->getIndex()->getZExtValue()); - auto *AddrWithOffset = Builder.CreateGEP(Type::getInt64Ty(M.getContext()), - Addr, Offset, "pgocount.addr"); + auto *Offset = Builder.getInt64(I->getIndex()->getZExtValue() * + sizeof(std::uint64_t)); + auto *AddrWithOffset = + Builder.CreatePtrAdd(Addr, Offset, "pgocount.offset"); return AddrWithOffset; } return Addr; @@ -1686,12 +1687,7 @@ InstrLowerer::createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name, Constant::getNullValue(StructTy), Name); const std::uint64_t FnHash = IndexedInstrProf::ComputeHash( getPGOFuncNameVarInitializer(Inc->getName())); - const std::string FnName = [&] { - auto *Arr = cast(Inc->getName()->getInitializer()); - StringRef NameStr = - Arr->isCString() ? Arr->getAsCString() : Arr->getAsString(); - return std::string{"__profc_"} + std::to_string(FnHash); - }(); + const std::string FnName = std::string{"__profc_"} + std::to_string(FnHash); GV->addAttribute("sycl-unique-id", FnName); GV->addAttribute("sycl-device-global-size", Twine(NumCounters * 8).str()); return GV; diff --git a/sycl/source/detail/device_global_map_entry.cpp b/sycl/source/detail/device_global_map_entry.cpp index 7d792389acdbf..fecf974978c3f 100644 --- a/sycl/source/detail/device_global_map_entry.cpp +++ b/sycl/source/detail/device_global_map_entry.cpp @@ -79,22 +79,21 @@ void DeviceGlobalMapEntry::cleanupProfileCounter(context_impl *CtxImpl) { return std::strtoull(MUniqueId.substr(PrefixSize).c_str(), nullptr, DecimalBase); }(); - for (device_impl &Device : CtxImpl->getDevices()) { + for (const device_impl &Device : CtxImpl->getDevices()) { auto USMPtrIt = MDeviceToUSMPtrMap.find({&Device, CtxImpl}); if (USMPtrIt != MDeviceToUSMPtrMap.end()) { DeviceGlobalUSMMem &USMMem = USMPtrIt->second; - // Get the increments from the USM pointer + // Get the increments from the USM pointer. std::vector Increments(NumCounters); const std::uint64_t *Counters = static_cast(USMMem.MPtr); for (std::size_t I = 0; I < NumCounters; ++I) - Increments[I] += Counters[I]; + Increments[I] = Counters[I]; - // Call the weak symbol to update the profile counters - if (__sycl_increment_profile_counters) { + // Call the weak symbol to update the profile counters. + if (__sycl_increment_profile_counters) __sycl_increment_profile_counters(FnHash, Increments.size(), Increments.data()); - } // Free the USM memory and release the event if it exists. detail::usm::freeInternal(USMMem.MPtr, CtxImpl); diff --git a/sycl/source/detail/device_global_map_entry.hpp b/sycl/source/detail/device_global_map_entry.hpp index 72e5bbf2d678d..f129df44d1c6f 100644 --- a/sycl/source/detail/device_global_map_entry.hpp +++ b/sycl/source/detail/device_global_map_entry.hpp @@ -116,7 +116,7 @@ struct DeviceGlobalMapEntry { // Returns true if the device_global is a profile counter. bool isProfileCounter(); - // Cleans up a profile counter device global + // Cleans up a profile counter device global. void cleanupProfileCounter(context_impl *CtxImpl); // Gets or allocates USM memory for a device_global. diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 8e58d6d80e6c1..d9a2e71925b5d 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2469,7 +2469,7 @@ std::vector ProgramManager::getDeviceGlobalEntries( std::vector ProgramManager::getProfileCounterDeviceGlobalEntries( const context_impl *CtxImpl) { - std::vector ProfileCounters = + const std::vector ProfileCounters = ProgramManager::getInstance().m_DeviceGlobals.getProfileCounterEntries(); std::vector FoundEntries; for (const auto &DGEntry : ProfileCounters) { diff --git a/sycl/test-e2e/Basic/device_code_coverage.cpp b/sycl/test-e2e/Coverage/device_code_coverage.cpp similarity index 91% rename from sycl/test-e2e/Basic/device_code_coverage.cpp rename to sycl/test-e2e/Coverage/device_code_coverage.cpp index 65127e3233a7a..368a7f1879bbc 100644 --- a/sycl/test-e2e/Basic/device_code_coverage.cpp +++ b/sycl/test-e2e/Coverage/device_code_coverage.cpp @@ -1,9 +1,9 @@ // RUN: %{build} -fprofile-instr-generate -fcoverage-mapping -o %t.out // RUN: %{run} LLVM_PROFILE_FILE=%t.profraw %t.out -// RUN: llvm-profdata merge %t.profraw -o %t.profdata -// RUN: llvm-cov show -instr-profile=%t.profdata %t.out -name="main" | FileCheck %s +// RUN: %{run-aux} llvm-profdata merge %t.profraw -o %t.profdata +// RUN: %{run-aux} llvm-cov show -instr-profile=%t.profdata %t.out -name="main" | FileCheck %s -#include +#include int main() { sycl::queue q; @@ -22,6 +22,7 @@ int main() { return 0; } +// REQUIRES: target-spir // UNSUPPORTED: opencl && gpu // UNSUPPORTED-TRACKER: GSD-4287 From 940860199aa7a1cb43f3e96c1a55c80cc7b1e231 Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Fri, 26 Sep 2025 11:29:15 -0700 Subject: [PATCH 04/19] Fix MSVC build error Signed-off-by: Michael Aziz --- .../source/detail/device_global_map_entry.cpp | 59 ++++++++++++------- 1 file changed, 38 insertions(+), 21 deletions(-) diff --git a/sycl/source/detail/device_global_map_entry.cpp b/sycl/source/detail/device_global_map_entry.cpp index fecf974978c3f..5fe1e784fe8a8 100644 --- a/sycl/source/detail/device_global_map_entry.cpp +++ b/sycl/source/detail/device_global_map_entry.cpp @@ -66,9 +66,26 @@ bool DeviceGlobalMapEntry::isProfileCounter() { return MUniqueId.substr(0, CounterPrefix.size()) == CounterPrefix; } +#ifdef _MSC_VER +extern "C" void +__sycl_increment_profile_counters(std::uint64_t FnHash, std::size_t NumCounters, + const std::uint64_t *Increments); +extern "C" void +__sycl_increment_profile_counters_default(std::uint64_t FnHash, + std::size_t NumCounters, + const std::uint64_t *Increments) { + (void)FnHash; + (void)NumCounters; + (void)Increments; +} +#pragma comment( \ + linker, \ + "/alternatename:__sycl_increment_profile_counters=__sycl_increment_profile_counters_default") +#else extern "C" void __attribute__((weak)) __sycl_increment_profile_counters(std::uint64_t FnHash, std::size_t NumCounters, const std::uint64_t *Increments); +#endif void DeviceGlobalMapEntry::cleanupProfileCounter(context_impl *CtxImpl) { std::lock_guard Lock{MDeviceToUSMPtrMapMutex}; @@ -91,7 +108,7 @@ void DeviceGlobalMapEntry::cleanupProfileCounter(context_impl *CtxImpl) { Increments[I] = Counters[I]; // Call the weak symbol to update the profile counters. - if (__sycl_increment_profile_counters) + if (&__sycl_increment_profile_counters) __sycl_increment_profile_counters(FnHash, Increments.size(), Increments.data()); @@ -139,12 +156,12 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl) { std::lock_guard Lock(NewAlloc.MInitEventMutex); ur_event_handle_t InitEvent; if (MDeviceGlobalPtr) { - // C++ guarantees members appear in memory in the order they are declared, - // so since the member variable that contains the initial contents of the - // device_global is right after the usm_ptr member variable we can do - // some pointer arithmetic to memcopy over this value to the usm_ptr. This - // value inside of the device_global will be zero-initialized if it was - // not given a value on construction. + // C++ guarantees members appear in memory in the order they are + // declared, so since the member variable that contains the initial + // contents of the device_global is right after the usm_ptr member + // variable we can do some pointer arithmetic to memcopy over this + // value to the usm_ptr. This value inside of the device_global will + // be zero-initialized if it was not given a value on construction. MemoryManager::copy_usm( reinterpret_cast( reinterpret_cast(MDeviceGlobalPtr) + @@ -152,8 +169,8 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl) { QueueImpl, MDeviceGlobalTSize, NewAlloc.MPtr, std::vector{}, &InitEvent); } else { - // For SYCLBIN device globals we do not have a host pointer to copy from, - // so instead we fill the USM memory with 0's. + // For SYCLBIN device globals we do not have a host pointer to copy + // from, so instead we fill the USM memory with 0's. MemoryManager::fill_usm(NewAlloc.MPtr, QueueImpl, MDeviceGlobalTSize, {static_cast(0)}, {}, &InitEvent); } @@ -161,8 +178,8 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl) { } // Only device globals with host variables need to be registered with the - // context. The rest will be managed by their kernel bundles and cleaned up - // accordingly. + // context. The rest will be managed by their kernel bundles and cleaned + // up accordingly. if (MDeviceGlobalPtr) CtxImpl.addAssociatedDeviceGlobal(MDeviceGlobalPtr); return NewAlloc; @@ -194,20 +211,20 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(const context &Context) { NewAlloc.MAllocatingContext = CtxImpl.shared_from_this(); if (MDeviceGlobalPtr) { - // C++ guarantees members appear in memory in the order they are declared, - // so since the member variable that contains the initial contents of the - // device_global is right after the usm_ptr member variable we can do - // some pointer arithmetic to memcopy over this value to the usm_ptr. This - // value inside of the device_global will be zero-initialized if it was not - // given a value on construction. + // C++ guarantees members appear in memory in the order they are + // declared, so since the member variable that contains the initial + // contents of the device_global is right after the usm_ptr member + // variable we can do some pointer arithmetic to memcopy over this value + // to the usm_ptr. This value inside of the device_global will be + // zero-initialized if it was not given a value on construction. MemoryManager::context_copy_usm( reinterpret_cast( reinterpret_cast(MDeviceGlobalPtr) + sizeof(MDeviceGlobalPtr)), &CtxImpl, MDeviceGlobalTSize, NewAlloc.MPtr); } else { - // For SYCLBIN device globals we do not have a host pointer to copy from, - // so instead we fill the USM memory with 0's. + // For SYCLBIN device globals we do not have a host pointer to copy + // from, so instead we fill the USM memory with 0's. std::vector ImmBuff(MDeviceGlobalTSize, static_cast(0)); MemoryManager::context_copy_usm(ImmBuff.data(), &CtxImpl, @@ -215,8 +232,8 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(const context &Context) { } // Only device globals with host variables need to be registered with the - // context. The rest will be managed by their kernel bundles and cleaned up - // accordingly. + // context. The rest will be managed by their kernel bundles and cleaned + // up accordingly. if (MDeviceGlobalPtr) CtxImpl.addAssociatedDeviceGlobal(MDeviceGlobalPtr); return NewAlloc; From 8201ffc0754b77deae08f3247da99c4fdd21e843 Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Fri, 26 Sep 2025 12:54:24 -0700 Subject: [PATCH 05/19] Add a design doc Signed-off-by: Michael Aziz --- sycl/doc/design/DeviceCodeCoverage.md | 69 +++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 sycl/doc/design/DeviceCodeCoverage.md diff --git a/sycl/doc/design/DeviceCodeCoverage.md b/sycl/doc/design/DeviceCodeCoverage.md new file mode 100644 index 0000000000000..1323535a9e8e5 --- /dev/null +++ b/sycl/doc/design/DeviceCodeCoverage.md @@ -0,0 +1,69 @@ +# Design for Device-side Code Coverage + +## Overview + +This document describes the design and implementation of device-side code coverage for SYCL, extending Clang's source-based code coverage to support device code. The approach leverages the existing SYCL device global infrastructure, as detailed in the [DeviceGlobal.md](DeviceGlobal.md) design document, to enable collection and aggregation of coverage data from device kernels. + +## Design Details + +### Profiling Counter Representation + +Profiling counters for code coverage are lowered by the compiler as device globals. Specifically, the `InstrProfilingLoweringPass` is modified so that, when targeting SPIR-V, coverage counters are represented as pointers to USM buffers, matching the representation of other SYCL device globals. This indirection allows counters to be relocatable and managed consistently with other device-side global variables. + +Each counter is annotated with a unique identifier (`sycl-unique-id`) of the form `__profc_`, where `` is a 64-bit unsigned integer uniquely identifying the instrumented function. The counter's size is also recorded via the `sycl-device-global-size` attribute. These attributes ensure that counters are discoverable and manageable by the SYCL runtime and integration headers/footers. + +### Integration with Device Global Infrastructure + +The device global infrastructure, as described in [DeviceGlobal.md](DeviceGlobal.md), provides mechanisms for mapping host and device instances of global variables, managing their lifetimes, and facilitating data transfer. Device-side coverage counters are treated as a special class of device globals: + +- They use the shared alloation type rather than the device allocation type for the underlying USM memory. +- They do not have corresponding `device_global` declarations in host code. +- Their lifetime and cleanup are managed via the device global map, with integration footer code ensuring registration and deregistration. + +### Runtime Handling and Data Aggregation + +When a device global entry corresponding to a coverage counter is released (e.g., when a device image is unloaded), the SYCL runtime aggregates the values from the device-side counter into the equivalent host-side counter. Equivalence is determined by matching both the `` and the number of counter regions. If no matching host-side counter exists—typically due to differences in code between host and device caused by the `__SYCL_DEVICE_ONLY__` macro—the device-side counter values are discarded. + +The aggregation is performed by invoking a new function in the compiler runtime, `__sycl_increment_profile_counters`, which is weakly linked to accommodate optional runtime availability. This function accepts the ``, the number of regions, and the increment values, and updates the host-side counters accordingly. At program exit, the final profile data reflects the sum of host and device coverage counters. + +### Compiler and Runtime Changes + +#### Compiler Frontend + +- The lowering pass for coverage counters is updated to emit device globals with the appropriate attributes and indirection. +- Integration headers and footers are updated to register device global counters with the runtime, using the unique identifier and size. + +#### SYCL Runtime + +- Device globals with IDs matching the `__profc_` pattern are recognized as coverage counters. +- USM allocation and management for counters is handled as for other device globals, but without host-side declarations. +- Upon cleanup, device-side counter values are aggregated into host-side counters via the runtime API. + +#### Compiler Runtime + +- The new function `__sycl_increment_profile_counters` is introduced to update host-side counters. +- The function is weakly linked to allow for optional inclusion. + +### Limitations and Considerations + +- The feature is currently implemented only for SPIR-V targets; CUDA and HIP backends are not supported. +- Devices lacking support for device globals cannot utilize device-side code coverage. +- Differences in code between host and device (e.g., due to `__SYCL_DEVICE_ONLY__`) may prevent aggregation of coverage data for some functions. +- The design relies on the robustness of the device global infrastructure for correct mapping and lifetime management. + +## Relationship to Device Global Design + +This feature is built upon the mechanisms described in [DeviceGlobal.md](DeviceGlobal.md), including: + +- Use of unique string identifiers (`sycl-unique-id`) for mapping and management. +- USM-based allocation and zero-initialization of device-side storage. +- Integration header/footer registration for host-device correlation. +- Runtime database for device global management and lookup. + +The code coverage counters are a specialized use case of device globals, with additional logic for aggregation and profile generation. + +## References + +- [Implementation design for SYCL device globals](DeviceGlobal.md) +- [Clang Source-based Code Coverage](https://clang.llvm.org/docs/SourceBasedCodeCoverage.html) +- [SYCL Specification](https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html) From 259e66629d6ad9d35467ac38cc95d76a017a8c7b Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Fri, 26 Sep 2025 13:05:47 -0700 Subject: [PATCH 06/19] Add the design doc to the toctree Signed-off-by: Michael Aziz --- sycl/doc/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/doc/index.rst b/sycl/doc/index.rst index fe3e1078514a8..e0c83c8645067 100644 --- a/sycl/doc/index.rst +++ b/sycl/doc/index.rst @@ -40,6 +40,7 @@ Design Documents for the oneAPI DPC++ Compiler design/ParallelForRangeRounding design/SYCLInstrumentationUsingXPTI design/ITTAnnotations + design/DeviceCodeCoverage design/DeviceGlobal design/CompileTimeProperties design/HostPipes From c30c09838b090dbe435628619cecbe8c17860215 Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Fri, 26 Sep 2025 16:15:50 -0400 Subject: [PATCH 07/19] Update sycl/doc/design/DeviceCodeCoverage.md --- sycl/doc/design/DeviceCodeCoverage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/design/DeviceCodeCoverage.md b/sycl/doc/design/DeviceCodeCoverage.md index 1323535a9e8e5..954b53b72bcb2 100644 --- a/sycl/doc/design/DeviceCodeCoverage.md +++ b/sycl/doc/design/DeviceCodeCoverage.md @@ -16,7 +16,7 @@ Each counter is annotated with a unique identifier (`sycl-unique-id`) of the for The device global infrastructure, as described in [DeviceGlobal.md](DeviceGlobal.md), provides mechanisms for mapping host and device instances of global variables, managing their lifetimes, and facilitating data transfer. Device-side coverage counters are treated as a special class of device globals: -- They use the shared alloation type rather than the device allocation type for the underlying USM memory. +- They use the shared allocation type rather than the device allocation type for the underlying USM memory. - They do not have corresponding `device_global` declarations in host code. - Their lifetime and cleanup are managed via the device global map, with integration footer code ensuring registration and deregistration. From 094c73d3bcb418fbe33c0a32edbe5262f92dc21e Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Fri, 26 Sep 2025 15:08:48 -0700 Subject: [PATCH 08/19] Set `AtomicProfileUpdate` in `CodeGenOpts` Signed-off-by: Michael Aziz --- clang/lib/CodeGen/BackendUtil.cpp | 2 +- clang/lib/Frontend/CompilerInvocation.cpp | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 72a4635ec5f98..8bf72cd263931 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -601,7 +601,7 @@ getInstrProfOptions(const CodeGenOptions &CodeGenOpts, Options.InstrProfileOutput = CodeGenOpts.ContinuousProfileSync ? ("%c" + CodeGenOpts.InstrProfileOutput) : CodeGenOpts.InstrProfileOutput; - Options.Atomic = LangOpts.SYCLIsDevice || CodeGenOpts.AtomicProfileUpdate; + Options.Atomic = CodeGenOpts.AtomicProfileUpdate; return Options; } diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 9ff4eeb3fd3b1..510bdf576b94a 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -5377,6 +5377,8 @@ bool CompilerInvocation::CreateFromArgsImpl( if (LangOpts.SYCLIsDevice) { // Set the triple of the host for SYCL device compile. Res.getTargetOpts().HostTriple = Res.getFrontendOpts().AuxTriple; + // Set the atomic profile update flag to increment counters atomically. + Res.getCodeGenOpts().AtomicProfileUpdate = true; // If specified, create empty integration header files for now. CreateEmptyFile(LangOpts.SYCLIntHeader); CreateEmptyFile(LangOpts.SYCLIntFooter); From fca194f053c6bf6c6811aad63d0631e698b970a5 Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Mon, 29 Sep 2025 13:32:04 -0400 Subject: [PATCH 09/19] Apply suggestion from @steffenlarsen Co-authored-by: Steffen Larsen --- llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 9b5c4eb835d6a..16e4ff6036131 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -1126,8 +1126,7 @@ Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) { Counters, "pgocount.addr"); const std::uint64_t Index = I->getIndex()->getZExtValue(); if (Index > 0) { - auto *Offset = Builder.getInt64(I->getIndex()->getZExtValue() * - sizeof(std::uint64_t)); + auto *Offset = Builder.getInt64(Index * sizeof(std::uint64_t)); auto *AddrWithOffset = Builder.CreatePtrAdd(Addr, Offset, "pgocount.offset"); return AddrWithOffset; From 607e2c11ac769bc20190b90a1753ca1c26f4f66e Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Mon, 29 Sep 2025 13:32:21 -0400 Subject: [PATCH 10/19] Apply suggestion from @steffenlarsen Co-authored-by: Steffen Larsen --- llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 16e4ff6036131..45d0f98c8a7be 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -1127,9 +1127,7 @@ Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) { const std::uint64_t Index = I->getIndex()->getZExtValue(); if (Index > 0) { auto *Offset = Builder.getInt64(Index * sizeof(std::uint64_t)); - auto *AddrWithOffset = - Builder.CreatePtrAdd(Addr, Offset, "pgocount.offset"); - return AddrWithOffset; + return Builder.CreatePtrAdd(Addr, Offset, "pgocount.offset"); } return Addr; } From e2ea6ed3d94e635416bc69a4276dc3b0bd62d988 Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Mon, 29 Sep 2025 13:32:49 -0400 Subject: [PATCH 11/19] Apply suggestion from @steffenlarsen Co-authored-by: Steffen Larsen --- llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 45d0f98c8a7be..ec31654645cf5 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -1676,11 +1676,10 @@ InstrLowerer::createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name, if (M.getTargetTriple().isSPIR()) { uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); auto &Ctx = M.getContext(); - GlobalVariable *GV; auto *PtrTy = PointerType::get(Ctx, 1); auto *IntTy = Type::getInt64Ty(Ctx); auto *StructTy = StructType::get(Ctx, {PtrTy, IntTy}); - GV = new GlobalVariable(M, StructTy, false, Linkage, + GlobalVariable *GV = new GlobalVariable(M, StructTy, false, Linkage, Constant::getNullValue(StructTy), Name); const std::uint64_t FnHash = IndexedInstrProf::ComputeHash( getPGOFuncNameVarInitializer(Inc->getName())); From d4459f9d367547db0ed6631bd8fc4d36b14400ea Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Mon, 29 Sep 2025 13:33:04 -0400 Subject: [PATCH 12/19] Apply suggestion from @steffenlarsen Co-authored-by: Steffen Larsen --- .../detail/program_manager/program_manager.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index d9a2e71925b5d..ee4e51dfe0b44 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2469,14 +2469,15 @@ std::vector ProgramManager::getDeviceGlobalEntries( std::vector ProgramManager::getProfileCounterDeviceGlobalEntries( const context_impl *CtxImpl) { - const std::vector ProfileCounters = + std::vector ProfileCounters = ProgramManager::getInstance().m_DeviceGlobals.getProfileCounterEntries(); - std::vector FoundEntries; - for (const auto &DGEntry : ProfileCounters) { - if (DGEntry->isAvailableInContext(CtxImpl)) - FoundEntries.push_back(DGEntry); - } - return FoundEntries; + const auto NewEnd = std::remove_if( + ProfileCounters.begin(), ProfileCounters.end(), + [CtxImpl](const DeviceGlobalMapEntry *DGEntry) { + return !DGEntry->isAvailableInContext(CtxImpl); + }); + ProfileCounters.erase(NewEnd, ProfileCounters.end()); + return ProfileCounters; } void ProgramManager::addOrInitHostPipeEntry(const void *HostPipePtr, From 1bbdc769de1695492b18cb274fe7b7db6aa74aba Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Mon, 29 Sep 2025 13:33:22 -0400 Subject: [PATCH 13/19] Apply suggestion from @steffenlarsen Co-authored-by: Steffen Larsen --- sycl/source/detail/device_global_map_entry.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/device_global_map_entry.cpp b/sycl/source/detail/device_global_map_entry.cpp index 5fe1e784fe8a8..5fe17ae447696 100644 --- a/sycl/source/detail/device_global_map_entry.cpp +++ b/sycl/source/detail/device_global_map_entry.cpp @@ -55,10 +55,8 @@ OwnedUrEvent DeviceGlobalUSMMem::getInitEvent(adapter_impl &Adapter) { bool DeviceGlobalMapEntry::isAvailableInContext(const context_impl *CtxImpl) { std::lock_guard Lock{MDeviceToUSMPtrMapMutex}; - for (const auto &It : MDeviceToUSMPtrMap) - if (It.first.second == CtxImpl) - return true; - return false; + std::any_of(MDeviceToUSMPtrMap.begin(), MDeviceToUSMPtrMap.end(), + [CtxImpl](const auto &It) { return It.first.second == CtxImpl; }); } bool DeviceGlobalMapEntry::isProfileCounter() { From 5d16d4e327de9cf79886919cf067d3bd8cdad4e2 Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Mon, 29 Sep 2025 13:33:41 -0400 Subject: [PATCH 14/19] Apply suggestion from @steffenlarsen Co-authored-by: Steffen Larsen --- sycl/source/detail/device_global_map_entry.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/source/detail/device_global_map_entry.cpp b/sycl/source/detail/device_global_map_entry.cpp index 5fe17ae447696..d5534e2f32129 100644 --- a/sycl/source/detail/device_global_map_entry.cpp +++ b/sycl/source/detail/device_global_map_entry.cpp @@ -87,6 +87,7 @@ __sycl_increment_profile_counters(std::uint64_t FnHash, std::size_t NumCounters, void DeviceGlobalMapEntry::cleanupProfileCounter(context_impl *CtxImpl) { std::lock_guard Lock{MDeviceToUSMPtrMapMutex}; + assert(isProfileCounter()); const std::size_t NumCounters = MDeviceGlobalTSize / sizeof(std::uint64_t); const std::uint64_t FnHash = [&] { const auto PrefixSize = std::string{"__profc_"}.size(); From bd4e4546ca28cc2e3e88a071667b754ae6c197a8 Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Mon, 29 Sep 2025 13:34:06 -0400 Subject: [PATCH 15/19] Apply suggestion from @steffenlarsen Co-authored-by: Steffen Larsen --- sycl/source/detail/device_global_map_entry.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/device_global_map_entry.cpp b/sycl/source/detail/device_global_map_entry.cpp index d5534e2f32129..2b6e50529b9f0 100644 --- a/sycl/source/detail/device_global_map_entry.cpp +++ b/sycl/source/detail/device_global_map_entry.cpp @@ -90,7 +90,7 @@ void DeviceGlobalMapEntry::cleanupProfileCounter(context_impl *CtxImpl) { assert(isProfileCounter()); const std::size_t NumCounters = MDeviceGlobalTSize / sizeof(std::uint64_t); const std::uint64_t FnHash = [&] { - const auto PrefixSize = std::string{"__profc_"}.size(); + constexpr size_t PrefixSize = std::string_view{"__profc_"}.size(); constexpr int DecimalBase = 10; return std::strtoull(MUniqueId.substr(PrefixSize).c_str(), nullptr, DecimalBase); From 5c42c6c9daf4f1086223a7011152d9d8b3752caa Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Mon, 29 Sep 2025 13:35:22 -0400 Subject: [PATCH 16/19] Update sycl/source/detail/device_global_map_entry.cpp Co-authored-by: Steffen Larsen --- sycl/source/detail/device_global_map_entry.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/device_global_map_entry.cpp b/sycl/source/detail/device_global_map_entry.cpp index 2b6e50529b9f0..e3a3ab7b9b0dc 100644 --- a/sycl/source/detail/device_global_map_entry.cpp +++ b/sycl/source/detail/device_global_map_entry.cpp @@ -60,8 +60,8 @@ bool DeviceGlobalMapEntry::isAvailableInContext(const context_impl *CtxImpl) { } bool DeviceGlobalMapEntry::isProfileCounter() { - const std::string CounterPrefix = "__profc_"; - return MUniqueId.substr(0, CounterPrefix.size()) == CounterPrefix; + constexpr std::string_view CounterPrefix = "__profc_"; + return std::string_view{MUniqueId}.substr(0, CounterPrefix.size()) == CounterPrefix; } #ifdef _MSC_VER From b3fbe614b0acdd358d29e040ccc19ed9e5818967 Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Mon, 29 Sep 2025 12:10:59 -0700 Subject: [PATCH 17/19] Address review comments Signed-off-by: Michael Aziz --- clang/lib/Driver/ToolChains/Clang.cpp | 3 + clang/lib/Frontend/CompilerInvocation.cpp | 2 - .../Instrumentation/InstrProfiling.cpp | 13 ++-- sycl/doc/design/DeviceCodeCoverage.md | 2 + .../source/detail/device_global_map_entry.cpp | 60 ++++++++++--------- .../source/detail/device_global_map_entry.hpp | 6 +- .../program_manager/program_manager.cpp | 10 ++-- 7 files changed, 52 insertions(+), 44 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 7926b8edd5821..9842adc0a7f04 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5529,6 +5529,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fsycl-is-device"); CmdArgs.push_back("-fdeclare-spirv-builtins"); + // Set the atomic profile update flag to increment counters atomically. + CmdArgs.push_back("-fprofile-update=atomic"); + // Set O2 optimization level by default if (!Args.getLastArg(options::OPT_O_Group)) CmdArgs.push_back("-O2"); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 510bdf576b94a..9ff4eeb3fd3b1 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -5377,8 +5377,6 @@ bool CompilerInvocation::CreateFromArgsImpl( if (LangOpts.SYCLIsDevice) { // Set the triple of the host for SYCL device compile. Res.getTargetOpts().HostTriple = Res.getFrontendOpts().AuxTriple; - // Set the atomic profile update flag to increment counters atomically. - Res.getCodeGenOpts().AtomicProfileUpdate = true; // If specified, create empty integration header files for now. CreateEmptyFile(LangOpts.SYCLIntHeader); CreateEmptyFile(LangOpts.SYCLIntFooter); diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index ec31654645cf5..5fe4201750ace 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -1125,11 +1125,10 @@ Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) { auto *Addr = Builder.CreateLoad(PointerType::get(M.getContext(), 1), Counters, "pgocount.addr"); const std::uint64_t Index = I->getIndex()->getZExtValue(); - if (Index > 0) { - auto *Offset = Builder.getInt64(Index * sizeof(std::uint64_t)); - return Builder.CreatePtrAdd(Addr, Offset, "pgocount.offset"); - } - return Addr; + if (Index == 0) + return Addr; + auto *Offset = Builder.getInt64(Index * sizeof(std::uint64_t)); + return Builder.CreatePtrAdd(Addr, Offset, "pgocount.offset"); } auto *Counters = getOrCreateRegionCounters(I); @@ -1679,8 +1678,8 @@ InstrLowerer::createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name, auto *PtrTy = PointerType::get(Ctx, 1); auto *IntTy = Type::getInt64Ty(Ctx); auto *StructTy = StructType::get(Ctx, {PtrTy, IntTy}); - GlobalVariable *GV = new GlobalVariable(M, StructTy, false, Linkage, - Constant::getNullValue(StructTy), Name); + GlobalVariable *GV = new GlobalVariable( + M, StructTy, false, Linkage, Constant::getNullValue(StructTy), Name); const std::uint64_t FnHash = IndexedInstrProf::ComputeHash( getPGOFuncNameVarInitializer(Inc->getName())); const std::string FnName = std::string{"__profc_"} + std::to_string(FnHash); diff --git a/sycl/doc/design/DeviceCodeCoverage.md b/sycl/doc/design/DeviceCodeCoverage.md index 954b53b72bcb2..623023e703ac5 100644 --- a/sycl/doc/design/DeviceCodeCoverage.md +++ b/sycl/doc/design/DeviceCodeCoverage.md @@ -12,6 +12,8 @@ Profiling counters for code coverage are lowered by the compiler as device globa Each counter is annotated with a unique identifier (`sycl-unique-id`) of the form `__profc_`, where `` is a 64-bit unsigned integer uniquely identifying the instrumented function. The counter's size is also recorded via the `sycl-device-global-size` attribute. These attributes ensure that counters are discoverable and manageable by the SYCL runtime and integration headers/footers. +The profile counter device global is represented as an array of 8-byte integers (`std::uint64_t`). The number of elements in this array corresponds to the number of regions in the function being instrumented, where a region typically represents a distinct code branch or block. The size of the device global variable is therefore determined by multiplying the number of regions by eight bytes, and this value is recorded in the `sycl-device-global-size` attribute for use by the runtime and integration logic. + ### Integration with Device Global Infrastructure The device global infrastructure, as described in [DeviceGlobal.md](DeviceGlobal.md), provides mechanisms for mapping host and device instances of global variables, managing their lifetimes, and facilitating data transfer. Device-side coverage counters are treated as a special class of device globals: diff --git a/sycl/source/detail/device_global_map_entry.cpp b/sycl/source/detail/device_global_map_entry.cpp index e3a3ab7b9b0dc..8e3269dcd6101 100644 --- a/sycl/source/detail/device_global_map_entry.cpp +++ b/sycl/source/detail/device_global_map_entry.cpp @@ -53,17 +53,24 @@ OwnedUrEvent DeviceGlobalUSMMem::getInitEvent(adapter_impl &Adapter) { } } -bool DeviceGlobalMapEntry::isAvailableInContext(const context_impl *CtxImpl) { +bool DeviceGlobalMapEntry::isAvailableInContext( + const context_impl *CtxImpl) const { std::lock_guard Lock{MDeviceToUSMPtrMapMutex}; - std::any_of(MDeviceToUSMPtrMap.begin(), MDeviceToUSMPtrMap.end(), - [CtxImpl](const auto &It) { return It.first.second == CtxImpl; }); + return std::any_of( + MDeviceToUSMPtrMap.begin(), MDeviceToUSMPtrMap.end(), + [CtxImpl](const auto &It) { return It.first.second == CtxImpl; }); } -bool DeviceGlobalMapEntry::isProfileCounter() { +bool DeviceGlobalMapEntry::isProfileCounter() const { constexpr std::string_view CounterPrefix = "__profc_"; - return std::string_view{MUniqueId}.substr(0, CounterPrefix.size()) == CounterPrefix; + return std::string_view{MUniqueId}.substr(0, CounterPrefix.size()) == + CounterPrefix; } +// __sycl_increment_profile_counters must be defined as a weak symbol so that +// the program will link even if the profiling runtime is not linked in. When +// compiling with MSVC there is no weak attribute, so we use a pragma comment +// and default function to achieve the same effect. #ifdef _MSC_VER extern "C" void __sycl_increment_profile_counters(std::uint64_t FnHash, std::size_t NumCounters, @@ -87,7 +94,7 @@ __sycl_increment_profile_counters(std::uint64_t FnHash, std::size_t NumCounters, void DeviceGlobalMapEntry::cleanupProfileCounter(context_impl *CtxImpl) { std::lock_guard Lock{MDeviceToUSMPtrMapMutex}; - assert(isProfileCounter()); + assert(isProfileCounter() && "Not a profile counter device global."); const std::size_t NumCounters = MDeviceGlobalTSize / sizeof(std::uint64_t); const std::uint64_t FnHash = [&] { constexpr size_t PrefixSize = std::string_view{"__profc_"}.size(); @@ -97,31 +104,30 @@ void DeviceGlobalMapEntry::cleanupProfileCounter(context_impl *CtxImpl) { }(); for (const device_impl &Device : CtxImpl->getDevices()) { auto USMPtrIt = MDeviceToUSMPtrMap.find({&Device, CtxImpl}); - if (USMPtrIt != MDeviceToUSMPtrMap.end()) { - DeviceGlobalUSMMem &USMMem = USMPtrIt->second; + if (USMPtrIt == MDeviceToUSMPtrMap.end()) + continue; - // Get the increments from the USM pointer. - std::vector Increments(NumCounters); - const std::uint64_t *Counters = static_cast(USMMem.MPtr); - for (std::size_t I = 0; I < NumCounters; ++I) - Increments[I] = Counters[I]; + // Get the increments from the USM pointer. + DeviceGlobalUSMMem &USMMem = USMPtrIt->second; + std::vector Increments(NumCounters); + const std::uint64_t *Counters = static_cast(USMMem.MPtr); + for (std::size_t I = 0; I < NumCounters; ++I) + Increments[I] = Counters[I]; - // Call the weak symbol to update the profile counters. - if (&__sycl_increment_profile_counters) - __sycl_increment_profile_counters(FnHash, Increments.size(), - Increments.data()); + // Call the weak symbol to update the profile counters. + if (&__sycl_increment_profile_counters) + __sycl_increment_profile_counters(FnHash, Increments.size(), + Increments.data()); - // Free the USM memory and release the event if it exists. - detail::usm::freeInternal(USMMem.MPtr, CtxImpl); - if (USMMem.MInitEvent != nullptr) - CtxImpl->getAdapter().call( - USMMem.MInitEvent); + // Free the USM memory and release the event if it exists. + detail::usm::freeInternal(USMMem.MPtr, CtxImpl); + if (USMMem.MInitEvent != nullptr) + CtxImpl->getAdapter().call(USMMem.MInitEvent); - // Set to nullptr to avoid double free. - USMMem.MPtr = nullptr; - USMMem.MInitEvent = nullptr; - MDeviceToUSMPtrMap.erase(USMPtrIt); - } + // Set to nullptr to avoid double free. + USMMem.MPtr = nullptr; + USMMem.MInitEvent = nullptr; + MDeviceToUSMPtrMap.erase(USMPtrIt); } } diff --git a/sycl/source/detail/device_global_map_entry.hpp b/sycl/source/detail/device_global_map_entry.hpp index f129df44d1c6f..4538dcf4bc1eb 100644 --- a/sycl/source/detail/device_global_map_entry.hpp +++ b/sycl/source/detail/device_global_map_entry.hpp @@ -111,10 +111,10 @@ struct DeviceGlobalMapEntry { } // Checks if the device_global is available in the given context. - bool isAvailableInContext(const context_impl *CtxImpl); + bool isAvailableInContext(const context_impl *CtxImpl) const; // Returns true if the device_global is a profile counter. - bool isProfileCounter(); + bool isProfileCounter() const; // Cleans up a profile counter device global. void cleanupProfileCounter(context_impl *CtxImpl); @@ -144,7 +144,7 @@ struct DeviceGlobalMapEntry { std::map, DeviceGlobalUSMMem> MDeviceToUSMPtrMap; - std::mutex MDeviceToUSMPtrMapMutex; + mutable std::mutex MDeviceToUSMPtrMapMutex; }; } // namespace detail diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index ee4e51dfe0b44..4a36d580304c4 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2471,11 +2471,11 @@ ProgramManager::getProfileCounterDeviceGlobalEntries( const context_impl *CtxImpl) { std::vector ProfileCounters = ProgramManager::getInstance().m_DeviceGlobals.getProfileCounterEntries(); - const auto NewEnd = std::remove_if( - ProfileCounters.begin(), ProfileCounters.end(), - [CtxImpl](const DeviceGlobalMapEntry *DGEntry) { - return !DGEntry->isAvailableInContext(CtxImpl); - }); + const auto NewEnd = + std::remove_if(ProfileCounters.begin(), ProfileCounters.end(), + [CtxImpl](DeviceGlobalMapEntry *DGEntry) { + return !DGEntry->isAvailableInContext(CtxImpl); + }); ProfileCounters.erase(NewEnd, ProfileCounters.end()); return ProfileCounters; } From 7d5f8b17c0b200044cfa22536ee47de114012666 Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Tue, 30 Sep 2025 14:31:24 -0700 Subject: [PATCH 18/19] Disable Windows support Signed-off-by: Michael Aziz --- sycl/test-e2e/Coverage/device_code_coverage.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sycl/test-e2e/Coverage/device_code_coverage.cpp b/sycl/test-e2e/Coverage/device_code_coverage.cpp index 368a7f1879bbc..e6996e708f0d9 100644 --- a/sycl/test-e2e/Coverage/device_code_coverage.cpp +++ b/sycl/test-e2e/Coverage/device_code_coverage.cpp @@ -25,6 +25,9 @@ int main() { // REQUIRES: target-spir // UNSUPPORTED: opencl && gpu // UNSUPPORTED-TRACKER: GSD-4287 +// UNSUPPORTED: windows +// UNSUPPORTED-INTENDED: On Windows, compiler-rt requires /MT but the flag +// cannot be used with SYCL. // CHECK: main: // CHECK: 8| 1|int main() { From 4c3d87b748a05d66361b1bba6c1b8a5627cd15cb Mon Sep 17 00:00:00 2001 From: Michael Aziz Date: Wed, 1 Oct 2025 13:36:49 -0700 Subject: [PATCH 19/19] Add `InstrProfiling` LIT test case Signed-off-by: Michael Aziz --- .../InstrProfiling/coverage_sycl.ll | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 llvm/test/Instrumentation/InstrProfiling/coverage_sycl.ll diff --git a/llvm/test/Instrumentation/InstrProfiling/coverage_sycl.ll b/llvm/test/Instrumentation/InstrProfiling/coverage_sycl.ll new file mode 100644 index 0000000000000..e2e5688432e0e --- /dev/null +++ b/llvm/test/Instrumentation/InstrProfiling/coverage_sycl.ll @@ -0,0 +1,29 @@ +; RUN: opt < %s -passes=instrprof -S | FileCheck %s + +target triple = "spir64-unknown-unknown" + +@__profn_foo = private constant [3 x i8] c"foo" +; CHECK: @__profc_foo = private global { ptr addrspace(1), i64 } zeroinitializer, section "__llvm_prf_cnts", comdat #0 +; CHECK: @__profd_foo = private global { i64, i64, i64, i64, ptr, ptr, i32, [3 x i16], i32 } { i64 {{.*}}, i64 {{.*}}, i64 sub (i64 ptrtoint (ptr @__profc_foo to i64) +@__profn_bar = private constant [3 x i8] c"bar" +; CHECK: @__profc_bar = private global { ptr addrspace(1), i64 } zeroinitializer, section "__llvm_prf_cnts", comdat #1 +; CHECK: @__profd_bar = private global { i64, i64, i64, i64, ptr, ptr, i32, [3 x i16], i32 } { i64 {{.*}}, i64 {{.*}}, i64 sub (i64 ptrtoint (ptr @__profc_bar to i64) + +; CHECK: @__llvm_prf_nm = {{.*}} section "__llvm_prf_names" + +define void @_Z3foov() { + call void @llvm.instrprof.cover(ptr @__profn_foo, i64 12345678, i32 1, i32 0) + ; CHECK: %pgocount.addr = load ptr addrspace(1), ptr @__profc_foo, align 8 + ; CHECK: store i8 0, ptr addrspace(1) %pgocount.addr, align 1 + ret void +} + +%class.A = type { ptr } +define dso_local void @_Z3barv(ptr nocapture nonnull align 8 %0) unnamed_addr #0 align 2 { + call void @llvm.instrprof.cover(ptr @__profn_bar, i64 87654321, i32 1, i32 0) + ; CHECK: %pgocount.addr = load ptr addrspace(1), ptr @__profc_bar, align 8 + ; CHECK: store i8 0, ptr addrspace(1) %pgocount.addr, align 1 + ret void +} + +declare void @llvm.instrprof.cover(ptr, i64, i32, i32)