From 60d0fd4d56af90fa92837628cfd31d80a8986aa2 Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Mon, 30 Jun 2025 14:52:10 -0400 Subject: [PATCH 01/29] Potential memory leak with the JVMTI wallclock sampler --- ddprof-lib/src/main/cpp/wallClock.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/ddprof-lib/src/main/cpp/wallClock.cpp b/ddprof-lib/src/main/cpp/wallClock.cpp index d703b81dd..ec1b689ca 100644 --- a/ddprof-lib/src/main/cpp/wallClock.cpp +++ b/ddprof-lib/src/main/cpp/wallClock.cpp @@ -173,18 +173,19 @@ void WallClockJVMTI::timerLoop() { bool do_filter = Profiler::instance()->threadFilter()->enabled(); int self = OS::threadId(); + JNIEnv *env = VM::jni(); for (int i = 0; i < threads_count; i++) { jthread thread = threads_ptr[i]; if (thread != nullptr) { ddprof::VMThread* nThread = static_cast(VMThread::fromJavaThread(jni, thread)); - if (nThread == nullptr) { - continue; - } - int tid = nThread->osThreadId(); - if (tid != self && (!do_filter || Profiler::instance()->threadFilter()->accept(tid))) { - threads.push_back({nThread, thread}); + if (nThread != nullptr) { + int tid = nThread->osThreadId(); + if (tid != self && (!do_filter || Profiler::instance()->threadFilter()->accept(tid))) { + threads.push_back({nThread, thread}); + } } + env->DeleteLocalRef(thread); } } jvmti->Deallocate((unsigned char*)threads_ptr); From 490054488d8cc7f91eef8476513d929227650a1d Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Mon, 30 Jun 2025 17:38:55 -0400 Subject: [PATCH 02/29] v1 --- ddprof-lib/src/main/cpp/wallClock.cpp | 24 +++++++++++++++--------- ddprof-lib/src/main/cpp/wallClock.h | 8 ++++++-- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/ddprof-lib/src/main/cpp/wallClock.cpp b/ddprof-lib/src/main/cpp/wallClock.cpp index ec1b689ca..fc54953d2 100644 --- a/ddprof-lib/src/main/cpp/wallClock.cpp +++ b/ddprof-lib/src/main/cpp/wallClock.cpp @@ -173,19 +173,18 @@ void WallClockJVMTI::timerLoop() { bool do_filter = Profiler::instance()->threadFilter()->enabled(); int self = OS::threadId(); - JNIEnv *env = VM::jni(); for (int i = 0; i < threads_count; i++) { jthread thread = threads_ptr[i]; if (thread != nullptr) { ddprof::VMThread* nThread = static_cast(VMThread::fromJavaThread(jni, thread)); - if (nThread != nullptr) { - int tid = nThread->osThreadId(); - if (tid != self && (!do_filter || Profiler::instance()->threadFilter()->accept(tid))) { - threads.push_back({nThread, thread}); - } + if (nThread == nullptr) { + continue; + } + int tid = nThread->osThreadId(); + if (tid != self && (!do_filter || Profiler::instance()->threadFilter()->accept(tid))) { + threads.push_back({nThread, thread}); } - env->DeleteLocalRef(thread); } } jvmti->Deallocate((unsigned char*)threads_ptr); @@ -219,7 +218,11 @@ void WallClockJVMTI::timerLoop() { return true; }; - timerLoopCommon(collectThreads, sampleThreads, _reservoir_size, _interval); + auto cleanThreads = [](ThreadEntry& thread_entry) { + VM::jni()->DeleteLocalRef(thread_entry.java); + }; + + timerLoopCommon(collectThreads, sampleThreads, cleanThreads, _reservoir_size, _interval); // Don't forget to detach the thread VM::detachThread(); } @@ -258,5 +261,8 @@ void WallClockASGCT::timerLoop() { return true; }; - timerLoopCommon(collectThreads, sampleThreads, _reservoir_size, _interval); + auto doNothing = [](int tid) { + }; + + timerLoopCommon(collectThreads, sampleThreads, doNothing, _reservoir_size, _interval); } diff --git a/ddprof-lib/src/main/cpp/wallClock.h b/ddprof-lib/src/main/cpp/wallClock.h index b938fb918..a147177de 100644 --- a/ddprof-lib/src/main/cpp/wallClock.h +++ b/ddprof-lib/src/main/cpp/wallClock.h @@ -50,8 +50,8 @@ class BaseWallClock : public Engine { bool isEnabled() const; - template - void timerLoopCommon(CollectThreadsFunc collectThreads, SampleThreadsFunc sampleThreads, int reservoirSize, u64 interval) { + template + void timerLoopCommon(CollectThreadsFunc collectThreads, SampleThreadsFunc sampleThreads, CleanThreadFunc cleanThreads, int reservoirSize, u64 interval) { if (!_enabled.load(std::memory_order_acquire)) { return; } @@ -103,6 +103,10 @@ class BaseWallClock : public Engine { epoch.clean(); } + for (ThreadType thread : threads) { + cleanThreads(thread); + } + threads.clear(); // Get a random sleep duration // clamp the random interval to <1,2N-1> From 0315204c87a5e0a68dc314f9176b139024c3c815 Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Mon, 30 Jun 2025 18:53:25 -0400 Subject: [PATCH 03/29] Don't sample terminated thread --- ddprof-lib/src/main/cpp/wallClock.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ddprof-lib/src/main/cpp/wallClock.cpp b/ddprof-lib/src/main/cpp/wallClock.cpp index fc54953d2..266a1bea0 100644 --- a/ddprof-lib/src/main/cpp/wallClock.cpp +++ b/ddprof-lib/src/main/cpp/wallClock.cpp @@ -207,7 +207,9 @@ void WallClockJVMTI::timerLoop() { } mode = convertJvmExecutionState(raw_thread_state); } - if (state == OSThreadState::UNKNOWN) { + if (state == OSThreadState::TERMINATED) { + return false; + } else if (state == OSThreadState::UNKNOWN) { state = OSThreadState::RUNNABLE; } event._thread_state = state; From 7b7e1cfeff784f47a9f2c86029574bc6b8ffe1f4 Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Tue, 1 Jul 2025 08:25:59 -0400 Subject: [PATCH 04/29] v2 --- ddprof-lib/src/main/cpp/wallClock.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/ddprof-lib/src/main/cpp/wallClock.cpp b/ddprof-lib/src/main/cpp/wallClock.cpp index 266a1bea0..8ae5d5bbc 100644 --- a/ddprof-lib/src/main/cpp/wallClock.cpp +++ b/ddprof-lib/src/main/cpp/wallClock.cpp @@ -200,18 +200,19 @@ void WallClockJVMTI::timerLoop() { raw_thread_state < ddprof::JVMJavaThreadState::_thread_max_state; OSThreadState state = OSThreadState::UNKNOWN; ExecutionMode mode = ExecutionMode::UNKNOWN; - if (vm_thread && is_initialized) { - OSThreadState os_state = vm_thread->osThreadState(); - if (os_state != OSThreadState::UNKNOWN) { - state = os_state; - } - mode = convertJvmExecutionState(raw_thread_state); + if (vm_thread == nullptr || !is_initialized) { + return false; } + OSThreadState os_state = vm_thread->osThreadState(); if (state == OSThreadState::TERMINATED) { return false; } else if (state == OSThreadState::UNKNOWN) { state = OSThreadState::RUNNABLE; + } else { + state = os_state; } + mode = convertJvmExecutionState(raw_thread_state); + event._thread_state = state; event._execution_mode = mode; event._weight = 1; From fd3461a0f6c035f255e5c78008b3736022c31b60 Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Tue, 1 Jul 2025 10:47:18 -0400 Subject: [PATCH 05/29] v3 --- ddprof-lib/src/main/cpp/profiler.cpp | 20 ++++++++++---------- ddprof-lib/src/main/cpp/wallClock.cpp | 4 ++-- ddprof-lib/src/main/cpp/wallClock.h | 1 + 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 4a76de0c3..548e8db86 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -607,6 +607,7 @@ u32 Profiler::recordJVMTISample(u64 counter, int tid, jthread thread, jint event int num_frames = 0; + // This is a racy call, because the thread may not be alive at this moment. if (VM::jvmti()->GetStackTrace(thread, 0, _max_stack_depth, jvmti_frames, &num_frames) == JVMTI_ERROR_NONE && num_frames > 0) { // Convert to AsyncGetCallTrace format. // Note: jvmti_frames and frames may overlap. @@ -618,18 +619,17 @@ u32 Profiler::recordJVMTISample(u64 counter, int tid, jthread thread, jint event // see https://github.com/async-profiler/async-profiler/pull/1090 LP64_ONLY(frames[i].padding = 0;) } - } - call_trace_id = _call_trace_storage.put(num_frames, frames, false, counter); - u64 duration = TSC::ticks() - startTime; - if (duration > 0) { - Counters::increment(UNWINDING_TIME_JVMTI, duration); // increment the JVMTI specific counter - } - } - if (!deferred) { - _jfr.recordEvent(lock_index, tid, call_trace_id, event_type, event); + call_trace_id = _call_trace_storage.put(num_frames, frames, false, counter); + u64 duration = TSC::ticks() - startTime; + if (duration > 0) { + Counters::increment(UNWINDING_TIME_JVMTI, duration); // increment the JVMTI specific counter + } + } + if (!deferred) { + _jfr.recordEvent(lock_index, tid, call_trace_id, event_type, event); + } } - _locks[lock_index].unlock(); return call_trace_id; } diff --git a/ddprof-lib/src/main/cpp/wallClock.cpp b/ddprof-lib/src/main/cpp/wallClock.cpp index 8ae5d5bbc..187b80f6e 100644 --- a/ddprof-lib/src/main/cpp/wallClock.cpp +++ b/ddprof-lib/src/main/cpp/wallClock.cpp @@ -183,7 +183,7 @@ void WallClockJVMTI::timerLoop() { } int tid = nThread->osThreadId(); if (tid != self && (!do_filter || Profiler::instance()->threadFilter()->accept(tid))) { - threads.push_back({nThread, thread}); + threads.push_back({nThread, thread, tid}); } } } @@ -217,7 +217,7 @@ void WallClockJVMTI::timerLoop() { event._execution_mode = mode; event._weight = 1; - Profiler::instance()->recordJVMTISample(1, thread_entry.native->osThreadId(), thread_entry.java, BCI_WALL, &event, false); + Profiler::instance()->recordJVMTISample(1, thread_entry.tid, thread_entry.java, BCI_WALL, &event, false); return true; }; diff --git a/ddprof-lib/src/main/cpp/wallClock.h b/ddprof-lib/src/main/cpp/wallClock.h index a147177de..75bdac327 100644 --- a/ddprof-lib/src/main/cpp/wallClock.h +++ b/ddprof-lib/src/main/cpp/wallClock.h @@ -165,6 +165,7 @@ class WallClockJVMTI : public BaseWallClock { struct ThreadEntry { ddprof::VMThread* native; jthread java; + int tid; }; WallClockJVMTI() : BaseWallClock() {} const char* name() override { From e903361319bd57155124504ee856a46166ca5592 Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Tue, 1 Jul 2025 12:27:39 -0400 Subject: [PATCH 06/29] v4 --- ddprof-lib/src/main/cpp/threadState.h | 3 ++- ddprof-lib/src/main/cpp/vmStructs_dd.h | 12 ++++++++++++ ddprof-lib/src/main/cpp/wallClock.cpp | 5 +++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/ddprof-lib/src/main/cpp/threadState.h b/ddprof-lib/src/main/cpp/threadState.h index 4434d4f13..d1351a66e 100644 --- a/ddprof-lib/src/main/cpp/threadState.h +++ b/ddprof-lib/src/main/cpp/threadState.h @@ -1,9 +1,10 @@ #ifndef JAVA_PROFILER_LIBRARY_THREAD_STATE_H #define JAVA_PROFILER_LIBRARY_THREAD_STATE_H +#include "arch.h" #include "jvmti.h" -enum class OSThreadState : int { +enum class OSThreadState : u32 { UNKNOWN = 0, NEW = 1, // The thread has been initialized but yet started RUNNABLE = 2, // Has been started and is runnable, but not necessarily running diff --git a/ddprof-lib/src/main/cpp/vmStructs_dd.h b/ddprof-lib/src/main/cpp/vmStructs_dd.h index 1139669b8..702dcce74 100644 --- a/ddprof-lib/src/main/cpp/vmStructs_dd.h +++ b/ddprof-lib/src/main/cpp/vmStructs_dd.h @@ -20,6 +20,7 @@ #include "common.h" #include "jniHelper.h" #include "jvmHeap.h" +#include "safeAccess.h" #include "threadState.h" #include "vmEntry.h" #include "vmStructs.h" @@ -139,6 +140,17 @@ namespace ddprof { } return OSThreadState::UNKNOWN; } + + OSThreadState osThreadStateSafe() { + if (ddprof::VMStructs::thread_osthread_offset() >= 0 && ddprof::VMStructs::osthread_state_offset() >= 0) { + u32 *osthread = *(u32 **)at(ddprof::VMStructs::thread_osthread_offset()); + if (osthread != nullptr) { + return static_cast( + SafeAccess::load32(osthread, static_cast(OSThreadState::TERMINATED))); + } + } + return OSThreadState::UNKNOWN; + } }; class JVMFlag : public VMStructs { diff --git a/ddprof-lib/src/main/cpp/wallClock.cpp b/ddprof-lib/src/main/cpp/wallClock.cpp index 187b80f6e..fd5a6e58b 100644 --- a/ddprof-lib/src/main/cpp/wallClock.cpp +++ b/ddprof-lib/src/main/cpp/wallClock.cpp @@ -153,6 +153,11 @@ void WallClockASGCT::initialize(Arguments& args) { OS::installSignalHandler(SIGVTALRM, sharedSignalHandler); } +/* This method is extremely racy! + * Thread references that are returned from JVMTI GetAllThreads(), only guarantee thread objects + * not to be collected by GC, they don't prevent threads from exiting. + * We have to be extremely cautious when accessing thread's data. + */ void WallClockJVMTI::timerLoop() { // Check for enablement before attaching/dettaching the current thread if (!isEnabled()) { From 0eda2ccad1e508c2481b4859163b0008585b6505 Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Wed, 2 Jul 2025 10:07:23 -0400 Subject: [PATCH 07/29] Safe access --- ddprof-lib/src/main/cpp/vmStructs_dd.h | 52 ++++++++++++++++++++++++-- ddprof-lib/src/main/cpp/wallClock.cpp | 27 ++++++++++--- 2 files changed, 71 insertions(+), 8 deletions(-) diff --git a/ddprof-lib/src/main/cpp/vmStructs_dd.h b/ddprof-lib/src/main/cpp/vmStructs_dd.h index 702dcce74..ad2c047b1 100644 --- a/ddprof-lib/src/main/cpp/vmStructs_dd.h +++ b/ddprof-lib/src/main/cpp/vmStructs_dd.h @@ -67,9 +67,19 @@ namespace ddprof { inline static int thread_osthread_offset() { return _thread_osthread_offset; } + inline static int osthread_state_offset() { return _osthread_state_offset; } + + inline static int osthread_id_offset() { + return _osthread_id_offset; + } + + inline static int thread_state_offset() { + return _thread_state_offset; + } + inline static int flag_type_offset() { return _flag_type_offset; } @@ -145,11 +155,47 @@ namespace ddprof { if (ddprof::VMStructs::thread_osthread_offset() >= 0 && ddprof::VMStructs::osthread_state_offset() >= 0) { u32 *osthread = *(u32 **)at(ddprof::VMStructs::thread_osthread_offset()); if (osthread != nullptr) { - return static_cast( - SafeAccess::load32(osthread, static_cast(OSThreadState::TERMINATED))); + // If the location is accessible, the thread must have been terminated + u32 value = SafeAccess::load32(osthread + ddprof::VMStructs::osthread_state_offset(), + static_cast(OSThreadState::TERMINATED)); + // Bad value, treat it as terminated + if (value > static_cast(OSThreadState::SYSCALL)) { + return OSThreadState::TERMINATED; + } + return static_cast(value); + } + } + return OSThreadState::UNKNOWN; + } + + int osThreadIdSafe() { + if (ddprof::VMStructs::thread_osthread_offset() >= 0 && ddprof::VMStructs::osthread_id_offset() >=0) { + u32* osthread = *(u32**) at(ddprof::VMStructs::thread_osthread_offset()); + if (osthread == nullptr) { + return -1; + } else { + u32 value = SafeAccess::load32(osthread + ddprof::VMStructs::osthread_id_offset(), -1); + return static_cast(value); } } - return OSThreadState::UNKNOWN; + return -1; + } + + int stateSafe() { + int offset = ddprof::VMStructs::thread_state_offset(); + if (offset >= 0) { + u32* state = *(u32**)at(offset); + if (state == nullptr) { + return 0; + } else { + u32 value = SafeAccess::load32(state, 0); + if (value > static_cast(_thread_max_state)) { + value = 0; + } + return static_cast(value); + } + } + return 0; } }; diff --git a/ddprof-lib/src/main/cpp/wallClock.cpp b/ddprof-lib/src/main/cpp/wallClock.cpp index fd5a6e58b..dc7378721 100644 --- a/ddprof-lib/src/main/cpp/wallClock.cpp +++ b/ddprof-lib/src/main/cpp/wallClock.cpp @@ -153,6 +153,8 @@ void WallClockASGCT::initialize(Arguments& args) { OS::installSignalHandler(SIGVTALRM, sharedSignalHandler); } +static long eventCount = 0; + /* This method is extremely racy! * Thread references that are returned from JVMTI GetAllThreads(), only guarantee thread objects * not to be collected by GC, they don't prevent threads from exiting. @@ -186,7 +188,8 @@ void WallClockJVMTI::timerLoop() { if (nThread == nullptr) { continue; } - int tid = nThread->osThreadId(); + // Racy, use safe version + int tid = nThread->osThreadIdSafe(); if (tid != self && (!do_filter || Profiler::instance()->threadFilter()->accept(tid))) { threads.push_back({nThread, thread, tid}); } @@ -197,19 +200,30 @@ void WallClockJVMTI::timerLoop() { auto sampleThreads = [&](ThreadEntry& thread_entry, int& num_failures, int& threads_already_exited, int& permission_denied) { static jint max_stack_depth = (jint)Profiler::instance()->max_stack_depth(); + jvmtiEnv* jvmti = VM::jvmti(); + jint thread_state; + // Reliable test + if (jvmti->GetThreadState(thread_entry.java, &thread_state) != JVMTI_ERROR_NONE || + (thread_state & JVMTI_THREAD_STATE_ALIVE) == 0) { + printf("Thread no longer alive\n"); + return false; + } + // Following code is racy, use safe version to access native structure. ExecutionEvent event; ddprof::VMThread* vm_thread = thread_entry.native; - int raw_thread_state = vm_thread->state(); + int raw_thread_state = vm_thread->stateSafe(); bool is_initialized = raw_thread_state >= ddprof::JVMJavaThreadState::_thread_in_native && raw_thread_state < ddprof::JVMJavaThreadState::_thread_max_state; OSThreadState state = OSThreadState::UNKNOWN; ExecutionMode mode = ExecutionMode::UNKNOWN; if (vm_thread == nullptr || !is_initialized) { + printf("Thread not initialized\n"); return false; } - OSThreadState os_state = vm_thread->osThreadState(); + OSThreadState os_state = vm_thread->osThreadStateSafe(); if (state == OSThreadState::TERMINATED) { + printf("Thread terminated\n"); return false; } else if (state == OSThreadState::UNKNOWN) { state = OSThreadState::RUNNABLE; @@ -223,16 +237,19 @@ void WallClockJVMTI::timerLoop() { event._weight = 1; Profiler::instance()->recordJVMTISample(1, thread_entry.tid, thread_entry.java, BCI_WALL, &event, false); + eventCount ++; return true; }; - auto cleanThreads = [](ThreadEntry& thread_entry) { + auto clearThreadRefs = [](ThreadEntry& thread_entry) { VM::jni()->DeleteLocalRef(thread_entry.java); }; - timerLoopCommon(collectThreads, sampleThreads, cleanThreads, _reservoir_size, _interval); + eventCount = 0; + timerLoopCommon(collectThreads, sampleThreads, clearThreadRefs, _reservoir_size, _interval); // Don't forget to detach the thread VM::detachThread(); + printf("Event generated: %d\n", eventCount); } void WallClockASGCT::timerLoop() { From 194a62cb7f69c22090fcbed3ae59f9ed1f270ed1 Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Wed, 2 Jul 2025 10:18:45 -0400 Subject: [PATCH 08/29] Fix thread state --- ddprof-lib/src/main/cpp/vmStructs_dd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddprof-lib/src/main/cpp/vmStructs_dd.h b/ddprof-lib/src/main/cpp/vmStructs_dd.h index ad2c047b1..376bbdc46 100644 --- a/ddprof-lib/src/main/cpp/vmStructs_dd.h +++ b/ddprof-lib/src/main/cpp/vmStructs_dd.h @@ -184,7 +184,7 @@ namespace ddprof { int stateSafe() { int offset = ddprof::VMStructs::thread_state_offset(); if (offset >= 0) { - u32* state = *(u32**)at(offset); + u32* state = (u32*)at(offset); if (state == nullptr) { return 0; } else { From fb7572aff43b0fb902dd473d0c20cc7024c4fef3 Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Wed, 2 Jul 2025 10:42:04 -0400 Subject: [PATCH 09/29] v5 --- ddprof-lib/src/main/cpp/vmStructs_dd.h | 8 ++++---- ddprof-lib/src/main/cpp/wallClock.cpp | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/ddprof-lib/src/main/cpp/vmStructs_dd.h b/ddprof-lib/src/main/cpp/vmStructs_dd.h index 376bbdc46..211bdc0b2 100644 --- a/ddprof-lib/src/main/cpp/vmStructs_dd.h +++ b/ddprof-lib/src/main/cpp/vmStructs_dd.h @@ -153,10 +153,10 @@ namespace ddprof { OSThreadState osThreadStateSafe() { if (ddprof::VMStructs::thread_osthread_offset() >= 0 && ddprof::VMStructs::osthread_state_offset() >= 0) { - u32 *osthread = *(u32 **)at(ddprof::VMStructs::thread_osthread_offset()); + const char *osthread = *(char **)at(ddprof::VMStructs::thread_osthread_offset()); if (osthread != nullptr) { // If the location is accessible, the thread must have been terminated - u32 value = SafeAccess::load32(osthread + ddprof::VMStructs::osthread_state_offset(), + u32 value = SafeAccess::load32((u32*)(osthread + ddprof::VMStructs::osthread_state_offset()), static_cast(OSThreadState::TERMINATED)); // Bad value, treat it as terminated if (value > static_cast(OSThreadState::SYSCALL)) { @@ -170,11 +170,11 @@ namespace ddprof { int osThreadIdSafe() { if (ddprof::VMStructs::thread_osthread_offset() >= 0 && ddprof::VMStructs::osthread_id_offset() >=0) { - u32* osthread = *(u32**) at(ddprof::VMStructs::thread_osthread_offset()); + const char* osthread = *(const char**) at(ddprof::VMStructs::thread_osthread_offset()); if (osthread == nullptr) { return -1; } else { - u32 value = SafeAccess::load32(osthread + ddprof::VMStructs::osthread_id_offset(), -1); + u32 value = SafeAccess::load32((u32*)(osthread + ddprof::VMStructs::osthread_id_offset()), -1); return static_cast(value); } } diff --git a/ddprof-lib/src/main/cpp/wallClock.cpp b/ddprof-lib/src/main/cpp/wallClock.cpp index dc7378721..f599624dc 100644 --- a/ddprof-lib/src/main/cpp/wallClock.cpp +++ b/ddprof-lib/src/main/cpp/wallClock.cpp @@ -190,6 +190,8 @@ void WallClockJVMTI::timerLoop() { } // Racy, use safe version int tid = nThread->osThreadIdSafe(); + assert(tid == nThread->osThreadId()); + if (tid != self && (!do_filter || Profiler::instance()->threadFilter()->accept(tid))) { threads.push_back({nThread, thread, tid}); } From 0643123ce9ad93a958b54e0c6e5902f76bbed032 Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Wed, 2 Jul 2025 11:56:18 -0400 Subject: [PATCH 10/29] Cleanup --- ddprof-lib/src/main/cpp/threadFilter.h | 4 ++ ddprof-lib/src/main/cpp/vmStructs_dd.h | 4 +- ddprof-lib/src/main/cpp/wallClock.cpp | 84 +++++++++++--------------- 3 files changed, 43 insertions(+), 49 deletions(-) diff --git a/ddprof-lib/src/main/cpp/threadFilter.h b/ddprof-lib/src/main/cpp/threadFilter.h index 8db82f9fb..e745d8ab8 100644 --- a/ddprof-lib/src/main/cpp/threadFilter.h +++ b/ddprof-lib/src/main/cpp/threadFilter.h @@ -70,6 +70,10 @@ class ThreadFilter { void init(const char *filter); void clear(); + inline bool isValid(int thread_id) { + return thread_id > 0 && thread_id < _max_thread_id; + } + bool accept(int thread_id); void add(int thread_id); void remove(int thread_id); diff --git a/ddprof-lib/src/main/cpp/vmStructs_dd.h b/ddprof-lib/src/main/cpp/vmStructs_dd.h index 211bdc0b2..e26f2ce1f 100644 --- a/ddprof-lib/src/main/cpp/vmStructs_dd.h +++ b/ddprof-lib/src/main/cpp/vmStructs_dd.h @@ -151,6 +151,7 @@ namespace ddprof { return OSThreadState::UNKNOWN; } + // Safer version OSThreadState osThreadStateSafe() { if (ddprof::VMStructs::thread_osthread_offset() >= 0 && ddprof::VMStructs::osthread_state_offset() >= 0) { const char *osthread = *(char **)at(ddprof::VMStructs::thread_osthread_offset()); @@ -158,7 +159,7 @@ namespace ddprof { // If the location is accessible, the thread must have been terminated u32 value = SafeAccess::load32((u32*)(osthread + ddprof::VMStructs::osthread_state_offset()), static_cast(OSThreadState::TERMINATED)); - // Bad value, treat it as terminated + // Checking for bad data if (value > static_cast(OSThreadState::SYSCALL)) { return OSThreadState::TERMINATED; } @@ -189,6 +190,7 @@ namespace ddprof { return 0; } else { u32 value = SafeAccess::load32(state, 0); + // Checking for bad data if (value > static_cast(_thread_max_state)) { value = 0; } diff --git a/ddprof-lib/src/main/cpp/wallClock.cpp b/ddprof-lib/src/main/cpp/wallClock.cpp index f599624dc..19599f284 100644 --- a/ddprof-lib/src/main/cpp/wallClock.cpp +++ b/ddprof-lib/src/main/cpp/wallClock.cpp @@ -153,12 +153,10 @@ void WallClockASGCT::initialize(Arguments& args) { OS::installSignalHandler(SIGVTALRM, sharedSignalHandler); } -static long eventCount = 0; - /* This method is extremely racy! - * Thread references that are returned from JVMTI GetAllThreads(), only guarantee thread objects - * not to be collected by GC, they don't prevent threads from exiting. - * We have to be extremely cautious when accessing thread's data. + * Thread references, that are returned from JVMTI::GetAllThreads(), only guarantee thread objects + * are not collected by GCs, they don't prevent threads from exiting. + * We have to be extremely careful when accessing thread's data, so it may not be valid. */ void WallClockJVMTI::timerLoop() { // Check for enablement before attaching/dettaching the current thread @@ -166,50 +164,45 @@ void WallClockJVMTI::timerLoop() { return; } // Attach to JVM as the first step - VM::attachThread("Datadog Profiler Wallclock Sampler"); - auto collectThreads = [&](std::vector& threads) { - jvmtiEnv* jvmti = VM::jvmti(); - if (jvmti == nullptr) { - return; - } - JNIEnv* jni = VM::jni(); - - jint threads_count = 0; - jthread* threads_ptr = nullptr; - jvmti->GetAllThreads(&threads_count, &threads_ptr); - - bool do_filter = Profiler::instance()->threadFilter()->enabled(); - int self = OS::threadId(); - - for (int i = 0; i < threads_count; i++) { - jthread thread = threads_ptr[i]; - if (thread != nullptr) { - ddprof::VMThread* nThread = static_cast(VMThread::fromJavaThread(jni, thread)); - if (nThread == nullptr) { - continue; - } - // Racy, use safe version - int tid = nThread->osThreadIdSafe(); - assert(tid == nThread->osThreadId()); - - if (tid != self && (!do_filter || Profiler::instance()->threadFilter()->accept(tid))) { - threads.push_back({nThread, thread, tid}); - } + VM::attachThread("Datadog Profiler Wallclock Sampler"); + auto collectThreads = [&](std::vector& threads) { + jvmtiEnv* jvmti = VM::jvmti(); + if (jvmti == nullptr) { + return; + } + JNIEnv* jni = VM::jni(); + + jint threads_count = 0; + jthread* threads_ptr = nullptr; + jvmti->GetAllThreads(&threads_count, &threads_ptr); + + ThreadFilter* threadFilter = Profiler::instance()->threadFilter(); + bool do_filter = threadFilter->enabled(); + int self = OS::threadId(); + + for (int i = 0; i < threads_count; i++) { + jthread thread = threads_ptr[i]; + if (thread != nullptr) { + ddprof::VMThread* nThread = static_cast(VMThread::fromJavaThread(jni, thread)); + if (nThread == nullptr) { + continue; + } + // Racy, use safer version and check + int tid = nThread->osThreadIdSafe(); + if (!threadFilter->isValid(tid)) { + continue; + } + + if (tid != self && (!do_filter || threadFilter->accept(tid))) { + threads.push_back({nThread, thread, tid}); } } - jvmti->Deallocate((unsigned char*)threads_ptr); + } + jvmti->Deallocate((unsigned char*)threads_ptr); }; auto sampleThreads = [&](ThreadEntry& thread_entry, int& num_failures, int& threads_already_exited, int& permission_denied) { static jint max_stack_depth = (jint)Profiler::instance()->max_stack_depth(); - jvmtiEnv* jvmti = VM::jvmti(); - jint thread_state; - // Reliable test - if (jvmti->GetThreadState(thread_entry.java, &thread_state) != JVMTI_ERROR_NONE || - (thread_state & JVMTI_THREAD_STATE_ALIVE) == 0) { - printf("Thread no longer alive\n"); - return false; - } // Following code is racy, use safe version to access native structure. ExecutionEvent event; @@ -220,12 +213,10 @@ void WallClockJVMTI::timerLoop() { OSThreadState state = OSThreadState::UNKNOWN; ExecutionMode mode = ExecutionMode::UNKNOWN; if (vm_thread == nullptr || !is_initialized) { - printf("Thread not initialized\n"); return false; } OSThreadState os_state = vm_thread->osThreadStateSafe(); if (state == OSThreadState::TERMINATED) { - printf("Thread terminated\n"); return false; } else if (state == OSThreadState::UNKNOWN) { state = OSThreadState::RUNNABLE; @@ -239,7 +230,6 @@ void WallClockJVMTI::timerLoop() { event._weight = 1; Profiler::instance()->recordJVMTISample(1, thread_entry.tid, thread_entry.java, BCI_WALL, &event, false); - eventCount ++; return true; }; @@ -247,11 +237,9 @@ void WallClockJVMTI::timerLoop() { VM::jni()->DeleteLocalRef(thread_entry.java); }; - eventCount = 0; timerLoopCommon(collectThreads, sampleThreads, clearThreadRefs, _reservoir_size, _interval); // Don't forget to detach the thread VM::detachThread(); - printf("Event generated: %d\n", eventCount); } void WallClockASGCT::timerLoop() { From a17691bc4fa17b5cefab61d7e7dc325c16fea53f Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Wed, 2 Jul 2025 17:08:05 -0400 Subject: [PATCH 11/29] Cleanup --- ddprof-lib/src/main/cpp/profiler.cpp | 20 ++++++++++---------- ddprof-lib/src/main/cpp/threadFilter.h | 2 +- ddprof-lib/src/main/cpp/threadState.h | 3 +-- ddprof-lib/src/main/cpp/wallClock.cpp | 5 ++--- 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 548e8db86..9eeb4dc92 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -607,7 +607,6 @@ u32 Profiler::recordJVMTISample(u64 counter, int tid, jthread thread, jint event int num_frames = 0; - // This is a racy call, because the thread may not be alive at this moment. if (VM::jvmti()->GetStackTrace(thread, 0, _max_stack_depth, jvmti_frames, &num_frames) == JVMTI_ERROR_NONE && num_frames > 0) { // Convert to AsyncGetCallTrace format. // Note: jvmti_frames and frames may overlap. @@ -619,17 +618,18 @@ u32 Profiler::recordJVMTISample(u64 counter, int tid, jthread thread, jint event // see https://github.com/async-profiler/async-profiler/pull/1090 LP64_ONLY(frames[i].padding = 0;) } + } - call_trace_id = _call_trace_storage.put(num_frames, frames, false, counter); - u64 duration = TSC::ticks() - startTime; - if (duration > 0) { - Counters::increment(UNWINDING_TIME_JVMTI, duration); // increment the JVMTI specific counter - } - } - if (!deferred) { - _jfr.recordEvent(lock_index, tid, call_trace_id, event_type, event); - } + call_trace_id = _call_trace_storage.put(num_frames, frames, false, counter); + u64 duration = TSC::ticks() - startTime; + if (duration > 0) { + Counters::increment(UNWINDING_TIME_JVMTI, duration); // increment the JVMTI specific counter + } + if (!deferred) { + _jfr.recordEvent(lock_index, tid, call_trace_id, event_type, event); + } } + _locks[lock_index].unlock(); return call_trace_id; } diff --git a/ddprof-lib/src/main/cpp/threadFilter.h b/ddprof-lib/src/main/cpp/threadFilter.h index e745d8ab8..7454be576 100644 --- a/ddprof-lib/src/main/cpp/threadFilter.h +++ b/ddprof-lib/src/main/cpp/threadFilter.h @@ -71,7 +71,7 @@ class ThreadFilter { void clear(); inline bool isValid(int thread_id) { - return thread_id > 0 && thread_id < _max_thread_id; + return thread_id >= 0 && thread_id < _max_thread_id; } bool accept(int thread_id); diff --git a/ddprof-lib/src/main/cpp/threadState.h b/ddprof-lib/src/main/cpp/threadState.h index d1351a66e..4434d4f13 100644 --- a/ddprof-lib/src/main/cpp/threadState.h +++ b/ddprof-lib/src/main/cpp/threadState.h @@ -1,10 +1,9 @@ #ifndef JAVA_PROFILER_LIBRARY_THREAD_STATE_H #define JAVA_PROFILER_LIBRARY_THREAD_STATE_H -#include "arch.h" #include "jvmti.h" -enum class OSThreadState : u32 { +enum class OSThreadState : int { UNKNOWN = 0, NEW = 1, // The thread has been initialized but yet started RUNNABLE = 2, // Has been started and is runnable, but not necessarily running diff --git a/ddprof-lib/src/main/cpp/wallClock.cpp b/ddprof-lib/src/main/cpp/wallClock.cpp index 19599f284..2af502a99 100644 --- a/ddprof-lib/src/main/cpp/wallClock.cpp +++ b/ddprof-lib/src/main/cpp/wallClock.cpp @@ -204,7 +204,7 @@ void WallClockJVMTI::timerLoop() { auto sampleThreads = [&](ThreadEntry& thread_entry, int& num_failures, int& threads_already_exited, int& permission_denied) { static jint max_stack_depth = (jint)Profiler::instance()->max_stack_depth(); - // Following code is racy, use safe version to access native structure. + // Following code is racy, use safer version to access native structure. ExecutionEvent event; ddprof::VMThread* vm_thread = thread_entry.native; int raw_thread_state = vm_thread->stateSafe(); @@ -276,8 +276,7 @@ void WallClockASGCT::timerLoop() { return true; }; - auto doNothing = [](int tid) { - }; + auto doNothing = [](int tid) { }; timerLoopCommon(collectThreads, sampleThreads, doNothing, _reservoir_size, _interval); } From 8283f260f373d5bf8f262db74a9a2d625c7c957b Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Thu, 3 Jul 2025 10:23:01 -0400 Subject: [PATCH 12/29] safeFetch impl --- ddprof-lib/src/main/cpp/safeAccess.cpp | 26 ++++++++++++++++++++++++ ddprof-lib/src/main/cpp/safeAccess.h | 19 +++++++++++++++++ ddprof-lib/src/main/cpp/vmStructs_dd.cpp | 5 +++++ ddprof-lib/src/main/cpp/vmStructs_dd.h | 19 +++++++++-------- 4 files changed, 60 insertions(+), 9 deletions(-) create mode 100644 ddprof-lib/src/main/cpp/safeAccess.cpp diff --git a/ddprof-lib/src/main/cpp/safeAccess.cpp b/ddprof-lib/src/main/cpp/safeAccess.cpp new file mode 100644 index 000000000..52476b48a --- /dev/null +++ b/ddprof-lib/src/main/cpp/safeAccess.cpp @@ -0,0 +1,26 @@ +/* + * Copyright 2025 Datadog, Inc + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include "safeAccess.h" +#include + +SafeAccess::SafeFetch32 SafeAccess::_safeFetch32Func = nullptr; + +void SafeAccess::initSafeFetch(CodeCache* libjvm) { + _safeFetch32Func = (SafeFetch32)libjvm->findSymbol("SafeFetch32_impl"); + assert(_safeFetch32Func != nullptr); +} diff --git a/ddprof-lib/src/main/cpp/safeAccess.h b/ddprof-lib/src/main/cpp/safeAccess.h index 0e39f0b2a..ffad8ddd1 100644 --- a/ddprof-lib/src/main/cpp/safeAccess.h +++ b/ddprof-lib/src/main/cpp/safeAccess.h @@ -18,6 +18,7 @@ #define _SAFEACCESS_H #include "arch_dd.h" +#include "codeCache.h" #include #ifdef __clang__ @@ -28,7 +29,25 @@ #define NOADDRSANITIZE __attribute__((no_sanitize("address"))) class SafeAccess { +private: + typedef int (*SafeFetch32)(int* ptr, int defaultValue); + static SafeFetch32 _safeFetch32Func; + public: + static void initSafeFetch(CodeCache* libjvm); + + static bool hasSafeFetch() { + return _safeFetch32Func != nullptr; + } + + static int safeFetch(int* ptr, int defaultValue) { + if (_safeFetch32Func != nullptr) { + return _safeFetch32Func(ptr, defaultValue); + } else { + return defaultValue; + } + } + NOINLINE NOADDRSANITIZE __attribute__((aligned(16))) static void *load(void **ptr) { return *ptr; } diff --git a/ddprof-lib/src/main/cpp/vmStructs_dd.cpp b/ddprof-lib/src/main/cpp/vmStructs_dd.cpp index d8efce3b8..4dc043a54 100644 --- a/ddprof-lib/src/main/cpp/vmStructs_dd.cpp +++ b/ddprof-lib/src/main/cpp/vmStructs_dd.cpp @@ -44,6 +44,7 @@ namespace ddprof { initOffsets(); initJvmFunctions(); initUnsafeFunctions(); + initSafeFetch(libjvm); } void VMStructs_::initOffsets() { @@ -98,6 +99,10 @@ namespace ddprof { } } + void VMStructs_::initSafeFetch(CodeCache* libjvm) { + SafeAccess::initSafeFetch(libjvm); + } + const void *VMStructs_::findHeapUsageFunc() { if (VM::hotspot_version() < 17) { // For JDK 11 it is really unreliable to find the memory_usage function - diff --git a/ddprof-lib/src/main/cpp/vmStructs_dd.h b/ddprof-lib/src/main/cpp/vmStructs_dd.h index e26f2ce1f..33e3b5234 100644 --- a/ddprof-lib/src/main/cpp/vmStructs_dd.h +++ b/ddprof-lib/src/main/cpp/vmStructs_dd.h @@ -52,6 +52,8 @@ namespace ddprof { static void initOffsets(); static void initJvmFunctions(); static void initUnsafeFunctions(); + // We need safe access for all jdk versions + static void initSafeFetch(CodeCache* libjvm); static void checkNativeBinding(jvmtiEnv *jvmti, JNIEnv *jni, jmethodID method, void *address); @@ -157,10 +159,10 @@ namespace ddprof { const char *osthread = *(char **)at(ddprof::VMStructs::thread_osthread_offset()); if (osthread != nullptr) { // If the location is accessible, the thread must have been terminated - u32 value = SafeAccess::load32((u32*)(osthread + ddprof::VMStructs::osthread_state_offset()), - static_cast(OSThreadState::TERMINATED)); + int value = SafeAccess::safeFetch((int*)(osthread + ddprof::VMStructs::osthread_state_offset()), + static_cast(OSThreadState::TERMINATED)); // Checking for bad data - if (value > static_cast(OSThreadState::SYSCALL)) { + if (value > static_cast(OSThreadState::SYSCALL)) { return OSThreadState::TERMINATED; } return static_cast(value); @@ -175,8 +177,7 @@ namespace ddprof { if (osthread == nullptr) { return -1; } else { - u32 value = SafeAccess::load32((u32*)(osthread + ddprof::VMStructs::osthread_id_offset()), -1); - return static_cast(value); + return SafeAccess::safeFetch((int*)(osthread + ddprof::VMStructs::osthread_id_offset()), -1); } } return -1; @@ -185,16 +186,16 @@ namespace ddprof { int stateSafe() { int offset = ddprof::VMStructs::thread_state_offset(); if (offset >= 0) { - u32* state = (u32*)at(offset); + int* state = (int*)at(offset); if (state == nullptr) { return 0; } else { - u32 value = SafeAccess::load32(state, 0); + int value = SafeAccess::safeFetch(state, 0); // Checking for bad data - if (value > static_cast(_thread_max_state)) { + if (value > _thread_max_state) { value = 0; } - return static_cast(value); + return value; } } return 0; From 79f57cf463765d80877566b3ca970b205dbeef9a Mon Sep 17 00:00:00 2001 From: "zhengyu.gu" Date: Thu, 3 Jul 2025 11:28:44 -0400 Subject: [PATCH 13/29] jdk11 support --- ddprof-lib/src/main/cpp/safeAccess.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ddprof-lib/src/main/cpp/safeAccess.cpp b/ddprof-lib/src/main/cpp/safeAccess.cpp index 52476b48a..8ad02c33a 100644 --- a/ddprof-lib/src/main/cpp/safeAccess.cpp +++ b/ddprof-lib/src/main/cpp/safeAccess.cpp @@ -22,5 +22,9 @@ SafeAccess::SafeFetch32 SafeAccess::_safeFetch32Func = nullptr; void SafeAccess::initSafeFetch(CodeCache* libjvm) { _safeFetch32Func = (SafeFetch32)libjvm->findSymbol("SafeFetch32_impl"); + if (_safeFetch32Func == nullptr) { + // jdk11 stub implementation + _safeFetch32Func = (SafeFetch32)libjvm->findSymbol("_ZN12StubRoutines18_safefetch32_entryE"); + } assert(_safeFetch32Func != nullptr); } From 8e37863c8908e412c6c72c27278790dbff085750 Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Thu, 3 Jul 2025 16:49:51 -0400 Subject: [PATCH 14/29] v6 --- ddprof-lib/src/main/cpp/safeAccess.cpp | 9 ++++----- ddprof-lib/src/main/cpp/safeAccess.h | 1 - 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/ddprof-lib/src/main/cpp/safeAccess.cpp b/ddprof-lib/src/main/cpp/safeAccess.cpp index 8ad02c33a..d56459c97 100644 --- a/ddprof-lib/src/main/cpp/safeAccess.cpp +++ b/ddprof-lib/src/main/cpp/safeAccess.cpp @@ -16,15 +16,14 @@ #include "safeAccess.h" -#include SafeAccess::SafeFetch32 SafeAccess::_safeFetch32Func = nullptr; void SafeAccess::initSafeFetch(CodeCache* libjvm) { _safeFetch32Func = (SafeFetch32)libjvm->findSymbol("SafeFetch32_impl"); - if (_safeFetch32Func == nullptr) { - // jdk11 stub implementation - _safeFetch32Func = (SafeFetch32)libjvm->findSymbol("_ZN12StubRoutines18_safefetch32_entryE"); + if (_safeFetch32Func == nullptr && !WX_MEMORY) { + // jdk11 stub implementation other than Macosx/aarch64 + void** entry = (void**)libjvm->findSymbol("_ZN12StubRoutines18_safefetch32_entryE"); + _safeFetch32Func = (SafeFetch32)*entry; } - assert(_safeFetch32Func != nullptr); } diff --git a/ddprof-lib/src/main/cpp/safeAccess.h b/ddprof-lib/src/main/cpp/safeAccess.h index ffad8ddd1..af9b17f18 100644 --- a/ddprof-lib/src/main/cpp/safeAccess.h +++ b/ddprof-lib/src/main/cpp/safeAccess.h @@ -32,7 +32,6 @@ class SafeAccess { private: typedef int (*SafeFetch32)(int* ptr, int defaultValue); static SafeFetch32 _safeFetch32Func; - public: static void initSafeFetch(CodeCache* libjvm); From 57e602282509eb253ca7a70d25a133f57d9cccc2 Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Mon, 7 Jul 2025 14:01:37 -0400 Subject: [PATCH 15/29] enhance and cleanup --- ddprof-lib/src/main/cpp/safeAccess.cpp | 6 ++++++ ddprof-lib/src/main/cpp/safeAccess.h | 13 ++++++------- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/ddprof-lib/src/main/cpp/safeAccess.cpp b/ddprof-lib/src/main/cpp/safeAccess.cpp index d56459c97..06f6eef4d 100644 --- a/ddprof-lib/src/main/cpp/safeAccess.cpp +++ b/ddprof-lib/src/main/cpp/safeAccess.cpp @@ -20,10 +20,16 @@ SafeAccess::SafeFetch32 SafeAccess::_safeFetch32Func = nullptr; void SafeAccess::initSafeFetch(CodeCache* libjvm) { + // Hotspot JVM's safefetch implementation appears better, e.g. it actually returns errorValue, + // when the address is invalid. let's use it whenever possible. _safeFetch32Func = (SafeFetch32)libjvm->findSymbol("SafeFetch32_impl"); if (_safeFetch32Func == nullptr && !WX_MEMORY) { // jdk11 stub implementation other than Macosx/aarch64 void** entry = (void**)libjvm->findSymbol("_ZN12StubRoutines18_safefetch32_entryE"); _safeFetch32Func = (SafeFetch32)*entry; } + // Fallback + if (_safeFetch32Func == nullptr) { + _safeFetch32Func = (SafeFetch32)load32; + } } diff --git a/ddprof-lib/src/main/cpp/safeAccess.h b/ddprof-lib/src/main/cpp/safeAccess.h index af9b17f18..73e9e1f2a 100644 --- a/ddprof-lib/src/main/cpp/safeAccess.h +++ b/ddprof-lib/src/main/cpp/safeAccess.h @@ -19,6 +19,7 @@ #include "arch_dd.h" #include "codeCache.h" +#include #include #ifdef __clang__ @@ -30,8 +31,9 @@ class SafeAccess { private: - typedef int (*SafeFetch32)(int* ptr, int defaultValue); + typedef int (*SafeFetch32)(int* ptr, int errorValue); static SafeFetch32 _safeFetch32Func; + public: static void initSafeFetch(CodeCache* libjvm); @@ -39,12 +41,9 @@ class SafeAccess { return _safeFetch32Func != nullptr; } - static int safeFetch(int* ptr, int defaultValue) { - if (_safeFetch32Func != nullptr) { - return _safeFetch32Func(ptr, defaultValue); - } else { - return defaultValue; - } + static inline int safeFetch(int* ptr, int errorValue) { + assert(_safeFetch32Func != nullptr); + return _safeFetch32Func(ptr, errorValue); } NOINLINE NOADDRSANITIZE __attribute__((aligned(16))) static void *load(void **ptr) { From c4182d2d36158b292e8f352cbb204cda1a38897c Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Mon, 7 Jul 2025 14:20:04 -0400 Subject: [PATCH 16/29] fix nullptr deference --- ddprof-lib/src/main/cpp/safeAccess.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ddprof-lib/src/main/cpp/safeAccess.cpp b/ddprof-lib/src/main/cpp/safeAccess.cpp index 06f6eef4d..69cca5893 100644 --- a/ddprof-lib/src/main/cpp/safeAccess.cpp +++ b/ddprof-lib/src/main/cpp/safeAccess.cpp @@ -26,7 +26,9 @@ void SafeAccess::initSafeFetch(CodeCache* libjvm) { if (_safeFetch32Func == nullptr && !WX_MEMORY) { // jdk11 stub implementation other than Macosx/aarch64 void** entry = (void**)libjvm->findSymbol("_ZN12StubRoutines18_safefetch32_entryE"); - _safeFetch32Func = (SafeFetch32)*entry; + if (entry != nullptr && *entry != nullptr) { + _safeFetch32Func = (SafeFetch32)*entry; + } } // Fallback if (_safeFetch32Func == nullptr) { From b01830520da93e9b71770112be70f592ef7b06fe Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Mon, 7 Jul 2025 14:30:43 -0400 Subject: [PATCH 17/29] More cleanup --- ddprof-lib/src/main/cpp/safeAccess.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ddprof-lib/src/main/cpp/safeAccess.h b/ddprof-lib/src/main/cpp/safeAccess.h index 73e9e1f2a..aa1f2c34c 100644 --- a/ddprof-lib/src/main/cpp/safeAccess.h +++ b/ddprof-lib/src/main/cpp/safeAccess.h @@ -37,10 +37,6 @@ class SafeAccess { public: static void initSafeFetch(CodeCache* libjvm); - static bool hasSafeFetch() { - return _safeFetch32Func != nullptr; - } - static inline int safeFetch(int* ptr, int errorValue) { assert(_safeFetch32Func != nullptr); return _safeFetch32Func(ptr, errorValue); From ecf380f0a617761b2ce2479126c620e61bf12cf7 Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Tue, 8 Jul 2025 09:19:58 -0400 Subject: [PATCH 18/29] Erwan's finding --- ddprof-lib/src/main/cpp/profiler.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 9eeb4dc92..4c28bb04d 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -972,6 +972,7 @@ void Profiler::updateJavaThreadNames() { JNIEnv *jni = VM::jni(); for (int i = 0; i < thread_count; i++) { updateThreadName(jvmti, jni, thread_objects[i]); + jni->DeleteLocalRef(thread_objects[i]); } jvmti->Deallocate((unsigned char *)thread_objects); From bafea6f02a8198ec533e4917f26cce7e73f68312 Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Tue, 8 Jul 2025 13:44:23 -0400 Subject: [PATCH 19/29] Fixed memory leak found by Erwan --- ddprof-lib/src/main/cpp/wallClock.cpp | 33 +++++++++++++++++---------- ddprof-lib/src/main/cpp/wallClock.h | 8 ++----- 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/ddprof-lib/src/main/cpp/wallClock.cpp b/ddprof-lib/src/main/cpp/wallClock.cpp index 2af502a99..a2d0ac268 100644 --- a/ddprof-lib/src/main/cpp/wallClock.cpp +++ b/ddprof-lib/src/main/cpp/wallClock.cpp @@ -163,6 +163,19 @@ void WallClockJVMTI::timerLoop() { if (!isEnabled()) { return; } + + jvmtiEnv* jvmti = VM::jvmti(); + if (jvmti == nullptr) { + return; + } + + jint threads_count = 0; + jthread* threads_ptr = nullptr; + if (jvmti->GetAllThreads(&threads_count, &threads_ptr) != JVMTI_ERROR_NONE || + threads_count == 0) { + return; + } + // Attach to JVM as the first step VM::attachThread("Datadog Profiler Wallclock Sampler"); auto collectThreads = [&](std::vector& threads) { @@ -172,10 +185,6 @@ void WallClockJVMTI::timerLoop() { } JNIEnv* jni = VM::jni(); - jint threads_count = 0; - jthread* threads_ptr = nullptr; - jvmti->GetAllThreads(&threads_count, &threads_ptr); - ThreadFilter* threadFilter = Profiler::instance()->threadFilter(); bool do_filter = threadFilter->enabled(); int self = OS::threadId(); @@ -198,7 +207,6 @@ void WallClockJVMTI::timerLoop() { } } } - jvmti->Deallocate((unsigned char*)threads_ptr); }; auto sampleThreads = [&](ThreadEntry& thread_entry, int& num_failures, int& threads_already_exited, int& permission_denied) { @@ -233,11 +241,14 @@ void WallClockJVMTI::timerLoop() { return true; }; - auto clearThreadRefs = [](ThreadEntry& thread_entry) { - VM::jni()->DeleteLocalRef(thread_entry.java); - }; + timerLoopCommon(collectThreads, sampleThreads, _reservoir_size, _interval); + + JNIEnv* jni = VM::jni(); + for (jint index = 0; index < threads_count; index++) { + jni->DeleteLocalRef(threads_ptr[index]); + } + jvmti->Deallocate((unsigned char*)threads_ptr); - timerLoopCommon(collectThreads, sampleThreads, clearThreadRefs, _reservoir_size, _interval); // Don't forget to detach the thread VM::detachThread(); } @@ -276,7 +287,5 @@ void WallClockASGCT::timerLoop() { return true; }; - auto doNothing = [](int tid) { }; - - timerLoopCommon(collectThreads, sampleThreads, doNothing, _reservoir_size, _interval); + timerLoopCommon(collectThreads, sampleThreads, _reservoir_size, _interval); } diff --git a/ddprof-lib/src/main/cpp/wallClock.h b/ddprof-lib/src/main/cpp/wallClock.h index 75bdac327..8dd6dd543 100644 --- a/ddprof-lib/src/main/cpp/wallClock.h +++ b/ddprof-lib/src/main/cpp/wallClock.h @@ -50,8 +50,8 @@ class BaseWallClock : public Engine { bool isEnabled() const; - template - void timerLoopCommon(CollectThreadsFunc collectThreads, SampleThreadsFunc sampleThreads, CleanThreadFunc cleanThreads, int reservoirSize, u64 interval) { + template + void timerLoopCommon(CollectThreadsFunc collectThreads, SampleThreadsFunc sampleThreads, int reservoirSize, u64 interval) { if (!_enabled.load(std::memory_order_acquire)) { return; } @@ -103,10 +103,6 @@ class BaseWallClock : public Engine { epoch.clean(); } - for (ThreadType thread : threads) { - cleanThreads(thread); - } - threads.clear(); // Get a random sleep duration // clamp the random interval to <1,2N-1> From 56dc6f694abb6f29538356755fa8fef8aca8fd5f Mon Sep 17 00:00:00 2001 From: Datadog Java Profiler Date: Tue, 1 Jul 2025 14:29:31 +0000 Subject: [PATCH 20/29] [Automated] Bump dev version to 1.29.0 --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 89d72cf96..db196855a 100644 --- a/build.gradle +++ b/build.gradle @@ -14,7 +14,7 @@ plugins { id "com.diffplug.spotless" version "6.11.0" } -version = "1.28.0" +version = "1.29.0" apply plugin: "com.dipien.semantic-version" version = project.findProperty("ddprof_version") ?: version From 8f6b5e2ff52771826e4abbe9ef18448087dfbd83 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Tue, 1 Jul 2025 18:17:53 +0200 Subject: [PATCH 21/29] Update the sonatype repos (#235) --- build.gradle | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index db196855a..68cf63159 100644 --- a/build.gradle +++ b/build.gradle @@ -39,7 +39,8 @@ repositories { mavenContent { snapshotsOnly() } - url 'https://oss.sonatype.org/content/repositories/snapshots/' + // see https://central.sonatype.org/publish/publish-portal-snapshots/#consuming-via-gradle + url 'https://central.sonatype.com/repository/maven-snapshots/' } } @@ -75,7 +76,14 @@ nexusPublishing { password = "admin123" } } else { + // see https://github.com/gradle-nexus/publish-plugin#publishing-to-maven-central-via-sonatype-central + // For official documentation: + // staging repo publishing https://central.sonatype.org/publish/publish-portal-ossrh-staging-api/#configuration + // snapshot publishing https://central.sonatype.org/publish/publish-portal-snapshots/#publishing-via-other-methods sonatype { + nexusUrl.set(uri("https://ossrh-staging-api.central.sonatype.com/service/local/")) + snapshotRepositoryUrl.set(uri("https://central.sonatype.com/repository/maven-snapshots/")) + username = System.getenv("SONATYPE_USERNAME") password = System.getenv("SONATYPE_PASSWORD") } From 44531b0970ccb0d3f9fa326e3fa3de3092595966 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Wed, 2 Jul 2025 11:36:48 +0200 Subject: [PATCH 22/29] Fix artifact download URL --- .github/workflows/update_assets.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/update_assets.yml b/.github/workflows/update_assets.yml index c76512ce1..fe2e1112a 100644 --- a/.github/workflows/update_assets.yml +++ b/.github/workflows/update_assets.yml @@ -35,7 +35,7 @@ jobs: TAG="$GITHUB_REF_NAME" fi VERSION=$(echo "${TAG}" | sed -e 's/v_//g') - ASSET_URL="https://oss.sonatype.org/service/local/repositories/releases/content/com/datadoghq/ddprof/${VERSION}/ddprof-${VERSION}.jar" + ASSET_URL="https://repo1.maven.org/maven2/com/datadoghq/ddprof/${VERSION}/ddprof-${VERSION}.jar" RESULT=1 while [ $RESULT -ne 0 ]; do wget -q $ASSET_URL From edcb33654dededceab9a1536d4d6968c0d3003a5 Mon Sep 17 00:00:00 2001 From: r1viollet <74836499+r1viollet@users.noreply.github.com> Date: Thu, 3 Jul 2025 14:30:17 +0200 Subject: [PATCH 23/29] Split debug (#233) * Split debug Add build steps to store split debug information for release builds --- .github/scripts/test_alpine_aarch64.sh | 2 +- .github/workflows/test_workflow.yml | 6 +- README.md | 33 +++++ ddprof-lib/build.gradle | 178 ++++++++++++++++++++++--- 4 files changed, 198 insertions(+), 21 deletions(-) diff --git a/.github/scripts/test_alpine_aarch64.sh b/.github/scripts/test_alpine_aarch64.sh index 4d107ca5a..af30d6208 100755 --- a/.github/scripts/test_alpine_aarch64.sh +++ b/.github/scripts/test_alpine_aarch64.sh @@ -29,6 +29,6 @@ JAVA_VERSION=$("${JAVA_TEST_HOME}/bin/java" -version 2>&1 | awk -F '"' '/version }') export JAVA_VERSION -apk update && apk add curl moreutils wget hexdump linux-headers bash make g++ clang git cppcheck jq cmake gtest-dev gmock tar >/dev/null +apk update && apk add curl moreutils wget hexdump linux-headers bash make g++ clang git cppcheck jq cmake gtest-dev gmock tar binutils >/dev/null ./gradlew -PCI -PkeepJFRs :ddprof-test:test${CONFIG} --no-daemon --parallel --build-cache --no-watch-fs \ No newline at end of file diff --git a/.github/workflows/test_workflow.yml b/.github/workflows/test_workflow.yml index e96121b7c..c9738218d 100644 --- a/.github/workflows/test_workflow.yml +++ b/.github/workflows/test_workflow.yml @@ -72,7 +72,7 @@ jobs: if: steps.set_enabled.outputs.enabled == 'true' run: | sudo apt-get update - sudo apt-get install -y curl zip unzip libgtest-dev libgmock-dev + sudo apt-get install -y curl zip unzip libgtest-dev libgmock-dev binutils if [[ ${{ matrix.java_version }} =~ "-zing" ]]; then sudo apt-get install -y g++-9 gcc-9 sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 100 --slave /usr/bin/g++ g++ /usr/bin/g++-9 @@ -135,7 +135,7 @@ jobs: steps: - name: Setup OS run: | - apk update && apk add curl moreutils wget hexdump linux-headers bash make g++ clang git cppcheck jq cmake gtest-dev gmock tar >/dev/null + apk update && apk add curl moreutils wget hexdump linux-headers bash make g++ clang git cppcheck jq cmake gtest-dev gmock tar binutils >/dev/null - uses: actions/checkout@v3 - name: Cache Gradle Wrapper Binaries uses: actions/cache@v4 @@ -286,7 +286,7 @@ jobs: sudo apt update -y sudo apt remove -y g++ sudo apt autoremove -y - sudo apt install -y curl zip unzip clang make build-essential + sudo apt install -y curl zip unzip clang make build-essential binutils if [[ ${{ matrix.java_version }} =~ "-zing" ]]; then sudo apt -y install g++-9 gcc-9 sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 100 --slave /usr/bin/g++ g++ /usr/bin/g++-9 diff --git a/README.md b/README.md index b8e24356b..93fa275dc 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,39 @@ The project includes both Java and C++ unit tests. You can run them using: ### Cross-JDK Testing `JAVA_TEST_HOME= ./gradlew testDebug` +## Release Builds and Debug Information + +### Split Debug Information +Release builds automatically generate split debug information to optimize deployment size while preserving debugging capabilities: + +- **Stripped libraries** (~1.2MB): Production-ready binaries with symbols removed for deployment +- **Debug symbol files** (~6.1MB): Separate `.debug` files containing full debugging information +- **Debug links**: Stripped libraries include `.gnu_debuglink` sections pointing to debug files + +### Build Artifacts Structure +``` +ddprof-lib/build/ +├── lib/main/release/linux/x64/ +│ ├── libjavaProfiler.so # Original library with debug symbols +│ ├── stripped/ +│ │ └── libjavaProfiler.so # Stripped library (83% smaller) +│ └── debug/ +│ └── libjavaProfiler.so.debug # Debug symbols only +├── native/release/ +│ └── META-INF/native-libs/linux-x64/ +│ └── libjavaProfiler.so # Final stripped library (deployed) +└── native/release-debug/ + └── META-INF/native-libs/linux-x64/ + └── libjavaProfiler.so.debug # Debug symbols package +``` + +### Build Options +- **Skip debug extraction**: `./gradlew buildRelease -Pskip-debug-extraction=true` +- **Debug extraction requires**: `objcopy` (Linux) or `dsymutil` (macOS) + - Ubuntu/Debian: `sudo apt-get install binutils` + - Alpine: `apk add binutils` + - macOS: Included with Xcode command line tools + ## Development ### Code Quality diff --git a/ddprof-lib/build.gradle b/ddprof-lib/build.gradle index 9ceb5c4c2..93d664fae 100644 --- a/ddprof-lib/build.gradle +++ b/ddprof-lib/build.gradle @@ -8,6 +8,166 @@ plugins { id 'de.undercouch.download' version '4.1.1' } +// Helper function to check if objcopy is available +def checkObjcopyAvailable() { + try { + def process = ['objcopy', '--version'].execute() + process.waitFor() + return process.exitValue() == 0 + } catch (Exception e) { + return false + } +} + +// Helper function to check if dsymutil is available (for macOS) +def checkDsymutilAvailable() { + try { + def process = ['dsymutil', '--version'].execute() + process.waitFor() + return process.exitValue() == 0 + } catch (Exception e) { + return false + } +} + +// Helper function to check if debug extraction should be skipped +def shouldSkipDebugExtraction() { + // Skip if explicitly disabled + if (project.hasProperty('skip-debug-extraction')) { + return true + } + + // Skip if required tools are not available + if (os().isLinux() && !checkObjcopyAvailable()) { + return true + } + + if (os().isMacOsX() && !checkDsymutilAvailable()) { + return true + } + + return false +} + +// Helper function to get debug file path for a given config +def getDebugFilePath(config) { + def extension = os().isLinux() ? 'so' : 'dylib' + return file("$buildDir/lib/main/${config.name}/${osIdentifier()}/${archIdentifier()}/debug/libjavaProfiler.${extension}.debug") +} + +// Helper function to get stripped file path for a given config +def getStrippedFilePath(config) { + def extension = os().isLinux() ? 'so' : 'dylib' + return file("$buildDir/lib/main/${config.name}/${osIdentifier()}/${archIdentifier()}/stripped/libjavaProfiler.${extension}") +} + +// Helper function to create error message for missing tools +def getMissingToolErrorMessage(toolName, installInstructions) { + return """ + |${toolName} is not available but is required for split debug information. + | + |To fix this issue: + |${installInstructions} + | + |If you want to build without split debug info, set -Pskip-debug-extraction=true + """.stripMargin() +} + +// Helper function to create debug extraction task +def createDebugExtractionTask(config, linkTask) { + return tasks.register('extractDebugLibRelease', Exec) { + onlyIf { + !shouldSkipDebugExtraction() + } + dependsOn linkTask + description = 'Extract debug symbols from release library' + workingDir project.buildDir + + doFirst { + def sourceFile = linkTask.get().linkedFile.get().asFile + def debugFile = getDebugFilePath(config) + + // Ensure debug directory exists + debugFile.parentFile.mkdirs() + + // Set the command line based on platform + if (os().isLinux()) { + commandLine = ['objcopy', '--only-keep-debug', sourceFile.absolutePath, debugFile.absolutePath] + } else { + // For macOS, we'll use dsymutil instead + commandLine = ['dsymutil', sourceFile.absolutePath, '-o', debugFile.absolutePath.replace('.debug', '.dSYM')] + } + } + } +} + +// Helper function to create debug link task (Linux only) +def createDebugLinkTask(config, linkTask, extractDebugTask) { + return tasks.register('addDebugLinkLibRelease', Exec) { + onlyIf { + os().isLinux() && !shouldSkipDebugExtraction() + } + dependsOn extractDebugTask + description = 'Add debug link to the original library' + + doFirst { + def sourceFile = linkTask.get().linkedFile.get().asFile + def debugFile = getDebugFilePath(config) + + commandLine = ['objcopy', '--add-gnu-debuglink=' + debugFile.absolutePath, sourceFile.absolutePath] + } + } +} + +// Helper function to create debug file copy task +def createDebugCopyTask(config, extractDebugTask) { + return tasks.register('copyReleaseDebugFiles', Copy) { + onlyIf { + !shouldSkipDebugExtraction() + } + dependsOn extractDebugTask + from file("$buildDir/lib/main/${config.name}/${osIdentifier()}/${archIdentifier()}/debug") + into file(libraryTargetPath(config.name + '-debug')) + include '**/*.debug' + include '**/*.dSYM/**' + } +} + +// Main function to setup debug extraction for release builds +def setupDebugExtraction(config, linkTask) { + if (config.name == 'release' && config.active && !project.hasProperty('skip-native')) { + // Create all debug-related tasks + def extractDebugTask = createDebugExtractionTask(config, linkTask) + def addDebugLinkTask = createDebugLinkTask(config, linkTask, extractDebugTask) + + // Create the strip task and configure it properly + def stripTask = tasks.register('stripLibRelease', StripSymbols) { + // No onlyIf needed here - setupDebugExtraction already handles the main conditions + dependsOn addDebugLinkTask + } + + // Configure the strip task after registration + stripTask.configure { + targetPlatform = linkTask.get().targetPlatform + toolChain = linkTask.get().toolChain + binaryFile = linkTask.get().linkedFile.get().asFile + outputFile = getStrippedFilePath(config) + } + + def copyDebugTask = createDebugCopyTask(config, extractDebugTask) + + // Wire up the copy task to use stripped binaries + def copyTask = tasks.findByName("copyReleaseLibs") + if (copyTask != null) { + copyTask.dependsOn stripTask + copyTask.inputs.files stripTask.get().outputs.files + + // Create an extra folder for the debug symbols + copyTask.dependsOn copyDebugTask + } + } +} + def libraryName = "ddprof" description = "Datadog Java Profiler Library" @@ -366,23 +526,7 @@ tasks.whenTaskAdded { task -> outputs.file linkedFile } if (config.name == 'release') { - def stripTask = tasks.register('stripLibRelease', StripSymbols) { - onlyIf { - config.active - } - dependsOn linkTask - targetPlatform = tasks.linkLibRelease.targetPlatform - toolChain = tasks.linkLibRelease.toolChain - binaryFile = tasks.linkLibRelease.linkedFile.get() - outputFile = file("$buildDir/lib/main/${config.name}/${osIdentifier()}/${archIdentifier()}/stripped/libjavaProfiler.${os().isLinux() ? 'so' : 'dylib'}") - inputs.file binaryFile - outputs.file outputFile - } - def copyTask = tasks.findByName("copyReleaseLibs") - if (copyTask != null) { - copyTask.dependsOn stripTask - copyTask.inputs.files stripTask.get().outputs.files - } + setupDebugExtraction(config, linkTask) } } } From c0d8e917a7dbc19a1f49ac08aaa5df8b026f3ea4 Mon Sep 17 00:00:00 2001 From: Jaroslav Bachorik Date: Fri, 4 Jul 2025 14:30:15 +0200 Subject: [PATCH 24/29] Add the micro-benchmark for thread filtering (#237) * Add the micro-benchmark for thread filtering * Do not test for obviously invalid thread id * Relax threadfilter mem order --- ddprof-lib/src/main/cpp/threadFilter.cpp | 4 +- .../com/datadoghq/profiler/JavaProfiler.java | 15 ++- ddprof-lib/src/test/cpp/ddprof_ut.cpp | 2 +- ddprof-stresstest/build.gradle | 2 +- .../datadoghq/profiler/stresstest/Main.java | 9 +- .../profiler/stresstest/WhiteboxProfiler.java | 14 ++- .../{ => counters}/CapturingLambdas.java | 2 +- .../{ => counters}/DumpRecording.java | 2 +- .../{ => counters}/GraphMutation.java | 2 +- .../scenarios/{ => counters}/GraphState.java | 2 +- .../scenarios/{ => counters}/NanoTime.java | 2 +- .../{ => counters}/ThreadFilterBenchmark.java | 3 +- .../{ => counters}/TracedParallelWork.java | 2 +- .../throughput/ThreadFilterBenchmark.java | 113 ++++++++++++++++++ 14 files changed, 150 insertions(+), 24 deletions(-) rename ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/{ => counters}/CapturingLambdas.java (93%) rename ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/{ => counters}/DumpRecording.java (95%) rename ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/{ => counters}/GraphMutation.java (92%) rename ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/{ => counters}/GraphState.java (88%) rename ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/{ => counters}/NanoTime.java (79%) rename ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/{ => counters}/ThreadFilterBenchmark.java (99%) rename ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/{ => counters}/TracedParallelWork.java (97%) create mode 100644 ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/throughput/ThreadFilterBenchmark.java diff --git a/ddprof-lib/src/main/cpp/threadFilter.cpp b/ddprof-lib/src/main/cpp/threadFilter.cpp index 31713ae5d..13e6c2ae0 100644 --- a/ddprof-lib/src/main/cpp/threadFilter.cpp +++ b/ddprof-lib/src/main/cpp/threadFilter.cpp @@ -135,7 +135,7 @@ void ThreadFilter::add(int thread_id) { thread_id = mapThreadId(thread_id); assert(b == bitmap(thread_id)); u64 bit = 1ULL << (thread_id & 0x3f); - if (!(__sync_fetch_and_or(&word(b, thread_id), bit) & bit)) { + if (!(__atomic_fetch_or(&word(b, thread_id), bit, __ATOMIC_RELAXED) & bit)) { atomicInc(_size); } } @@ -148,7 +148,7 @@ void ThreadFilter::remove(int thread_id) { } u64 bit = 1ULL << (thread_id & 0x3f); - if (__sync_fetch_and_and(&word(b, thread_id), ~bit) & bit) { + if (__atomic_fetch_and(&word(b, thread_id), ~bit, __ATOMIC_RELAXED) & bit) { atomicInc(_size, -1); } } diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java index cae2c53a6..a7cda6bc4 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java @@ -37,12 +37,15 @@ public final class JavaProfiler { static final Unsafe UNSAFE; static { Unsafe unsafe = null; - String version = System.getProperty("java.version"); - try { - Field f = Unsafe.class.getDeclaredField("theUnsafe"); - f.setAccessible(true); - unsafe = (Unsafe) f.get(null); - } catch (Exception ignore) { } + // a safety and testing valve to disable unsafe access + if (!Boolean.getBoolean("ddprof.disable_unsafe")) { + try { + Field f = Unsafe.class.getDeclaredField("theUnsafe"); + f.setAccessible(true); + unsafe = (Unsafe) f.get(null); + } catch (Exception ignore) { + } + } UNSAFE = unsafe; } diff --git a/ddprof-lib/src/test/cpp/ddprof_ut.cpp b/ddprof-lib/src/test/cpp/ddprof_ut.cpp index 6c5c8b93e..a754c9b02 100644 --- a/ddprof-lib/src/test/cpp/ddprof_ut.cpp +++ b/ddprof-lib/src/test/cpp/ddprof_ut.cpp @@ -129,7 +129,7 @@ // increase step gradually to create different bit densities int step = 1; int size = 0; - for (int tid = 0; tid < maxTid - step - 1; tid += step, size++) { + for (int tid = 1; tid < maxTid - step - 1; tid += step, size++) { EXPECT_FALSE(filter.accept(tid)); filter.add(tid); EXPECT_TRUE(filter.accept(tid)); diff --git a/ddprof-stresstest/build.gradle b/ddprof-stresstest/build.gradle index c108d7352..2934e4aca 100644 --- a/ddprof-stresstest/build.gradle +++ b/ddprof-stresstest/build.gradle @@ -39,7 +39,7 @@ task runStressTests(type: Exec) { } group = 'Execution' description = 'Run JMH stresstests' - commandLine "${javaHome}/bin/java", '-jar', 'build/libs/stresstests.jar' + commandLine "${javaHome}/bin/java", '-jar', 'build/libs/stresstests.jar', 'counters.*' } tasks.withType(JavaCompile).configureEach { diff --git a/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/Main.java b/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/Main.java index 710d2f065..d71346a8a 100644 --- a/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/Main.java +++ b/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/Main.java @@ -16,11 +16,18 @@ public class Main { public static final String SCENARIOS_PACKAGE = "com.datadoghq.profiler.stresstest.scenarios."; public static void main(String... args) throws Exception { + String filter = "*"; + if (args.length == 1) { + filter = args[0]; + } else if (args.length > 1) { + System.err.println("Usage: java -jar ddprof-stresstest.jar [scenario filter]"); + System.exit(1); + } CommandLineOptions commandLineOptions = new CommandLineOptions(args); Mode mode = Mode.AverageTime; Options options = new OptionsBuilder() .parent(new CommandLineOptions(args)) - .include(SCENARIOS_PACKAGE + "*") + .include(SCENARIOS_PACKAGE + filter) .addProfiler(WhiteboxProfiler.class) .forks(commandLineOptions.getForkCount().orElse(1)) .warmupIterations(commandLineOptions.getWarmupIterations().orElse(0)) diff --git a/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/WhiteboxProfiler.java b/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/WhiteboxProfiler.java index 5d8d54ecb..99899eb45 100644 --- a/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/WhiteboxProfiler.java +++ b/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/WhiteboxProfiler.java @@ -46,12 +46,16 @@ public Collection afterIteration(BenchmarkParams benchmarkPara JavaProfiler.getInstance().stop(); long fileSize = Files.size(jfr); Files.deleteIfExists(jfr); - List results = new ArrayList<>(); - results.add(new ScalarResult("jfr_filesize_bytes", fileSize, "", AggregationPolicy.MAX)); - for (Map.Entry counter : JavaProfiler.getInstance().getDebugCounters().entrySet()) { - results.add(new ScalarResult(counter.getKey(), counter.getValue(), "", AggregationPolicy.MAX)); + if (!Boolean.parseBoolean(benchmarkParams.getParam("skipResults"))) { + List results = new ArrayList<>(); + results.add(new ScalarResult("jfr_filesize_bytes", fileSize, "", AggregationPolicy.MAX)); + for (Map.Entry counter : JavaProfiler.getInstance().getDebugCounters().entrySet()) { + results.add(new ScalarResult(counter.getKey(), counter.getValue(), "", AggregationPolicy.MAX)); + } + return results; + } else { + return Collections.emptyList(); } - return results; } catch (IOException e) { e.printStackTrace(); return Collections.emptyList(); diff --git a/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/CapturingLambdas.java b/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/counters/CapturingLambdas.java similarity index 93% rename from ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/CapturingLambdas.java rename to ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/counters/CapturingLambdas.java index a1d008915..a6d2e06e3 100644 --- a/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/CapturingLambdas.java +++ b/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/counters/CapturingLambdas.java @@ -1,4 +1,4 @@ -package com.datadoghq.profiler.stresstest.scenarios; +package com.datadoghq.profiler.stresstest.scenarios.counters; import com.datadoghq.profiler.stresstest.Configuration; import org.openjdk.jmh.annotations.Benchmark; diff --git a/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/DumpRecording.java b/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/counters/DumpRecording.java similarity index 95% rename from ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/DumpRecording.java rename to ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/counters/DumpRecording.java index a76cf30f8..83c4cc29c 100644 --- a/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/DumpRecording.java +++ b/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/counters/DumpRecording.java @@ -1,4 +1,4 @@ -package com.datadoghq.profiler.stresstest.scenarios; +package com.datadoghq.profiler.stresstest.scenarios.counters; import com.datadoghq.profiler.JavaProfiler; import com.datadoghq.profiler.stresstest.Configuration; diff --git a/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/GraphMutation.java b/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/counters/GraphMutation.java similarity index 92% rename from ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/GraphMutation.java rename to ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/counters/GraphMutation.java index bbc91009b..58a2e6cd4 100644 --- a/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/GraphMutation.java +++ b/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/counters/GraphMutation.java @@ -1,4 +1,4 @@ -package com.datadoghq.profiler.stresstest.scenarios; +package com.datadoghq.profiler.stresstest.scenarios.counters; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.Threads; diff --git a/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/GraphState.java b/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/counters/GraphState.java similarity index 88% rename from ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/GraphState.java rename to ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/counters/GraphState.java index a614507e7..cd612832b 100644 --- a/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/GraphState.java +++ b/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/counters/GraphState.java @@ -1,4 +1,4 @@ -package com.datadoghq.profiler.stresstest.scenarios; +package com.datadoghq.profiler.stresstest.scenarios.counters; import com.datadoghq.profiler.stresstest.Configuration; import org.openjdk.jmh.annotations.*; diff --git a/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/NanoTime.java b/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/counters/NanoTime.java similarity index 79% rename from ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/NanoTime.java rename to ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/counters/NanoTime.java index 0c592083e..8e4b7dd12 100644 --- a/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/NanoTime.java +++ b/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/counters/NanoTime.java @@ -1,4 +1,4 @@ -package com.datadoghq.profiler.stresstest.scenarios; +package com.datadoghq.profiler.stresstest.scenarios.counters; import com.datadoghq.profiler.stresstest.Configuration; import org.openjdk.jmh.annotations.Benchmark; diff --git a/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/ThreadFilterBenchmark.java b/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/counters/ThreadFilterBenchmark.java similarity index 99% rename from ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/ThreadFilterBenchmark.java rename to ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/counters/ThreadFilterBenchmark.java index b0ec1daed..246cbd1dd 100644 --- a/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/ThreadFilterBenchmark.java +++ b/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/counters/ThreadFilterBenchmark.java @@ -1,9 +1,8 @@ -package com.datadoghq.profiler.stresstest.scenarios; +package com.datadoghq.profiler.stresstest.scenarios.counters; import com.datadoghq.profiler.JavaProfiler; import com.datadoghq.profiler.stresstest.Configuration; import org.openjdk.jmh.annotations.*; -import org.openjdk.jmh.infra.Blackhole; import java.io.FileWriter; import java.io.IOException; diff --git a/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/TracedParallelWork.java b/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/counters/TracedParallelWork.java similarity index 97% rename from ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/TracedParallelWork.java rename to ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/counters/TracedParallelWork.java index e8e081349..2ed4227bd 100644 --- a/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/TracedParallelWork.java +++ b/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/counters/TracedParallelWork.java @@ -1,4 +1,4 @@ -package com.datadoghq.profiler.stresstest.scenarios; +package com.datadoghq.profiler.stresstest.scenarios.counters; import com.datadoghq.profiler.ContextSetter; import com.datadoghq.profiler.JavaProfiler; diff --git a/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/throughput/ThreadFilterBenchmark.java b/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/throughput/ThreadFilterBenchmark.java new file mode 100644 index 000000000..bc03e0ca7 --- /dev/null +++ b/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/throughput/ThreadFilterBenchmark.java @@ -0,0 +1,113 @@ +package com.datadoghq.profiler.stresstest.scenarios.throughput; + +import com.datadoghq.profiler.JavaProfiler; +import com.datadoghq.profiler.stresstest.Configuration; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +import java.io.IOException; +import java.util.concurrent.TimeUnit; + +@State(Scope.Benchmark) +public class ThreadFilterBenchmark extends Configuration { + private JavaProfiler profiler; + + @Param(BASE_COMMAND + ",filter=1") + public String command; + + @Param("true") + public String skipResults; + + @Param({"0", "7", "70000"}) + public String workload; + + private long workloadNum = 0; + + @Setup(Level.Trial) + public void setup() throws IOException { + profiler = JavaProfiler.getInstance(); + workloadNum = Long.parseLong(workload); + } + + @Benchmark + @BenchmarkMode(Mode.Throughput) + @Fork(value = 3, warmups = 3) + @Warmup(iterations = 5) + @Measurement(iterations = 8) + @Threads(1) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public void threadFilterStress01() throws InterruptedException { + profiler.addThread(); + // Simulate per-thread work + Blackhole.consumeCPU(workloadNum); + profiler.removeThread(); + } + + @Benchmark + @BenchmarkMode(Mode.Throughput) + @Fork(value = 3, warmups = 3) + @Warmup(iterations = 5) + @Measurement(iterations = 8) + @Threads(2) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public void threadFilterStress02() throws InterruptedException { + profiler.addThread(); + // Simulate per-thread work + Blackhole.consumeCPU(workloadNum); + profiler.removeThread(); + } + + @Benchmark + @BenchmarkMode(Mode.Throughput) + @Fork(value = 3, warmups = 3) + @Warmup(iterations = 5) + @Measurement(iterations = 8) + @Threads(4) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public void threadFilterStress04() throws InterruptedException { + profiler.addThread(); + // Simulate per-thread work + Blackhole.consumeCPU(workloadNum); + profiler.removeThread(); + } + + @Benchmark + @BenchmarkMode(Mode.Throughput) + @Fork(value = 3, warmups = 3) + @Warmup(iterations = 5) + @Measurement(iterations = 8) + @Threads(8) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public void threadFilterStress08() throws InterruptedException { + profiler.addThread(); + // Simulate per-thread work + Blackhole.consumeCPU(workloadNum); + profiler.removeThread(); + } + + @Benchmark + @BenchmarkMode(Mode.Throughput) + @Fork(value = 3, warmups = 3) + @Warmup(iterations = 5) + @Measurement(iterations = 8) + @Threads(16) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public void threadFilterStress16() throws InterruptedException { + profiler.addThread(); + // Simulate per-thread work + Blackhole.consumeCPU(workloadNum); + profiler.removeThread(); + } +} From 6b7e662c479c51a4f8853c5b7f95d8768f1e865c Mon Sep 17 00:00:00 2001 From: r1viollet <74836499+r1viollet@users.noreply.github.com> Date: Mon, 7 Jul 2025 18:05:14 +0200 Subject: [PATCH 25/29] Flaky test - j9 OSR (#239) Skip zing and j9 flaky tests --- .../profiler/alloc/AllocationProfilerTest.java | 8 ++++++++ .../wallclock/ContendedWallclockSamplesTest.java | 15 ++++++++++----- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/alloc/AllocationProfilerTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/alloc/AllocationProfilerTest.java index 09d24e163..d995d065f 100644 --- a/ddprof-test/src/test/java/com/datadoghq/profiler/alloc/AllocationProfilerTest.java +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/alloc/AllocationProfilerTest.java @@ -22,6 +22,14 @@ protected boolean isPlatformSupported() { @RetryingTest(5) public void shouldGetObjectAllocationSamples() throws InterruptedException { + + // We seem to hit issues on j9: + // OSR (On stack replacement) creates crashes with the profiler. + // ----------- Stack Backtrace ----------- + // prepareForOSR+0xbf (0x00007F51062A4DDF [libj9jit29.so+0x4a4ddf]) + if (Platform.isJ9() && !Platform.isJavaVersionAtLeast(8)) { + return; + } Assumptions.assumeFalse(isAsan() || isTsan()); AllocatingTarget target1 = new AllocatingTarget(); diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/ContendedWallclockSamplesTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/ContendedWallclockSamplesTest.java index e680633b7..0adbfd812 100644 --- a/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/ContendedWallclockSamplesTest.java +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/wallclock/ContendedWallclockSamplesTest.java @@ -51,13 +51,18 @@ public void after() { @TestTemplate @ValueSource(strings = {"vm", "vmx", "fp", "dwarf"}) public void test(@CStack String cstack) { - String config = System.getProperty("ddprof_test.config"); - boolean isSanitizer = config.endsWith("san"); - boolean isJvmci = System.getProperty("java.vm.version", "").contains("jvmci"); - assumeFalse(Platform.isZing() || Platform.isJ9()); + // Skip test entirely on unsupported JVMs (don't use assumeFalse which gets retried) + if (Platform.isZing() || Platform.isJ9()) { + return; + } // Running vm stackwalker tests on JVMCI (Graal), JDK 24, aarch64 and with a sanitizer is crashing in a weird place // This looks like the sanitizer instrumentation is breaking the longjump based crash recovery :( - assumeFalse(Platform.isJavaVersionAtLeast(24) && isJvmci && Platform.isAarch64() && cstack.startsWith("vm") && isSanitizer); + String config = System.getProperty("ddprof_test.config"); + boolean isJvmci = System.getProperty("java.vm.version", "").contains("jvmci"); + boolean isSanitizer = config.endsWith("san"); + if (Platform.isJavaVersionAtLeast(24) && isJvmci && Platform.isAarch64() && cstack.startsWith("vm") && isSanitizer) { + return; + } long result = 0; for (int i = 0; i < 10; i++) { From cddfbf2c2aa8f614a4569e61fcfbd676268e3c27 Mon Sep 17 00:00:00 2001 From: r1viollet <74836499+r1viollet@users.noreply.github.com> Date: Tue, 8 Jul 2025 14:21:53 +0200 Subject: [PATCH 26/29] Fix flaky allocation test (#241) Lower threshold for allocation test --- .../com/datadoghq/profiler/jfr/ObjectSampleDumpSmokeTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddprof-test/src/test/java/com/datadoghq/profiler/jfr/ObjectSampleDumpSmokeTest.java b/ddprof-test/src/test/java/com/datadoghq/profiler/jfr/ObjectSampleDumpSmokeTest.java index 8582783ac..c7196e4a7 100644 --- a/ddprof-test/src/test/java/com/datadoghq/profiler/jfr/ObjectSampleDumpSmokeTest.java +++ b/ddprof-test/src/test/java/com/datadoghq/profiler/jfr/ObjectSampleDumpSmokeTest.java @@ -15,7 +15,7 @@ protected boolean isPlatformSupported() { @Override protected String getProfilerCommand() { - return "memory=128:a"; + return "memory=32:a"; } @RetryingTest(5) From e9803d18bb6d35cf98dbe40a04cfe117eb2129d0 Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Wed, 9 Jul 2025 08:37:08 -0400 Subject: [PATCH 27/29] jbachorik's comments --- ddprof-lib/src/main/cpp/profiler.cpp | 6 +++--- ddprof-lib/src/main/cpp/safeAccess.cpp | 1 + ddprof-lib/src/main/cpp/safeAccess.h | 2 +- ddprof-lib/src/main/cpp/vmStructs_dd.h | 24 ++++++------------------ ddprof-lib/src/main/cpp/wallClock.cpp | 8 +++----- 5 files changed, 14 insertions(+), 27 deletions(-) diff --git a/ddprof-lib/src/main/cpp/profiler.cpp b/ddprof-lib/src/main/cpp/profiler.cpp index 4c28bb04d..efdae02a3 100644 --- a/ddprof-lib/src/main/cpp/profiler.cpp +++ b/ddprof-lib/src/main/cpp/profiler.cpp @@ -625,9 +625,9 @@ u32 Profiler::recordJVMTISample(u64 counter, int tid, jthread thread, jint event if (duration > 0) { Counters::increment(UNWINDING_TIME_JVMTI, duration); // increment the JVMTI specific counter } - if (!deferred) { - _jfr.recordEvent(lock_index, tid, call_trace_id, event_type, event); - } + } + if (!deferred) { + _jfr.recordEvent(lock_index, tid, call_trace_id, event_type, event); } _locks[lock_index].unlock(); diff --git a/ddprof-lib/src/main/cpp/safeAccess.cpp b/ddprof-lib/src/main/cpp/safeAccess.cpp index 69cca5893..82aceeeb8 100644 --- a/ddprof-lib/src/main/cpp/safeAccess.cpp +++ b/ddprof-lib/src/main/cpp/safeAccess.cpp @@ -22,6 +22,7 @@ SafeAccess::SafeFetch32 SafeAccess::_safeFetch32Func = nullptr; void SafeAccess::initSafeFetch(CodeCache* libjvm) { // Hotspot JVM's safefetch implementation appears better, e.g. it actually returns errorValue, // when the address is invalid. let's use it whenever possible. + // When the methods are not available, fallback to SafeAccess::load32() implementation. _safeFetch32Func = (SafeFetch32)libjvm->findSymbol("SafeFetch32_impl"); if (_safeFetch32Func == nullptr && !WX_MEMORY) { // jdk11 stub implementation other than Macosx/aarch64 diff --git a/ddprof-lib/src/main/cpp/safeAccess.h b/ddprof-lib/src/main/cpp/safeAccess.h index aa1f2c34c..993c359c6 100644 --- a/ddprof-lib/src/main/cpp/safeAccess.h +++ b/ddprof-lib/src/main/cpp/safeAccess.h @@ -37,7 +37,7 @@ class SafeAccess { public: static void initSafeFetch(CodeCache* libjvm); - static inline int safeFetch(int* ptr, int errorValue) { + static inline int safeFetch32(int* ptr, int errorValue) { assert(_safeFetch32Func != nullptr); return _safeFetch32Func(ptr, errorValue); } diff --git a/ddprof-lib/src/main/cpp/vmStructs_dd.h b/ddprof-lib/src/main/cpp/vmStructs_dd.h index 33e3b5234..431aa4329 100644 --- a/ddprof-lib/src/main/cpp/vmStructs_dd.h +++ b/ddprof-lib/src/main/cpp/vmStructs_dd.h @@ -143,23 +143,11 @@ namespace ddprof { } OSThreadState osThreadState() { - if (ddprof::VMStructs::thread_osthread_offset() >= 0 && ddprof::VMStructs::osthread_state_offset() >= 0) { - const char *osthread = *(const char **)at(ddprof::VMStructs::thread_osthread_offset()); - if (osthread != nullptr) { - return static_cast( - *(int *)(osthread + ddprof::VMStructs::osthread_state_offset())); - } - } - return OSThreadState::UNKNOWN; - } - - // Safer version - OSThreadState osThreadStateSafe() { if (ddprof::VMStructs::thread_osthread_offset() >= 0 && ddprof::VMStructs::osthread_state_offset() >= 0) { const char *osthread = *(char **)at(ddprof::VMStructs::thread_osthread_offset()); if (osthread != nullptr) { - // If the location is accessible, the thread must have been terminated - int value = SafeAccess::safeFetch((int*)(osthread + ddprof::VMStructs::osthread_state_offset()), + // If the location is not accessible, the thread must have been terminated + int value = SafeAccess::safeFetch32((int*)(osthread + ddprof::VMStructs::osthread_state_offset()), static_cast(OSThreadState::TERMINATED)); // Checking for bad data if (value > static_cast(OSThreadState::SYSCALL)) { @@ -171,26 +159,26 @@ namespace ddprof { return OSThreadState::UNKNOWN; } - int osThreadIdSafe() { + int osThreadId() { if (ddprof::VMStructs::thread_osthread_offset() >= 0 && ddprof::VMStructs::osthread_id_offset() >=0) { const char* osthread = *(const char**) at(ddprof::VMStructs::thread_osthread_offset()); if (osthread == nullptr) { return -1; } else { - return SafeAccess::safeFetch((int*)(osthread + ddprof::VMStructs::osthread_id_offset()), -1); + return SafeAccess::safeFetch32((int*)(osthread + ddprof::VMStructs::osthread_id_offset()), -1); } } return -1; } - int stateSafe() { + int state() { int offset = ddprof::VMStructs::thread_state_offset(); if (offset >= 0) { int* state = (int*)at(offset); if (state == nullptr) { return 0; } else { - int value = SafeAccess::safeFetch(state, 0); + int value = SafeAccess::safeFetch32(state, 0); // Checking for bad data if (value > _thread_max_state) { value = 0; diff --git a/ddprof-lib/src/main/cpp/wallClock.cpp b/ddprof-lib/src/main/cpp/wallClock.cpp index a2d0ac268..981722568 100644 --- a/ddprof-lib/src/main/cpp/wallClock.cpp +++ b/ddprof-lib/src/main/cpp/wallClock.cpp @@ -196,8 +196,7 @@ void WallClockJVMTI::timerLoop() { if (nThread == nullptr) { continue; } - // Racy, use safer version and check - int tid = nThread->osThreadIdSafe(); + int tid = nThread->osThreadId(); if (!threadFilter->isValid(tid)) { continue; } @@ -212,10 +211,9 @@ void WallClockJVMTI::timerLoop() { auto sampleThreads = [&](ThreadEntry& thread_entry, int& num_failures, int& threads_already_exited, int& permission_denied) { static jint max_stack_depth = (jint)Profiler::instance()->max_stack_depth(); - // Following code is racy, use safer version to access native structure. ExecutionEvent event; ddprof::VMThread* vm_thread = thread_entry.native; - int raw_thread_state = vm_thread->stateSafe(); + int raw_thread_state = vm_thread->state(); bool is_initialized = raw_thread_state >= ddprof::JVMJavaThreadState::_thread_in_native && raw_thread_state < ddprof::JVMJavaThreadState::_thread_max_state; OSThreadState state = OSThreadState::UNKNOWN; @@ -223,7 +221,7 @@ void WallClockJVMTI::timerLoop() { if (vm_thread == nullptr || !is_initialized) { return false; } - OSThreadState os_state = vm_thread->osThreadStateSafe(); + OSThreadState os_state = vm_thread->osThreadState(); if (state == OSThreadState::TERMINATED) { return false; } else if (state == OSThreadState::UNKNOWN) { From 0071c00489d080f16eaf40ee0ae5b3bf8302b5cc Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Wed, 9 Jul 2025 10:22:14 -0400 Subject: [PATCH 28/29] More jbachorik's comments --- ddprof-lib/src/main/cpp/wallClock.cpp | 37 +++++++++++++++++++-------- ddprof-lib/src/main/cpp/wallClock.h | 4 +-- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/ddprof-lib/src/main/cpp/wallClock.cpp b/ddprof-lib/src/main/cpp/wallClock.cpp index 981722568..d261cdcec 100644 --- a/ddprof-lib/src/main/cpp/wallClock.cpp +++ b/ddprof-lib/src/main/cpp/wallClock.cpp @@ -169,12 +169,13 @@ void WallClockJVMTI::timerLoop() { return; } + // Notice: + // We want to cache threads that are captured by collectThread(), so that we can + // clean them up in cleanThreadRefs(). + // The approach is not ideal, but it is cleaner than cleaning individual thread + // during filtering phases. jint threads_count = 0; jthread* threads_ptr = nullptr; - if (jvmti->GetAllThreads(&threads_count, &threads_ptr) != JVMTI_ERROR_NONE || - threads_count == 0) { - return; - } // Attach to JVM as the first step VM::attachThread("Datadog Profiler Wallclock Sampler"); @@ -183,6 +184,12 @@ void WallClockJVMTI::timerLoop() { if (jvmti == nullptr) { return; } + + if (jvmti->GetAllThreads(&threads_count, &threads_ptr) != JVMTI_ERROR_NONE || + threads_count == 0) { + return; + } + JNIEnv* jni = VM::jni(); ThreadFilter* threadFilter = Profiler::instance()->threadFilter(); @@ -239,13 +246,18 @@ void WallClockJVMTI::timerLoop() { return true; }; - timerLoopCommon(collectThreads, sampleThreads, _reservoir_size, _interval); + auto cleanThreadRefs = [&]() { + JNIEnv* jni = VM::jni(); + for (jint index = 0; index < threads_count; index++) { + jni->DeleteLocalRef(threads_ptr[index]); + } + jvmti->Deallocate((unsigned char*)threads_ptr); + threads_ptr = nullptr; + threads_count = 0; + }; + + timerLoopCommon(collectThreads, sampleThreads, cleanThreadRefs, _reservoir_size, _interval); - JNIEnv* jni = VM::jni(); - for (jint index = 0; index < threads_count; index++) { - jni->DeleteLocalRef(threads_ptr[index]); - } - jvmti->Deallocate((unsigned char*)threads_ptr); // Don't forget to detach the thread VM::detachThread(); @@ -285,5 +297,8 @@ void WallClockASGCT::timerLoop() { return true; }; - timerLoopCommon(collectThreads, sampleThreads, _reservoir_size, _interval); + auto doNothing = []() { + }; + + timerLoopCommon(collectThreads, sampleThreads, doNothing, _reservoir_size, _interval); } diff --git a/ddprof-lib/src/main/cpp/wallClock.h b/ddprof-lib/src/main/cpp/wallClock.h index 8dd6dd543..b5c424302 100644 --- a/ddprof-lib/src/main/cpp/wallClock.h +++ b/ddprof-lib/src/main/cpp/wallClock.h @@ -50,8 +50,8 @@ class BaseWallClock : public Engine { bool isEnabled() const; - template - void timerLoopCommon(CollectThreadsFunc collectThreads, SampleThreadsFunc sampleThreads, int reservoirSize, u64 interval) { + template + void timerLoopCommon(CollectThreadsFunc collectThreads, SampleThreadsFunc sampleThreads, CleanThreadFunc cleanThreads, int reservoirSize, u64 interval) { if (!_enabled.load(std::memory_order_acquire)) { return; } From a1db67404508c8ea248fc08e1abf93840e4db679 Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Wed, 9 Jul 2025 10:37:16 -0400 Subject: [PATCH 29/29] Cleanup thread local references --- ddprof-lib/src/main/cpp/wallClock.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ddprof-lib/src/main/cpp/wallClock.h b/ddprof-lib/src/main/cpp/wallClock.h index b5c424302..0ead37fac 100644 --- a/ddprof-lib/src/main/cpp/wallClock.h +++ b/ddprof-lib/src/main/cpp/wallClock.h @@ -104,6 +104,8 @@ class BaseWallClock : public Engine { } threads.clear(); + cleanThreads(); + // Get a random sleep duration // clamp the random interval to <1,2N-1> // the probability of clamping is extremely small, close to zero