Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
94b2559
Thread filter optim
r1viollet Jul 7, 2025
51cb97f
Add an automatic register in case we failed to register the thread
r1viollet Jul 7, 2025
cc02c1e
Exterminate the last remnants of false sharing
jbachorik Jul 10, 2025
50a8d5f
Minor tweaks
jbachorik Jul 21, 2025
30d32c0
Merge cleanup
jbachorik Jul 21, 2025
bf23309
Merge branch 'main' into r1viollet/thread_filter_squash
jbachorik Jul 24, 2025
90651f5
Merge branch 'main' of github.com:DataDog/java-profiler into r1violle…
r1viollet Aug 8, 2025
28e23ee
Adjust ThreadEnd hook
r1viollet Aug 8, 2025
3d31cc7
Profiler thread - Ensure we init before swap
r1viollet Aug 19, 2025
dfd44de
Thread filter bench
r1viollet Aug 19, 2025
1e6efe4
Merge branch 'main' of github.com:DataDog/java-profiler into r1violle…
r1viollet Aug 20, 2025
2633224
Thread filter optims - minor fixes
r1viollet Aug 21, 2025
d967e71
Thread filter - Expand test coverage
r1viollet Aug 21, 2025
68035f6
threadIDTable - add testing for the fixed size table
r1viollet Aug 21, 2025
658097d
threadFilter bench - Minor cleanup of print lines
r1viollet Aug 21, 2025
e78a6b2
ThreadFilter optim - fixes
r1viollet Aug 21, 2025
a637ba6
PR review adjustments, following Zhengyu's comments
r1viollet Sep 4, 2025
ab9411c
Merge branch 'main' of github.com:DataDog/java-profiler into r1violle…
r1viollet Sep 5, 2025
b5fba80
Merge branch 'main' of github.com:DataDog/java-profiler into r1violle…
r1viollet Sep 9, 2025
cc170ee
Thread filter - Adjust memory ordering following discussion with Zhengyu
r1viollet Sep 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 26 additions & 13 deletions ddprof-lib/src/main/cpp/flightRecorder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ char *Recording::_jvm_flags = NULL;
char *Recording::_java_command = NULL;

Recording::Recording(int fd, Arguments &args)
: _fd(fd), _thread_set(), _method_map() {
: _fd(fd), _method_map() {

args.save(_args);
_chunk_start = lseek(_fd, 0, SEEK_END);
Expand All @@ -331,6 +331,8 @@ Recording::Recording(int fd, Arguments &args)
_bytes_written = 0;

_tid = OS::threadId();
_active_index.store(0, std::memory_order_relaxed);

VM::jvmti()->GetAvailableProcessors(&_available_processors);

writeHeader(_buf);
Expand Down Expand Up @@ -1060,11 +1062,18 @@ void Recording::writeExecutionModes(Buffer *buf) {
}

void Recording::writeThreads(Buffer *buf) {
addThread(_tid);
std::vector<int> threads;
threads.reserve(_thread_set.size());
_thread_set.collect(threads);
_thread_set.clear();
int old_index = _active_index.fetch_xor(1, std::memory_order_acq_rel);
// After flip: new samples go into the new active set
// We flush from old_index (the previous active set)

std::unordered_set<int> threads;
threads.insert(_tid);

for (int i = 0; i < CONCURRENCY_LEVEL; ++i) {
// Collect thread IDs from the fixed-size table into the main set
_thread_ids[i][old_index].collect(threads);
_thread_ids[i][old_index].clear();
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Release here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think memory_order_acq_rel is good here for the old_index. Though we might be talking about something else.


Profiler *profiler = Profiler::instance();
ThreadInfo *t_info = &profiler->_thread_info;
Expand All @@ -1073,15 +1082,15 @@ void Recording::writeThreads(Buffer *buf) {

buf->putVar64(T_THREAD);
buf->putVar64(threads.size());
for (int i = 0; i < threads.size(); i++) {
for (auto tid : threads) {
const char *thread_name;
jlong thread_id;
std::pair<std::shared_ptr<std::string>, u64> info = t_info->get(threads[i]);
std::pair<std::shared_ptr<std::string>, u64> info = t_info->get(tid);
if (info.first) {
thread_name = info.first->c_str();
thread_id = info.second;
} else {
snprintf(name_buf, sizeof(name_buf), "[tid=%d]", threads[i]);
snprintf(name_buf, sizeof(name_buf), "[tid=%d]", tid);
thread_name = name_buf;
thread_id = 0;
}
Expand All @@ -1091,9 +1100,9 @@ void Recording::writeThreads(Buffer *buf) {
(thread_id == 0 ? length + 1 : 2 * length) -
3 * 10; // 3x max varint length
flushIfNeeded(buf, required);
buf->putVar64(threads[i]);
buf->putVar64(tid);
buf->putUtf8(thread_name, length);
buf->putVar64(threads[i]);
buf->putVar64(tid);
if (thread_id == 0) {
buf->put8(0);
} else {
Expand Down Expand Up @@ -1443,7 +1452,11 @@ void Recording::recordCpuLoad(Buffer *buf, float proc_user, float proc_system,
flushIfNeeded(buf);
}

void Recording::addThread(int tid) { _thread_set.add(tid); }
// assumption is that we hold the lock (with lock_index)
void Recording::addThread(int lock_index, int tid) {
int active = _active_index.load(std::memory_order_acquire);
_thread_ids[lock_index][active].insert(tid);
}

Error FlightRecorder::start(Arguments &args, bool reset) {
const char *file = args.file();
Expand Down Expand Up @@ -1600,7 +1613,7 @@ void FlightRecorder::recordEvent(int lock_index, int tid, u64 call_trace_id,
break;
}
_rec->flushIfNeeded(buf);
_rec->addThread(tid);
_rec->addThread(lock_index, tid);
}
}

Expand Down
11 changes: 9 additions & 2 deletions ddprof-lib/src/main/cpp/flightRecorder.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#define _FLIGHTRECORDER_H

#include <map>
#include <unordered_set>

#include <limits.h>
#include <string.h>
Expand All @@ -24,6 +25,7 @@
#include "mutex.h"
#include "objectSampler.h"
#include "threadFilter.h"
#include "threadIdTable.h"
#include "vmEntry.h"

const u64 MAX_JLONG = 0x7fffffffffffffffULL;
Expand Down Expand Up @@ -117,9 +119,13 @@ class Recording {
static char *_java_command;

RecordingBuffer _buf[CONCURRENCY_LEVEL];
// we have several tables to avoid lock contention
// we have a second dimension to allow a switch in the active table
ThreadIdTable _thread_ids[CONCURRENCY_LEVEL][2];
std::atomic<int> _active_index{0}; // 0 or 1 globally

int _fd;
off_t _chunk_start;
ThreadFilter _thread_set;
MethodMap _method_map;

Arguments _args;
Expand Down Expand Up @@ -248,7 +254,8 @@ class Recording {
LockEvent *event);
void recordCpuLoad(Buffer *buf, float proc_user, float proc_system,
float machine_total);
void addThread(int tid);

void addThread(int lock_index, int tid);
};

class Lookup {
Expand Down
114 changes: 89 additions & 25 deletions ddprof-lib/src/main/cpp/javaApi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include <assert.h>

#include "arch_dd.h"
#include "context.h"
#include "counters.h"
#include "engine.h"
Expand Down Expand Up @@ -124,19 +125,103 @@ Java_com_datadoghq_profiler_JavaProfiler_getSamples(JNIEnv *env,
return (jlong)Profiler::instance()->total_samples();
}

// some duplication between add and remove, though we want to avoid having an extra branch in the hot path
extern "C" DLLEXPORT void JNICALL
Java_com_datadoghq_profiler_JavaProfiler_filterThreadAdd0(JNIEnv *env,
jobject unused) {
ProfiledThread *current = ProfiledThread::current();
if (unlikely(current == nullptr)) {
assert(false);
return;
}
int tid = current->tid();
if (unlikely(tid < 0)) {
return;
}
ThreadFilter *thread_filter = Profiler::instance()->threadFilter();
if (unlikely(!thread_filter->enabled())) {
return;
}

int slot_id = current->filterSlotId();
if (unlikely(slot_id == -1)) {
// Thread doesn't have a slot ID yet (e.g., main thread), so register it
// Happens when we are not enabled before thread start
slot_id = thread_filter->registerThread();
current->setFilterSlotId(slot_id);
}

if (unlikely(slot_id == -1)) {
return; // Failed to register thread
}
thread_filter->add(tid, slot_id);
}

extern "C" DLLEXPORT void JNICALL
Java_com_datadoghq_profiler_JavaProfiler_filterThreadRemove0(JNIEnv *env,
jobject unused) {
ProfiledThread *current = ProfiledThread::current();
if (unlikely(current == nullptr)) {
Copy link
Contributor

@zhengyu123 zhengyu123 Aug 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think assert(current != nllptr) should be sufficient, otherwise, we have a bigger problem.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this happens on unloading. JVMTI cleanup can be removed before all threads are finished ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have the assert for debug builds, though we can keep avoiding crashes for release builds. Feel free to answer if you do not agree.

assert(false);
return;
}
int tid = current->tid();
if (unlikely(tid < 0)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible? or we should just assert

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question
I think we are missing the instrumentation of some threads. Could non-java threads call back into java?
It could be nice to have asserts to debug this, though I think I'd prefer the safer path for a prod release.

return;
}
ThreadFilter *thread_filter = Profiler::instance()->threadFilter();
if (unlikely(!thread_filter->enabled())) {
return;
}

int slot_id = current->filterSlotId();
if (unlikely(slot_id == -1)) {
// Thread doesn't have a slot ID yet - nothing to remove
return;
}
thread_filter->remove(slot_id);
}

// Backward compatibility for existing code
extern "C" DLLEXPORT void JNICALL
Java_com_datadoghq_profiler_JavaProfiler_filterThread0(JNIEnv *env,
jobject unused,
jboolean enable) {
int tid = ProfiledThread::currentTid();
if (tid < 0) {
ProfiledThread *current = ProfiledThread::current();
if (unlikely(current == nullptr)) {
assert(false);
return;
}
int tid = current->tid();
if (unlikely(tid < 0)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above

return;
}
ThreadFilter *thread_filter = Profiler::instance()->threadFilter();
if (unlikely(!thread_filter->enabled())) {
return;
}

int slot_id = current->filterSlotId();
if (unlikely(slot_id == -1)) {
if (enable) {
// Thread doesn't have a slot ID yet, so register it
assert(thread_filter->enabled() && "ThreadFilter should be enabled when trying to register thread");
slot_id = thread_filter->registerThread();
current->setFilterSlotId(slot_id);
} else {
// Thread doesn't have a slot ID yet - nothing to remove
return;
}
}

if (unlikely(slot_id == -1)) {
return; // Failed to register thread
}

if (enable) {
thread_filter->add(tid);
thread_filter->add(tid, slot_id);
} else {
thread_filter->remove(tid);
thread_filter->remove(slot_id);
}
}

Expand Down Expand Up @@ -408,27 +493,6 @@ Java_com_datadoghq_profiler_JVMAccess_healthCheck0(JNIEnv *env,
return true;
}

extern "C" DLLEXPORT jlong JNICALL
Java_com_datadoghq_profiler_ActiveBitmap_bitmapAddressFor0(JNIEnv *env,
jclass unused,
jint tid) {
u64* bitmap = Profiler::instance()->threadFilter()->bitmapAddressFor((int)tid);
return (jlong)bitmap;
}

extern "C" DLLEXPORT jboolean JNICALL
Java_com_datadoghq_profiler_ActiveBitmap_isActive0(JNIEnv *env,
jclass unused,
jint tid) {
return Profiler::instance()->threadFilter()->accept((int)tid) ? JNI_TRUE : JNI_FALSE;
}

extern "C" DLLEXPORT jlong JNICALL
Java_com_datadoghq_profiler_ActiveBitmap_getActiveCountAddr0(JNIEnv *env,
jclass unused) {
return (jlong)Profiler::instance()->threadFilter()->addressOfSize();
}

// Static variable to track the current published context
static otel_process_ctx_result* current_published_context = nullptr;

Expand Down
49 changes: 39 additions & 10 deletions ddprof-lib/src/main/cpp/profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,12 @@ void Profiler::addRuntimeStub(const void *address, int length,

void Profiler::onThreadStart(jvmtiEnv *jvmti, JNIEnv *jni, jthread thread) {
ProfiledThread::initCurrentThread();

int tid = ProfiledThread::currentTid();
ProfiledThread *current = ProfiledThread::current();
int tid = current->tid();
if (_thread_filter.enabled()) {
_thread_filter.remove(tid);
int slot_id = _thread_filter.registerThread();
current->setFilterSlotId(slot_id);
_thread_filter.remove(slot_id); // Remove from filtering initially
}
updateThreadName(jvmti, jni, thread, true);

Expand All @@ -116,16 +118,33 @@ void Profiler::onThreadStart(jvmtiEnv *jvmti, JNIEnv *jni, jthread thread) {
}

void Profiler::onThreadEnd(jvmtiEnv *jvmti, JNIEnv *jni, jthread thread) {
int tid = ProfiledThread::currentTid();
if (_thread_filter.enabled()) {
_thread_filter.remove(tid);
ProfiledThread *current = ProfiledThread::current();
int tid = -1;

if (current != nullptr) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can current == nullptr?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I remember seeing a crash around this..
Basically JVMTI could unload and delete the profiled thread from under our feet.

// ProfiledThread is alive - do full cleanup and use efficient tid access
int slot_id = current->filterSlotId();
tid = current->tid();

if (_thread_filter.enabled()) {
_thread_filter.unregisterThread(slot_id);
current->setFilterSlotId(-1);
}

ProfiledThread::release();
} else {
// ProfiledThread already cleaned up - try to get tid from JVMTI as fallback
tid = VMThread::nativeThreadId(jni, thread);
if (tid < 0) {
// No ProfiledThread AND can't get tid from JVMTI - nothing we can do
return;
}
}
updateThreadName(jvmti, jni, thread, true);


// These can run if we have a valid tid
updateThreadName(jvmti, jni, thread, false); // false = not self
_cpu_engine->unregisterThread(tid);
// unregister here because JNI callers generally don't know about thread exits
_wall_engine->unregisterThread(tid);
ProfiledThread::release();
}

int Profiler::registerThread(int tid) {
Expand Down Expand Up @@ -1152,6 +1171,16 @@ Error Profiler::start(Arguments &args, bool reset) {
}

_thread_filter.init(args._filter);

// Minor optim: Register the current thread (start thread won't be called)
if (_thread_filter.enabled()) {
ProfiledThread *current = ProfiledThread::current();
if (current != nullptr) {
int slot_id = _thread_filter.registerThread();
current->setFilterSlotId(slot_id);
_thread_filter.remove(slot_id); // Remove from filtering initially (matches onThreadStart behavior)
}
}

_cpu_engine = selectCpuEngine(args);
_wall_engine = selectWallEngine(args);
Expand Down
6 changes: 5 additions & 1 deletion ddprof-lib/src/main/cpp/thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,12 @@ class ProfiledThread : public ThreadLocalData {
u32 _wall_epoch;
u64 _call_trace_id;
u32 _recording_epoch;
int _filter_slot_id; // Slot ID for thread filtering
UnwindFailures _unwind_failures;

ProfiledThread(int buffer_pos, int tid)
: ThreadLocalData(), _pc(0), _span_id(0), _crash_depth(0), _buffer_pos(buffer_pos), _tid(tid), _cpu_epoch(0),
_wall_epoch(0), _call_trace_id(0), _recording_epoch(0) {};
_wall_epoch(0), _call_trace_id(0), _recording_epoch(0), _filter_slot_id(-1) {};

void releaseFromBuffer();

Expand Down Expand Up @@ -125,6 +126,9 @@ class ProfiledThread : public ThreadLocalData {
}

static void signalHandler(int signo, siginfo_t *siginfo, void *ucontext);

int filterSlotId() { return _filter_slot_id; }
void setFilterSlotId(int slotId) { _filter_slot_id = slotId; }
};

#endif // _THREAD_H
Loading
Loading