From 51adb9f880fb811afc6b57b7f9cbc228bc2be6a9 Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Fri, 6 Jun 2025 18:05:19 +0000 Subject: [PATCH 01/11] v0 --- ddprof-lib/src/main/cpp/javaApi.cpp | 9 +++ ddprof-lib/src/main/cpp/threadFilter.cpp | 4 ++ ddprof-lib/src/main/cpp/threadFilter.h | 1 + .../com/datadoghq/profiler/ActiveBitmaps.java | 68 +++++++++++++++++++ .../com/datadoghq/profiler/JavaProfiler.java | 4 +- 5 files changed, 84 insertions(+), 2 deletions(-) create mode 100644 ddprof-lib/src/main/java/com/datadoghq/profiler/ActiveBitmaps.java diff --git a/ddprof-lib/src/main/cpp/javaApi.cpp b/ddprof-lib/src/main/cpp/javaApi.cpp index 7d45e4227..a1db16194 100644 --- a/ddprof-lib/src/main/cpp/javaApi.cpp +++ b/ddprof-lib/src/main/cpp/javaApi.cpp @@ -406,3 +406,12 @@ Java_com_datadoghq_profiler_JVMAccess_healthCheck0(JNIEnv *env, jobject unused) { return true; } + +extern "C" DLLEXPORT void JNICALL +Java_com_datadoghq_profiler_ActiveBitmaps_DirectByteBufferAccess_setBitmap(JNIEnv *env, + jclass unused, + jint index, + jlong addr) { + u64* ptr = (u64*)addr; + Profiler::instance()->threadFilter()->setBitmap((int)index, ptr); +} diff --git a/ddprof-lib/src/main/cpp/threadFilter.cpp b/ddprof-lib/src/main/cpp/threadFilter.cpp index 034aabf9b..fbd9fd740 100644 --- a/ddprof-lib/src/main/cpp/threadFilter.cpp +++ b/ddprof-lib/src/main/cpp/threadFilter.cpp @@ -85,6 +85,10 @@ void ThreadFilter::clear() { _size = 0; } +void ThreadFilter::setBitmap(int index, u64* bitmap) { + _bitmap[index] = bitmap; +} + bool ThreadFilter::accept(int thread_id) { u64 *b = bitmap(thread_id); return b != NULL && (word(b, thread_id) & (1ULL << (thread_id & 0x3f))); diff --git a/ddprof-lib/src/main/cpp/threadFilter.h b/ddprof-lib/src/main/cpp/threadFilter.h index cec7e7048..89376b325 100644 --- a/ddprof-lib/src/main/cpp/threadFilter.h +++ b/ddprof-lib/src/main/cpp/threadFilter.h @@ -61,6 +61,7 @@ class ThreadFilter { void init(const char *filter); void clear(); + void setBitmap(int index, u64* bitmap); bool accept(int thread_id); void add(int thread_id); diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/ActiveBitmaps.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/ActiveBitmaps.java new file mode 100644 index 000000000..61e6d12f6 --- /dev/null +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/ActiveBitmaps.java @@ -0,0 +1,68 @@ +package com.datadoghq.profiler; + +import java.lang.reflect.Field; +import java.util.ArrayList; +import java.util.List; +import java.nio.ByteBuffer; +import java.nio.Buffer; + +class ActiveBitmaps { + static final int BITMAP_SIZE = 65536; // 64K + static final int BITMAP_CAPACITY = BITMAP_SIZE * 8; + static final List bitmaps = new ArrayList<>(); + + static final Field address; + static { + try { + address = Buffer.class.getDeclaredField("address"); + address.setAccessible(true); + } catch (NoSuchFieldException e) { + throw new AssertionError(e); + } + } + + public synchronized static void setActive(int tid, boolean active) { + ByteBuffer bitmap = bitmapFor(tid); + int index = (tid % BITMAP_CAPACITY) / 8; + byte val = bitmap.get(index); + byte mask = (byte)(1 << (tid & 0x07)); + if (active) { + bitmap.put(index, (byte)(val | mask)); + } else { + bitmap.put(index, (byte)(val & (~mask))); + } + } + + static ByteBuffer bitmapFor(int tid) { + int index = tid / BITMAP_CAPACITY; + if (bitmaps.size() <= index) { + for (int i = bitmaps.size(); i < index; i++) { + bitmaps.set(i, null); + } + } + ByteBuffer bitmap = bitmaps.get(index); + if (bitmap == null) { + bitmap = allocateBitmap(); + try { + long addr = address.getLong(bitmap); + setBitmap(index, addr); + } catch (IllegalAccessException e) { + throw new RuntimeException(e); + } + bitmaps.set(index, allocateBitmap()); + } + return bitmap; + } + + static ByteBuffer allocateBitmap() { + ByteBuffer b = ByteBuffer.allocateDirect(BITMAP_SIZE); + for (int index = 0; index < BITMAP_SIZE; index++) { + b.put(index, (byte)0); + } + return b; + } + + // Set bitmap to native code + static native void setBitmap(int index, long address); +} + diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java index 4436273c6..fc23921ab 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java @@ -208,7 +208,7 @@ public boolean recordTraceRoot(long rootSpanId, String endpoint, int sizeLimit) * 'filter' option must be enabled to use this method. */ public void addThread() { - filterThread0(true); + ActiveBitmaps.setActive(TID.get(), true); } /** @@ -216,7 +216,7 @@ public void addThread() { * 'filter' option must be enabled to use this method. */ public void removeThread() { - filterThread0(false); + ActiveBitmaps.setActive(TID.get(), false); } From 26dca08f3fb548a4df971caafdff60b04507dc11 Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Sun, 8 Jun 2025 01:35:50 +0000 Subject: [PATCH 02/11] v1 --- ddprof-lib/src/main/cpp/javaApi.cpp | 15 +- ddprof-lib/src/main/cpp/threadFilter.cpp | 8 +- ddprof-lib/src/main/cpp/threadFilter.h | 3 +- .../com/datadoghq/profiler/ActiveBitmaps.java | 27 +- .../scenarios/ThreadFilterBenchmark.java | 247 ++++++++++++++++++ 5 files changed, 270 insertions(+), 30 deletions(-) create mode 100644 ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/ThreadFilterBenchmark.java diff --git a/ddprof-lib/src/main/cpp/javaApi.cpp b/ddprof-lib/src/main/cpp/javaApi.cpp index a1db16194..96e27265d 100644 --- a/ddprof-lib/src/main/cpp/javaApi.cpp +++ b/ddprof-lib/src/main/cpp/javaApi.cpp @@ -407,11 +407,12 @@ Java_com_datadoghq_profiler_JVMAccess_healthCheck0(JNIEnv *env, return true; } -extern "C" DLLEXPORT void JNICALL -Java_com_datadoghq_profiler_ActiveBitmaps_DirectByteBufferAccess_setBitmap(JNIEnv *env, - jclass unused, - jint index, - jlong addr) { - u64* ptr = (u64*)addr; - Profiler::instance()->threadFilter()->setBitmap((int)index, ptr); +extern "C" DLLEXPORT jobject JNICALL +Java_com_datadoghq_profiler_ActiveBitmaps_newBitmapFor(JNIEnv *env, + jclass unused, + jint index) { + int size; + u64* bitmap = Profiler::instance()->threadFilter()->createBitmapFor((int)indexi, size); + jobject b = env->NewDirectByteBuffer((void*)bitmap, (jlong)size); + return b; } diff --git a/ddprof-lib/src/main/cpp/threadFilter.cpp b/ddprof-lib/src/main/cpp/threadFilter.cpp index fbd9fd740..acd77f203 100644 --- a/ddprof-lib/src/main/cpp/threadFilter.cpp +++ b/ddprof-lib/src/main/cpp/threadFilter.cpp @@ -85,8 +85,12 @@ void ThreadFilter::clear() { _size = 0; } -void ThreadFilter::setBitmap(int index, u64* bitmap) { - _bitmap[index] = bitmap; +u64* ThreadFilter::createBitmapFor(int index, int& size) { + assert(_bitmap[index] == NULL); + b = (u64 *)OS::safeAlloc(BITMAP_SIZE); + size = BITMAP_SIZE; + _bitmap[index] = b; + return b; } bool ThreadFilter::accept(int thread_id) { diff --git a/ddprof-lib/src/main/cpp/threadFilter.h b/ddprof-lib/src/main/cpp/threadFilter.h index 89376b325..b39d72aac 100644 --- a/ddprof-lib/src/main/cpp/threadFilter.h +++ b/ddprof-lib/src/main/cpp/threadFilter.h @@ -61,7 +61,8 @@ class ThreadFilter { void init(const char *filter); void clear(); - void setBitmap(int index, u64* bitmap); + + u64* createBitmapFor(int index, int& size); bool accept(int thread_id); void add(int thread_id); diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/ActiveBitmaps.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/ActiveBitmaps.java index 61e6d12f6..ce1bef74d 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/ActiveBitmaps.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/ActiveBitmaps.java @@ -1,26 +1,16 @@ package com.datadoghq.profiler; -import java.lang.reflect.Field; import java.util.ArrayList; import java.util.List; import java.nio.ByteBuffer; import java.nio.Buffer; +import java.lang.invoke.VarHandle; class ActiveBitmaps { static final int BITMAP_SIZE = 65536; // 64K static final int BITMAP_CAPACITY = BITMAP_SIZE * 8; static final List bitmaps = new ArrayList<>(); - static final Field address; - static { - try { - address = Buffer.class.getDeclaredField("address"); - address.setAccessible(true); - } catch (NoSuchFieldException e) { - throw new AssertionError(e); - } - } - public synchronized static void setActive(int tid, boolean active) { ByteBuffer bitmap = bitmapFor(tid); int index = (tid % BITMAP_CAPACITY) / 8; @@ -31,24 +21,21 @@ public synchronized static void setActive(int tid, boolean active) { } else { bitmap.put(index, (byte)(val & (~mask))); } + // Volatile store + VarHandle.fullFence(); } static ByteBuffer bitmapFor(int tid) { int index = tid / BITMAP_CAPACITY; if (bitmaps.size() <= index) { - for (int i = bitmaps.size(); i < index; i++) { - bitmaps.set(i, null); + for (int i = bitmaps.size(); i <= index; i++) { + bitmaps.add(null); } } ByteBuffer bitmap = bitmaps.get(index); if (bitmap == null) { bitmap = allocateBitmap(); - try { - long addr = address.getLong(bitmap); - setBitmap(index, addr); - } catch (IllegalAccessException e) { - throw new RuntimeException(e); - } + setBitmap(index, bitmap); bitmaps.set(index, allocateBitmap()); } return bitmap; @@ -63,6 +50,6 @@ static ByteBuffer allocateBitmap() { } // Set bitmap to native code - static native void setBitmap(int index, long address); + static native ByteBuffer newBitmapFor(int index); } diff --git a/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/ThreadFilterBenchmark.java b/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/ThreadFilterBenchmark.java new file mode 100644 index 000000000..88ce317b4 --- /dev/null +++ b/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/ThreadFilterBenchmark.java @@ -0,0 +1,247 @@ +package com.datadoghq.profiler.stresstest.scenarios; + +import com.datadoghq.profiler.JavaProfiler; +import com.datadoghq.profiler.stresstest.Configuration; +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; + +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.concurrent.*; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicIntegerArray; + +@State(Scope.Benchmark) +public class ThreadFilterBenchmark extends Configuration { + + private static final int NUM_THREADS = 4; + private ExecutorService executorService; + private JavaProfiler profiler; + private AtomicBoolean running; + private CountDownLatch startLatch; + private CountDownLatch stopLatch; + private AtomicLong operationCount; + private long startTime; + private long stopTime; + private PrintWriter logWriter; + private static final int ARRAY_SIZE = 1024; // Larger array to stress memory + private static final int[] sharedArray = new int[ARRAY_SIZE]; + private static final AtomicIntegerArray atomicArray = new AtomicIntegerArray(ARRAY_SIZE); + private static final int CACHE_LINE_SIZE = 64; // Typical cache line size + private static final int STRIDE = CACHE_LINE_SIZE / Integer.BYTES; // Elements per cache line + private boolean useThreadFilters = true; // Flag to control the use of thread filters + private AtomicLong addThreadCount = new AtomicLong(0); + private AtomicLong removeThreadCount = new AtomicLong(0); + + @Setup(Level.Trial) + public void setup() throws IOException { + System.out.println("Setting up benchmark..."); + System.out.println("Creating thread pool with " + NUM_THREADS + " threads"); + executorService = Executors.newFixedThreadPool(NUM_THREADS); + System.out.println("Getting profiler instance"); + profiler = JavaProfiler.getInstance(); + + // Stop the profiler if it's already running + try { + profiler.stop(); + } catch (IllegalStateException e) { + System.out.println("Profiler was not active at setup."); + } + + String config = "start,wall=10ms,filter=1,file=/tmp/thread_filter_profile.jfr"; + System.out.println("Starting profiler with " + config); + profiler.execute(config); + System.out.println("Started profiler with output file"); + running = new AtomicBoolean(true); + operationCount = new AtomicLong(0); + startTime = System.currentTimeMillis(); + stopTime = startTime + 30000; // Run for 30 seconds + System.out.println("Benchmark setup completed at " + startTime); + + try { + String logFile = "/tmp/thread_filter_benchmark.log"; + System.out.println("Attempting to create log file at: " + logFile); + logWriter = new PrintWriter(new FileWriter(logFile)); + logWriter.printf("Benchmark started at %d%n", startTime); + logWriter.flush(); + System.out.println("Successfully created and wrote to log file"); + } catch (IOException e) { + System.err.println("Failed to create log file: " + e.getMessage()); + e.printStackTrace(); + throw e; + } + } + + @TearDown(Level.Trial) + public void tearDown() { + System.out.println("Tearing down benchmark..."); + running.set(false); + + // Wait for all threads to finish with a timeout + try { + if (stopLatch != null) { + if (!stopLatch.await(30, TimeUnit.SECONDS)) { + System.err.println("Warning: Some threads did not finish within timeout"); + } + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + + // Shutdown executor with timeout + executorService.shutdown(); + try { + if (!executorService.awaitTermination(30, TimeUnit.SECONDS)) { + executorService.shutdownNow(); + if (!executorService.awaitTermination(30, TimeUnit.SECONDS)) { + System.err.println("Warning: Executor did not terminate"); + } + } + } catch (InterruptedException e) { + executorService.shutdownNow(); + Thread.currentThread().interrupt(); + } + + // Stop the profiler if it's active + try { + profiler.stop(); + } catch (IllegalStateException e) { + System.out.println("Profiler was not active at teardown."); + } + + long endTime = System.currentTimeMillis(); + long totalOps = operationCount.get(); + double durationSecs = (endTime - startTime) / 1000.0; + double opsPerSec = totalOps / durationSecs; + double addOpsPerSec = addThreadCount.get() / durationSecs; + double removeOpsPerSec = removeThreadCount.get() / durationSecs; + + String stats = String.format("Thread Filter Stats:%n" + + "Total operations: %,d%n" + + "Duration: %.2f seconds%n" + + "Operations/second: %,.0f%n" + + "Operations/second/thread: %,.0f%n" + + "AddThread operations/second: %,.0f%n" + + "RemoveThread operations/second: %,.0f%n", + totalOps, durationSecs, opsPerSec, opsPerSec / NUM_THREADS, addOpsPerSec, removeOpsPerSec); + + System.out.print(stats); + if (logWriter != null) { + try { + logWriter.print(stats); + logWriter.flush(); + logWriter.close(); + System.out.println("Successfully closed log file"); + } catch (Exception e) { + System.err.println("Error closing log file: " + e.getMessage()); + e.printStackTrace(); + } + } + } + + public void setUseThreadFilters(boolean useThreadFilters) { + this.useThreadFilters = useThreadFilters; + } + + @Benchmark + @BenchmarkMode(Mode.Throughput) + @Fork(value = 1, warmups = 0) + @Warmup(iterations = 1, time = 1) + @Measurement(iterations = 1, time = 2) + @Threads(1) + @OutputTimeUnit(TimeUnit.MILLISECONDS) + public long threadFilterStress() throws InterruptedException { + System.out.println("Starting benchmark iteration..."); + startLatch = new CountDownLatch(NUM_THREADS); + stopLatch = new CountDownLatch(NUM_THREADS); + + // Start all worker threads + for (int i = 0; i < NUM_THREADS; i++) { + final int threadId = i; + executorService.submit(() -> { + try { + startLatch.countDown(); + startLatch.await(30, TimeUnit.SECONDS); + + String startMsg = String.format("Thread %d started%n", threadId); + System.out.print(startMsg); + if (logWriter != null) { + logWriter.print(startMsg); + logWriter.flush(); + } + + while (running.get() && System.currentTimeMillis() < stopTime) { + // Memory-intensive operations that would be sensitive to false sharing + for (int j = 0; j < ARRAY_SIZE; j += STRIDE) { + if (useThreadFilters) { + // Register thread at the start of each cache line operation + profiler.addThread(); + addThreadCount.incrementAndGet(); + } + + // Each thread writes to its own cache line + int baseIndex = (threadId * STRIDE) % ARRAY_SIZE; + for (int k = 0; k < STRIDE; k++) { + int index = (baseIndex + k) % ARRAY_SIZE; + // Write to shared array + sharedArray[index] = threadId; + // Read and modify + int value = sharedArray[index] + 1; + // Atomic operation + atomicArray.set(index, value); + } + + if (useThreadFilters) { + // Remove thread after cache line operation + profiler.removeThread(); + removeThreadCount.incrementAndGet(); + } + operationCount.incrementAndGet(); + } + + // More memory operations with thread registration + for (int j = 0; j < ARRAY_SIZE; j += STRIDE) { + if (useThreadFilters) { + // Register thread at the start of each cache line operation + profiler.addThread(); + addThreadCount.incrementAndGet(); + } + + int baseIndex = (threadId * STRIDE) % ARRAY_SIZE; + for (int k = 0; k < STRIDE; k++) { + int index = (baseIndex + k) % ARRAY_SIZE; + int value = atomicArray.get(index); + sharedArray[index] = value * 2; + } + + if (useThreadFilters) { + // Remove thread after cache line operation + profiler.removeThread(); + removeThreadCount.incrementAndGet(); + } + operationCount.incrementAndGet(); + } + + if (operationCount.get() % 1000 == 0) { + String progressMsg = String.format("Thread %d completed %d operations%n", threadId, operationCount.get()); + System.out.print(progressMsg); + if (logWriter != null) { + logWriter.print(progressMsg); + logWriter.flush(); + } + } + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } finally { + stopLatch.countDown(); + } + }); + } + + stopLatch.await(); + return operationCount.get(); + } +} \ No newline at end of file From 9a81b5aa5b30693a92f5ff40f6caded06d4d3801 Mon Sep 17 00:00:00 2001 From: "zhengyu.gu" Date: Mon, 9 Jun 2025 13:19:31 -0400 Subject: [PATCH 03/11] v2 --- ddprof-lib/src/main/cpp/javaApi.cpp | 21 +++-- ddprof-lib/src/main/cpp/threadFilter.cpp | 36 ++++---- ddprof-lib/src/main/cpp/threadFilter.h | 5 +- .../com/datadoghq/profiler/ActiveBitmaps.java | 90 +++++++++---------- 4 files changed, 82 insertions(+), 70 deletions(-) diff --git a/ddprof-lib/src/main/cpp/javaApi.cpp b/ddprof-lib/src/main/cpp/javaApi.cpp index 96e27265d..6e65935f3 100644 --- a/ddprof-lib/src/main/cpp/javaApi.cpp +++ b/ddprof-lib/src/main/cpp/javaApi.cpp @@ -407,12 +407,17 @@ Java_com_datadoghq_profiler_JVMAccess_healthCheck0(JNIEnv *env, return true; } -extern "C" DLLEXPORT jobject JNICALL -Java_com_datadoghq_profiler_ActiveBitmaps_newBitmapFor(JNIEnv *env, - jclass unused, - jint index) { - int size; - u64* bitmap = Profiler::instance()->threadFilter()->createBitmapFor((int)indexi, size); - jobject b = env->NewDirectByteBuffer((void*)bitmap, (jlong)size); - return b; +extern "C" DLLEXPORT jlong JNICALL +Java_com_datadoghq_profiler_ActiveBitmaps_bitmapAddressFor0(JNIEnv *env, + jclass unused, + jint tid) { + u64* bitmap = Profiler::instance()->threadFilter()->bitmapAddressFor((int)tid); + return (jlong)bitmap; } + +extern "C" DLLEXPORT jboolean JNICALL +Java_com_datadoghq_profiler_ActiveBitmaps_isActive(JNIEnv *env, + jclass unused, + jint tid) { + return Profiler::instance()->threadFilter()->accept((int)tid) ? JNI_TRUE : JNI_FALSE; +} diff --git a/ddprof-lib/src/main/cpp/threadFilter.cpp b/ddprof-lib/src/main/cpp/threadFilter.cpp index acd77f203..a07c151e0 100644 --- a/ddprof-lib/src/main/cpp/threadFilter.cpp +++ b/ddprof-lib/src/main/cpp/threadFilter.cpp @@ -17,6 +17,7 @@ #include "threadFilter.h" #include "counters.h" #include "os.h" +#include #include #include @@ -85,25 +86,14 @@ void ThreadFilter::clear() { _size = 0; } -u64* ThreadFilter::createBitmapFor(int index, int& size) { - assert(_bitmap[index] == NULL); - b = (u64 *)OS::safeAlloc(BITMAP_SIZE); - size = BITMAP_SIZE; - _bitmap[index] = b; - return b; -} -bool ThreadFilter::accept(int thread_id) { - u64 *b = bitmap(thread_id); - return b != NULL && (word(b, thread_id) & (1ULL << (thread_id & 0x3f))); -} - -void ThreadFilter::add(int thread_id) { - u64 *b = bitmap(thread_id); +u64* ThreadFilter::getBitmapFor(int thread_id) { + int index = static_cast(thread_id) / BITMAP_CAPACITY; + u64* b = _bitmap[index]; if (b == NULL) { b = (u64 *)OS::safeAlloc(BITMAP_SIZE); u64 *oldb = __sync_val_compare_and_swap( - &_bitmap[(u32)thread_id / BITMAP_CAPACITY], NULL, b); + &_bitmap[index], NULL, b); if (oldb != NULL) { OS::safeFree(b, BITMAP_SIZE); b = oldb; @@ -111,7 +101,23 @@ void ThreadFilter::add(int thread_id) { trackPage(); } } + return b; +} +u64* ThreadFilter::bitmapAddressFor(int thread_id) { + u64* bitmap = getBitmapFor(thread_id); + int index = (thread_id % BITMAP_CAPACITY) / 64; + return &bitmap[index]; +} + +bool ThreadFilter::accept(int thread_id) { + u64 *b = bitmap(thread_id); + return b != NULL && (word(b, thread_id) & (1ULL << (thread_id & 0x3f))); +} + +void ThreadFilter::add(int thread_id) { + u64 *b = getBitmapFor(thread_id); + assert(b != NULL); u64 bit = 1ULL << (thread_id & 0x3f); if (!(__sync_fetch_and_or(&word(b, thread_id), bit) & bit)) { atomicInc(_size); diff --git a/ddprof-lib/src/main/cpp/threadFilter.h b/ddprof-lib/src/main/cpp/threadFilter.h index b39d72aac..55db1d28a 100644 --- a/ddprof-lib/src/main/cpp/threadFilter.h +++ b/ddprof-lib/src/main/cpp/threadFilter.h @@ -50,6 +50,8 @@ class ThreadFilter { return bitmap[((u32)thread_id % BITMAP_CAPACITY) >> 6]; } + u64* getBitmapFor(int thread_id); + public: ThreadFilter(); ThreadFilter(ThreadFilter &threadFilter) = delete; @@ -62,11 +64,10 @@ class ThreadFilter { void init(const char *filter); void clear(); - u64* createBitmapFor(int index, int& size); - bool accept(int thread_id); void add(int thread_id); void remove(int thread_id); + u64* bitmapAddressFor(int thread_id); void collect(std::vector &v); }; diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/ActiveBitmaps.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/ActiveBitmaps.java index ce1bef74d..93824b9c4 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/ActiveBitmaps.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/ActiveBitmaps.java @@ -1,55 +1,55 @@ package com.datadoghq.profiler; -import java.util.ArrayList; -import java.util.List; -import java.nio.ByteBuffer; -import java.nio.Buffer; -import java.lang.invoke.VarHandle; +import sun.misc.Unsafe; +import java.lang.reflect.Field; -class ActiveBitmaps { - static final int BITMAP_SIZE = 65536; // 64K - static final int BITMAP_CAPACITY = BITMAP_SIZE * 8; - static final List bitmaps = new ArrayList<>(); - public synchronized static void setActive(int tid, boolean active) { - ByteBuffer bitmap = bitmapFor(tid); - int index = (tid % BITMAP_CAPACITY) / 8; - byte val = bitmap.get(index); - byte mask = (byte)(1 << (tid & 0x07)); - if (active) { - bitmap.put(index, (byte)(val | mask)); - } else { - bitmap.put(index, (byte)(val & (~mask))); - } - // Volatile store - VarHandle.fullFence(); - } +class ActiveBitmaps { + private static final Unsafe UNSAFE; + static { + Unsafe unsafe = null; + try { + Field f = Unsafe.class.getDeclaredField("theUnsafe"); + f.setAccessible(true); + unsafe = (Unsafe) f.get(null); + } catch (Exception ignore) { } + UNSAFE = unsafe; + } - static ByteBuffer bitmapFor(int tid) { - int index = tid / BITMAP_CAPACITY; - if (bitmaps.size() <= index) { - for (int i = bitmaps.size(); i <= index; i++) { - bitmaps.add(null); - } - } - ByteBuffer bitmap = bitmaps.get(index); - if (bitmap == null) { - bitmap = allocateBitmap(); - setBitmap(index, bitmap); - bitmaps.set(index, allocateBitmap()); - } - return bitmap; + private static final ThreadLocal Address = new ThreadLocal() { + @Override protected Long initialValue() { + return -1L; } + }; + + // Set bitmap to native code + static native long bitmapAddressFor0(int tid); - static ByteBuffer allocateBitmap() { - ByteBuffer b = ByteBuffer.allocateDirect(BITMAP_SIZE); - for (int index = 0; index < BITMAP_SIZE; index++) { - b.put(index, (byte)0); - } - return b; - } + static void setActive(int tid, boolean active) { + long addr = Address.get(); + if (addr == -1) { + addr = bitmapAddressFor0(tid); + Address.set(addr); + } + long bitmask = 1L << (tid & 0x3f); + long value = UNSAFE.getLong(addr); + long newVal; + if (active) { + newVal = value | bitmask; + } else { + newVal = value & ~bitmask; + } + while (!UNSAFE.compareAndSwapLong(null, addr, value, newVal)) { + value = UNSAFE.getLong(addr); + newVal = active ? (value | bitmask) : (value & ~bitmask); + } - // Set bitmap to native code - static native ByteBuffer newBitmapFor(int index); + if (isActive(tid) != active) { + throw new RuntimeException("Blooooom! " + addr); + } + } + + // Verify + static native boolean isActive(int tid); } From cb51c17cf6a55012123d29736224e5d8e1d3ff0b Mon Sep 17 00:00:00 2001 From: "zhengyu.gu" Date: Mon, 9 Jun 2025 21:14:14 -0400 Subject: [PATCH 04/11] v4 --- .../src/main/java/com/datadoghq/profiler/ActiveBitmaps.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/ActiveBitmaps.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/ActiveBitmaps.java index 93824b9c4..c17abf8c0 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/ActiveBitmaps.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/ActiveBitmaps.java @@ -44,11 +44,8 @@ static void setActive(int tid, boolean active) { newVal = active ? (value | bitmask) : (value & ~bitmask); } - if (isActive(tid) != active) { - throw new RuntimeException("Blooooom! " + addr); - } + assert isActive(tid) == active; } - // Verify static native boolean isActive(int tid); } From 4bfccf5bd4aec507960351ca755fe7c2dbc6e77c Mon Sep 17 00:00:00 2001 From: "zhengyu.gu" Date: Tue, 10 Jun 2025 14:59:05 -0400 Subject: [PATCH 05/11] v0 --- ddprof-lib/src/main/cpp/threadFilter.cpp | 16 +++++++++++++++- ddprof-lib/src/main/cpp/threadFilter.h | 2 ++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/ddprof-lib/src/main/cpp/threadFilter.cpp b/ddprof-lib/src/main/cpp/threadFilter.cpp index 034aabf9b..2f61ebf61 100644 --- a/ddprof-lib/src/main/cpp/threadFilter.cpp +++ b/ddprof-lib/src/main/cpp/threadFilter.cpp @@ -17,6 +17,7 @@ #include "threadFilter.h" #include "counters.h" #include "os.h" +#include #include #include @@ -85,12 +86,21 @@ void ThreadFilter::clear() { _size = 0; } +int ThreadFilter::hashThreadId(int thread_id) { + u16 lower16 = (u16)(thread_id & 0xffff); + lower16 = ((lower16 & 0x00ff) << 8) | ((lower16 & 0xff00) >> 8); + int tid = (thread_id & ~0xffff) | lower16; + return tid; +} + bool ThreadFilter::accept(int thread_id) { + thread_id = hashThreadId(thread_id); u64 *b = bitmap(thread_id); return b != NULL && (word(b, thread_id) & (1ULL << (thread_id & 0x3f))); } void ThreadFilter::add(int thread_id) { + thread_id = hashThreadId(thread_id); u64 *b = bitmap(thread_id); if (b == NULL) { b = (u64 *)OS::safeAlloc(BITMAP_SIZE); @@ -111,6 +121,7 @@ void ThreadFilter::add(int thread_id) { } void ThreadFilter::remove(int thread_id) { + thread_id = hashThreadId(thread_id); u64 *b = bitmap(thread_id); if (b == NULL) { return; @@ -132,7 +143,10 @@ void ThreadFilter::collect(std::vector &v) { // order here u64 word = __atomic_load_n(&b[j], __ATOMIC_ACQUIRE); while (word != 0) { - v.push_back(start_id + j * 64 + __builtin_ctzl(word)); + int tid = start_id + j * 64 + __builtin_ctzl(word); + // restore thread id; + tid = hashThreadId(tid); + v.push_back(tid); word &= (word - 1); } } diff --git a/ddprof-lib/src/main/cpp/threadFilter.h b/ddprof-lib/src/main/cpp/threadFilter.h index cec7e7048..40a44fc94 100644 --- a/ddprof-lib/src/main/cpp/threadFilter.h +++ b/ddprof-lib/src/main/cpp/threadFilter.h @@ -45,6 +45,8 @@ class ThreadFilter { __ATOMIC_ACQUIRE); } + static int hashThreadId(int thread_id); + u64 &word(u64 *bitmap, int thread_id) { // todo: add thread safe APIs return bitmap[((u32)thread_id % BITMAP_CAPACITY) >> 6]; From a9eda77181173cfd1009e942df81cc9a06503ceb Mon Sep 17 00:00:00 2001 From: "zhengyu.gu" Date: Tue, 10 Jun 2025 15:14:20 -0400 Subject: [PATCH 06/11] Remove unnecessary import --- ddprof-lib/src/main/cpp/threadFilter.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/ddprof-lib/src/main/cpp/threadFilter.cpp b/ddprof-lib/src/main/cpp/threadFilter.cpp index 2f61ebf61..4504bee92 100644 --- a/ddprof-lib/src/main/cpp/threadFilter.cpp +++ b/ddprof-lib/src/main/cpp/threadFilter.cpp @@ -17,7 +17,6 @@ #include "threadFilter.h" #include "counters.h" #include "os.h" -#include #include #include From cc957930b34f8358cb4088310f7f56998cb04bd6 Mon Sep 17 00:00:00 2001 From: "zhengyu.gu" Date: Tue, 10 Jun 2025 21:55:30 -0400 Subject: [PATCH 07/11] Rename --- ddprof-lib/src/main/cpp/threadFilter.cpp | 12 ++++++------ ddprof-lib/src/main/cpp/threadFilter.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ddprof-lib/src/main/cpp/threadFilter.cpp b/ddprof-lib/src/main/cpp/threadFilter.cpp index 4504bee92..2e4b09c40 100644 --- a/ddprof-lib/src/main/cpp/threadFilter.cpp +++ b/ddprof-lib/src/main/cpp/threadFilter.cpp @@ -85,7 +85,7 @@ void ThreadFilter::clear() { _size = 0; } -int ThreadFilter::hashThreadId(int thread_id) { +int ThreadFilter::mapThreadId(int thread_id) { u16 lower16 = (u16)(thread_id & 0xffff); lower16 = ((lower16 & 0x00ff) << 8) | ((lower16 & 0xff00) >> 8); int tid = (thread_id & ~0xffff) | lower16; @@ -93,13 +93,13 @@ int ThreadFilter::hashThreadId(int thread_id) { } bool ThreadFilter::accept(int thread_id) { - thread_id = hashThreadId(thread_id); + thread_id = mapThreadId(thread_id); u64 *b = bitmap(thread_id); return b != NULL && (word(b, thread_id) & (1ULL << (thread_id & 0x3f))); } void ThreadFilter::add(int thread_id) { - thread_id = hashThreadId(thread_id); + thread_id = mapThreadId(thread_id); u64 *b = bitmap(thread_id); if (b == NULL) { b = (u64 *)OS::safeAlloc(BITMAP_SIZE); @@ -120,7 +120,7 @@ void ThreadFilter::add(int thread_id) { } void ThreadFilter::remove(int thread_id) { - thread_id = hashThreadId(thread_id); + thread_id = mapThreadId(thread_id); u64 *b = bitmap(thread_id); if (b == NULL) { return; @@ -143,8 +143,8 @@ void ThreadFilter::collect(std::vector &v) { u64 word = __atomic_load_n(&b[j], __ATOMIC_ACQUIRE); while (word != 0) { int tid = start_id + j * 64 + __builtin_ctzl(word); - // restore thread id; - tid = hashThreadId(tid); + // restore thread id + tid = mapThreadId(tid); v.push_back(tid); word &= (word - 1); } diff --git a/ddprof-lib/src/main/cpp/threadFilter.h b/ddprof-lib/src/main/cpp/threadFilter.h index 40a44fc94..76b7419b5 100644 --- a/ddprof-lib/src/main/cpp/threadFilter.h +++ b/ddprof-lib/src/main/cpp/threadFilter.h @@ -45,7 +45,7 @@ class ThreadFilter { __ATOMIC_ACQUIRE); } - static int hashThreadId(int thread_id); + static int mapThreadId(int thread_id); u64 &word(u64 *bitmap, int thread_id) { // todo: add thread safe APIs From bb2a1168e8870061f41a06ce8edb21106162dedd Mon Sep 17 00:00:00 2001 From: "zhengyu.gu" Date: Wed, 11 Jun 2025 10:57:03 -0400 Subject: [PATCH 08/11] Added assertion for potential mapped thread id overflow --- ddprof-lib/src/main/cpp/threadFilter.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ddprof-lib/src/main/cpp/threadFilter.cpp b/ddprof-lib/src/main/cpp/threadFilter.cpp index 2e4b09c40..11a97cb87 100644 --- a/ddprof-lib/src/main/cpp/threadFilter.cpp +++ b/ddprof-lib/src/main/cpp/threadFilter.cpp @@ -86,6 +86,8 @@ void ThreadFilter::clear() { } int ThreadFilter::mapThreadId(int thread_id) { + // We want to map the thread_id inside the same bitmap + static_assert(BITMAP_SIZE >= (u16)0xffff, "Potential verflow"); u16 lower16 = (u16)(thread_id & 0xffff); lower16 = ((lower16 & 0x00ff) << 8) | ((lower16 & 0xff00) >> 8); int tid = (thread_id & ~0xffff) | lower16; From cc5445d069f0d63f23407359440a1eae2aae20c4 Mon Sep 17 00:00:00 2001 From: "zhengyu.gu" Date: Wed, 11 Jun 2025 16:10:53 -0400 Subject: [PATCH 09/11] v5 --- ddprof-lib/src/main/cpp/javaApi.cpp | 6 ++++++ ddprof-lib/src/main/cpp/threadFilter.cpp | 17 +++++++++++++++-- ddprof-lib/src/main/cpp/threadFilter.h | 3 +++ .../com/datadoghq/profiler/ActiveBitmaps.java | 18 ++++++++++++++---- .../com/datadoghq/profiler/JavaProfiler.java | 1 + .../scenarios/ThreadFilterBenchmark.java | 4 ++-- 6 files changed, 41 insertions(+), 8 deletions(-) diff --git a/ddprof-lib/src/main/cpp/javaApi.cpp b/ddprof-lib/src/main/cpp/javaApi.cpp index 6e65935f3..a2bd6b4a6 100644 --- a/ddprof-lib/src/main/cpp/javaApi.cpp +++ b/ddprof-lib/src/main/cpp/javaApi.cpp @@ -421,3 +421,9 @@ Java_com_datadoghq_profiler_ActiveBitmaps_isActive(JNIEnv *env, jint tid) { return Profiler::instance()->threadFilter()->accept((int)tid) ? JNI_TRUE : JNI_FALSE; } + +extern "C" DLLEXPORT jlong JNICALL +Java_com_datadoghq_profiler_ActiveBitmaps_getActiveCountAddr0(JNIEnv *env, + jclass unused) { + return (jlong)Profiler::instance()->threadFilter()->addressOfSize(); +} diff --git a/ddprof-lib/src/main/cpp/threadFilter.cpp b/ddprof-lib/src/main/cpp/threadFilter.cpp index a07c151e0..52a0bd794 100644 --- a/ddprof-lib/src/main/cpp/threadFilter.cpp +++ b/ddprof-lib/src/main/cpp/threadFilter.cpp @@ -86,6 +86,14 @@ void ThreadFilter::clear() { _size = 0; } +int ThreadFilter::mapThreadId(int thread_id) { + // We want to map the thread_id inside the same bitmap + static_assert(BITMAP_SIZE >= (u16)0xffff, "Potential verflow"); + u16 lower16 = (u16)(thread_id & 0xffff); + lower16 = ((lower16 & 0x00ff) << 8) | ((lower16 & 0xff00) >> 8); + int tid = (thread_id & ~0xffff) | lower16; + return tid; +} u64* ThreadFilter::getBitmapFor(int thread_id) { int index = static_cast(thread_id) / BITMAP_CAPACITY; @@ -106,18 +114,21 @@ u64* ThreadFilter::getBitmapFor(int thread_id) { u64* ThreadFilter::bitmapAddressFor(int thread_id) { u64* bitmap = getBitmapFor(thread_id); + thread_id = mapThreadId(thread_id); int index = (thread_id % BITMAP_CAPACITY) / 64; return &bitmap[index]; } bool ThreadFilter::accept(int thread_id) { u64 *b = bitmap(thread_id); + thread_id = mapThreadId(thread_id); return b != NULL && (word(b, thread_id) & (1ULL << (thread_id & 0x3f))); } void ThreadFilter::add(int thread_id) { u64 *b = getBitmapFor(thread_id); assert(b != NULL); + thread_id = mapThreadId(thread_id); u64 bit = 1ULL << (thread_id & 0x3f); if (!(__sync_fetch_and_or(&word(b, thread_id), bit) & bit)) { atomicInc(_size); @@ -129,7 +140,7 @@ void ThreadFilter::remove(int thread_id) { if (b == NULL) { return; } - + thread_id = mapThreadId(thread_id); u64 bit = 1ULL << (thread_id & 0x3f); if (__sync_fetch_and_and(&word(b, thread_id), ~bit) & bit) { atomicInc(_size, -1); @@ -146,7 +157,9 @@ void ThreadFilter::collect(std::vector &v) { // order here u64 word = __atomic_load_n(&b[j], __ATOMIC_ACQUIRE); while (word != 0) { - v.push_back(start_id + j * 64 + __builtin_ctzl(word)); + int thread_id = start_id + j * 64 + __builtin_ctzl(word); + thread_id = mapThreadId(thread_id); + v.push_back(thread_id); word &= (word - 1); } } diff --git a/ddprof-lib/src/main/cpp/threadFilter.h b/ddprof-lib/src/main/cpp/threadFilter.h index 55db1d28a..2e21fda62 100644 --- a/ddprof-lib/src/main/cpp/threadFilter.h +++ b/ddprof-lib/src/main/cpp/threadFilter.h @@ -45,6 +45,8 @@ class ThreadFilter { __ATOMIC_ACQUIRE); } + static int mapThreadId(int thread_id); + u64 &word(u64 *bitmap, int thread_id) { // todo: add thread safe APIs return bitmap[((u32)thread_id % BITMAP_CAPACITY) >> 6]; @@ -60,6 +62,7 @@ class ThreadFilter { bool enabled() { return _enabled; } int size() { return _size; } + const volatile int* addressOfSize() const { return &_size; } void init(const char *filter); void clear(); diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/ActiveBitmaps.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/ActiveBitmaps.java index c17abf8c0..065df58f2 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/ActiveBitmaps.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/ActiveBitmaps.java @@ -16,12 +16,18 @@ class ActiveBitmaps { UNSAFE = unsafe; } + private static long activeCountAddr = 0; + private static final ThreadLocal Address = new ThreadLocal() { @Override protected Long initialValue() { return -1L; } - }; - + }; + + public static void initialize() { + activeCountAddr = getActiveCountAddr0(); + } + // Set bitmap to native code static native long bitmapAddressFor0(int tid); @@ -31,7 +37,7 @@ static void setActive(int tid, boolean active) { addr = bitmapAddressFor0(tid); Address.set(addr); } - long bitmask = 1L << (tid & 0x3f); + long bitmask = 1L << ((tid >> 8) & 0x3f); long value = UNSAFE.getLong(addr); long newVal; if (active) { @@ -43,10 +49,14 @@ static void setActive(int tid, boolean active) { value = UNSAFE.getLong(addr); newVal = active ? (value | bitmask) : (value & ~bitmask); } - + int delta = active ? 1 : -1; + assert activeCountAddr != 0; + UNSAFE.getAndAddInt(null, activeCountAddr, delta); assert isActive(tid) == active; } // Verify static native boolean isActive(int tid); + + static native long getActiveCountAddr0(); } diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java index fc23921ab..8ca4bd59e 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/JavaProfiler.java @@ -108,6 +108,7 @@ public static synchronized JavaProfiler getInstance(String libLocation, String s throw new IOException("Failed to load Datadog Java profiler library", result.error); } init0(); + ActiveBitmaps.initialize(); profiler.initializeContextStorage(); instance = profiler; diff --git a/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/ThreadFilterBenchmark.java b/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/ThreadFilterBenchmark.java index 88ce317b4..682176af7 100644 --- a/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/ThreadFilterBenchmark.java +++ b/ddprof-stresstest/src/jmh/java/com/datadoghq/profiler/stresstest/scenarios/ThreadFilterBenchmark.java @@ -106,7 +106,7 @@ public void tearDown() { // Stop the profiler if it's active try { - profiler.stop(); +// profiler.stop(); } catch (IllegalStateException e) { System.out.println("Profiler was not active at teardown."); } @@ -244,4 +244,4 @@ public long threadFilterStress() throws InterruptedException { stopLatch.await(); return operationCount.get(); } -} \ No newline at end of file +} From da080751bfeb1a880376589d2ff88e44af644f5a Mon Sep 17 00:00:00 2001 From: "zhengyu.gu" Date: Wed, 11 Jun 2025 20:04:34 -0400 Subject: [PATCH 10/11] v6 --- ddprof-lib/src/main/cpp/threadFilter.cpp | 3 +-- ddprof-lib/src/main/cpp/threadFilter.h | 4 ++++ .../src/main/java/com/datadoghq/profiler/ActiveBitmaps.java | 3 ++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/ddprof-lib/src/main/cpp/threadFilter.cpp b/ddprof-lib/src/main/cpp/threadFilter.cpp index 52a0bd794..3fd83147f 100644 --- a/ddprof-lib/src/main/cpp/threadFilter.cpp +++ b/ddprof-lib/src/main/cpp/threadFilter.cpp @@ -115,8 +115,7 @@ u64* ThreadFilter::getBitmapFor(int thread_id) { u64* ThreadFilter::bitmapAddressFor(int thread_id) { u64* bitmap = getBitmapFor(thread_id); thread_id = mapThreadId(thread_id); - int index = (thread_id % BITMAP_CAPACITY) / 64; - return &bitmap[index]; + return wordAddress(bitmap, thread_id); } bool ThreadFilter::accept(int thread_id) { diff --git a/ddprof-lib/src/main/cpp/threadFilter.h b/ddprof-lib/src/main/cpp/threadFilter.h index 2e21fda62..a5654dbe4 100644 --- a/ddprof-lib/src/main/cpp/threadFilter.h +++ b/ddprof-lib/src/main/cpp/threadFilter.h @@ -52,6 +52,10 @@ class ThreadFilter { return bitmap[((u32)thread_id % BITMAP_CAPACITY) >> 6]; } + u64* wordAddress(u64 *bitmap, int thread_id) { + return &bitmap[((u32)thread_id % BITMAP_CAPACITY) >> 6]; + } + u64* getBitmapFor(int thread_id); public: diff --git a/ddprof-lib/src/main/java/com/datadoghq/profiler/ActiveBitmaps.java b/ddprof-lib/src/main/java/com/datadoghq/profiler/ActiveBitmaps.java index 065df58f2..6a4e5d8ee 100644 --- a/ddprof-lib/src/main/java/com/datadoghq/profiler/ActiveBitmaps.java +++ b/ddprof-lib/src/main/java/com/datadoghq/profiler/ActiveBitmaps.java @@ -54,7 +54,8 @@ static void setActive(int tid, boolean active) { UNSAFE.getAndAddInt(null, activeCountAddr, delta); assert isActive(tid) == active; } - // Verify + + // For verification static native boolean isActive(int tid); static native long getActiveCountAddr0(); From a3d2477d439f2c865e29fe46a956a7ad4a283d43 Mon Sep 17 00:00:00 2001 From: Zhengyu Gu Date: Thu, 12 Jun 2025 15:52:26 -0400 Subject: [PATCH 11/11] Reverse lower 16 bits, instead of swapping lower 2 bytes --- ddprof-lib/src/main/cpp/reverse_bits.h | 23 +++++++++++++++++++++++ ddprof-lib/src/main/cpp/threadFilter.cpp | 3 ++- 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 ddprof-lib/src/main/cpp/reverse_bits.h diff --git a/ddprof-lib/src/main/cpp/reverse_bits.h b/ddprof-lib/src/main/cpp/reverse_bits.h new file mode 100644 index 000000000..81e46e19a --- /dev/null +++ b/ddprof-lib/src/main/cpp/reverse_bits.h @@ -0,0 +1,23 @@ +// +// Borrow the implementation from openjdk +// https://github.com/openjdk/jdk/blob/master/src/hotspot/share/utilities/reverse_bits.hpp +// + +#ifndef REVERSE_BITS_H +#define REVERSE_BITS_H +#include "arch_dd.h" +#include + +static constexpr u32 rep_5555 = static_cast(UINT64_C(0x5555555555555555)); +static constexpr u32 rep_3333 = static_cast(UINT64_C(0x3333333333333333)); +static constexpr u32 rep_0F0F = static_cast(UINT64_C(0x0F0F0F0F0F0F0F0F)); + +inline u16 reverse16(u16 v) { + u32 x = static_cast(v); + x = ((x & rep_5555) << 1) | ((x >> 1) & rep_5555); + x = ((x & rep_3333) << 2) | ((x >> 2) & rep_3333); + x = ((x & rep_0F0F) << 4) | ((x >> 4) & rep_0F0F); + return __builtin_bswap16(static_cast(x)); +} + +#endif //REVERSE_BITS_H diff --git a/ddprof-lib/src/main/cpp/threadFilter.cpp b/ddprof-lib/src/main/cpp/threadFilter.cpp index 11a97cb87..e533ac0d7 100644 --- a/ddprof-lib/src/main/cpp/threadFilter.cpp +++ b/ddprof-lib/src/main/cpp/threadFilter.cpp @@ -17,6 +17,7 @@ #include "threadFilter.h" #include "counters.h" #include "os.h" +#include "reverse_bits.h" #include #include @@ -89,7 +90,7 @@ int ThreadFilter::mapThreadId(int thread_id) { // We want to map the thread_id inside the same bitmap static_assert(BITMAP_SIZE >= (u16)0xffff, "Potential verflow"); u16 lower16 = (u16)(thread_id & 0xffff); - lower16 = ((lower16 & 0x00ff) << 8) | ((lower16 & 0xff00) >> 8); + lower16 = reverse16(lower16); int tid = (thread_id & ~0xffff) | lower16; return tid; }