From d4136b51c027cc7d36d8a9c44a72cacdfa3fdbe9 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Wed, 15 Jan 2025 21:19:31 +0100 Subject: [PATCH 01/48] Cache intermediate segments allocated during FFM stub invocations. --- .../jdk/internal/foreign/abi/SharedUtils.java | 86 +++++++++++++++++-- 1 file changed, 78 insertions(+), 8 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index 83698398edaa0..7157834e16242 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -40,6 +40,7 @@ import jdk.internal.foreign.abi.s390.linux.LinuxS390Linker; import jdk.internal.foreign.abi.x64.sysv.SysVx64Linker; import jdk.internal.foreign.abi.x64.windows.Windowsx64Linker; +import jdk.internal.misc.TerminatingThreadLocal; import jdk.internal.vm.annotation.ForceInline; import java.lang.foreign.AddressLayout; @@ -382,24 +383,93 @@ static long pickChunkOffset(long chunkOffset, long byteWidth, int chunkWidth) { : chunkOffset; } + static class Segment { + private static final int CACHED_SIZE = 256; + + private final Arena arena = Arena.ofShared(); + private final MemorySegment segment; + + public Segment(long size) { + segment = arena.allocate(Math.max(size, CACHED_SIZE)); + } + + boolean supports(long size) { + return segment.byteSize() > size; + } + + private SegmentAllocator slicingAllocator() { + return SegmentAllocator.slicingAllocator(segment); + } + + private Scope scope() { + return segment.scope(); + } + + boolean canCache() { + return segment.byteSize() == CACHED_SIZE; + } + + void close() { + arena.close(); + } + } + + static class SegmentCache { + private Segment segment = null; + + Segment acquire(long size) { + if (segment == null || !segment.supports(size)) { + return new Segment(size); + } + Segment result = segment; + segment = null; + return result; + } + + private boolean canCache(Segment released) { + return this.segment == null && released.canCache(); + } + + void release(Segment released) { + if (canCache(released)) this.segment = released; + else released.close(); + } + + void close() { + if (this.segment != null) this.segment.close(); + } + } + + private static final TerminatingThreadLocal segmentCache = new TerminatingThreadLocal() { + @Override + protected SegmentCache initialValue() { + return new SegmentCache(); + } + + @Override + protected void threadTerminated(SegmentCache cache) { + cache.close(); + } + }; + public static Arena newBoundedArena(long size) { return new Arena() { - final Arena arena = Arena.ofConfined(); - final SegmentAllocator slicingAllocator = SegmentAllocator.slicingAllocator(arena.allocate(size)); + final Segment segment = SharedUtils.segmentCache.get().acquire(size); + final SegmentAllocator allocator = segment.slicingAllocator(); @Override - public Scope scope() { - return arena.scope(); + public MemorySegment allocate(long byteSize, long byteAlignment) { + return allocator.allocate(byteSize, byteAlignment); } @Override - public void close() { - arena.close(); + public Scope scope() { + return segment.scope(); } @Override - public MemorySegment allocate(long byteSize, long byteAlignment) { - return slicingAllocator.allocate(byteSize, byteAlignment); + public void close() { + SharedUtils.segmentCache.get().release(segment); } }; } From 93280dad2fcf0211c404dc47176524a0b6e933d8 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Thu, 16 Jan 2025 10:22:14 +0100 Subject: [PATCH 02/48] readability --- .../jdk/internal/foreign/abi/SharedUtils.java | 86 +++++++++---------- 1 file changed, 40 insertions(+), 46 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index 7157834e16242..2c794a36120a7 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -43,16 +43,8 @@ import jdk.internal.misc.TerminatingThreadLocal; import jdk.internal.vm.annotation.ForceInline; -import java.lang.foreign.AddressLayout; -import java.lang.foreign.Arena; -import java.lang.foreign.Linker; -import java.lang.foreign.FunctionDescriptor; -import java.lang.foreign.GroupLayout; -import java.lang.foreign.MemoryLayout; -import java.lang.foreign.MemorySegment; +import java.lang.foreign.*; import java.lang.foreign.MemorySegment.Scope; -import java.lang.foreign.SegmentAllocator; -import java.lang.foreign.ValueLayout; import java.lang.invoke.MethodHandle; import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodType; @@ -383,13 +375,17 @@ static long pickChunkOffset(long chunkOffset, long byteWidth, int chunkWidth) { : chunkOffset; } - static class Segment { + // Intermediate buffer needed for a stub call handle. Small buffers may be reused across calls. + static class CallBuffer { + // Size for cached buffers. private static final int CACHED_SIZE = 256; + // Not confined: cached buffers may float between threads. private final Arena arena = Arena.ofShared(); private final MemorySegment segment; - public Segment(long size) { + public CallBuffer(long size) { + // Allocate at least CACHED_SIZE in case we want to reuse this buffer. segment = arena.allocate(Math.max(size, CACHED_SIZE)); } @@ -405,57 +401,53 @@ private Scope scope() { return segment.scope(); } - boolean canCache() { + boolean isCacheable() { + // Don't cache larger buffers. return segment.byteSize() == CACHED_SIZE; } void close() { arena.close(); } - } - static class SegmentCache { - private Segment segment = null; + // A one-element cache. + static class Holder { + private CallBuffer element = null; + } - Segment acquire(long size) { - if (segment == null || !segment.supports(size)) { - return new Segment(size); + private static final TerminatingThreadLocal tl = new TerminatingThreadLocal() { + @Override + protected SharedUtils.CallBuffer.Holder initialValue() { + return new SharedUtils.CallBuffer.Holder(); } - Segment result = segment; - segment = null; - return result; - } - private boolean canCache(Segment released) { - return this.segment == null && released.canCache(); - } + @Override + protected void threadTerminated(SharedUtils.CallBuffer.Holder holder) { + if (holder.element != null) holder.element.close(); + } + }; - void release(Segment released) { - if (canCache(released)) this.segment = released; - else released.close(); + static CallBuffer acquire(long size) { + SharedUtils.CallBuffer.Holder cache = tl.get(); + if (cache.element == null || !cache.element.supports(size)) { + return new CallBuffer(size); + } + CallBuffer result = cache.element; + cache.element = null; + return result; } - void close() { - if (this.segment != null) this.segment.close(); + static void release(CallBuffer released) { + SharedUtils.CallBuffer.Holder cache = tl.get(); + if (cache.element == null && released.isCacheable()) cache.element = released; + else released.close(); } } - private static final TerminatingThreadLocal segmentCache = new TerminatingThreadLocal() { - @Override - protected SegmentCache initialValue() { - return new SegmentCache(); - } - - @Override - protected void threadTerminated(SegmentCache cache) { - cache.close(); - } - }; - public static Arena newBoundedArena(long size) { return new Arena() { - final Segment segment = SharedUtils.segmentCache.get().acquire(size); - final SegmentAllocator allocator = segment.slicingAllocator(); + final CallBuffer buffer = SharedUtils.CallBuffer.acquire(size); + final SegmentAllocator allocator = buffer.slicingAllocator(); @Override public MemorySegment allocate(long byteSize, long byteAlignment) { @@ -464,12 +456,14 @@ public MemorySegment allocate(long byteSize, long byteAlignment) { @Override public Scope scope() { - return segment.scope(); + return buffer.scope(); } @Override public void close() { - SharedUtils.segmentCache.get().release(segment); + // Caveat: this may be a carrier thread different from + // where the allocation happened. + SharedUtils.CallBuffer.release(buffer); } }; } From e27798de9667cd0c6cda3c35083b884ff0709fcd Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Thu, 16 Jan 2025 10:27:25 +0100 Subject: [PATCH 03/48] merge master --- .../share/classes/jdk/internal/foreign/abi/SharedUtils.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index 39608f0d390f5..3b7fcf0350d4b 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -51,6 +51,8 @@ import java.lang.foreign.MemoryLayout; import java.lang.foreign.MemorySegment; import java.lang.foreign.MemorySegment.Scope; +import java.lang.foreign.SegmentAllocator; +import java.lang.foreign.ValueLayout; import java.lang.invoke.MethodHandle; import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodType; From f96d963d2b89b83a36661a9ba6149a1ea3f68df1 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Thu, 16 Jan 2025 10:32:23 +0100 Subject: [PATCH 04/48] readability --- .../jdk/internal/foreign/abi/SharedUtils.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index 3b7fcf0350d4b..c6ece2bba637c 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -423,20 +423,20 @@ static class Holder { private CallBuffer element = null; } - private static final TerminatingThreadLocal tl = new TerminatingThreadLocal() { + private static final TerminatingThreadLocal tl = new TerminatingThreadLocal<>() { @Override - protected SharedUtils.CallBuffer.Holder initialValue() { - return new SharedUtils.CallBuffer.Holder(); + protected Holder initialValue() { + return new Holder(); } @Override - protected void threadTerminated(SharedUtils.CallBuffer.Holder holder) { + protected void threadTerminated(Holder holder) { if (holder.element != null) holder.element.close(); } }; static CallBuffer acquire(long size) { - SharedUtils.CallBuffer.Holder cache = tl.get(); + Holder cache = tl.get(); if (cache.element == null || !cache.element.supports(size)) { return new CallBuffer(size); } @@ -446,7 +446,7 @@ static CallBuffer acquire(long size) { } static void release(CallBuffer released) { - SharedUtils.CallBuffer.Holder cache = tl.get(); + Holder cache = tl.get(); if (cache.element == null && released.isCacheable()) cache.element = released; else released.close(); } @@ -454,7 +454,7 @@ static void release(CallBuffer released) { public static Arena newBoundedArena(long size) { return new Arena() { - final CallBuffer buffer = SharedUtils.CallBuffer.acquire(size); + final CallBuffer buffer = CallBuffer.acquire(size); final SegmentAllocator allocator = buffer.slicingAllocator(); @Override @@ -471,7 +471,7 @@ public Scope scope() { public void close() { // Caveat: this may be a carrier thread different from // where the allocation happened. - SharedUtils.CallBuffer.release(buffer); + CallBuffer.release(buffer); } }; } From 2bda29ae3be5d11469c82968fbbe7a406ae2ef6c Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Thu, 16 Jan 2025 11:49:55 +0100 Subject: [PATCH 05/48] avoid TL lookup if not necessary --- .../jdk/internal/foreign/abi/SharedUtils.java | 37 +++++++++++-------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index c6ece2bba637c..42b83fe93b8af 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -397,10 +397,6 @@ public CallBuffer(long size) { segment = arena.allocate(Math.max(size, CACHED_SIZE)); } - boolean supports(long size) { - return segment.byteSize() > size; - } - private SegmentAllocator slicingAllocator() { return SegmentAllocator.slicingAllocator(segment); } @@ -409,6 +405,10 @@ private Scope scope() { return segment.scope(); } + static boolean isCacheable(long size) { + return size <= CACHED_SIZE; + } + boolean isCacheable() { // Don't cache larger buffers. return segment.byteSize() == CACHED_SIZE; @@ -431,13 +431,15 @@ protected Holder initialValue() { @Override protected void threadTerminated(Holder holder) { - if (holder.element != null) holder.element.close(); + if (holder.element != null) { + holder.element.close(); + } } }; - static CallBuffer acquire(long size) { - Holder cache = tl.get(); - if (cache.element == null || !cache.element.supports(size)) { + static CallBuffer acquireOrAllocate(long size) { + Holder cache; + if (!isCacheable(size) || (cache = tl.get()).element == null) { return new CallBuffer(size); } CallBuffer result = cache.element; @@ -445,16 +447,19 @@ static CallBuffer acquire(long size) { return result; } - static void release(CallBuffer released) { - Holder cache = tl.get(); - if (cache.element == null && released.isCacheable()) cache.element = released; - else released.close(); + static void cacheOrClose(CallBuffer released) { + Holder cache; + if (released.isCacheable() && (cache = tl.get()).element == null) { + cache.element = released; + } else { + released.close(); + } } } public static Arena newBoundedArena(long size) { return new Arena() { - final CallBuffer buffer = CallBuffer.acquire(size); + final CallBuffer buffer = CallBuffer.acquireOrAllocate(size); final SegmentAllocator allocator = buffer.slicingAllocator(); @Override @@ -471,7 +476,7 @@ public Scope scope() { public void close() { // Caveat: this may be a carrier thread different from // where the allocation happened. - CallBuffer.release(buffer); + CallBuffer.cacheOrClose(buffer); } }; } @@ -514,8 +519,8 @@ static void writeOverSized(MemorySegment ptr, Class type, Object o) { } else if (type == double.class) { ptr.set(JAVA_DOUBLE_UNALIGNED, 0, (double) o); } else if (type == boolean.class) { - boolean b = (boolean)o; - ptr.set(JAVA_LONG_UNALIGNED, 0, b ? (long)1 : (long)0); + boolean b = (boolean) o; + ptr.set(JAVA_LONG_UNALIGNED, 0, b ? (long) 1 : (long) 0); } else { throw new IllegalArgumentException("Unsupported carrier: " + type); } From 61f35c97ffb436bea37e9cebd6e06bb5d147f566 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Thu, 16 Jan 2025 11:52:37 +0100 Subject: [PATCH 06/48] !!@# format --- .../share/classes/jdk/internal/foreign/abi/SharedUtils.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index 42b83fe93b8af..144e56d406222 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -519,8 +519,8 @@ static void writeOverSized(MemorySegment ptr, Class type, Object o) { } else if (type == double.class) { ptr.set(JAVA_DOUBLE_UNALIGNED, 0, (double) o); } else if (type == boolean.class) { - boolean b = (boolean) o; - ptr.set(JAVA_LONG_UNALIGNED, 0, b ? (long) 1 : (long) 0); + boolean b = (boolean)o; + ptr.set(JAVA_LONG_UNALIGNED, 0, b ? (long)1 : (long)0); } else { throw new IllegalArgumentException("Unsupported carrier: " + type); } From 9cf9837bbfcb07d7da2dec6b1c23718cb49933fc Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Thu, 16 Jan 2025 11:58:15 +0100 Subject: [PATCH 07/48] final --- .../share/classes/jdk/internal/foreign/abi/SharedUtils.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index 144e56d406222..2b2cd4f5f1e30 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -438,7 +438,7 @@ protected void threadTerminated(Holder holder) { }; static CallBuffer acquireOrAllocate(long size) { - Holder cache; + final Holder cache; if (!isCacheable(size) || (cache = tl.get()).element == null) { return new CallBuffer(size); } @@ -448,7 +448,7 @@ static CallBuffer acquireOrAllocate(long size) { } static void cacheOrClose(CallBuffer released) { - Holder cache; + final Holder cache; if (released.isCacheable() && (cache = tl.get()).element == null) { cache.element = released; } else { From 2964f84b39b57c2440deeff074477f78237608d9 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Thu, 16 Jan 2025 11:58:55 +0100 Subject: [PATCH 08/48] final --- .../share/classes/jdk/internal/foreign/abi/SharedUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index 2b2cd4f5f1e30..cf33a4c08cbdc 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -442,7 +442,7 @@ static CallBuffer acquireOrAllocate(long size) { if (!isCacheable(size) || (cache = tl.get()).element == null) { return new CallBuffer(size); } - CallBuffer result = cache.element; + final CallBuffer result = cache.element; cache.element = null; return result; } From f2cd14497ba1d2002a201364cf98572e8443d99a Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Thu, 16 Jan 2025 16:54:42 +0100 Subject: [PATCH 09/48] add pinned sections around CTL manipulation --- .../jdk/internal/foreign/abi/SharedUtils.java | 29 +++++++++++++------ 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index cf33a4c08cbdc..0288b2abe769c 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -41,6 +41,7 @@ import jdk.internal.foreign.abi.x64.sysv.SysVx64Linker; import jdk.internal.foreign.abi.x64.windows.Windowsx64Linker; import jdk.internal.misc.TerminatingThreadLocal; +import jdk.internal.vm.Continuation; import jdk.internal.vm.annotation.ForceInline; import java.lang.foreign.AddressLayout; @@ -439,20 +440,30 @@ protected void threadTerminated(Holder holder) { static CallBuffer acquireOrAllocate(long size) { final Holder cache; - if (!isCacheable(size) || (cache = tl.get()).element == null) { - return new CallBuffer(size); + Continuation.pin(); + try { + if (!isCacheable(size) || (cache = tl.get()).element == null) { + return new CallBuffer(size); + } + final CallBuffer result = cache.element; + cache.element = null; + return result; + } finally { + Continuation.unpin(); } - final CallBuffer result = cache.element; - cache.element = null; - return result; } static void cacheOrClose(CallBuffer released) { final Holder cache; - if (released.isCacheable() && (cache = tl.get()).element == null) { - cache.element = released; - } else { - released.close(); + Continuation.pin(); + try { + if (released.isCacheable() && (cache = tl.get()).element == null) { + cache.element = released; + } else { + released.close(); + } + } finally { + Continuation.unpin(); } } } From a0ac383c85eac1308b3e20632d60c11f02f108e3 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Fri, 17 Jan 2025 00:53:15 +0100 Subject: [PATCH 10/48] feedback: - 2 element cache to support upcall - avoid shared session With this, escape analysis kicks in and the "BoundedArena" seems to get scalar-replaced. The call becomes allocation-free. Introducing a confined session per call kills this and costs ~50%. --- .../jdk/internal/foreign/abi/SharedUtils.java | 140 ++++++++++-------- 1 file changed, 80 insertions(+), 60 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index 0288b2abe769c..68c018ddba030 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -41,6 +41,7 @@ import jdk.internal.foreign.abi.x64.sysv.SysVx64Linker; import jdk.internal.foreign.abi.x64.windows.Windowsx64Linker; import jdk.internal.misc.TerminatingThreadLocal; +import jdk.internal.misc.Unsafe; import jdk.internal.vm.Continuation; import jdk.internal.vm.annotation.ForceInline; @@ -385,82 +386,88 @@ static long pickChunkOffset(long chunkOffset, long byteWidth, int chunkWidth) { } // Intermediate buffer needed for a stub call handle. Small buffers may be reused across calls. - static class CallBuffer { + static final class BufferCache { + private static final Unsafe UNSAFE = Unsafe.getUnsafe(); + // Size for cached buffers. private static final int CACHED_SIZE = 256; - // Not confined: cached buffers may float between threads. - private final Arena arena = Arena.ofShared(); - private final MemorySegment segment; + // Two-element stack to support downcall + upcall (cached1 == null => cached2 == null). + // Elements are unscoped. + private MemorySegment cached1; + private MemorySegment cached2; - public CallBuffer(long size) { - // Allocate at least CACHED_SIZE in case we want to reuse this buffer. - segment = arena.allocate(Math.max(size, CACHED_SIZE)); + MemorySegment pop() { + MemorySegment result = cached1; + cached1 = cached2; + cached2 = null; + return result; } - private SegmentAllocator slicingAllocator() { - return SegmentAllocator.slicingAllocator(segment); + boolean push(MemorySegment segment) { + if (cached2 != null) { + return false; + } + cached2 = cached1; + cached1 = segment; + return true; } - private Scope scope() { - return segment.scope(); + void free() { + if (cached1 != null) free(cached1); + if (cached2 != null) free(cached2); } - static boolean isCacheable(long size) { - return size <= CACHED_SIZE; + @SuppressWarnings("restricted") + static MemorySegment allocate(long size) { + long allocatedSize = Math.max(size, CACHED_SIZE); + return MemorySegment + .ofAddress(UNSAFE.allocateMemory(allocatedSize)) + .reinterpret(allocatedSize); } - boolean isCacheable() { - // Don't cache larger buffers. - return segment.byteSize() == CACHED_SIZE; + static void free(MemorySegment segment) { + UNSAFE.freeMemory(segment.address()); } - void close() { - arena.close(); + static boolean couldBeSatisfiedFromCache(long size) { + return size <= CACHED_SIZE; } - // A one-element cache. - static class Holder { - private CallBuffer element = null; + static boolean couldCache(MemorySegment released) { + // Don't cache larger buffers. + return released.byteSize() == CACHED_SIZE; } - private static final TerminatingThreadLocal tl = new TerminatingThreadLocal<>() { + private static final TerminatingThreadLocal tl = new TerminatingThreadLocal<>() { @Override - protected Holder initialValue() { - return new Holder(); + protected BufferCache initialValue() { + return new BufferCache(); } @Override - protected void threadTerminated(Holder holder) { - if (holder.element != null) { - holder.element.close(); - } + protected void threadTerminated(BufferCache cache) { + cache.free(); } }; - static CallBuffer acquireOrAllocate(long size) { - final Holder cache; + static MemorySegment acquireOrAllocate(long size) { Continuation.pin(); try { - if (!isCacheable(size) || (cache = tl.get()).element == null) { - return new CallBuffer(size); - } - final CallBuffer result = cache.element; - cache.element = null; - return result; + final MemorySegment result; + return !couldBeSatisfiedFromCache(size) || (result = tl.get().pop()) == null + ? allocate(size) + : result; } finally { Continuation.unpin(); } } - static void cacheOrClose(CallBuffer released) { - final Holder cache; + static void cacheOrClose(MemorySegment released) { Continuation.pin(); try { - if (released.isCacheable() && (cache = tl.get()).element == null) { - cache.element = released; - } else { - released.close(); + if (!couldCache(released) || !tl.get().push(released)) { + free(released); } } finally { Continuation.unpin(); @@ -468,28 +475,41 @@ static void cacheOrClose(CallBuffer released) { } } + @ForceInline public static Arena newBoundedArena(long size) { - return new Arena() { - final CallBuffer buffer = CallBuffer.acquireOrAllocate(size); - final SegmentAllocator allocator = buffer.slicingAllocator(); + return new BoundedArena(size); + } + + private static class BoundedArena implements Arena { + final MemorySegment buffer; + // We'd ideally confine the segments returned from this arena with a + // final Arena callScope = Arena.ofConfined(); + // and close the scope after the call. + // However, the arena creates allocation pressure, without it the call is scalar-replaced. + @SuppressWarnings("restricted") + final SegmentAllocator allocator; + + @ForceInline + public BoundedArena(long size) { + buffer = BufferCache.acquireOrAllocate(size); + allocator = SegmentAllocator.slicingAllocator(buffer/*.reinterpret(callScope, null)*/); + } - @Override - public MemorySegment allocate(long byteSize, long byteAlignment) { - return allocator.allocate(byteSize, byteAlignment); - } + @Override + public MemorySegment allocate(long byteSize, long byteAlignment) { + return allocator.allocate(byteSize, byteAlignment); + } - @Override - public Scope scope() { - return buffer.scope(); - } + @Override + public Scope scope() { + return Arena.global().scope(); // callScope; + } - @Override - public void close() { - // Caveat: this may be a carrier thread different from - // where the allocation happened. - CallBuffer.cacheOrClose(buffer); - } - }; + @Override + public void close() { + // callScope.close(); + BufferCache.cacheOrClose(buffer); + } } public static Arena newEmptyArena() { From 0a41dce3ec4c90a732c385498c1572558b7cf781 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Fri, 17 Jan 2025 01:45:24 +0100 Subject: [PATCH 11/48] minimal continuation pinning Should really just protect the CTL handling. alloc/free should happen outside (even if it practically doesn't matter) --- .../jdk/internal/foreign/abi/SharedUtils.java | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index 68c018ddba030..3a8b4378eaec4 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -452,26 +452,25 @@ protected void threadTerminated(BufferCache cache) { }; static MemorySegment acquireOrAllocate(long size) { + final MemorySegment result; Continuation.pin(); try { - final MemorySegment result; - return !couldBeSatisfiedFromCache(size) || (result = tl.get().pop()) == null - ? allocate(size) - : result; + result = couldBeSatisfiedFromCache(size) ? tl.get().pop() : null; } finally { Continuation.unpin(); } + return result == null ? allocate(size) : result; } static void cacheOrClose(MemorySegment released) { + final boolean cached; Continuation.pin(); try { - if (!couldCache(released) || !tl.get().push(released)) { - free(released); - } + cached = couldCache(released) && tl.get().push(released); } finally { Continuation.unpin(); } + if (!cached) free(released); } } From 68d4bcc43cf61c3939c014d64094315c571e83dd Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Fri, 17 Jan 2025 11:11:51 +0100 Subject: [PATCH 12/48] cache the bounded area/slicing allocator no need to recreate these simplifies code and no longer needs @ForceInlining --- .../internal/foreign/SlicingAllocator.java | 8 ++ .../jdk/internal/foreign/abi/SharedUtils.java | 96 ++++++++----------- 2 files changed, 48 insertions(+), 56 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/SlicingAllocator.java b/src/java.base/share/classes/jdk/internal/foreign/SlicingAllocator.java index db7d476053e54..71d4e179daefb 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/SlicingAllocator.java +++ b/src/java.base/share/classes/jdk/internal/foreign/SlicingAllocator.java @@ -38,6 +38,10 @@ public SlicingAllocator(MemorySegment segment) { this.segment = segment; } + public MemorySegment segment() { + return segment; + } + MemorySegment trySlice(long byteSize, long byteAlignment) { long min = segment.address(); long start = Utils.alignUp(min + sp, byteAlignment) - min; @@ -52,4 +56,8 @@ public MemorySegment allocate(long byteSize, long byteAlignment) { // try to slice from current segment first... return trySlice(byteSize, byteAlignment); } + + public void reset() { + sp = 0L; + } } diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index 3a8b4378eaec4..3ee4cd4a25315 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -28,6 +28,7 @@ import jdk.internal.access.JavaLangInvokeAccess; import jdk.internal.access.SharedSecrets; import jdk.internal.foreign.CABI; +import jdk.internal.foreign.SlicingAllocator; import jdk.internal.foreign.abi.AbstractLinker.UpcallStubFactory; import jdk.internal.foreign.abi.aarch64.linux.LinuxAArch64Linker; import jdk.internal.foreign.abi.aarch64.macos.MacOsAArch64Linker; @@ -387,24 +388,19 @@ static long pickChunkOffset(long chunkOffset, long byteWidth, int chunkWidth) { // Intermediate buffer needed for a stub call handle. Small buffers may be reused across calls. static final class BufferCache { - private static final Unsafe UNSAFE = Unsafe.getUnsafe(); - - // Size for cached buffers. - private static final int CACHED_SIZE = 256; - // Two-element stack to support downcall + upcall (cached1 == null => cached2 == null). // Elements are unscoped. - private MemorySegment cached1; - private MemorySegment cached2; + private BoundedArena cached1; + private BoundedArena cached2; - MemorySegment pop() { - MemorySegment result = cached1; + BoundedArena pop() { + BoundedArena result = cached1; cached1 = cached2; cached2 = null; return result; } - boolean push(MemorySegment segment) { + boolean push(BoundedArena segment) { if (cached2 != null) { return false; } @@ -414,29 +410,8 @@ boolean push(MemorySegment segment) { } void free() { - if (cached1 != null) free(cached1); - if (cached2 != null) free(cached2); - } - - @SuppressWarnings("restricted") - static MemorySegment allocate(long size) { - long allocatedSize = Math.max(size, CACHED_SIZE); - return MemorySegment - .ofAddress(UNSAFE.allocateMemory(allocatedSize)) - .reinterpret(allocatedSize); - } - - static void free(MemorySegment segment) { - UNSAFE.freeMemory(segment.address()); - } - - static boolean couldBeSatisfiedFromCache(long size) { - return size <= CACHED_SIZE; - } - - static boolean couldCache(MemorySegment released) { - // Don't cache larger buffers. - return released.byteSize() == CACHED_SIZE; + if (cached1 != null) cached1.free(); + if (cached2 != null) cached2.free(); } private static final TerminatingThreadLocal tl = new TerminatingThreadLocal<>() { @@ -451,49 +426,58 @@ protected void threadTerminated(BufferCache cache) { } }; - static MemorySegment acquireOrAllocate(long size) { - final MemorySegment result; + static BoundedArena acquire() { Continuation.pin(); try { - result = couldBeSatisfiedFromCache(size) ? tl.get().pop() : null; + return tl.get().pop(); } finally { Continuation.unpin(); } - return result == null ? allocate(size) : result; } - static void cacheOrClose(MemorySegment released) { - final boolean cached; + static boolean release(BoundedArena arena) { Continuation.pin(); try { - cached = couldCache(released) && tl.get().push(released); + return tl.get().push(arena); } finally { Continuation.unpin(); } - if (!cached) free(released); } } - @ForceInline public static Arena newBoundedArena(long size) { - return new BoundedArena(size); + Arena result = BoundedArena.couldBeSatisfiedFromCache(size) ? BufferCache.acquire() : null; + return result != null ? result : new BoundedArena(size); } private static class BoundedArena implements Arena { - final MemorySegment buffer; - // We'd ideally confine the segments returned from this arena with a - // final Arena callScope = Arena.ofConfined(); - // and close the scope after the call. - // However, the arena creates allocation pressure, without it the call is scalar-replaced. - @SuppressWarnings("restricted") - final SegmentAllocator allocator; + private static final int CACHED_BUFFER_SIZE = 256; + private static final Unsafe UNSAFE = Unsafe.getUnsafe(); + + final boolean cacheable; + final SlicingAllocator allocator; - @ForceInline + @SuppressWarnings("restricted") public BoundedArena(long size) { - buffer = BufferCache.acquireOrAllocate(size); - allocator = SegmentAllocator.slicingAllocator(buffer/*.reinterpret(callScope, null)*/); + long allocatedSize = Math.max(CACHED_BUFFER_SIZE, size); + MemorySegment buffer = + MemorySegment.ofAddress(UNSAFE.allocateMemory(allocatedSize)) + .reinterpret(allocatedSize); + // Don't cache larger buffers. + cacheable = allocatedSize == CACHED_BUFFER_SIZE; + allocator = new SlicingAllocator(buffer); } + void free() { + UNSAFE.freeMemory(allocator.segment().address()); + } + + static boolean couldBeSatisfiedFromCache(long size) { + return size <= BoundedArena.CACHED_BUFFER_SIZE; + } + + // Arena: + @Override public MemorySegment allocate(long byteSize, long byteAlignment) { return allocator.allocate(byteSize, byteAlignment); @@ -501,13 +485,13 @@ public MemorySegment allocate(long byteSize, long byteAlignment) { @Override public Scope scope() { - return Arena.global().scope(); // callScope; + return Arena.global().scope(); } @Override public void close() { - // callScope.close(); - BufferCache.cacheOrClose(buffer); + allocator.reset(); + if (!cacheable || !BufferCache.release(this)) free(); } } From b35cc8691cea693c7688caa1d7a7600af79a4553 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Fri, 17 Jan 2025 11:11:51 +0100 Subject: [PATCH 13/48] confine buffers. Careful massaging to get scalar replacement. Triggers fastdebug assertion, though. --- .../jdk/internal/foreign/abi/SharedUtils.java | 83 +++++++++---------- 1 file changed, 39 insertions(+), 44 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index 3a8b4378eaec4..d84ce58e02094 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -28,6 +28,7 @@ import jdk.internal.access.JavaLangInvokeAccess; import jdk.internal.access.SharedSecrets; import jdk.internal.foreign.CABI; +import jdk.internal.foreign.SlicingAllocator; import jdk.internal.foreign.abi.AbstractLinker.UpcallStubFactory; import jdk.internal.foreign.abi.aarch64.linux.LinuxAArch64Linker; import jdk.internal.foreign.abi.aarch64.macos.MacOsAArch64Linker; @@ -140,12 +141,12 @@ public static long remainsToAlignment(long addr, long alignment) { * Takes a MethodHandle that takes an input buffer as a first argument (a MemorySegment), and returns nothing, * and adapts it to return a MemorySegment, by allocating a MemorySegment for the input * buffer, calling the target MethodHandle, and then returning the allocated MemorySegment. - * + *

* This allows viewing a MethodHandle that makes use of in memory return (IMR) as a MethodHandle that just returns * a MemorySegment without requiring a pre-allocated buffer as an explicit input. * * @param handle the target handle to adapt - * @param cDesc the function descriptor of the native function (with actual return layout) + * @param cDesc the function descriptor of the native function (with actual return layout) * @return the adapted handle */ public static MethodHandle adaptDowncallForIMR(MethodHandle handle, FunctionDescriptor cDesc, CallingSequence sequence) { @@ -261,8 +262,8 @@ public static Linker getSystemLinker() { static Map indexMap(Binding.Move[] moves) { return IntStream.range(0, moves.length) - .boxed() - .collect(Collectors.toMap(i -> moves[i].storage(), i -> i)); + .boxed() + .collect(Collectors.toMap(i -> moves[i].storage(), i -> i)); } static MethodHandle mergeArguments(MethodHandle mh, int sourceIndex, int destIndex) { @@ -293,7 +294,7 @@ public static MethodHandle swapArguments(MethodHandle mh, int firstArg, int seco MethodType mtype = mh.type(); int[] perms = new int[mtype.parameterCount()]; MethodType swappedType = MethodType.methodType(mtype.returnType()); - for (int i = 0 ; i < perms.length ; i++) { + for (int i = 0; i < perms.length; i++) { int dst = i; if (i == firstArg) dst = secondArg; if (i == secondArg) dst = firstArg; @@ -387,11 +388,9 @@ static long pickChunkOffset(long chunkOffset, long byteWidth, int chunkWidth) { // Intermediate buffer needed for a stub call handle. Small buffers may be reused across calls. static final class BufferCache { + private static final int CACHED_BUFFER_SIZE = 256; private static final Unsafe UNSAFE = Unsafe.getUnsafe(); - // Size for cached buffers. - private static final int CACHED_SIZE = 256; - // Two-element stack to support downcall + upcall (cached1 == null => cached2 == null). // Elements are unscoped. private MemorySegment cached1; @@ -420,9 +419,8 @@ void free() { @SuppressWarnings("restricted") static MemorySegment allocate(long size) { - long allocatedSize = Math.max(size, CACHED_SIZE); - return MemorySegment - .ofAddress(UNSAFE.allocateMemory(allocatedSize)) + long allocatedSize = Math.max(CACHED_BUFFER_SIZE, size); + return MemorySegment.ofAddress(UNSAFE.allocateMemory(allocatedSize)) .reinterpret(allocatedSize); } @@ -430,15 +428,6 @@ static void free(MemorySegment segment) { UNSAFE.freeMemory(segment.address()); } - static boolean couldBeSatisfiedFromCache(long size) { - return size <= CACHED_SIZE; - } - - static boolean couldCache(MemorySegment released) { - // Don't cache larger buffers. - return released.byteSize() == CACHED_SIZE; - } - private static final TerminatingThreadLocal tl = new TerminatingThreadLocal<>() { @Override protected BufferCache initialValue() { @@ -451,26 +440,22 @@ protected void threadTerminated(BufferCache cache) { } }; - static MemorySegment acquireOrAllocate(long size) { - final MemorySegment result; + static MemorySegment acquire() { Continuation.pin(); try { - result = couldBeSatisfiedFromCache(size) ? tl.get().pop() : null; + return tl.get().pop(); } finally { Continuation.unpin(); } - return result == null ? allocate(size) : result; } - static void cacheOrClose(MemorySegment released) { - final boolean cached; + static boolean release(MemorySegment segment) { Continuation.pin(); try { - cached = couldCache(released) && tl.get().push(released); + return tl.get().push(segment); } finally { Continuation.unpin(); } - if (!cached) free(released); } } @@ -479,35 +464,45 @@ public static Arena newBoundedArena(long size) { return new BoundedArena(size); } - private static class BoundedArena implements Arena { - final MemorySegment buffer; - // We'd ideally confine the segments returned from this arena with a - // final Arena callScope = Arena.ofConfined(); - // and close the scope after the call. - // However, the arena creates allocation pressure, without it the call is scalar-replaced. - @SuppressWarnings("restricted") - final SegmentAllocator allocator; + static final class BoundedArena implements Arena { + private final MemorySegment source; + private final SlicingAllocator allocator; + private final Arena scope; @ForceInline public BoundedArena(long size) { - buffer = BufferCache.acquireOrAllocate(size); - allocator = SegmentAllocator.slicingAllocator(buffer/*.reinterpret(callScope, null)*/); + // When here, works in fastdebug, but not scalar-replaced: + // scope = Arena.ofConfined(); + + MemorySegment cached = size <= BufferCache.CACHED_BUFFER_SIZE ? BufferCache.acquire() : null; + + // When here, works in release build, but fastdebug crashes: + // # Internal Error (/Users/mernst/IdeaProjects/jdk/src/hotspot/share/opto/escape.cpp:4767), pid=85070, tid=26115 + // # assert(false) failed: EA: missing memory path + scope = Arena.ofConfined(); + + source = cached != null ? cached : BufferCache.allocate(size); + allocator = new SlicingAllocator(source); } + @SuppressWarnings("restricted") @Override + @ForceInline public MemorySegment allocate(long byteSize, long byteAlignment) { - return allocator.allocate(byteSize, byteAlignment); + return allocator.allocate(byteSize, byteAlignment).reinterpret(scope, null); } @Override public Scope scope() { - return Arena.global().scope(); // callScope; + return scope.scope(); } @Override + @ForceInline public void close() { - // callScope.close(); - BufferCache.cacheOrClose(buffer); + scope.close(); + if (source.byteSize() != BufferCache.CACHED_BUFFER_SIZE || !BufferCache.release(source)) + BufferCache.free(source); } } @@ -549,8 +544,8 @@ static void writeOverSized(MemorySegment ptr, Class type, Object o) { } else if (type == double.class) { ptr.set(JAVA_DOUBLE_UNALIGNED, 0, (double) o); } else if (type == boolean.class) { - boolean b = (boolean)o; - ptr.set(JAVA_LONG_UNALIGNED, 0, b ? (long)1 : (long)0); + boolean b = (boolean) o; + ptr.set(JAVA_LONG_UNALIGNED, 0, b ? (long) 1 : (long) 0); } else { throw new IllegalArgumentException("Unsupported carrier: " + type); } From c0b2beb12888f110e7b06c01bc9e53a345a76adb Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Fri, 17 Jan 2025 12:50:08 +0100 Subject: [PATCH 14/48] no need to use SlicingAllocator directly --- .../share/classes/jdk/internal/foreign/abi/SharedUtils.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index d84ce58e02094..4fdd1a809b57f 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -28,7 +28,6 @@ import jdk.internal.access.JavaLangInvokeAccess; import jdk.internal.access.SharedSecrets; import jdk.internal.foreign.CABI; -import jdk.internal.foreign.SlicingAllocator; import jdk.internal.foreign.abi.AbstractLinker.UpcallStubFactory; import jdk.internal.foreign.abi.aarch64.linux.LinuxAArch64Linker; import jdk.internal.foreign.abi.aarch64.macos.MacOsAArch64Linker; @@ -466,7 +465,7 @@ public static Arena newBoundedArena(long size) { static final class BoundedArena implements Arena { private final MemorySegment source; - private final SlicingAllocator allocator; + private final SegmentAllocator allocator; private final Arena scope; @ForceInline @@ -482,7 +481,7 @@ public BoundedArena(long size) { scope = Arena.ofConfined(); source = cached != null ? cached : BufferCache.allocate(size); - allocator = new SlicingAllocator(source); + allocator = SegmentAllocator.slicingAllocator(source); } @SuppressWarnings("restricted") From 021d037bf746cc678d7f240f16a68edb4bbc4760 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Fri, 17 Jan 2025 12:56:47 +0100 Subject: [PATCH 15/48] revert SlicingAllocator --- .../classes/jdk/internal/foreign/SlicingAllocator.java | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/SlicingAllocator.java b/src/java.base/share/classes/jdk/internal/foreign/SlicingAllocator.java index 71d4e179daefb..db7d476053e54 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/SlicingAllocator.java +++ b/src/java.base/share/classes/jdk/internal/foreign/SlicingAllocator.java @@ -38,10 +38,6 @@ public SlicingAllocator(MemorySegment segment) { this.segment = segment; } - public MemorySegment segment() { - return segment; - } - MemorySegment trySlice(long byteSize, long byteAlignment) { long min = segment.address(); long start = Utils.alignUp(min + sp, byteAlignment) - min; @@ -56,8 +52,4 @@ public MemorySegment allocate(long byteSize, long byteAlignment) { // try to slice from current segment first... return trySlice(byteSize, byteAlignment); } - - public void reset() { - sp = 0L; - } } From 634b909b0e076fbe517e01f0a7dcbce6bc636ae4 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Fri, 17 Jan 2025 13:00:11 +0100 Subject: [PATCH 16/48] reorder --- .../jdk/internal/foreign/abi/SharedUtils.java | 94 +++++++++---------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index 4fdd1a809b57f..c5696b14c1133 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -385,6 +385,53 @@ static long pickChunkOffset(long chunkOffset, long byteWidth, int chunkWidth) { : chunkOffset; } + @ForceInline + public static Arena newBoundedArena(long size) { + return new BoundedArena(size); + } + + static final class BoundedArena implements Arena { + private final MemorySegment source; + private final SegmentAllocator allocator; + private final Arena scope; + + @ForceInline + public BoundedArena(long size) { + // When here, works in fastdebug, but not scalar-replaced: + // scope = Arena.ofConfined(); + + MemorySegment cached = size <= BufferCache.CACHED_BUFFER_SIZE ? BufferCache.acquire() : null; + + // When here, works in release build, but fastdebug crashes: + // # Internal Error (/Users/mernst/IdeaProjects/jdk/src/hotspot/share/opto/escape.cpp:4767), pid=85070, tid=26115 + // # assert(false) failed: EA: missing memory path + scope = Arena.ofConfined(); + + source = cached != null ? cached : BufferCache.allocate(size); + allocator = SegmentAllocator.slicingAllocator(source); + } + + @SuppressWarnings("restricted") + @Override + @ForceInline + public MemorySegment allocate(long byteSize, long byteAlignment) { + return allocator.allocate(byteSize, byteAlignment).reinterpret(scope, null); + } + + @Override + public Scope scope() { + return scope.scope(); + } + + @Override + @ForceInline + public void close() { + scope.close(); + if (source.byteSize() != BufferCache.CACHED_BUFFER_SIZE || !BufferCache.release(source)) + BufferCache.free(source); + } + } + // Intermediate buffer needed for a stub call handle. Small buffers may be reused across calls. static final class BufferCache { private static final int CACHED_BUFFER_SIZE = 256; @@ -458,53 +505,6 @@ static boolean release(MemorySegment segment) { } } - @ForceInline - public static Arena newBoundedArena(long size) { - return new BoundedArena(size); - } - - static final class BoundedArena implements Arena { - private final MemorySegment source; - private final SegmentAllocator allocator; - private final Arena scope; - - @ForceInline - public BoundedArena(long size) { - // When here, works in fastdebug, but not scalar-replaced: - // scope = Arena.ofConfined(); - - MemorySegment cached = size <= BufferCache.CACHED_BUFFER_SIZE ? BufferCache.acquire() : null; - - // When here, works in release build, but fastdebug crashes: - // # Internal Error (/Users/mernst/IdeaProjects/jdk/src/hotspot/share/opto/escape.cpp:4767), pid=85070, tid=26115 - // # assert(false) failed: EA: missing memory path - scope = Arena.ofConfined(); - - source = cached != null ? cached : BufferCache.allocate(size); - allocator = SegmentAllocator.slicingAllocator(source); - } - - @SuppressWarnings("restricted") - @Override - @ForceInline - public MemorySegment allocate(long byteSize, long byteAlignment) { - return allocator.allocate(byteSize, byteAlignment).reinterpret(scope, null); - } - - @Override - public Scope scope() { - return scope.scope(); - } - - @Override - @ForceInline - public void close() { - scope.close(); - if (source.byteSize() != BufferCache.CACHED_BUFFER_SIZE || !BufferCache.release(source)) - BufferCache.free(source); - } - } - public static Arena newEmptyArena() { return new Arena() { final Arena arena = Arena.ofConfined(); From 195f68aabf8e08062d19671c67fcffd55c68ce58 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Fri, 17 Jan 2025 13:14:42 +0100 Subject: [PATCH 17/48] move scoping --- .../share/classes/jdk/internal/foreign/abi/SharedUtils.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index c5696b14c1133..19140534e8a24 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -396,6 +396,7 @@ static final class BoundedArena implements Arena { private final Arena scope; @ForceInline + @SuppressWarnings("restricted") public BoundedArena(long size) { // When here, works in fastdebug, but not scalar-replaced: // scope = Arena.ofConfined(); @@ -408,14 +409,13 @@ public BoundedArena(long size) { scope = Arena.ofConfined(); source = cached != null ? cached : BufferCache.allocate(size); - allocator = SegmentAllocator.slicingAllocator(source); + allocator = SegmentAllocator.slicingAllocator(source.reinterpret(scope, null)); } - @SuppressWarnings("restricted") @Override @ForceInline public MemorySegment allocate(long byteSize, long byteAlignment) { - return allocator.allocate(byteSize, byteAlignment).reinterpret(scope, null); + return allocator.allocate(byteSize, byteAlignment); } @Override From 1f2110a55a2d1c0a9fa85c8d9f630b743f36b632 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Fri, 17 Jan 2025 14:57:09 +0100 Subject: [PATCH 18/48] move pinned cache lookup out of constructor. See https://github.com/mernst-github/repro/tree/main/escape-analysis. --- .../jdk/internal/foreign/abi/SharedUtils.java | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index 19140534e8a24..44403169c1894 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -387,7 +387,9 @@ static long pickChunkOffset(long chunkOffset, long byteWidth, int chunkWidth) { @ForceInline public static Arena newBoundedArena(long size) { - return new BoundedArena(size); + MemorySegment cached = size <= BufferCache.CACHED_BUFFER_SIZE ? BufferCache.acquire() : null; + MemorySegment source = cached != null ? cached : BufferCache.allocate(size); + return new BoundedArena(source); } static final class BoundedArena implements Arena { @@ -397,19 +399,10 @@ static final class BoundedArena implements Arena { @ForceInline @SuppressWarnings("restricted") - public BoundedArena(long size) { - // When here, works in fastdebug, but not scalar-replaced: - // scope = Arena.ofConfined(); - - MemorySegment cached = size <= BufferCache.CACHED_BUFFER_SIZE ? BufferCache.acquire() : null; - - // When here, works in release build, but fastdebug crashes: - // # Internal Error (/Users/mernst/IdeaProjects/jdk/src/hotspot/share/opto/escape.cpp:4767), pid=85070, tid=26115 - // # assert(false) failed: EA: missing memory path + public BoundedArena(MemorySegment segment) { + source = segment; scope = Arena.ofConfined(); - - source = cached != null ? cached : BufferCache.allocate(size); - allocator = SegmentAllocator.slicingAllocator(source.reinterpret(scope, null)); + allocator = SegmentAllocator.slicingAllocator(segment.reinterpret(scope, null)); } @Override From 09e9c9ddfd75d5fcfad3a87a57b7ab44d21f298e Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Sat, 18 Jan 2025 07:42:06 +0100 Subject: [PATCH 19/48] Benchmark: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Exercise intermediate buffer in downcall handle returning an Arm64 "HFA". # VM version: JDK 25-ea, OpenJDK 64-Bit Server VM, 25-ea+3-283 PointsAlloc.panama_by_value avgt 5 50.479 ± 3.653 ns/op PointsAlloc.panama_by_value:·gc.alloc.rate avgt 5 16490.820 ± 1193.776 MB/sec PointsAlloc.panama_by_value:·gc.alloc.rate.norm avgt 5 218.240 ± 0.001 B/op PointsAlloc.panama_by_value:·gc.count avgt 5 136.000 counts PointsAlloc.panama_by_value:·gc.time avgt 5 56.000 ms # VM version: JDK 25-internal, OpenJDK 64-Bit Server VM, 25-internal-adhoc.mernst.jdk PointsAlloc.panama_by_value avgt 5 17.549 ± 1.046 ns/op PointsAlloc.panama_by_value:·gc.alloc.rate avgt 5 486.807 ± 28.467 MB/sec PointsAlloc.panama_by_value:·gc.alloc.rate.norm avgt 5 2.240 ± 0.001 B/op PointsAlloc.panama_by_value:·gc.count avgt 5 11.000 counts PointsAlloc.panama_by_value:·gc.time avgt 5 19.000 ms --- .../java/lang/foreign/points/PointsAlloc.java | 12 +++++ .../lang/foreign/points/support/Circle.java | 48 +++++++++++++++++++ .../lang/foreign/points/support/libPoint.c | 9 +++- .../java/lang/foreign/points/support/points.h | 7 ++- 4 files changed, 74 insertions(+), 2 deletions(-) create mode 100644 test/micro/org/openjdk/bench/java/lang/foreign/points/support/Circle.java diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/points/PointsAlloc.java b/test/micro/org/openjdk/bench/java/lang/foreign/points/PointsAlloc.java index 8fac59fa9e7ab..e55d680c80ec0 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/points/PointsAlloc.java +++ b/test/micro/org/openjdk/bench/java/lang/foreign/points/PointsAlloc.java @@ -27,13 +27,16 @@ import org.openjdk.jmh.annotations.Fork; import org.openjdk.jmh.annotations.Measurement; import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OperationsPerInvocation; import org.openjdk.jmh.annotations.OutputTimeUnit; import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.annotations.Warmup; import org.openjdk.bench.java.lang.foreign.points.support.BBPoint; +import org.openjdk.bench.java.lang.foreign.points.support.Circle; import org.openjdk.bench.java.lang.foreign.points.support.JNIPoint; import org.openjdk.bench.java.lang.foreign.points.support.PanamaPoint; +import java.lang.foreign.Arena; import java.util.concurrent.TimeUnit; @BenchmarkMode(Mode.AverageTime) @@ -59,4 +62,13 @@ public Object panama_alloc() throws Throwable { return new PanamaPoint(0, 0); } + static final int NUM_CIRCLE_POINTS = 100; + + @Benchmark + @OperationsPerInvocation(NUM_CIRCLE_POINTS) + public Object panama_by_value() { + try (Arena arena = Arena.ofConfined()) { + return new Circle(arena, NUM_CIRCLE_POINTS); + } + } } diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/points/support/Circle.java b/test/micro/org/openjdk/bench/java/lang/foreign/points/support/Circle.java new file mode 100644 index 0000000000000..d3648b1baf7fd --- /dev/null +++ b/test/micro/org/openjdk/bench/java/lang/foreign/points/support/Circle.java @@ -0,0 +1,48 @@ +package org.openjdk.bench.java.lang.foreign.points.support; + +import java.lang.foreign.FunctionDescriptor; +import java.lang.foreign.Linker; +import java.lang.foreign.MemoryLayout; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; +import java.lang.foreign.SymbolLookup; +import java.lang.invoke.MethodHandle; + +import static org.openjdk.bench.java.lang.foreign.CLayouts.C_DOUBLE; + +public class Circle { + public static final MemoryLayout POINT_LAYOUT = MemoryLayout.structLayout( + C_DOUBLE.withName("x"), + C_DOUBLE.withName("y") + ); + private static final MethodHandle MH_UNIT_ROTATED; + + static { + Linker abi = Linker.nativeLinker(); + System.loadLibrary("Point"); + SymbolLookup loaderLibs = SymbolLookup.loaderLookup(); + MH_UNIT_ROTATED = abi.downcallHandle( + loaderLibs.findOrThrow("unit_rotated"), + FunctionDescriptor.of(POINT_LAYOUT, C_DOUBLE) + ); + } + + private final MemorySegment points; + + public Circle(SegmentAllocator allocator, int numPoints) { + try { + points = allocator.allocate(POINT_LAYOUT, numPoints); + for (int i = 0; i < numPoints; i++) { + double phi = 2 * Math.PI * i / numPoints; + // points[i] = unit_rotated(phi); + MemorySegment dest = points.asSlice(i * POINT_LAYOUT.byteSize(), POINT_LAYOUT.byteSize()); + MemorySegment unused = + (MemorySegment) MH_UNIT_ROTATED.invokeExact( + (SegmentAllocator) (_, _) -> dest, + phi); + } + } catch (Throwable e) { + throw new RuntimeException(e); + } + } +} diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/points/support/libPoint.c b/test/micro/org/openjdk/bench/java/lang/foreign/points/support/libPoint.c index 5e1913e2aa7c4..07df58fa67de5 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/points/support/libPoint.c +++ b/test/micro/org/openjdk/bench/java/lang/foreign/points/support/libPoint.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -36,3 +36,10 @@ EXPORT double distance(Point p1, Point p2) { EXPORT double distance_ptrs(Point* p1, Point* p2) { return distance(*p1, *p2); } + +EXPORT DoublePoint unit_rotated(double phi) { + DoublePoint result; + result.x = cos(phi); + result.y = sin(phi); + return result; +} diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/points/support/points.h b/test/micro/org/openjdk/bench/java/lang/foreign/points/support/points.h index 1d173ecbc8577..a6193c3753bbc 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/points/support/points.h +++ b/test/micro/org/openjdk/bench/java/lang/foreign/points/support/points.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,4 +28,9 @@ typedef struct { int y; } Point; +typedef struct { + double x; + double y; +} DoublePoint; + #endif From 46bf3425f9243583b25fd46dea0770f056301479 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Sat, 18 Jan 2025 07:50:03 +0100 Subject: [PATCH 20/48] copyright header --- .../lang/foreign/points/support/Circle.java | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/points/support/Circle.java b/test/micro/org/openjdk/bench/java/lang/foreign/points/support/Circle.java index d3648b1baf7fd..31052b740f541 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/points/support/Circle.java +++ b/test/micro/org/openjdk/bench/java/lang/foreign/points/support/Circle.java @@ -1,3 +1,25 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ package org.openjdk.bench.java.lang.foreign.points.support; import java.lang.foreign.FunctionDescriptor; From 4940f39bf0fc6693ad244cf953deba11b9a292a9 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Sat, 18 Jan 2025 09:15:34 +0100 Subject: [PATCH 21/48] Add comparison benchmark for out-parameter. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By value is now almost competitive: ``` Benchmark Mode Cnt Score Error Units PointsAlloc.circle_by_ptr avgt 5 9.151 ± 0.196 ns/op PointsAlloc.circle_by_ptr:·gc.alloc.rate avgt 5 233.423 ± 5.019 MB/sec PointsAlloc.circle_by_ptr:·gc.alloc.rate.norm avgt 5 2.240 ± 0.001 B/op PointsAlloc.circle_by_ptr:·gc.count avgt 5 6.000 counts PointsAlloc.circle_by_ptr:·gc.time avgt 5 10.000 ms PointsAlloc.circle_by_value avgt 5 15.445 ± 0.433 ns/op PointsAlloc.circle_by_value:·gc.alloc.rate avgt 5 138.301 ± 3.855 MB/sec PointsAlloc.circle_by_value:·gc.alloc.rate.norm avgt 5 2.240 ± 0.001 B/op PointsAlloc.circle_by_value:·gc.count avgt 5 3.000 counts PointsAlloc.circle_by_value:·gc.time avgt 5 6.000 ms ``` --- .../java/lang/foreign/points/PointsAlloc.java | 12 +++++-- .../lang/foreign/points/support/Circle.java | 36 ++++++++++++++++--- .../lang/foreign/points/support/libPoint.c | 8 +++-- 3 files changed, 46 insertions(+), 10 deletions(-) diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/points/PointsAlloc.java b/test/micro/org/openjdk/bench/java/lang/foreign/points/PointsAlloc.java index e55d680c80ec0..01d306ec72c0f 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/points/PointsAlloc.java +++ b/test/micro/org/openjdk/bench/java/lang/foreign/points/PointsAlloc.java @@ -66,9 +66,17 @@ public Object panama_alloc() throws Throwable { @Benchmark @OperationsPerInvocation(NUM_CIRCLE_POINTS) - public Object panama_by_value() { + public Object circle_by_value() { try (Arena arena = Arena.ofConfined()) { - return new Circle(arena, NUM_CIRCLE_POINTS); + return Circle.byValue(arena, NUM_CIRCLE_POINTS); + } + } + + @Benchmark + @OperationsPerInvocation(NUM_CIRCLE_POINTS) + public Object circle_by_ptr() { + try (Arena arena = Arena.ofConfined()) { + return Circle.byPtr(arena, NUM_CIRCLE_POINTS); } } } diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/points/support/Circle.java b/test/micro/org/openjdk/bench/java/lang/foreign/points/support/Circle.java index 31052b740f541..ebf5f53917d27 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/points/support/Circle.java +++ b/test/micro/org/openjdk/bench/java/lang/foreign/points/support/Circle.java @@ -28,6 +28,7 @@ import java.lang.foreign.MemorySegment; import java.lang.foreign.SegmentAllocator; import java.lang.foreign.SymbolLookup; +import java.lang.foreign.ValueLayout; import java.lang.invoke.MethodHandle; import static org.openjdk.bench.java.lang.foreign.CLayouts.C_DOUBLE; @@ -37,32 +38,57 @@ public class Circle { C_DOUBLE.withName("x"), C_DOUBLE.withName("y") ); - private static final MethodHandle MH_UNIT_ROTATED; + private static final MethodHandle MH_UNIT_ROTATED_BY_VALUE; + private static final MethodHandle MH_UNIT_ROTATED_BY_PTR; static { Linker abi = Linker.nativeLinker(); System.loadLibrary("Point"); SymbolLookup loaderLibs = SymbolLookup.loaderLookup(); - MH_UNIT_ROTATED = abi.downcallHandle( + MH_UNIT_ROTATED_BY_VALUE = abi.downcallHandle( loaderLibs.findOrThrow("unit_rotated"), FunctionDescriptor.of(POINT_LAYOUT, C_DOUBLE) ); + MH_UNIT_ROTATED_BY_PTR = abi.downcallHandle( + loaderLibs.findOrThrow("unit_rotated_ptr"), + FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, C_DOUBLE) + ); } private final MemorySegment points; - public Circle(SegmentAllocator allocator, int numPoints) { + private Circle(MemorySegment points) { + this.points = points; + } + + public static Circle byValue(SegmentAllocator allocator, int numPoints) { try { - points = allocator.allocate(POINT_LAYOUT, numPoints); + MemorySegment points = allocator.allocate(POINT_LAYOUT, numPoints); for (int i = 0; i < numPoints; i++) { double phi = 2 * Math.PI * i / numPoints; // points[i] = unit_rotated(phi); MemorySegment dest = points.asSlice(i * POINT_LAYOUT.byteSize(), POINT_LAYOUT.byteSize()); MemorySegment unused = - (MemorySegment) MH_UNIT_ROTATED.invokeExact( + (MemorySegment) MH_UNIT_ROTATED_BY_VALUE.invokeExact( (SegmentAllocator) (_, _) -> dest, phi); } + return new Circle(points); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + public static Circle byPtr(SegmentAllocator allocator, int numPoints) { + try { + MemorySegment points = allocator.allocate(POINT_LAYOUT, numPoints); + for (int i = 0; i < numPoints; i++) { + double phi = 2 * Math.PI * i / numPoints; + // unit_rotated_ptr(&points[i], phi); + MemorySegment dest = points.asSlice(i * POINT_LAYOUT.byteSize(), POINT_LAYOUT.byteSize()); + MH_UNIT_ROTATED_BY_PTR.invokeExact(dest, phi); + } + return new Circle(points); } catch (Throwable e) { throw new RuntimeException(e); } diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/points/support/libPoint.c b/test/micro/org/openjdk/bench/java/lang/foreign/points/support/libPoint.c index 07df58fa67de5..258229fb3e960 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/points/support/libPoint.c +++ b/test/micro/org/openjdk/bench/java/lang/foreign/points/support/libPoint.c @@ -38,8 +38,10 @@ EXPORT double distance_ptrs(Point* p1, Point* p2) { } EXPORT DoublePoint unit_rotated(double phi) { - DoublePoint result; - result.x = cos(phi); - result.y = sin(phi); + DoublePoint result = { cos(phi), sin(phi) }; return result; } + +EXPORT void unit_rotated_ptr(DoublePoint* out, double phi) { + *out = unit_rotated(phi); +} From 5b750a365c740f9ded4c84561afde67e8575658b Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Sun, 19 Jan 2025 18:13:29 +0100 Subject: [PATCH 22/48] shave off a couple more nanos --- .../jdk/internal/foreign/abi/SharedUtils.java | 34 ++++++++++++------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index 44403169c1894..b1a0f4bead909 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -388,8 +388,7 @@ static long pickChunkOffset(long chunkOffset, long byteWidth, int chunkWidth) { @ForceInline public static Arena newBoundedArena(long size) { MemorySegment cached = size <= BufferCache.CACHED_BUFFER_SIZE ? BufferCache.acquire() : null; - MemorySegment source = cached != null ? cached : BufferCache.allocate(size); - return new BoundedArena(source); + return new BoundedArena(cached != null ? cached : BufferCache.allocate(size)); } static final class BoundedArena implements Arena { @@ -430,25 +429,34 @@ static final class BufferCache { private static final int CACHED_BUFFER_SIZE = 256; private static final Unsafe UNSAFE = Unsafe.getUnsafe(); - // Two-element stack to support downcall + upcall (cached1 == null => cached2 == null). - // Elements are unscoped. + // Two-elements to support downcall + upcall. Elements are unscoped. private MemorySegment cached1; private MemorySegment cached2; MemorySegment pop() { - MemorySegment result = cached1; - cached1 = cached2; - cached2 = null; - return result; + if (cached1 != null) { + MemorySegment result = cached1; + cached1 = null; + return result; + } + if (cached2 != null) { + MemorySegment result = cached2; + cached2 = null; + return result; + } + return null; } boolean push(MemorySegment segment) { - if (cached2 != null) { - return false; + if (cached1 == null) { + cached1 = segment; + return true; + } + if (cached2 == null) { + cached2 = segment; + return true; } - cached2 = cached1; - cached1 = segment; - return true; + return false; } void free() { From 001c785ce04e055e8d1ecceb45c542cbf74871e0 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Sun, 19 Jan 2025 18:52:00 +0100 Subject: [PATCH 23/48] move CallBufferCache out --- .../internal/foreign/abi/CallBufferCache.java | 96 +++++++++++++++++++ .../jdk/internal/foreign/abi/SharedUtils.java | 93 +----------------- 2 files changed, 100 insertions(+), 89 deletions(-) create mode 100644 src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java b/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java new file mode 100644 index 0000000000000..9178e07d2ee47 --- /dev/null +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java @@ -0,0 +1,96 @@ +package jdk.internal.foreign.abi; + +import jdk.internal.misc.TerminatingThreadLocal; +import jdk.internal.misc.Unsafe; +import jdk.internal.vm.Continuation; + +import java.lang.foreign.MemorySegment; + +/** + * Allocates intermediate buffer space needed within call handles. + * Small buffers may be cached across calls. + */ +final class CallBufferCache { + /** Minimum allocation size = maximum cached size */ + public static final int CACHED_BUFFER_SIZE = 256; + + private static final Unsafe UNSAFE = Unsafe.getUnsafe(); + + static class PerThread { + // Two-elements to support downcall + upcall. Elements are unscoped. + private MemorySegment cached1; + private MemorySegment cached2; + + MemorySegment pop() { + if (cached1 != null) { + MemorySegment result = cached1; + cached1 = null; + return result; + } + if (cached2 != null) { + MemorySegment result = cached2; + cached2 = null; + return result; + } + return null; + } + + boolean push(MemorySegment segment) { + if (cached1 == null) { + cached1 = segment; + return true; + } + if (cached2 == null) { + cached2 = segment; + return true; + } + return false; + } + + void free() { + if (cached1 != null) CallBufferCache.free(cached1); + if (cached2 != null) CallBufferCache.free(cached2); + } + } + + @SuppressWarnings("restricted") + static MemorySegment allocate(long size) { + long allocatedSize = Math.max(CACHED_BUFFER_SIZE, size); + return MemorySegment.ofAddress(UNSAFE.allocateMemory(allocatedSize)) + .reinterpret(allocatedSize); + } + + static void free(MemorySegment segment) { + UNSAFE.freeMemory(segment.address()); + } + + private static final TerminatingThreadLocal tl = new TerminatingThreadLocal<>() { + @Override + protected PerThread initialValue() { + return new PerThread(); + } + + @Override + protected void threadTerminated(PerThread cache) { + cache.free(); + } + }; + + static MemorySegment acquire() { + Continuation.pin(); + try { + return tl.get().pop(); + } finally { + Continuation.unpin(); + } + } + + static boolean release(MemorySegment segment) { + Continuation.pin(); + try { + return tl.get().push(segment); + } finally { + Continuation.unpin(); + } + } +} diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index b1a0f4bead909..9e7f6dc642639 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -40,9 +40,6 @@ import jdk.internal.foreign.abi.s390.linux.LinuxS390Linker; import jdk.internal.foreign.abi.x64.sysv.SysVx64Linker; import jdk.internal.foreign.abi.x64.windows.Windowsx64Linker; -import jdk.internal.misc.TerminatingThreadLocal; -import jdk.internal.misc.Unsafe; -import jdk.internal.vm.Continuation; import jdk.internal.vm.annotation.ForceInline; import java.lang.foreign.AddressLayout; @@ -387,8 +384,8 @@ static long pickChunkOffset(long chunkOffset, long byteWidth, int chunkWidth) { @ForceInline public static Arena newBoundedArena(long size) { - MemorySegment cached = size <= BufferCache.CACHED_BUFFER_SIZE ? BufferCache.acquire() : null; - return new BoundedArena(cached != null ? cached : BufferCache.allocate(size)); + MemorySegment cached = size <= CallBufferCache.CACHED_BUFFER_SIZE ? CallBufferCache.acquire() : null; + return new BoundedArena(cached != null ? cached : CallBufferCache.allocate(size)); } static final class BoundedArena implements Arena { @@ -419,90 +416,8 @@ public Scope scope() { @ForceInline public void close() { scope.close(); - if (source.byteSize() != BufferCache.CACHED_BUFFER_SIZE || !BufferCache.release(source)) - BufferCache.free(source); - } - } - - // Intermediate buffer needed for a stub call handle. Small buffers may be reused across calls. - static final class BufferCache { - private static final int CACHED_BUFFER_SIZE = 256; - private static final Unsafe UNSAFE = Unsafe.getUnsafe(); - - // Two-elements to support downcall + upcall. Elements are unscoped. - private MemorySegment cached1; - private MemorySegment cached2; - - MemorySegment pop() { - if (cached1 != null) { - MemorySegment result = cached1; - cached1 = null; - return result; - } - if (cached2 != null) { - MemorySegment result = cached2; - cached2 = null; - return result; - } - return null; - } - - boolean push(MemorySegment segment) { - if (cached1 == null) { - cached1 = segment; - return true; - } - if (cached2 == null) { - cached2 = segment; - return true; - } - return false; - } - - void free() { - if (cached1 != null) free(cached1); - if (cached2 != null) free(cached2); - } - - @SuppressWarnings("restricted") - static MemorySegment allocate(long size) { - long allocatedSize = Math.max(CACHED_BUFFER_SIZE, size); - return MemorySegment.ofAddress(UNSAFE.allocateMemory(allocatedSize)) - .reinterpret(allocatedSize); - } - - static void free(MemorySegment segment) { - UNSAFE.freeMemory(segment.address()); - } - - private static final TerminatingThreadLocal tl = new TerminatingThreadLocal<>() { - @Override - protected BufferCache initialValue() { - return new BufferCache(); - } - - @Override - protected void threadTerminated(BufferCache cache) { - cache.free(); - } - }; - - static MemorySegment acquire() { - Continuation.pin(); - try { - return tl.get().pop(); - } finally { - Continuation.unpin(); - } - } - - static boolean release(MemorySegment segment) { - Continuation.pin(); - try { - return tl.get().push(segment); - } finally { - Continuation.unpin(); - } + if (source.byteSize() != CallBufferCache.CACHED_BUFFER_SIZE || !CallBufferCache.release(source)) + CallBufferCache.free(source); } } From d9a49c6761c5c14dd2e3fb3727b9140e895b303e Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Sun, 19 Jan 2025 20:10:26 +0100 Subject: [PATCH 24/48] unit test --- .../internal/foreign/abi/CallBufferCache.java | 18 ++- .../jdk/java/foreign/CallBufferCacheTest.java | 117 ++++++++++++++++++ 2 files changed, 125 insertions(+), 10 deletions(-) create mode 100644 test/jdk/java/foreign/CallBufferCacheTest.java diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java b/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java index 9178e07d2ee47..c8dedd1d79dd3 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java @@ -8,12 +8,11 @@ /** * Allocates intermediate buffer space needed within call handles. - * Small buffers may be cached across calls. + * Small buffers may be cached in thread-local storage. */ -final class CallBufferCache { - /** Minimum allocation size = maximum cached size */ +public final class CallBufferCache { + // Minimum allocation size = maximum cached size public static final int CACHED_BUFFER_SIZE = 256; - private static final Unsafe UNSAFE = Unsafe.getUnsafe(); static class PerThread { @@ -54,13 +53,12 @@ void free() { } @SuppressWarnings("restricted") - static MemorySegment allocate(long size) { + public static MemorySegment allocate(long size) { long allocatedSize = Math.max(CACHED_BUFFER_SIZE, size); - return MemorySegment.ofAddress(UNSAFE.allocateMemory(allocatedSize)) - .reinterpret(allocatedSize); + return MemorySegment.ofAddress(UNSAFE.allocateMemory(allocatedSize)).reinterpret(allocatedSize); } - static void free(MemorySegment segment) { + public static void free(MemorySegment segment) { UNSAFE.freeMemory(segment.address()); } @@ -76,7 +74,7 @@ protected void threadTerminated(PerThread cache) { } }; - static MemorySegment acquire() { + public static MemorySegment acquire() { Continuation.pin(); try { return tl.get().pop(); @@ -85,7 +83,7 @@ static MemorySegment acquire() { } } - static boolean release(MemorySegment segment) { + public static boolean release(MemorySegment segment) { Continuation.pin(); try { return tl.get().push(segment); diff --git a/test/jdk/java/foreign/CallBufferCacheTest.java b/test/jdk/java/foreign/CallBufferCacheTest.java new file mode 100644 index 0000000000000..e79e9e0f815af --- /dev/null +++ b/test/jdk/java/foreign/CallBufferCacheTest.java @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @modules java.base/jdk.internal.foreign.abi + * @run testng/othervm --enable-native-access=ALL-UNNAMED CallBufferCacheTest + */ + +import jdk.internal.foreign.abi.CallBufferCache; +import org.testng.annotations.Test; + +import java.lang.foreign.MemorySegment; + +import static jdk.internal.foreign.abi.CallBufferCache.CACHED_BUFFER_SIZE; +import static org.testng.Assert.*; + +public class CallBufferCacheTest { + + @Test + public void testEmpty() { + assertNull(CallBufferCache.acquire()); + } + + private void testAllocate(long size, long expectedSize) { + MemorySegment segment1 = CallBufferCache.allocate(size); + MemorySegment segment2 = CallBufferCache.allocate(size); + assertEquals(segment1.byteSize(), expectedSize); + assertEquals(segment2.byteSize(), expectedSize); + assertNotSame(segment1, segment2); + assertNotSame(segment1.address(), segment2.address()); + assertTrue(segment1.asOverlappingSlice(segment2).isEmpty()); + CallBufferCache.free(segment1); + CallBufferCache.free(segment2); + } + + @Test + public void testAllocateSmall() { + testAllocate(1, CACHED_BUFFER_SIZE); + } + + @Test + public void testAllocateLarge() { + testAllocate(CACHED_BUFFER_SIZE + 123, CACHED_BUFFER_SIZE + 123); + } + + @Test + public void testCacheSize() { + assertNull(CallBufferCache.acquire()); + + MemorySegment segment1 = CallBufferCache.allocate(128); + MemorySegment segment2 = CallBufferCache.allocate(128); + MemorySegment segment3 = CallBufferCache.allocate(128); + + assertTrue(CallBufferCache.release(segment3)); + assertTrue(CallBufferCache.release(segment2)); + assertFalse(CallBufferCache.release(segment1)); + + MemorySegment first = CallBufferCache.acquire(); + assertTrue(first == segment3 || first == segment2); + assertTrue(CallBufferCache.release(first)); + + first = CallBufferCache.acquire(); + MemorySegment second = CallBufferCache.acquire(); + assertNotSame(first, second); + assertTrue(first == segment2 || first == segment3); + assertTrue(second == segment2 || second == segment3); + + assertNull(CallBufferCache.acquire()); + + CallBufferCache.free(segment1); + CallBufferCache.free(segment2); + CallBufferCache.free(segment3); + } + + @Test + public void testThreadLocal() throws InterruptedException { + MemorySegment segment = CallBufferCache.allocate(128); + assertTrue(CallBufferCache.release(segment)); + Thread.ofPlatform().start(() -> assertNull(CallBufferCache.acquire())).join(); + assertSame(segment, CallBufferCache.acquire()); + CallBufferCache.free(segment); + } + + @Test + public void testMigrateThread() throws InterruptedException { + MemorySegment segment = CallBufferCache.allocate(128); + assertTrue(CallBufferCache.release(segment)); + assertSame(segment, CallBufferCache.acquire()); + Thread.ofPlatform().start(() -> { + CallBufferCache.release(segment); + assertSame(segment, CallBufferCache.acquire()); + CallBufferCache.release(segment); + }).join(); + assertNull(CallBufferCache.acquire()); + } +} From 873ffa65dc2c21e2138155d94f714d6bdff3df9d Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Sun, 19 Jan 2025 20:14:29 +0100 Subject: [PATCH 25/48] (c) --- .../internal/foreign/abi/CallBufferCache.java | 22 +++++++++++++++++++ .../jdk/java/foreign/CallBufferCacheTest.java | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java b/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java index c8dedd1d79dd3..67c40eece0ce1 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java @@ -1,3 +1,25 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ package jdk.internal.foreign.abi; import jdk.internal.misc.TerminatingThreadLocal; diff --git a/test/jdk/java/foreign/CallBufferCacheTest.java b/test/jdk/java/foreign/CallBufferCacheTest.java index e79e9e0f815af..79319ea8f881a 100644 --- a/test/jdk/java/foreign/CallBufferCacheTest.java +++ b/test/jdk/java/foreign/CallBufferCacheTest.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it From 343909b1da04e0fd39f1254219cb19b7fe5f5248 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Sun, 19 Jan 2025 21:40:58 +0100 Subject: [PATCH 26/48] Storing segment addresses instead of objects in the cache appears to be slightly faster. Write barrier? --- .../internal/foreign/abi/CallBufferCache.java | 60 +++++------ .../jdk/internal/foreign/abi/SharedUtils.java | 25 +++-- .../jdk/java/foreign/CallBufferCacheTest.java | 102 +++++++++--------- 3 files changed, 91 insertions(+), 96 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java b/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java index 67c40eece0ce1..251e86a5a96d7 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java @@ -26,62 +26,58 @@ import jdk.internal.misc.Unsafe; import jdk.internal.vm.Continuation; -import java.lang.foreign.MemorySegment; - /** - * Allocates intermediate buffer space needed within call handles. - * Small buffers may be cached in thread-local storage. + * Provides thread-local storage for up to two buffer addresses. + * It is caller's responsibility to store homogeneous segment sizes. + * Storing addresses, not MemorySegments turns out to be slightly faster (write barrier?). */ public final class CallBufferCache { - // Minimum allocation size = maximum cached size - public static final int CACHED_BUFFER_SIZE = 256; private static final Unsafe UNSAFE = Unsafe.getUnsafe(); static class PerThread { - // Two-elements to support downcall + upcall. Elements are unscoped. - private MemorySegment cached1; - private MemorySegment cached2; + // Two-elements to support downcall + upcall. + private long address1; + private long address2; - MemorySegment pop() { - if (cached1 != null) { - MemorySegment result = cached1; - cached1 = null; + long pop() { + if (address1 != 0) { + long result = address1; + address1 = 0; return result; } - if (cached2 != null) { - MemorySegment result = cached2; - cached2 = null; + if (address2 != 0) { + long result = address2; + address2 = 0; return result; } - return null; + return 0; } - boolean push(MemorySegment segment) { - if (cached1 == null) { - cached1 = segment; + boolean push(long address) { + if (address1 == 0) { + address1 = address; return true; } - if (cached2 == null) { - cached2 = segment; + if (address2 == 0) { + address2 = address; return true; } return false; } void free() { - if (cached1 != null) CallBufferCache.free(cached1); - if (cached2 != null) CallBufferCache.free(cached2); + if (address1 != 0) CallBufferCache.free(address1); + if (address2 != 0) CallBufferCache.free(address2); } } @SuppressWarnings("restricted") - public static MemorySegment allocate(long size) { - long allocatedSize = Math.max(CACHED_BUFFER_SIZE, size); - return MemorySegment.ofAddress(UNSAFE.allocateMemory(allocatedSize)).reinterpret(allocatedSize); + public static long allocate(long size) { + return UNSAFE.allocateMemory(size); } - public static void free(MemorySegment segment) { - UNSAFE.freeMemory(segment.address()); + public static void free(long address) { + UNSAFE.freeMemory(address); } private static final TerminatingThreadLocal tl = new TerminatingThreadLocal<>() { @@ -96,7 +92,7 @@ protected void threadTerminated(PerThread cache) { } }; - public static MemorySegment acquire() { + public static long acquire() { Continuation.pin(); try { return tl.get().pop(); @@ -105,10 +101,10 @@ public static MemorySegment acquire() { } } - public static boolean release(MemorySegment segment) { + public static boolean release(long address) { Continuation.pin(); try { - return tl.get().push(segment); + return tl.get().push(address); } finally { Continuation.unpin(); } diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index 9e7f6dc642639..a7995b2faca51 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -382,23 +382,28 @@ static long pickChunkOffset(long chunkOffset, long byteWidth, int chunkWidth) { : chunkOffset; } + // Minimum allocation size = maximum cached size + private static final int CACHED_BUFFER_SIZE = 256; + @ForceInline + @SuppressWarnings("restricted") public static Arena newBoundedArena(long size) { - MemorySegment cached = size <= CallBufferCache.CACHED_BUFFER_SIZE ? CallBufferCache.acquire() : null; - return new BoundedArena(cached != null ? cached : CallBufferCache.allocate(size)); + long allocatedSize = Math.max(size, CACHED_BUFFER_SIZE); + long fromCache = allocatedSize == CACHED_BUFFER_SIZE ? CallBufferCache.acquire() : 0; + long address = fromCache != 0 ? fromCache : CallBufferCache.allocate(allocatedSize); + return new BoundedArena(size, MemorySegment.ofAddress(address).reinterpret(allocatedSize)); } static final class BoundedArena implements Arena { - private final MemorySegment source; + private final Arena scope = Arena.ofConfined(); + private final MemorySegment scoped; private final SegmentAllocator allocator; - private final Arena scope; @ForceInline @SuppressWarnings("restricted") - public BoundedArena(MemorySegment segment) { - source = segment; - scope = Arena.ofConfined(); - allocator = SegmentAllocator.slicingAllocator(segment.reinterpret(scope, null)); + public BoundedArena(long size, MemorySegment source) { + scoped = source.reinterpret(size, scope, null); + allocator = SegmentAllocator.slicingAllocator(scoped); } @Override @@ -416,8 +421,8 @@ public Scope scope() { @ForceInline public void close() { scope.close(); - if (source.byteSize() != CallBufferCache.CACHED_BUFFER_SIZE || !CallBufferCache.release(source)) - CallBufferCache.free(source); + if (scoped.byteSize() > CACHED_BUFFER_SIZE || !CallBufferCache.release(scoped.address())) + CallBufferCache.free(scoped.address()); } } diff --git a/test/jdk/java/foreign/CallBufferCacheTest.java b/test/jdk/java/foreign/CallBufferCacheTest.java index 79319ea8f881a..fa75eecdc140f 100644 --- a/test/jdk/java/foreign/CallBufferCacheTest.java +++ b/test/jdk/java/foreign/CallBufferCacheTest.java @@ -30,88 +30,82 @@ import jdk.internal.foreign.abi.CallBufferCache; import org.testng.annotations.Test; -import java.lang.foreign.MemorySegment; - -import static jdk.internal.foreign.abi.CallBufferCache.CACHED_BUFFER_SIZE; import static org.testng.Assert.*; public class CallBufferCacheTest { @Test public void testEmpty() { - assertNull(CallBufferCache.acquire()); - } - - private void testAllocate(long size, long expectedSize) { - MemorySegment segment1 = CallBufferCache.allocate(size); - MemorySegment segment2 = CallBufferCache.allocate(size); - assertEquals(segment1.byteSize(), expectedSize); - assertEquals(segment2.byteSize(), expectedSize); - assertNotSame(segment1, segment2); - assertNotSame(segment1.address(), segment2.address()); - assertTrue(segment1.asOverlappingSlice(segment2).isEmpty()); - CallBufferCache.free(segment1); - CallBufferCache.free(segment2); - } - - @Test - public void testAllocateSmall() { - testAllocate(1, CACHED_BUFFER_SIZE); + assertEquals(CallBufferCache.acquire(), 0); } @Test - public void testAllocateLarge() { - testAllocate(CACHED_BUFFER_SIZE + 123, CACHED_BUFFER_SIZE + 123); + public void testAllocate() { + long address1 = CallBufferCache.allocate(123); + long address2 = CallBufferCache.allocate(123); + assertNotEquals(address1, address2); + CallBufferCache.free(address1); + CallBufferCache.free(address2); } @Test public void testCacheSize() { - assertNull(CallBufferCache.acquire()); + assertEquals(CallBufferCache.acquire(), 0); - MemorySegment segment1 = CallBufferCache.allocate(128); - MemorySegment segment2 = CallBufferCache.allocate(128); - MemorySegment segment3 = CallBufferCache.allocate(128); + // Three nested calls. + long address1 = CallBufferCache.allocate(128); + long address2 = CallBufferCache.allocate(128); + long address3 = CallBufferCache.allocate(128); - assertTrue(CallBufferCache.release(segment3)); - assertTrue(CallBufferCache.release(segment2)); - assertFalse(CallBufferCache.release(segment1)); + // Two buffers go into the cache. + assertTrue(CallBufferCache.release(address3)); + assertTrue(CallBufferCache.release(address2)); + assertFalse(CallBufferCache.release(address1)); - MemorySegment first = CallBufferCache.acquire(); - assertTrue(first == segment3 || first == segment2); + // Next acquisition is either of them. + long first = CallBufferCache.acquire(); + assertTrue(first == address3 || first == address2); assertTrue(CallBufferCache.release(first)); + // Can re-acquire both. first = CallBufferCache.acquire(); - MemorySegment second = CallBufferCache.acquire(); - assertNotSame(first, second); - assertTrue(first == segment2 || first == segment3); - assertTrue(second == segment2 || second == segment3); - - assertNull(CallBufferCache.acquire()); - - CallBufferCache.free(segment1); - CallBufferCache.free(segment2); - CallBufferCache.free(segment3); + long second = CallBufferCache.acquire(); + assertNotEquals(first, second); + assertTrue(first == address2 || first == address3); + assertTrue(second == address2 || second == address3); + // Now the cache is empty again. + assertEquals(CallBufferCache.acquire(), 0); + + CallBufferCache.free(address1); + CallBufferCache.free(address2); + CallBufferCache.free(address3); } @Test public void testThreadLocal() throws InterruptedException { - MemorySegment segment = CallBufferCache.allocate(128); - assertTrue(CallBufferCache.release(segment)); - Thread.ofPlatform().start(() -> assertNull(CallBufferCache.acquire())).join(); - assertSame(segment, CallBufferCache.acquire()); - CallBufferCache.free(segment); + long address = CallBufferCache.allocate(128); + assertTrue(CallBufferCache.release(address)); + Thread.ofPlatform().start(() -> { + // Not visible in other thread. + assertEquals(CallBufferCache.acquire(), 0); + }).join(); + // Only here. + assertEquals(address, CallBufferCache.acquire()); + CallBufferCache.free(address); } @Test public void testMigrateThread() throws InterruptedException { - MemorySegment segment = CallBufferCache.allocate(128); - assertTrue(CallBufferCache.release(segment)); - assertSame(segment, CallBufferCache.acquire()); + long address = CallBufferCache.allocate(128); + assertTrue(CallBufferCache.release(address)); + assertEquals(address, CallBufferCache.acquire()); Thread.ofPlatform().start(() -> { - CallBufferCache.release(segment); - assertSame(segment, CallBufferCache.acquire()); - CallBufferCache.release(segment); + // A buffer can migrate to another thread due to VThread scheduling. + CallBufferCache.release(address); + assertEquals(address, CallBufferCache.acquire()); + CallBufferCache.release(address); + // freed by TL. }).join(); - assertNull(CallBufferCache.acquire()); + assertEquals(CallBufferCache.acquire(), 0); } } From 4a2210dfbb21ad9a2d1d9f4d098379017b0cb76b Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Sun, 19 Jan 2025 21:53:22 +0100 Subject: [PATCH 27/48] tiny stylistic changes --- .../jdk/internal/foreign/abi/SharedUtils.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index a7995b2faca51..2322839dec14c 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -388,12 +388,13 @@ static long pickChunkOffset(long chunkOffset, long byteWidth, int chunkWidth) { @ForceInline @SuppressWarnings("restricted") public static Arena newBoundedArena(long size) { - long allocatedSize = Math.max(size, CACHED_BUFFER_SIZE); - long fromCache = allocatedSize == CACHED_BUFFER_SIZE ? CallBufferCache.acquire() : 0; - long address = fromCache != 0 ? fromCache : CallBufferCache.allocate(allocatedSize); - return new BoundedArena(size, MemorySegment.ofAddress(address).reinterpret(allocatedSize)); + long bufferSize = Math.max(size, CACHED_BUFFER_SIZE); + long fromCache = bufferSize == CACHED_BUFFER_SIZE ? CallBufferCache.acquire() : 0; + long address = fromCache != 0 ? fromCache : CallBufferCache.allocate(bufferSize); + return new BoundedArena(MemorySegment.ofAddress(address).reinterpret(size)); } + /** A confined arena slicing off an (unscoped) source segment. */ static final class BoundedArena implements Arena { private final Arena scope = Arena.ofConfined(); private final MemorySegment scoped; @@ -401,8 +402,8 @@ static final class BoundedArena implements Arena { @ForceInline @SuppressWarnings("restricted") - public BoundedArena(long size, MemorySegment source) { - scoped = source.reinterpret(size, scope, null); + public BoundedArena(MemorySegment source) { + scoped = source.reinterpret(scope, null); allocator = SegmentAllocator.slicingAllocator(scoped); } @@ -421,6 +422,7 @@ public Scope scope() { @ForceInline public void close() { scope.close(); + // All segments we handed out are now invalid, we can release source to the cache or free it. if (scoped.byteSize() > CACHED_BUFFER_SIZE || !CallBufferCache.release(scoped.address())) CallBufferCache.free(scoped.address()); } From 35a3a156fd5fdfb65fbbf5eebfe170f0eeb61d49 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Mon, 20 Jan 2025 08:25:56 +0100 Subject: [PATCH 28/48] Implementation notes. --- .../classes/jdk/internal/foreign/abi/CallBufferCache.java | 4 +++- .../share/classes/jdk/internal/foreign/abi/SharedUtils.java | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java b/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java index 251e86a5a96d7..4d53201d8af9d 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java @@ -27,7 +27,7 @@ import jdk.internal.vm.Continuation; /** - * Provides thread-local storage for up to two buffer addresses. + * Provides carrier-thread-local storage for up to two buffer addresses. * It is caller's responsibility to store homogeneous segment sizes. * Storing addresses, not MemorySegments turns out to be slightly faster (write barrier?). */ @@ -93,6 +93,7 @@ protected void threadTerminated(PerThread cache) { }; public static long acquire() { + // Protect against vthread unmount. Continuation.pin(); try { return tl.get().pop(); @@ -102,6 +103,7 @@ public static long acquire() { } public static boolean release(long address) { + // Protect against vthread unmount. Continuation.pin(); try { return tl.get().push(address); diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index 2322839dec14c..11f1e5d503ca0 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -388,6 +388,7 @@ static long pickChunkOffset(long chunkOffset, long byteWidth, int chunkWidth) { @ForceInline @SuppressWarnings("restricted") public static Arena newBoundedArena(long size) { + // JDK-8347997: buffer cache pinned section needs to happen outside of constructor. long bufferSize = Math.max(size, CACHED_BUFFER_SIZE); long fromCache = bufferSize == CACHED_BUFFER_SIZE ? CallBufferCache.acquire() : 0; long address = fromCache != 0 ? fromCache : CallBufferCache.allocate(bufferSize); @@ -423,6 +424,7 @@ public Scope scope() { public void close() { scope.close(); // All segments we handed out are now invalid, we can release source to the cache or free it. + // Due to VThread scheduling we may be returning ownership to a different platform thread. if (scoped.byteSize() > CACHED_BUFFER_SIZE || !CallBufferCache.release(scoped.address())) CallBufferCache.free(scoped.address()); } From b7be3a615a1641f55cfca4405eb51440d8969f4a Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Mon, 20 Jan 2025 15:25:38 +0100 Subject: [PATCH 29/48] revert formatting --- .../jdk/internal/foreign/abi/SharedUtils.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index 11f1e5d503ca0..fd65be5fefe63 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -137,12 +137,12 @@ public static long remainsToAlignment(long addr, long alignment) { * Takes a MethodHandle that takes an input buffer as a first argument (a MemorySegment), and returns nothing, * and adapts it to return a MemorySegment, by allocating a MemorySegment for the input * buffer, calling the target MethodHandle, and then returning the allocated MemorySegment. - *

+ * * This allows viewing a MethodHandle that makes use of in memory return (IMR) as a MethodHandle that just returns * a MemorySegment without requiring a pre-allocated buffer as an explicit input. * * @param handle the target handle to adapt - * @param cDesc the function descriptor of the native function (with actual return layout) + * @param cDesc the function descriptor of the native function (with actual return layout) * @return the adapted handle */ public static MethodHandle adaptDowncallForIMR(MethodHandle handle, FunctionDescriptor cDesc, CallingSequence sequence) { @@ -258,8 +258,8 @@ public static Linker getSystemLinker() { static Map indexMap(Binding.Move[] moves) { return IntStream.range(0, moves.length) - .boxed() - .collect(Collectors.toMap(i -> moves[i].storage(), i -> i)); + .boxed() + .collect(Collectors.toMap(i -> moves[i].storage(), i -> i)); } static MethodHandle mergeArguments(MethodHandle mh, int sourceIndex, int destIndex) { @@ -290,7 +290,7 @@ public static MethodHandle swapArguments(MethodHandle mh, int firstArg, int seco MethodType mtype = mh.type(); int[] perms = new int[mtype.parameterCount()]; MethodType swappedType = MethodType.methodType(mtype.returnType()); - for (int i = 0; i < perms.length; i++) { + for (int i = 0 ; i < perms.length ; i++) { int dst = i; if (i == firstArg) dst = secondArg; if (i == secondArg) dst = firstArg; @@ -468,8 +468,8 @@ static void writeOverSized(MemorySegment ptr, Class type, Object o) { } else if (type == double.class) { ptr.set(JAVA_DOUBLE_UNALIGNED, 0, (double) o); } else if (type == boolean.class) { - boolean b = (boolean) o; - ptr.set(JAVA_LONG_UNALIGNED, 0, b ? (long) 1 : (long) 0); + boolean b = (boolean)o; + ptr.set(JAVA_LONG_UNALIGNED, 0, b ? (long)1 : (long)0); } else { throw new IllegalArgumentException("Unsupported carrier: " + type); } From 643efd7a35bbe9f21cd0fd413e5a86d595dbb4e6 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Mon, 20 Jan 2025 17:10:14 +0100 Subject: [PATCH 30/48] move bench --- .../lang/foreign/CallOverheadByValue.java | 113 ++++++++++++++++++ .../lang/foreign/libCallOverheadByValue.c | 39 ++++++ .../java/lang/foreign/points/PointsAlloc.java | 20 ---- .../lang/foreign/points/support/Circle.java | 96 --------------- .../lang/foreign/points/support/libPoint.c | 11 +- .../java/lang/foreign/points/support/points.h | 7 +- 6 files changed, 154 insertions(+), 132 deletions(-) create mode 100644 test/micro/org/openjdk/bench/java/lang/foreign/CallOverheadByValue.java create mode 100644 test/micro/org/openjdk/bench/java/lang/foreign/libCallOverheadByValue.c delete mode 100644 test/micro/org/openjdk/bench/java/lang/foreign/points/support/Circle.java diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/CallOverheadByValue.java b/test/micro/org/openjdk/bench/java/lang/foreign/CallOverheadByValue.java new file mode 100644 index 0000000000000..eeb08389f1267 --- /dev/null +++ b/test/micro/org/openjdk/bench/java/lang/foreign/CallOverheadByValue.java @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.java.lang.foreign; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OperationsPerInvocation; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.TearDown; +import org.openjdk.jmh.annotations.Warmup; + +import java.lang.foreign.Arena; +import java.lang.foreign.FunctionDescriptor; +import java.lang.foreign.Linker; +import java.lang.foreign.MemoryLayout; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; +import java.lang.foreign.SymbolLookup; +import java.lang.foreign.ValueLayout; +import java.lang.invoke.MethodHandle; +import java.util.concurrent.TimeUnit; + +import static org.openjdk.bench.java.lang.foreign.CLayouts.C_DOUBLE; + +@BenchmarkMode(Mode.AverageTime) +@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS) +@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS) +@State(org.openjdk.jmh.annotations.Scope.Thread) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Fork(value = 1, jvmArgs = {"-Xlog:gc", "--enable-native-access=ALL-UNNAMED", "-Djava.library.path=micro/native"}) +public class CallOverheadByValue { + + public static final MemoryLayout POINT_LAYOUT = MemoryLayout.structLayout( + C_DOUBLE, C_DOUBLE + ); + private static final MethodHandle MH_UNIT_ROTATED_BY_VALUE; + private static final MethodHandle MH_UNIT_ROTATED_BY_PTR; + + static { + Linker abi = Linker.nativeLinker(); + System.loadLibrary("CallOverheadByValue"); + SymbolLookup loaderLibs = SymbolLookup.loaderLookup(); + MH_UNIT_ROTATED_BY_VALUE = abi.downcallHandle( + loaderLibs.findOrThrow("unit_rotated"), + FunctionDescriptor.of(POINT_LAYOUT, C_DOUBLE) + ); + MH_UNIT_ROTATED_BY_PTR = abi.downcallHandle( + loaderLibs.findOrThrow("unit_rotated_ptr"), + FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, C_DOUBLE) + ); + } + + static final int NUM_CIRCLE_POINTS = 100; + + Arena arena = Arena.ofConfined(); + MemorySegment points = arena.allocate(POINT_LAYOUT, NUM_CIRCLE_POINTS); + + @TearDown + public void tearDown() { + arena.close(); + } + + @Benchmark + @OperationsPerInvocation(NUM_CIRCLE_POINTS) + public MemorySegment byValue() throws Throwable { + for (int i = 0; i < NUM_CIRCLE_POINTS; i++) { + double phi = 2 * Math.PI * i / NUM_CIRCLE_POINTS; + // points[i] = unit_rotated(phi); + MemorySegment dest = points.asSlice(i * POINT_LAYOUT.byteSize(), POINT_LAYOUT.byteSize()); + MemorySegment unused = + (MemorySegment) MH_UNIT_ROTATED_BY_VALUE.invokeExact( + (SegmentAllocator) (_, _) -> dest, + phi); + } + return points; + } + + @Benchmark + @OperationsPerInvocation(NUM_CIRCLE_POINTS) + public MemorySegment byPtr() throws Throwable { + for (int i = 0; i < NUM_CIRCLE_POINTS; i++) { + double phi = 2 * Math.PI * i / NUM_CIRCLE_POINTS; + // unit_rotated_ptr(&points[i], phi); + MemorySegment dest = points.asSlice(i * POINT_LAYOUT.byteSize(), POINT_LAYOUT.byteSize()); + MH_UNIT_ROTATED_BY_PTR.invokeExact(dest, phi); + } + return points; + } +} diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/libCallOverheadByValue.c b/test/micro/org/openjdk/bench/java/lang/foreign/libCallOverheadByValue.c new file mode 100644 index 0000000000000..18686290d1ae5 --- /dev/null +++ b/test/micro/org/openjdk/bench/java/lang/foreign/libCallOverheadByValue.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "export.h" +#include "math.h" + +typedef struct { + double x; + double y; +} DoublePoint; + +EXPORT DoublePoint unit_rotated(double phi) { + DoublePoint result = { cos(phi), sin(phi) }; + return result; +} + +EXPORT void unit_rotated_ptr(DoublePoint* out, double phi) { + *out = unit_rotated(phi); +} diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/points/PointsAlloc.java b/test/micro/org/openjdk/bench/java/lang/foreign/points/PointsAlloc.java index 01d306ec72c0f..8fac59fa9e7ab 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/points/PointsAlloc.java +++ b/test/micro/org/openjdk/bench/java/lang/foreign/points/PointsAlloc.java @@ -27,16 +27,13 @@ import org.openjdk.jmh.annotations.Fork; import org.openjdk.jmh.annotations.Measurement; import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OperationsPerInvocation; import org.openjdk.jmh.annotations.OutputTimeUnit; import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.annotations.Warmup; import org.openjdk.bench.java.lang.foreign.points.support.BBPoint; -import org.openjdk.bench.java.lang.foreign.points.support.Circle; import org.openjdk.bench.java.lang.foreign.points.support.JNIPoint; import org.openjdk.bench.java.lang.foreign.points.support.PanamaPoint; -import java.lang.foreign.Arena; import java.util.concurrent.TimeUnit; @BenchmarkMode(Mode.AverageTime) @@ -62,21 +59,4 @@ public Object panama_alloc() throws Throwable { return new PanamaPoint(0, 0); } - static final int NUM_CIRCLE_POINTS = 100; - - @Benchmark - @OperationsPerInvocation(NUM_CIRCLE_POINTS) - public Object circle_by_value() { - try (Arena arena = Arena.ofConfined()) { - return Circle.byValue(arena, NUM_CIRCLE_POINTS); - } - } - - @Benchmark - @OperationsPerInvocation(NUM_CIRCLE_POINTS) - public Object circle_by_ptr() { - try (Arena arena = Arena.ofConfined()) { - return Circle.byPtr(arena, NUM_CIRCLE_POINTS); - } - } } diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/points/support/Circle.java b/test/micro/org/openjdk/bench/java/lang/foreign/points/support/Circle.java deleted file mode 100644 index ebf5f53917d27..0000000000000 --- a/test/micro/org/openjdk/bench/java/lang/foreign/points/support/Circle.java +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ -package org.openjdk.bench.java.lang.foreign.points.support; - -import java.lang.foreign.FunctionDescriptor; -import java.lang.foreign.Linker; -import java.lang.foreign.MemoryLayout; -import java.lang.foreign.MemorySegment; -import java.lang.foreign.SegmentAllocator; -import java.lang.foreign.SymbolLookup; -import java.lang.foreign.ValueLayout; -import java.lang.invoke.MethodHandle; - -import static org.openjdk.bench.java.lang.foreign.CLayouts.C_DOUBLE; - -public class Circle { - public static final MemoryLayout POINT_LAYOUT = MemoryLayout.structLayout( - C_DOUBLE.withName("x"), - C_DOUBLE.withName("y") - ); - private static final MethodHandle MH_UNIT_ROTATED_BY_VALUE; - private static final MethodHandle MH_UNIT_ROTATED_BY_PTR; - - static { - Linker abi = Linker.nativeLinker(); - System.loadLibrary("Point"); - SymbolLookup loaderLibs = SymbolLookup.loaderLookup(); - MH_UNIT_ROTATED_BY_VALUE = abi.downcallHandle( - loaderLibs.findOrThrow("unit_rotated"), - FunctionDescriptor.of(POINT_LAYOUT, C_DOUBLE) - ); - MH_UNIT_ROTATED_BY_PTR = abi.downcallHandle( - loaderLibs.findOrThrow("unit_rotated_ptr"), - FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, C_DOUBLE) - ); - } - - private final MemorySegment points; - - private Circle(MemorySegment points) { - this.points = points; - } - - public static Circle byValue(SegmentAllocator allocator, int numPoints) { - try { - MemorySegment points = allocator.allocate(POINT_LAYOUT, numPoints); - for (int i = 0; i < numPoints; i++) { - double phi = 2 * Math.PI * i / numPoints; - // points[i] = unit_rotated(phi); - MemorySegment dest = points.asSlice(i * POINT_LAYOUT.byteSize(), POINT_LAYOUT.byteSize()); - MemorySegment unused = - (MemorySegment) MH_UNIT_ROTATED_BY_VALUE.invokeExact( - (SegmentAllocator) (_, _) -> dest, - phi); - } - return new Circle(points); - } catch (Throwable e) { - throw new RuntimeException(e); - } - } - - public static Circle byPtr(SegmentAllocator allocator, int numPoints) { - try { - MemorySegment points = allocator.allocate(POINT_LAYOUT, numPoints); - for (int i = 0; i < numPoints; i++) { - double phi = 2 * Math.PI * i / numPoints; - // unit_rotated_ptr(&points[i], phi); - MemorySegment dest = points.asSlice(i * POINT_LAYOUT.byteSize(), POINT_LAYOUT.byteSize()); - MH_UNIT_ROTATED_BY_PTR.invokeExact(dest, phi); - } - return new Circle(points); - } catch (Throwable e) { - throw new RuntimeException(e); - } - } -} diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/points/support/libPoint.c b/test/micro/org/openjdk/bench/java/lang/foreign/points/support/libPoint.c index 258229fb3e960..5e1913e2aa7c4 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/points/support/libPoint.c +++ b/test/micro/org/openjdk/bench/java/lang/foreign/points/support/libPoint.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -36,12 +36,3 @@ EXPORT double distance(Point p1, Point p2) { EXPORT double distance_ptrs(Point* p1, Point* p2) { return distance(*p1, *p2); } - -EXPORT DoublePoint unit_rotated(double phi) { - DoublePoint result = { cos(phi), sin(phi) }; - return result; -} - -EXPORT void unit_rotated_ptr(DoublePoint* out, double phi) { - *out = unit_rotated(phi); -} diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/points/support/points.h b/test/micro/org/openjdk/bench/java/lang/foreign/points/support/points.h index a6193c3753bbc..1d173ecbc8577 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/points/support/points.h +++ b/test/micro/org/openjdk/bench/java/lang/foreign/points/support/points.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,9 +28,4 @@ typedef struct { int y; } Point; -typedef struct { - double x; - double y; -} DoublePoint; - #endif From 4f8a9a978a98af841f8e0f5d5f7ba01775896912 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Mon, 20 Jan 2025 17:10:25 +0100 Subject: [PATCH 31/48] shift api boundary --- .../internal/foreign/abi/CallBufferCache.java | 29 +++++++++++++-- .../jdk/internal/foreign/abi/SharedUtils.java | 36 +++++++++---------- 2 files changed, 44 insertions(+), 21 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java b/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java index 4d53201d8af9d..fc8933e67bac3 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java @@ -26,16 +26,17 @@ import jdk.internal.misc.Unsafe; import jdk.internal.vm.Continuation; +import java.lang.foreign.MemorySegment; + /** - * Provides carrier-thread-local storage for up to two buffer addresses. - * It is caller's responsibility to store homogeneous segment sizes. - * Storing addresses, not MemorySegments turns out to be slightly faster (write barrier?). + * Provides carrier-thread-local storage for up to two small buffers. */ public final class CallBufferCache { private static final Unsafe UNSAFE = Unsafe.getUnsafe(); static class PerThread { // Two-elements to support downcall + upcall. + // Storing addresses, not MemorySegments turns out to be slightly faster (write barrier?). private long address1; private long address2; @@ -92,6 +93,8 @@ protected void threadTerminated(PerThread cache) { } }; + // visible only for tests + public static long acquire() { // Protect against vthread unmount. Continuation.pin(); @@ -111,4 +114,24 @@ public static boolean release(long address) { Continuation.unpin(); } } + + private static final long CACHED_BUFFER_SIZE = 256; + + @SuppressWarnings("restricted") + public static MemorySegment acquireOrAllocate(long requestedSize) { + final long bufferSize = Math.max(requestedSize, CACHED_BUFFER_SIZE); + long address = (bufferSize == CACHED_BUFFER_SIZE) ? acquire() : 0; + if (address == 0) { + // Either size was too large or cache empty. + address = allocate(bufferSize); + } + return MemorySegment.ofAddress(address).reinterpret(requestedSize); + } + + public static void releaseOrFree(MemorySegment segment) { + if (segment.byteSize() > CACHED_BUFFER_SIZE || !release(segment.address())) { + // Either size was too large or cache full. + free(segment.address()); + } + } } diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index fd65be5fefe63..8b29693150194 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -60,6 +60,7 @@ import java.util.Arrays; import java.util.Map; import java.util.Objects; +import java.util.function.Consumer; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -382,30 +383,32 @@ static long pickChunkOffset(long chunkOffset, long byteWidth, int chunkWidth) { : chunkOffset; } - // Minimum allocation size = maximum cached size - private static final int CACHED_BUFFER_SIZE = 256; - @ForceInline @SuppressWarnings("restricted") public static Arena newBoundedArena(long size) { - // JDK-8347997: buffer cache pinned section needs to happen outside of constructor. - long bufferSize = Math.max(size, CACHED_BUFFER_SIZE); - long fromCache = bufferSize == CACHED_BUFFER_SIZE ? CallBufferCache.acquire() : 0; - long address = fromCache != 0 ? fromCache : CallBufferCache.allocate(bufferSize); - return new BoundedArena(MemorySegment.ofAddress(address).reinterpret(size)); + // JDK-8347997: buffer cache pinned section needs to happen outside of constructor and before + // confined session, otherwise scalar replacement breaks. + MemorySegment unscoped = CallBufferCache.acquireOrAllocate(size); + Arena scope = Arena.ofConfined(); + MemorySegment source = unscoped.reinterpret(scope, null); + // Preferable we'd like to register this cleanup in the line above + // but it breaks scalar replacement. + return new BoundedArena(scope, source, CallBufferCache::releaseOrFree); } /** A confined arena slicing off an (unscoped) source segment. */ static final class BoundedArena implements Arena { - private final Arena scope = Arena.ofConfined(); - private final MemorySegment scoped; + private final Arena scope; + private final MemorySegment source; private final SegmentAllocator allocator; + private final Consumer cleanup; @ForceInline - @SuppressWarnings("restricted") - public BoundedArena(MemorySegment source) { - scoped = source.reinterpret(scope, null); - allocator = SegmentAllocator.slicingAllocator(scoped); + public BoundedArena(Arena scope, MemorySegment source, Consumer cleanup) { + this.scope = scope; + this.source = source; + this.allocator = SegmentAllocator.slicingAllocator(source); + this.cleanup = cleanup; } @Override @@ -423,10 +426,7 @@ public Scope scope() { @ForceInline public void close() { scope.close(); - // All segments we handed out are now invalid, we can release source to the cache or free it. - // Due to VThread scheduling we may be returning ownership to a different platform thread. - if (scoped.byteSize() > CACHED_BUFFER_SIZE || !CallBufferCache.release(scoped.address())) - CallBufferCache.free(scoped.address()); + cleanup.accept(source); } } From 0023eb456344f29ea1a67c84e7d3569ca00a6b37 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Mon, 20 Jan 2025 17:30:41 +0100 Subject: [PATCH 32/48] reduce visibility --- .../internal/foreign/abi/CallBufferCache.java | 19 +++-------- .../jdk/java/foreign/CallBufferCacheTest.java | 33 ++++++++----------- 2 files changed, 18 insertions(+), 34 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java b/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java index fc8933e67bac3..4009a705b25ce 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java @@ -67,20 +67,11 @@ boolean push(long address) { } void free() { - if (address1 != 0) CallBufferCache.free(address1); - if (address2 != 0) CallBufferCache.free(address2); + if (address1 != 0) UNSAFE.freeMemory(address1); + if (address2 != 0) UNSAFE.freeMemory(address2); } } - @SuppressWarnings("restricted") - public static long allocate(long size) { - return UNSAFE.allocateMemory(size); - } - - public static void free(long address) { - UNSAFE.freeMemory(address); - } - private static final TerminatingThreadLocal tl = new TerminatingThreadLocal<>() { @Override protected PerThread initialValue() { @@ -93,7 +84,7 @@ protected void threadTerminated(PerThread cache) { } }; - // visible only for tests + // acquire/release visible only for tests public static long acquire() { // Protect against vthread unmount. @@ -123,7 +114,7 @@ public static MemorySegment acquireOrAllocate(long requestedSize) { long address = (bufferSize == CACHED_BUFFER_SIZE) ? acquire() : 0; if (address == 0) { // Either size was too large or cache empty. - address = allocate(bufferSize); + address = UNSAFE.allocateMemory(bufferSize); } return MemorySegment.ofAddress(address).reinterpret(requestedSize); } @@ -131,7 +122,7 @@ public static MemorySegment acquireOrAllocate(long requestedSize) { public static void releaseOrFree(MemorySegment segment) { if (segment.byteSize() > CACHED_BUFFER_SIZE || !release(segment.address())) { // Either size was too large or cache full. - free(segment.address()); + UNSAFE.freeMemory(segment.address()); } } } diff --git a/test/jdk/java/foreign/CallBufferCacheTest.java b/test/jdk/java/foreign/CallBufferCacheTest.java index fa75eecdc140f..fc350d6dcb254 100644 --- a/test/jdk/java/foreign/CallBufferCacheTest.java +++ b/test/jdk/java/foreign/CallBufferCacheTest.java @@ -23,39 +23,32 @@ /* * @test - * @modules java.base/jdk.internal.foreign.abi + * @modules java.base/jdk.internal.foreign.abi java.base/jdk.internal.misc * @run testng/othervm --enable-native-access=ALL-UNNAMED CallBufferCacheTest */ import jdk.internal.foreign.abi.CallBufferCache; +import jdk.internal.misc.Unsafe; import org.testng.annotations.Test; import static org.testng.Assert.*; public class CallBufferCacheTest { - + Unsafe UNSAFE = Unsafe.getUnsafe(); + @Test public void testEmpty() { assertEquals(CallBufferCache.acquire(), 0); } - @Test - public void testAllocate() { - long address1 = CallBufferCache.allocate(123); - long address2 = CallBufferCache.allocate(123); - assertNotEquals(address1, address2); - CallBufferCache.free(address1); - CallBufferCache.free(address2); - } - @Test public void testCacheSize() { assertEquals(CallBufferCache.acquire(), 0); // Three nested calls. - long address1 = CallBufferCache.allocate(128); - long address2 = CallBufferCache.allocate(128); - long address3 = CallBufferCache.allocate(128); + long address1 = UNSAFE.allocateMemory(128); + long address2 = UNSAFE.allocateMemory(128); + long address3 = UNSAFE.allocateMemory(128); // Two buffers go into the cache. assertTrue(CallBufferCache.release(address3)); @@ -76,14 +69,14 @@ public void testCacheSize() { // Now the cache is empty again. assertEquals(CallBufferCache.acquire(), 0); - CallBufferCache.free(address1); - CallBufferCache.free(address2); - CallBufferCache.free(address3); + UNSAFE.freeMemory(address1); + UNSAFE.freeMemory(address2); + UNSAFE.freeMemory(address3); } @Test public void testThreadLocal() throws InterruptedException { - long address = CallBufferCache.allocate(128); + long address = UNSAFE.allocateMemory(128); assertTrue(CallBufferCache.release(address)); Thread.ofPlatform().start(() -> { // Not visible in other thread. @@ -91,12 +84,12 @@ public void testThreadLocal() throws InterruptedException { }).join(); // Only here. assertEquals(address, CallBufferCache.acquire()); - CallBufferCache.free(address); + UNSAFE.freeMemory(address); } @Test public void testMigrateThread() throws InterruptedException { - long address = CallBufferCache.allocate(128); + long address = UNSAFE.allocateMemory(128); assertTrue(CallBufferCache.release(address)); assertEquals(address, CallBufferCache.acquire()); Thread.ofPlatform().start(() -> { From f68a930bb715e6b6dcb07bf6f1cbf7a39586389e Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Mon, 20 Jan 2025 18:53:43 +0100 Subject: [PATCH 33/48] remove stray -Xlog:gc test single call overhead --- .../lang/foreign/CallOverheadByValue.java | 50 +++++++------------ .../lang/foreign/libCallOverheadByValue.c | 9 ++-- 2 files changed, 21 insertions(+), 38 deletions(-) diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/CallOverheadByValue.java b/test/micro/org/openjdk/bench/java/lang/foreign/CallOverheadByValue.java index eeb08389f1267..8f89d50cee332 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/CallOverheadByValue.java +++ b/test/micro/org/openjdk/bench/java/lang/foreign/CallOverheadByValue.java @@ -51,33 +51,31 @@ @Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS) @State(org.openjdk.jmh.annotations.Scope.Thread) @OutputTimeUnit(TimeUnit.NANOSECONDS) -@Fork(value = 1, jvmArgs = {"-Xlog:gc", "--enable-native-access=ALL-UNNAMED", "-Djava.library.path=micro/native"}) +@Fork(value = 1, jvmArgs = {"--enable-native-access=ALL-UNNAMED", "-Djava.library.path=micro/native"}) public class CallOverheadByValue { public static final MemoryLayout POINT_LAYOUT = MemoryLayout.structLayout( C_DOUBLE, C_DOUBLE ); - private static final MethodHandle MH_UNIT_ROTATED_BY_VALUE; - private static final MethodHandle MH_UNIT_ROTATED_BY_PTR; + private static final MethodHandle MH_UNIT_BY_VALUE; + private static final MethodHandle MH_UNIT_BY_PTR; static { Linker abi = Linker.nativeLinker(); System.loadLibrary("CallOverheadByValue"); SymbolLookup loaderLibs = SymbolLookup.loaderLookup(); - MH_UNIT_ROTATED_BY_VALUE = abi.downcallHandle( - loaderLibs.findOrThrow("unit_rotated"), - FunctionDescriptor.of(POINT_LAYOUT, C_DOUBLE) + MH_UNIT_BY_VALUE = abi.downcallHandle( + loaderLibs.findOrThrow("unit"), + FunctionDescriptor.of(POINT_LAYOUT) ); - MH_UNIT_ROTATED_BY_PTR = abi.downcallHandle( - loaderLibs.findOrThrow("unit_rotated_ptr"), - FunctionDescriptor.ofVoid(ValueLayout.ADDRESS, C_DOUBLE) + MH_UNIT_BY_PTR = abi.downcallHandle( + loaderLibs.findOrThrow("unit_ptr"), + FunctionDescriptor.ofVoid(ValueLayout.ADDRESS) ); } - static final int NUM_CIRCLE_POINTS = 100; - Arena arena = Arena.ofConfined(); - MemorySegment points = arena.allocate(POINT_LAYOUT, NUM_CIRCLE_POINTS); + MemorySegment point = arena.allocate(POINT_LAYOUT); @TearDown public void tearDown() { @@ -85,29 +83,15 @@ public void tearDown() { } @Benchmark - @OperationsPerInvocation(NUM_CIRCLE_POINTS) - public MemorySegment byValue() throws Throwable { - for (int i = 0; i < NUM_CIRCLE_POINTS; i++) { - double phi = 2 * Math.PI * i / NUM_CIRCLE_POINTS; - // points[i] = unit_rotated(phi); - MemorySegment dest = points.asSlice(i * POINT_LAYOUT.byteSize(), POINT_LAYOUT.byteSize()); - MemorySegment unused = - (MemorySegment) MH_UNIT_ROTATED_BY_VALUE.invokeExact( - (SegmentAllocator) (_, _) -> dest, - phi); - } - return points; + public void byValue() throws Throwable { + // point = unit(); + MemorySegment unused = (MemorySegment) MH_UNIT_BY_VALUE.invokeExact( + (SegmentAllocator) (_, _) -> point); } @Benchmark - @OperationsPerInvocation(NUM_CIRCLE_POINTS) - public MemorySegment byPtr() throws Throwable { - for (int i = 0; i < NUM_CIRCLE_POINTS; i++) { - double phi = 2 * Math.PI * i / NUM_CIRCLE_POINTS; - // unit_rotated_ptr(&points[i], phi); - MemorySegment dest = points.asSlice(i * POINT_LAYOUT.byteSize(), POINT_LAYOUT.byteSize()); - MH_UNIT_ROTATED_BY_PTR.invokeExact(dest, phi); - } - return points; + public void byPtr() throws Throwable { + // unit_ptr(&point); + MH_UNIT_BY_PTR.invokeExact(point); } } diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/libCallOverheadByValue.c b/test/micro/org/openjdk/bench/java/lang/foreign/libCallOverheadByValue.c index 18686290d1ae5..2eb80f537d8c8 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/libCallOverheadByValue.c +++ b/test/micro/org/openjdk/bench/java/lang/foreign/libCallOverheadByValue.c @@ -22,18 +22,17 @@ */ #include "export.h" -#include "math.h" typedef struct { double x; double y; } DoublePoint; -EXPORT DoublePoint unit_rotated(double phi) { - DoublePoint result = { cos(phi), sin(phi) }; +EXPORT DoublePoint unit() { + DoublePoint result = { 1, 0 }; return result; } -EXPORT void unit_rotated_ptr(DoublePoint* out, double phi) { - *out = unit_rotated(phi); +EXPORT void unit_ptr(DoublePoint* out) { + *out = unit(); } From a523278acea3c2a69e1d4b18531d769f33e65910 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Mon, 20 Jan 2025 19:10:50 +0100 Subject: [PATCH 34/48] whitespace :scream: --- test/jdk/java/foreign/CallBufferCacheTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/jdk/java/foreign/CallBufferCacheTest.java b/test/jdk/java/foreign/CallBufferCacheTest.java index fc350d6dcb254..2e30728849ceb 100644 --- a/test/jdk/java/foreign/CallBufferCacheTest.java +++ b/test/jdk/java/foreign/CallBufferCacheTest.java @@ -35,7 +35,7 @@ public class CallBufferCacheTest { Unsafe UNSAFE = Unsafe.getUnsafe(); - + @Test public void testEmpty() { assertEquals(CallBufferCache.acquire(), 0); From 5a8491f125d1687885b68b8a263483520fdc3aa9 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Mon, 20 Jan 2025 19:41:17 +0100 Subject: [PATCH 35/48] restore 3 forks --- .../openjdk/bench/java/lang/foreign/CallOverheadByValue.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/CallOverheadByValue.java b/test/micro/org/openjdk/bench/java/lang/foreign/CallOverheadByValue.java index 8f89d50cee332..b8d364f64b23c 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/CallOverheadByValue.java +++ b/test/micro/org/openjdk/bench/java/lang/foreign/CallOverheadByValue.java @@ -27,7 +27,6 @@ import org.openjdk.jmh.annotations.Fork; import org.openjdk.jmh.annotations.Measurement; import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OperationsPerInvocation; import org.openjdk.jmh.annotations.OutputTimeUnit; import org.openjdk.jmh.annotations.State; import org.openjdk.jmh.annotations.TearDown; @@ -51,7 +50,7 @@ @Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS) @State(org.openjdk.jmh.annotations.Scope.Thread) @OutputTimeUnit(TimeUnit.NANOSECONDS) -@Fork(value = 1, jvmArgs = {"--enable-native-access=ALL-UNNAMED", "-Djava.library.path=micro/native"}) +@Fork(value = 3, jvmArgs = { "--enable-native-access=ALL-UNNAMED", "-Djava.library.path=micro/native" }) public class CallOverheadByValue { public static final MemoryLayout POINT_LAYOUT = MemoryLayout.structLayout( From d408852d167f4940aafd8be9268038fe78d10ac5 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Wed, 22 Jan 2025 10:53:32 +0100 Subject: [PATCH 36/48] Back buffer allocation with a single carrier-local segment. --- .../jdk/internal/foreign/abi/BufferStack.java | 55 ++++++++ .../internal/foreign/abi/CallBufferCache.java | 128 ------------------ .../jdk/internal/foreign/abi/SharedUtils.java | 52 ++----- .../jdk/java/foreign/CallBufferCacheTest.java | 104 -------------- 4 files changed, 64 insertions(+), 275 deletions(-) create mode 100644 src/java.base/share/classes/jdk/internal/foreign/abi/BufferStack.java delete mode 100644 src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java delete mode 100644 test/jdk/java/foreign/CallBufferCacheTest.java diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/BufferStack.java b/src/java.base/share/classes/jdk/internal/foreign/abi/BufferStack.java new file mode 100644 index 0000000000000..e488e7e6d1cd6 --- /dev/null +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/BufferStack.java @@ -0,0 +1,55 @@ +package jdk.internal.foreign.abi; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.util.concurrent.locks.ReentrantLock; + +public class BufferStack { + private final MemorySegment backingSegment; + private final ReentrantLock lock = new ReentrantLock(); + private long offset = 0; + + public BufferStack(MemorySegment backingSegment) { + this.backingSegment = backingSegment; + } + + public Arena reserve(long size) { + if (!lock.tryLock()) { + // Rare: another virtual thread on the same carrier was preparing or just + // finished an FFM call, but got unscheduled while holding this stack. + return Arena.ofConfined(); + } + if (offset + size > backingSegment.byteSize()) { + // Rare: we've running out of stack space due to recursion or unusually large buffers. + lock.unlock(); + return Arena.ofConfined(); + } + + return new Frame(); + } + + private class Frame implements Arena { + final long parentOffset = offset; + final Arena scope = Arena.ofConfined(); + + @Override + @SuppressWarnings("restricted") + public MemorySegment allocate(long byteSize, long byteAlignment) { + MemorySegment slice = backingSegment.asSlice(offset, byteSize, byteAlignment); + offset += byteSize; + return slice.reinterpret(scope, null); + } + + @Override + public MemorySegment.Scope scope() { + return scope.scope(); + } + + @Override + public void close() { + scope.close(); + offset = parentOffset; + lock.unlock(); + } + } +} diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java b/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java deleted file mode 100644 index 4009a705b25ce..0000000000000 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/CallBufferCache.java +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ -package jdk.internal.foreign.abi; - -import jdk.internal.misc.TerminatingThreadLocal; -import jdk.internal.misc.Unsafe; -import jdk.internal.vm.Continuation; - -import java.lang.foreign.MemorySegment; - -/** - * Provides carrier-thread-local storage for up to two small buffers. - */ -public final class CallBufferCache { - private static final Unsafe UNSAFE = Unsafe.getUnsafe(); - - static class PerThread { - // Two-elements to support downcall + upcall. - // Storing addresses, not MemorySegments turns out to be slightly faster (write barrier?). - private long address1; - private long address2; - - long pop() { - if (address1 != 0) { - long result = address1; - address1 = 0; - return result; - } - if (address2 != 0) { - long result = address2; - address2 = 0; - return result; - } - return 0; - } - - boolean push(long address) { - if (address1 == 0) { - address1 = address; - return true; - } - if (address2 == 0) { - address2 = address; - return true; - } - return false; - } - - void free() { - if (address1 != 0) UNSAFE.freeMemory(address1); - if (address2 != 0) UNSAFE.freeMemory(address2); - } - } - - private static final TerminatingThreadLocal tl = new TerminatingThreadLocal<>() { - @Override - protected PerThread initialValue() { - return new PerThread(); - } - - @Override - protected void threadTerminated(PerThread cache) { - cache.free(); - } - }; - - // acquire/release visible only for tests - - public static long acquire() { - // Protect against vthread unmount. - Continuation.pin(); - try { - return tl.get().pop(); - } finally { - Continuation.unpin(); - } - } - - public static boolean release(long address) { - // Protect against vthread unmount. - Continuation.pin(); - try { - return tl.get().push(address); - } finally { - Continuation.unpin(); - } - } - - private static final long CACHED_BUFFER_SIZE = 256; - - @SuppressWarnings("restricted") - public static MemorySegment acquireOrAllocate(long requestedSize) { - final long bufferSize = Math.max(requestedSize, CACHED_BUFFER_SIZE); - long address = (bufferSize == CACHED_BUFFER_SIZE) ? acquire() : 0; - if (address == 0) { - // Either size was too large or cache empty. - address = UNSAFE.allocateMemory(bufferSize); - } - return MemorySegment.ofAddress(address).reinterpret(requestedSize); - } - - public static void releaseOrFree(MemorySegment segment) { - if (segment.byteSize() > CACHED_BUFFER_SIZE || !release(segment.address())) { - // Either size was too large or cache full. - UNSAFE.freeMemory(segment.address()); - } - } -} diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index 8b29693150194..b9f8fadd9ba7e 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -40,6 +40,7 @@ import jdk.internal.foreign.abi.s390.linux.LinuxS390Linker; import jdk.internal.foreign.abi.x64.sysv.SysVx64Linker; import jdk.internal.foreign.abi.x64.windows.Windowsx64Linker; +import jdk.internal.misc.CarrierThreadLocal; import jdk.internal.vm.annotation.ForceInline; import java.lang.foreign.AddressLayout; @@ -60,7 +61,6 @@ import java.util.Arrays; import java.util.Map; import java.util.Objects; -import java.util.function.Consumer; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -383,51 +383,17 @@ static long pickChunkOffset(long chunkOffset, long byteWidth, int chunkWidth) { : chunkOffset; } - @ForceInline - @SuppressWarnings("restricted") - public static Arena newBoundedArena(long size) { - // JDK-8347997: buffer cache pinned section needs to happen outside of constructor and before - // confined session, otherwise scalar replacement breaks. - MemorySegment unscoped = CallBufferCache.acquireOrAllocate(size); - Arena scope = Arena.ofConfined(); - MemorySegment source = unscoped.reinterpret(scope, null); - // Preferable we'd like to register this cleanup in the line above - // but it breaks scalar replacement. - return new BoundedArena(scope, source, CallBufferCache::releaseOrFree); - } - - /** A confined arena slicing off an (unscoped) source segment. */ - static final class BoundedArena implements Arena { - private final Arena scope; - private final MemorySegment source; - private final SegmentAllocator allocator; - private final Consumer cleanup; - - @ForceInline - public BoundedArena(Arena scope, MemorySegment source, Consumer cleanup) { - this.scope = scope; - this.source = source; - this.allocator = SegmentAllocator.slicingAllocator(source); - this.cleanup = cleanup; - } - + private static final CarrierThreadLocal BUFFER_STACK = new CarrierThreadLocal<>() { @Override - @ForceInline - public MemorySegment allocate(long byteSize, long byteAlignment) { - return allocator.allocate(byteSize, byteAlignment); - } - - @Override - public Scope scope() { - return scope.scope(); + protected BufferStack initialValue() { + return new BufferStack(Arena.ofAuto().allocate(256)); } + }; - @Override - @ForceInline - public void close() { - scope.close(); - cleanup.accept(source); - } + @ForceInline + @SuppressWarnings("restricted") + public static Arena newBoundedArena(long size) { + return BUFFER_STACK.get().reserve(size); } public static Arena newEmptyArena() { diff --git a/test/jdk/java/foreign/CallBufferCacheTest.java b/test/jdk/java/foreign/CallBufferCacheTest.java deleted file mode 100644 index 2e30728849ceb..0000000000000 --- a/test/jdk/java/foreign/CallBufferCacheTest.java +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -/* - * @test - * @modules java.base/jdk.internal.foreign.abi java.base/jdk.internal.misc - * @run testng/othervm --enable-native-access=ALL-UNNAMED CallBufferCacheTest - */ - -import jdk.internal.foreign.abi.CallBufferCache; -import jdk.internal.misc.Unsafe; -import org.testng.annotations.Test; - -import static org.testng.Assert.*; - -public class CallBufferCacheTest { - Unsafe UNSAFE = Unsafe.getUnsafe(); - - @Test - public void testEmpty() { - assertEquals(CallBufferCache.acquire(), 0); - } - - @Test - public void testCacheSize() { - assertEquals(CallBufferCache.acquire(), 0); - - // Three nested calls. - long address1 = UNSAFE.allocateMemory(128); - long address2 = UNSAFE.allocateMemory(128); - long address3 = UNSAFE.allocateMemory(128); - - // Two buffers go into the cache. - assertTrue(CallBufferCache.release(address3)); - assertTrue(CallBufferCache.release(address2)); - assertFalse(CallBufferCache.release(address1)); - - // Next acquisition is either of them. - long first = CallBufferCache.acquire(); - assertTrue(first == address3 || first == address2); - assertTrue(CallBufferCache.release(first)); - - // Can re-acquire both. - first = CallBufferCache.acquire(); - long second = CallBufferCache.acquire(); - assertNotEquals(first, second); - assertTrue(first == address2 || first == address3); - assertTrue(second == address2 || second == address3); - // Now the cache is empty again. - assertEquals(CallBufferCache.acquire(), 0); - - UNSAFE.freeMemory(address1); - UNSAFE.freeMemory(address2); - UNSAFE.freeMemory(address3); - } - - @Test - public void testThreadLocal() throws InterruptedException { - long address = UNSAFE.allocateMemory(128); - assertTrue(CallBufferCache.release(address)); - Thread.ofPlatform().start(() -> { - // Not visible in other thread. - assertEquals(CallBufferCache.acquire(), 0); - }).join(); - // Only here. - assertEquals(address, CallBufferCache.acquire()); - UNSAFE.freeMemory(address); - } - - @Test - public void testMigrateThread() throws InterruptedException { - long address = UNSAFE.allocateMemory(128); - assertTrue(CallBufferCache.release(address)); - assertEquals(address, CallBufferCache.acquire()); - Thread.ofPlatform().start(() -> { - // A buffer can migrate to another thread due to VThread scheduling. - CallBufferCache.release(address); - assertEquals(address, CallBufferCache.acquire()); - CallBufferCache.release(address); - // freed by TL. - }).join(); - assertEquals(CallBufferCache.acquire(), 0); - } -} From ad0b92822006b76bfd2a7db35534d362c7700c74 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Wed, 22 Jan 2025 11:08:29 +0100 Subject: [PATCH 37/48] --unnecessary annotations --- .../share/classes/jdk/internal/foreign/abi/SharedUtils.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index b9f8fadd9ba7e..98bbda6ec220c 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -390,8 +390,6 @@ protected BufferStack initialValue() { } }; - @ForceInline - @SuppressWarnings("restricted") public static Arena newBoundedArena(long size) { return BUFFER_STACK.get().reserve(size); } From d347a87b1f1f32795a0c8fcb02abb98ebf7867b1 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Wed, 22 Jan 2025 13:21:08 +0100 Subject: [PATCH 38/48] * use slicing allocator for alignment guarantees * also provide alignment for frame allocation * verify stack order * only lock if VT @ root --- .../internal/foreign/SlicingAllocator.java | 16 +++ .../jdk/internal/foreign/abi/BufferStack.java | 120 +++++++++++++----- .../jdk/internal/foreign/abi/SharedUtils.java | 10 +- 3 files changed, 106 insertions(+), 40 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/SlicingAllocator.java b/src/java.base/share/classes/jdk/internal/foreign/SlicingAllocator.java index db7d476053e54..8ea830a217d5b 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/SlicingAllocator.java +++ b/src/java.base/share/classes/jdk/internal/foreign/SlicingAllocator.java @@ -38,6 +38,22 @@ public SlicingAllocator(MemorySegment segment) { this.segment = segment; } + public long currentOffset() { + return sp; + } + + public void resetTo(long offset) { + if (offset < 0 || offset > sp) + throw new IllegalArgumentException(String.format("offset %d should be in [0, %d] ", offset, sp)); + this.sp = offset; + } + + public boolean canAllocate(long byteSize, long byteAlignment) { + long min = segment.address(); + long start = Utils.alignUp(min + sp, byteAlignment) - min; + return start + byteSize <= segment.byteSize(); + } + MemorySegment trySlice(long byteSize, long byteAlignment) { long min = segment.address(); long start = Utils.alignUp(min + sp, byteAlignment) - min; diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/BufferStack.java b/src/java.base/share/classes/jdk/internal/foreign/abi/BufferStack.java index e488e7e6d1cd6..07f788a1e11da 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/BufferStack.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/BufferStack.java @@ -1,55 +1,109 @@ package jdk.internal.foreign.abi; +import jdk.internal.foreign.SlicingAllocator; +import jdk.internal.misc.TerminatingThreadLocal; +import jdk.internal.vm.annotation.ForceInline; + import java.lang.foreign.Arena; import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; import java.util.concurrent.locks.ReentrantLock; public class BufferStack { - private final MemorySegment backingSegment; - private final ReentrantLock lock = new ReentrantLock(); - private long offset = 0; + private final long size; - public BufferStack(MemorySegment backingSegment) { - this.backingSegment = backingSegment; + public BufferStack(long size) { + this.size = size; } - public Arena reserve(long size) { - if (!lock.tryLock()) { - // Rare: another virtual thread on the same carrier was preparing or just - // finished an FFM call, but got unscheduled while holding this stack. - return Arena.ofConfined(); + private final TerminatingThreadLocal tl = new TerminatingThreadLocal<>() { + @Override + protected PerThread initialValue() { + return new PerThread(size); } - if (offset + size > backingSegment.byteSize()) { - // Rare: we've running out of stack space due to recursion or unusually large buffers. - lock.unlock(); - return Arena.ofConfined(); + + @Override + protected void threadTerminated(PerThread value) { + value.close(); } + }; - return new Frame(); + @ForceInline + public Arena pushFrame(long size, long byteAlignment) { + return tl.get().pushFrame(size, byteAlignment); } - private class Frame implements Arena { - final long parentOffset = offset; - final Arena scope = Arena.ofConfined(); + private static final class PerThread { + private final ReentrantLock lock = new ReentrantLock(); + private final Arena owner = Arena.ofConfined(); + private final SlicingAllocator stack; - @Override - @SuppressWarnings("restricted") - public MemorySegment allocate(long byteSize, long byteAlignment) { - MemorySegment slice = backingSegment.asSlice(offset, byteSize, byteAlignment); - offset += byteSize; - return slice.reinterpret(scope, null); + public PerThread(long size) { + this.stack = new SlicingAllocator(owner.allocate(size)); } - @Override - public MemorySegment.Scope scope() { - return scope.scope(); + void close() { + owner.close(); } - @Override - public void close() { - scope.close(); - offset = parentOffset; - lock.unlock(); + @ForceInline + public Arena pushFrame(long size, long byteAlignment) { + boolean needsLock = Thread.currentThread().isVirtual() && !lock.isHeldByCurrentThread(); + if (needsLock && !lock.tryLock()) { + // Rare: another virtual thread on the same carrier competed for acquisition. + return Arena.ofConfined(); + } + if (!stack.canAllocate(size, byteAlignment)) { + if (needsLock) lock.unlock(); + return Arena.ofConfined(); + } + + return new Frame(needsLock, size, byteAlignment); + } + + private class Frame implements Arena { + private final boolean locked; + private final long parentOffset; + private final long tos; + private final Arena scope = Arena.ofConfined(); + private final SegmentAllocator frame; + + @SuppressWarnings("restricted") + public Frame(boolean locked, long byteSize, long byteAlignment) { + this.locked = locked; + + parentOffset = stack.currentOffset(); + MemorySegment frameSegment = stack.allocate(byteSize, byteAlignment); + tos = stack.currentOffset(); + frame = new SlicingAllocator(frameSegment.reinterpret(scope, null)); + } + + private void assertOrder() { + if (tos != stack.currentOffset()) + throw new IllegalStateException("Out of order access: frame not TOS"); + } + + @Override + @SuppressWarnings("restricted") + public MemorySegment allocate(long byteSize, long byteAlignment) { + assertOrder(); + return frame.allocate(byteSize, byteAlignment); + } + + @Override + public MemorySegment.Scope scope() { + return scope.scope(); + } + + @Override + public void close() { + assertOrder(); + scope.close(); + stack.resetTo(parentOffset); + if (locked) { + lock.unlock(); + } + } } } -} +} \ No newline at end of file diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index 98bbda6ec220c..6d66e8f95faca 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -383,15 +383,11 @@ static long pickChunkOffset(long chunkOffset, long byteWidth, int chunkWidth) { : chunkOffset; } - private static final CarrierThreadLocal BUFFER_STACK = new CarrierThreadLocal<>() { - @Override - protected BufferStack initialValue() { - return new BufferStack(Arena.ofAuto().allocate(256)); - } - }; + private static final BufferStack LINKER_STACK = new BufferStack(256); + @ForceInline public static Arena newBoundedArena(long size) { - return BUFFER_STACK.get().reserve(size); + return LINKER_STACK.pushFrame(size, 8); } public static Arena newEmptyArena() { From b0c2af1b99f724b29e2747822519148a57b3ef83 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Wed, 22 Jan 2025 14:21:37 +0100 Subject: [PATCH 39/48] (c) start a test --- .../jdk/internal/foreign/abi/BufferStack.java | 41 ++++++--- test/jdk/java/foreign/TestBufferStack.java | 88 +++++++++++++++++++ 2 files changed, 115 insertions(+), 14 deletions(-) create mode 100644 test/jdk/java/foreign/TestBufferStack.java diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/BufferStack.java b/src/java.base/share/classes/jdk/internal/foreign/abi/BufferStack.java index 07f788a1e11da..3ac920eadc60f 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/BufferStack.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/BufferStack.java @@ -1,7 +1,31 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ package jdk.internal.foreign.abi; import jdk.internal.foreign.SlicingAllocator; -import jdk.internal.misc.TerminatingThreadLocal; +import jdk.internal.misc.CarrierThreadLocal; import jdk.internal.vm.annotation.ForceInline; import java.lang.foreign.Arena; @@ -16,16 +40,11 @@ public BufferStack(long size) { this.size = size; } - private final TerminatingThreadLocal tl = new TerminatingThreadLocal<>() { + private final ThreadLocal tl = new CarrierThreadLocal<>() { @Override protected PerThread initialValue() { return new PerThread(size); } - - @Override - protected void threadTerminated(PerThread value) { - value.close(); - } }; @ForceInline @@ -35,15 +54,10 @@ public Arena pushFrame(long size, long byteAlignment) { private static final class PerThread { private final ReentrantLock lock = new ReentrantLock(); - private final Arena owner = Arena.ofConfined(); private final SlicingAllocator stack; public PerThread(long size) { - this.stack = new SlicingAllocator(owner.allocate(size)); - } - - void close() { - owner.close(); + this.stack = new SlicingAllocator(Arena.ofAuto().allocate(size)); } @ForceInline @@ -86,7 +100,6 @@ private void assertOrder() { @Override @SuppressWarnings("restricted") public MemorySegment allocate(long byteSize, long byteAlignment) { - assertOrder(); return frame.allocate(byteSize, byteAlignment); } diff --git a/test/jdk/java/foreign/TestBufferStack.java b/test/jdk/java/foreign/TestBufferStack.java new file mode 100644 index 0000000000000..5a1c0547476f4 --- /dev/null +++ b/test/jdk/java/foreign/TestBufferStack.java @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @modules java.base/jdk.internal.foreign.abi + * @run testng TestBufferStack + */ + +import jdk.internal.foreign.abi.BufferStack; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; + +import static java.lang.foreign.ValueLayout.JAVA_INT; + +public class TestBufferStack { + @Test + public void testScopedAllocation() { + BufferStack stack = new BufferStack(256); + try (Arena frame1 = stack.pushFrame(2 * JAVA_INT.byteSize(), JAVA_INT.byteAlignment())) { + // Segments have expected sizes and are accessible and allocated consecutively in the same scope. + MemorySegment segment11 = frame1.allocate(JAVA_INT); + Assert.assertEquals(segment11.byteSize(), 4); + segment11.set(JAVA_INT, 0, 1); + + MemorySegment segment12 = frame1.allocate(JAVA_INT); + Assert.assertEquals(segment12.address(), segment11.address() + 4); + Assert.assertEquals(segment12.byteSize(), 4); + Assert.assertEquals(segment12.scope(), segment11.scope()); + segment12.set(JAVA_INT, 0, 1); + + MemorySegment segment21; + try (Arena frame2 = stack.pushFrame(2 * JAVA_INT.byteSize(), JAVA_INT.byteAlignment())) { + // same here, but a new scope. + segment21 = frame2.allocate(JAVA_INT); + Assert.assertEquals(segment21.address(), segment12.address() + 4); + Assert.assertEquals(segment21.byteSize(), 4); + Assert.assertNotEquals(segment21.scope(), segment12.scope()); + segment21.set(JAVA_INT, 0, 1); + + MemorySegment segment22 = frame2.allocate(JAVA_INT); + Assert.assertEquals(segment22.address(), segment21.address() + 4); + Assert.assertEquals(segment22.byteSize(), 4); + Assert.assertEquals(segment22.scope(), segment21.scope()); + segment22.set(JAVA_INT, 0, 1); + + // Frames must be closed in stack order. + Assert.assertThrows(IllegalStateException.class, frame1::close); + } + // Scope is closed here, inner segments throw. + Assert.assertThrows(IllegalStateException.class, () -> segment21.get(JAVA_INT, 0)); + // A new stack frame allocates at the same location the previous did. + try (Arena frame3 = stack.pushFrame(2 * JAVA_INT.byteSize(), JAVA_INT.byteAlignment())) { + MemorySegment segment31 = frame3.allocate(JAVA_INT); + Assert.assertEquals(segment21.address(), segment12.address() + 4); + } + + // Outer segments are still accessible. + segment11.get(JAVA_INT, 0); + segment12.get(JAVA_INT, 0); + } + } +} From 954a685913254de0f63ee252babf39bd8786764c Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Wed, 22 Jan 2025 14:46:23 +0100 Subject: [PATCH 40/48] more test --- test/jdk/java/foreign/TestBufferStack.java | 56 +++++++++++++--------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/test/jdk/java/foreign/TestBufferStack.java b/test/jdk/java/foreign/TestBufferStack.java index 5a1c0547476f4..d21ac5f0d2f09 100644 --- a/test/jdk/java/foreign/TestBufferStack.java +++ b/test/jdk/java/foreign/TestBufferStack.java @@ -37,48 +37,58 @@ import java.lang.foreign.MemorySegment; import static java.lang.foreign.ValueLayout.JAVA_INT; +import static java.lang.foreign.ValueLayout.JAVA_LONG; public class TestBufferStack { @Test public void testScopedAllocation() { - BufferStack stack = new BufferStack(256); - try (Arena frame1 = stack.pushFrame(2 * JAVA_INT.byteSize(), JAVA_INT.byteAlignment())) { + int stackSize = 128; + BufferStack stack = new BufferStack(stackSize); + MemorySegment stackSegment; + try (Arena frame1 = stack.pushFrame(3 * JAVA_INT.byteSize(), JAVA_INT.byteAlignment())) { // Segments have expected sizes and are accessible and allocated consecutively in the same scope. MemorySegment segment11 = frame1.allocate(JAVA_INT); - Assert.assertEquals(segment11.byteSize(), 4); + Assert.assertEquals(segment11.scope(), frame1.scope()); + Assert.assertEquals(segment11.byteSize(), JAVA_INT.byteSize()); segment11.set(JAVA_INT, 0, 1); + stackSegment = segment11.reinterpret(stackSize); MemorySegment segment12 = frame1.allocate(JAVA_INT); - Assert.assertEquals(segment12.address(), segment11.address() + 4); - Assert.assertEquals(segment12.byteSize(), 4); - Assert.assertEquals(segment12.scope(), segment11.scope()); + Assert.assertEquals(segment12.address(), segment11.address() + JAVA_INT.byteSize()); + Assert.assertEquals(segment12.byteSize(), JAVA_INT.byteSize()); + Assert.assertEquals(segment12.scope(), frame1.scope()); segment12.set(JAVA_INT, 0, 1); - MemorySegment segment21; - try (Arena frame2 = stack.pushFrame(2 * JAVA_INT.byteSize(), JAVA_INT.byteAlignment())) { + MemorySegment segment2; + try (Arena frame2 = stack.pushFrame(JAVA_LONG.byteSize(), JAVA_LONG.byteAlignment())) { + Assert.assertNotEquals(frame2.scope(), frame1.scope()); // same here, but a new scope. - segment21 = frame2.allocate(JAVA_INT); - Assert.assertEquals(segment21.address(), segment12.address() + 4); - Assert.assertEquals(segment21.byteSize(), 4); - Assert.assertNotEquals(segment21.scope(), segment12.scope()); - segment21.set(JAVA_INT, 0, 1); - - MemorySegment segment22 = frame2.allocate(JAVA_INT); - Assert.assertEquals(segment22.address(), segment21.address() + 4); - Assert.assertEquals(segment22.byteSize(), 4); - Assert.assertEquals(segment22.scope(), segment21.scope()); - segment22.set(JAVA_INT, 0, 1); + segment2 = frame2.allocate(JAVA_LONG); + Assert.assertEquals(segment2.address(), segment12.address() + /*segment12 size + frame 1 spare + alignment constraint*/ 3 * JAVA_INT.byteSize()); + Assert.assertEquals(segment2.byteSize(), JAVA_LONG.byteSize()); + Assert.assertEquals(segment2.scope(), frame2.scope()); + segment2.set(JAVA_LONG, 0, 1); // Frames must be closed in stack order. Assert.assertThrows(IllegalStateException.class, frame1::close); } // Scope is closed here, inner segments throw. - Assert.assertThrows(IllegalStateException.class, () -> segment21.get(JAVA_INT, 0)); - // A new stack frame allocates at the same location the previous did. + Assert.assertThrows(IllegalStateException.class, () -> segment2.get(JAVA_INT, 0)); + // A new stack frame allocates at the same location (but different scope) as the previous did. try (Arena frame3 = stack.pushFrame(2 * JAVA_INT.byteSize(), JAVA_INT.byteAlignment())) { - MemorySegment segment31 = frame3.allocate(JAVA_INT); - Assert.assertEquals(segment21.address(), segment12.address() + 4); + MemorySegment segment3 = frame3.allocate(JAVA_INT); + Assert.assertEquals(segment3.scope(), frame3.scope()); + Assert.assertEquals(segment3.address(), segment12.address() + 2 * JAVA_INT.byteSize()); + } + + // Fallback arena behaves like regular stack frame. + MemorySegment outOfStack; + try (Arena hugeFrame = stack.pushFrame(1024, 4)) { + outOfStack = hugeFrame.allocate(4); + Assert.assertEquals(outOfStack.scope(), hugeFrame.scope()); + Assert.assertTrue(outOfStack.asOverlappingSlice(stackSegment).isEmpty()); } + Assert.assertThrows(IllegalStateException.class, () -> outOfStack.get(JAVA_INT, 0)); // Outer segments are still accessible. segment11.get(JAVA_INT, 0); From 686132b18f314b6e370fe1f1a8ed4d33df15b918 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Wed, 22 Jan 2025 15:56:30 +0100 Subject: [PATCH 41/48] an attempt at a stress test --- test/jdk/java/foreign/TestBufferStack.java | 24 ++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/test/jdk/java/foreign/TestBufferStack.java b/test/jdk/java/foreign/TestBufferStack.java index d21ac5f0d2f09..0b29a755be879 100644 --- a/test/jdk/java/foreign/TestBufferStack.java +++ b/test/jdk/java/foreign/TestBufferStack.java @@ -35,9 +35,13 @@ import java.lang.foreign.Arena; import java.lang.foreign.MemorySegment; +import java.time.Duration; +import java.util.Arrays; +import java.util.stream.IntStream; import static java.lang.foreign.ValueLayout.JAVA_INT; import static java.lang.foreign.ValueLayout.JAVA_LONG; +import static java.time.temporal.ChronoUnit.SECONDS; public class TestBufferStack { @Test @@ -95,4 +99,24 @@ public void testScopedAllocation() { segment12.get(JAVA_INT, 0); } } + + @Test + public void stress() throws InterruptedException { + BufferStack stack = new BufferStack(256); + Thread[] vThreads = IntStream.range(0, 1024).mapToObj(_ -> + Thread.ofVirtual().start(() -> { + long threadId = Thread.currentThread().threadId(); + while (true) { + try (Arena arena = stack.pushFrame(JAVA_LONG.byteSize(), JAVA_LONG.byteAlignment())) { + // Try to assert no two vThreads get allocated the same stack space. + MemorySegment segment = arena.allocate(JAVA_LONG); + JAVA_LONG.varHandle().setVolatile(segment, 0L, threadId); + Assert.assertEquals(threadId, (long) JAVA_LONG.varHandle().getVolatile(segment, 0L)); + } + } + })).toArray(Thread[]::new); + Thread.sleep(Duration.of(10, SECONDS)); + Arrays.stream(vThreads).forEach( + thread -> Assert.assertTrue(thread.isAlive())); + } } From 13dfec94d0db9d15bb3eb8fa3b90d49ad29f8264 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Wed, 22 Jan 2025 19:09:36 +0100 Subject: [PATCH 42/48] (c) --- .../share/classes/jdk/internal/foreign/SlicingAllocator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/SlicingAllocator.java b/src/java.base/share/classes/jdk/internal/foreign/SlicingAllocator.java index 8ea830a217d5b..6b1a071c2af07 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/SlicingAllocator.java +++ b/src/java.base/share/classes/jdk/internal/foreign/SlicingAllocator.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it From 93beb680622c956b9cfb8f70cb41cceba7d57868 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Thu, 23 Jan 2025 08:28:06 +0100 Subject: [PATCH 43/48] (c) --- test/jdk/java/foreign/TestBufferStack.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/jdk/java/foreign/TestBufferStack.java b/test/jdk/java/foreign/TestBufferStack.java index 0b29a755be879..b7d712849a0cf 100644 --- a/test/jdk/java/foreign/TestBufferStack.java +++ b/test/jdk/java/foreign/TestBufferStack.java @@ -4,9 +4,7 @@ * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. Oracle designates this - * particular file as subject to the "Classpath" exception as provided - * by Oracle in the LICENSE file that accompanied this code. + * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or From f09a29de625f5713984b8ae0c1d2915f56d80260 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Thu, 23 Jan 2025 08:33:19 +0100 Subject: [PATCH 44/48] Apply suggestions from code review Co-authored-by: Jorn Vernee --- .../share/classes/jdk/internal/foreign/abi/SharedUtils.java | 4 ++-- .../openjdk/bench/java/lang/foreign/CallOverheadByValue.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java index 6d66e8f95faca..feaa9fdb436e7 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/SharedUtils.java @@ -40,7 +40,6 @@ import jdk.internal.foreign.abi.s390.linux.LinuxS390Linker; import jdk.internal.foreign.abi.x64.sysv.SysVx64Linker; import jdk.internal.foreign.abi.x64.windows.Windowsx64Linker; -import jdk.internal.misc.CarrierThreadLocal; import jdk.internal.vm.annotation.ForceInline; import java.lang.foreign.AddressLayout; @@ -383,7 +382,8 @@ static long pickChunkOffset(long chunkOffset, long byteWidth, int chunkWidth) { : chunkOffset; } - private static final BufferStack LINKER_STACK = new BufferStack(256); + private static final int LINKER_STACK_SIZE = Integer.getInteger("jdk.internal.foreign.LINKER_STACK_SIZE", 256); + private static final BufferStack LINKER_STACK = new BufferStack(LINKER_STACK_SIZE); @ForceInline public static Arena newBoundedArena(long size) { diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/CallOverheadByValue.java b/test/micro/org/openjdk/bench/java/lang/foreign/CallOverheadByValue.java index b8d364f64b23c..8fae1905472ec 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/CallOverheadByValue.java +++ b/test/micro/org/openjdk/bench/java/lang/foreign/CallOverheadByValue.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it From 6dbda1cdecd2331f3ccb698371be44db6bad63ce Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Thu, 23 Jan 2025 08:33:37 +0100 Subject: [PATCH 45/48] topOfStack --- .../classes/jdk/internal/foreign/abi/BufferStack.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/abi/BufferStack.java b/src/java.base/share/classes/jdk/internal/foreign/abi/BufferStack.java index 3ac920eadc60f..150d54856026a 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/abi/BufferStack.java +++ b/src/java.base/share/classes/jdk/internal/foreign/abi/BufferStack.java @@ -78,7 +78,7 @@ public Arena pushFrame(long size, long byteAlignment) { private class Frame implements Arena { private final boolean locked; private final long parentOffset; - private final long tos; + private final long topOfStack; private final Arena scope = Arena.ofConfined(); private final SegmentAllocator frame; @@ -88,13 +88,13 @@ public Frame(boolean locked, long byteSize, long byteAlignment) { parentOffset = stack.currentOffset(); MemorySegment frameSegment = stack.allocate(byteSize, byteAlignment); - tos = stack.currentOffset(); + topOfStack = stack.currentOffset(); frame = new SlicingAllocator(frameSegment.reinterpret(scope, null)); } private void assertOrder() { - if (tos != stack.currentOffset()) - throw new IllegalStateException("Out of order access: frame not TOS"); + if (topOfStack != stack.currentOffset()) + throw new IllegalStateException("Out of order access: frame not top-of-stack"); } @Override From 0e6d53201d3ddd47b6b4027eeac137e0086846a2 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Thu, 23 Jan 2025 09:29:43 +0100 Subject: [PATCH 46/48] test deep linker stack --- test/jdk/java/foreign/TestBufferStack.java | 37 ++++++++++++++++++-- test/jdk/java/foreign/libTestBufferStack.c | 39 ++++++++++++++++++++++ 2 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 test/jdk/java/foreign/libTestBufferStack.c diff --git a/test/jdk/java/foreign/TestBufferStack.java b/test/jdk/java/foreign/TestBufferStack.java index b7d712849a0cf..10bb51d05da8a 100644 --- a/test/jdk/java/foreign/TestBufferStack.java +++ b/test/jdk/java/foreign/TestBufferStack.java @@ -24,6 +24,7 @@ /* * @test * @modules java.base/jdk.internal.foreign.abi + * @build NativeTestHelper TestBufferStack * @run testng TestBufferStack */ @@ -32,16 +33,20 @@ import org.testng.annotations.Test; import java.lang.foreign.Arena; +import java.lang.foreign.FunctionDescriptor; +import java.lang.foreign.MemoryLayout; import java.lang.foreign.MemorySegment; +import java.lang.foreign.SegmentAllocator; +import java.lang.invoke.MethodHandle; import java.time.Duration; import java.util.Arrays; import java.util.stream.IntStream; -import static java.lang.foreign.ValueLayout.JAVA_INT; -import static java.lang.foreign.ValueLayout.JAVA_LONG; +import static java.lang.foreign.MemoryLayout.structLayout; +import static java.lang.foreign.ValueLayout.*; import static java.time.temporal.ChronoUnit.SECONDS; -public class TestBufferStack { +public class TestBufferStack extends NativeTestHelper { @Test public void testScopedAllocation() { int stackSize = 128; @@ -117,4 +122,30 @@ public void stress() throws InterruptedException { Arrays.stream(vThreads).forEach( thread -> Assert.assertTrue(thread.isAlive())); } + + static { + System.loadLibrary("TestBufferStack"); + } + + private static final MemoryLayout HVAPoint3D = structLayout(NativeTestHelper.C_DOUBLE, C_DOUBLE, C_DOUBLE); + private static final MemorySegment UPCALL_MH = upcallStub(TestBufferStack.class, "recurse", FunctionDescriptor.of(HVAPoint3D, C_INT)); + private static final MethodHandle DOWNCALL_MH = downcallHandle("recurse", FunctionDescriptor.of(HVAPoint3D, C_INT, ADDRESS)); + + public static MemorySegment recurse(int depth) { + try { + return (MemorySegment) DOWNCALL_MH.invokeExact((SegmentAllocator) Arena.ofAuto(), depth, UPCALL_MH); + } catch (Throwable e) { + throw new RuntimeException(e); + } + } + + @Test + public void testDeepStack() throws Throwable { + // Each downcall and upcall require 48 bytes of stack. + // After five allocations we start falling back. + MemorySegment point = recurse(10); + Assert.assertEquals(point.getAtIndex(C_DOUBLE, 0), 12.0); + Assert.assertEquals(point.getAtIndex(C_DOUBLE, 1), 11.0); + Assert.assertEquals(point.getAtIndex(C_DOUBLE, 2), 10.0); + } } diff --git a/test/jdk/java/foreign/libTestBufferStack.c b/test/jdk/java/foreign/libTestBufferStack.c new file mode 100644 index 0000000000000..79eb32bf9334c --- /dev/null +++ b/test/jdk/java/foreign/libTestBufferStack.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#include "export.h" + +typedef struct { double x, y, z; } HVAPoint3D; + +EXPORT HVAPoint3D recurse(int depth, HVAPoint3D (*cb)(int)) { + if (depth == 0) { + HVAPoint3D result = { 2, 1, 0}; + return result; + } + + HVAPoint3D result = cb(depth - 1); + result.x += 1; + result.y += 1; + result.z += 1; + return result; +} From 8947964844374471a8dd6ca4824093ae423ab375 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Thu, 23 Jan 2025 18:09:45 +0100 Subject: [PATCH 47/48] /othervm --enable-native-access=ALL-UNNAMED Co-authored-by: Jorn Vernee --- test/jdk/java/foreign/TestBufferStack.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/jdk/java/foreign/TestBufferStack.java b/test/jdk/java/foreign/TestBufferStack.java index 10bb51d05da8a..d69daba35451c 100644 --- a/test/jdk/java/foreign/TestBufferStack.java +++ b/test/jdk/java/foreign/TestBufferStack.java @@ -25,7 +25,7 @@ * @test * @modules java.base/jdk.internal.foreign.abi * @build NativeTestHelper TestBufferStack - * @run testng TestBufferStack + * @run testng/othervm --enable-native-access=ALL-UNNAMED TestBufferStack */ import jdk.internal.foreign.abi.BufferStack; From c314d6a17c2d0b257ed4025c3a59cec84de38e80 Mon Sep 17 00:00:00 2001 From: Matthias Ernst Date: Thu, 23 Jan 2025 18:32:48 +0100 Subject: [PATCH 48/48] fix test under VThread factory --- test/jdk/java/foreign/TestBufferStack.java | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/test/jdk/java/foreign/TestBufferStack.java b/test/jdk/java/foreign/TestBufferStack.java index d69daba35451c..bf1ada8854c5b 100644 --- a/test/jdk/java/foreign/TestBufferStack.java +++ b/test/jdk/java/foreign/TestBufferStack.java @@ -109,18 +109,24 @@ public void stress() throws InterruptedException { Thread[] vThreads = IntStream.range(0, 1024).mapToObj(_ -> Thread.ofVirtual().start(() -> { long threadId = Thread.currentThread().threadId(); - while (true) { - try (Arena arena = stack.pushFrame(JAVA_LONG.byteSize(), JAVA_LONG.byteAlignment())) { - // Try to assert no two vThreads get allocated the same stack space. - MemorySegment segment = arena.allocate(JAVA_LONG); - JAVA_LONG.varHandle().setVolatile(segment, 0L, threadId); - Assert.assertEquals(threadId, (long) JAVA_LONG.varHandle().getVolatile(segment, 0L)); + while (!Thread.interrupted()) { + for (int i = 0; i < 1_000_000; i++) { + try (Arena arena = stack.pushFrame(JAVA_LONG.byteSize(), JAVA_LONG.byteAlignment())) { + // Try to assert no two vThreads get allocated the same stack space. + MemorySegment segment = arena.allocate(JAVA_LONG); + JAVA_LONG.varHandle().setVolatile(segment, 0L, threadId); + Assert.assertEquals(threadId, (long) JAVA_LONG.varHandle().getVolatile(segment, 0L)); + } } + Thread.yield(); // make sure the driver thread gets a chance. } })).toArray(Thread[]::new); Thread.sleep(Duration.of(10, SECONDS)); Arrays.stream(vThreads).forEach( - thread -> Assert.assertTrue(thread.isAlive())); + thread -> { + Assert.assertTrue(thread.isAlive()); + thread.interrupt(); + }); } static {