diff --git a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java index 97417efaa8ca3..a52881b047b18 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java +++ b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java @@ -889,23 +889,27 @@ public void setAtIndex(AddressLayout layout, long index, MemorySegment value) { layout.varHandle().set((MemorySegment)this, index * layout.byteSize(), value); } + @ForceInline @Override public String getString(long offset) { return getString(offset, sun.nio.cs.UTF_8.INSTANCE); } + @ForceInline @Override public String getString(long offset, Charset charset) { Objects.requireNonNull(charset); return StringSupport.read(this, offset, charset); } + @ForceInline @Override public void setString(long offset, String str) { Objects.requireNonNull(str); setString(offset, str, sun.nio.cs.UTF_8.INSTANCE); } + @ForceInline @Override public void setString(long offset, String str, Charset charset) { Objects.requireNonNull(charset); diff --git a/src/java.base/share/classes/jdk/internal/foreign/SegmentBulkOperations.java b/src/java.base/share/classes/jdk/internal/foreign/SegmentBulkOperations.java index d928f8fc425fe..068db1bf593c6 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/SegmentBulkOperations.java +++ b/src/java.base/share/classes/jdk/internal/foreign/SegmentBulkOperations.java @@ -28,7 +28,6 @@ import jdk.internal.misc.ScopedMemoryAccess; import jdk.internal.util.Architecture; import jdk.internal.util.ArraysSupport; -import jdk.internal.util.ByteArrayLittleEndian; import jdk.internal.vm.annotation.ForceInline; import jdk.internal.vm.annotation.Stable; @@ -50,6 +49,7 @@ public final class SegmentBulkOperations { private SegmentBulkOperations() {} private static final ScopedMemoryAccess SCOPED_MEMORY_ACCESS = ScopedMemoryAccess.getScopedMemoryAccess(); + private static final long LONG_MASK = ~7L; // The last three bits are zero // All the threshold values below MUST be a power of two and should preferably be // greater or equal to 2^3. @@ -75,21 +75,21 @@ public static MemorySegment fill(AbstractMemorySegmentImpl dst, byte value) { int offset = 0; // 0...0X...X000 final int limit = (int) (dst.length & (NATIVE_THRESHOLD_FILL - 8)); - for (; offset < limit; offset += 8) { + for (; offset < limit; offset += Long.BYTES) { SCOPED_MEMORY_ACCESS.putLongUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + offset, longValue, !Architecture.isLittleEndian()); } int remaining = (int) dst.length - limit; // 0...0X00 - if (remaining >= 4) { + if (remaining >= Integer.BYTES) { SCOPED_MEMORY_ACCESS.putIntUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + offset, (int) longValue, !Architecture.isLittleEndian()); - offset += 4; - remaining -= 4; + offset += Integer.BYTES; + remaining -= Integer.BYTES; } // 0...00X0 - if (remaining >= 2) { + if (remaining >= Short.BYTES) { SCOPED_MEMORY_ACCESS.putShortUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + offset, (short) longValue, !Architecture.isLittleEndian()); - offset += 2; - remaining -= 2; + offset += Short.BYTES; + remaining -= Short.BYTES; } // 0...000X if (remaining == 1) { @@ -123,26 +123,26 @@ public static void copy(AbstractMemorySegmentImpl src, long srcOffset, // is an overlap, we could tolerate one particular direction of overlap (but not the other). // 0...0X...X000 - final int limit = (int) (size & (NATIVE_THRESHOLD_COPY - 8)); + final int limit = (int) (size & (NATIVE_THRESHOLD_COPY - Long.BYTES)); int offset = 0; - for (; offset < limit; offset += 8) { + for (; offset < limit; offset += Long.BYTES) { final long v = SCOPED_MEMORY_ACCESS.getLongUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset, !Architecture.isLittleEndian()); SCOPED_MEMORY_ACCESS.putLongUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v, !Architecture.isLittleEndian()); } int remaining = (int) size - offset; // 0...0X00 - if (remaining >= 4) { + if (remaining >= Integer.BYTES) { final int v = SCOPED_MEMORY_ACCESS.getIntUnaligned(src.sessionImpl(), src.unsafeGetBase(),src.unsafeGetOffset() + srcOffset + offset, !Architecture.isLittleEndian()); SCOPED_MEMORY_ACCESS.putIntUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v, !Architecture.isLittleEndian()); - offset += 4; - remaining -= 4; + offset += Integer.BYTES; + remaining -= Integer.BYTES; } // 0...00X0 - if (remaining >= 2) { + if (remaining >= Short.BYTES) { final short v = SCOPED_MEMORY_ACCESS.getShortUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset, !Architecture.isLittleEndian()); SCOPED_MEMORY_ACCESS.putShortUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v, !Architecture.isLittleEndian()); - offset += 2; - remaining -=2; + offset += Short.BYTES; + remaining -= Short.BYTES; } // 0...000X if (remaining == 1) { @@ -202,9 +202,9 @@ public static int contentHash(AbstractMemorySegmentImpl segment, long fromOffset return 1; } int result = 1; - final long longBytes = length & ((1L << 62) - 8); + final long longBytes = length & LONG_MASK; final long limit = fromOffset + longBytes; - for (; fromOffset < limit; fromOffset += 8) { + for (; fromOffset < limit; fromOffset += Long.BYTES) { long val = SCOPED_MEMORY_ACCESS.getLongUnaligned(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + fromOffset, !Architecture.isLittleEndian()); result = result * POWERS_OF_31[7] + ((byte) (val >>> 56)) * POWERS_OF_31[6] @@ -218,24 +218,24 @@ public static int contentHash(AbstractMemorySegmentImpl segment, long fromOffset } int remaining = (int) (length - longBytes); // 0...0X00 - if (remaining >= 4) { + if (remaining >= Integer.BYTES) { int val = SCOPED_MEMORY_ACCESS.getIntUnaligned(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + fromOffset, !Architecture.isLittleEndian()); result = result * POWERS_OF_31[3] + ((byte) (val >>> 24)) * POWERS_OF_31[2] + ((byte) (val >>> 16)) * POWERS_OF_31[1] + ((byte) (val >>> 8)) * POWERS_OF_31[0] + ((byte) val); - fromOffset += 4; - remaining -= 4; + fromOffset += Integer.BYTES; + remaining -= Integer.BYTES; } // 0...00X0 - if (remaining >= 2) { + if (remaining >= Short.BYTES) { short val = SCOPED_MEMORY_ACCESS.getShortUnaligned(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + fromOffset, !Architecture.isLittleEndian()); result = result * POWERS_OF_31[1] + ((byte) (val >>> 8)) * POWERS_OF_31[0] + ((byte) val); - fromOffset += 2; - remaining -= 2; + fromOffset += Short.BYTES; + remaining -= Short.BYTES; } // 0...000X if (remaining == 1) { @@ -288,7 +288,7 @@ private static long mismatch(AbstractMemorySegmentImpl src, long srcFromOffset, long start, int length, boolean srcAndDstBytesDiffer) { int offset = 0; final int limit = length & (NATIVE_THRESHOLD_MISMATCH - 8); - for (; offset < limit; offset += 8) { + for (; offset < limit; offset += Long.BYTES) { final long s = SCOPED_MEMORY_ACCESS.getLongUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset + offset, false); final long d = SCOPED_MEMORY_ACCESS.getLongUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset + offset, false); if (s != d) { @@ -298,24 +298,24 @@ private static long mismatch(AbstractMemorySegmentImpl src, long srcFromOffset, int remaining = length - offset; // 0...0X00 - if (remaining >= 4) { + if (remaining >= Integer.BYTES) { final int s = SCOPED_MEMORY_ACCESS.getIntUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset + offset, false); final int d = SCOPED_MEMORY_ACCESS.getIntUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset + offset, false); if (s != d) { return start + offset + mismatch(s, d); } - offset += 4; - remaining -= 4; + offset += Integer.BYTES; + remaining -= Integer.BYTES; } // 0...00X0 - if (remaining >= 2) { + if (remaining >= Short.BYTES) { final short s = SCOPED_MEMORY_ACCESS.getShortUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset + offset, false); final short d = SCOPED_MEMORY_ACCESS.getShortUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset + offset, false); if (s != d) { return start + offset + mismatch(s, d); } - offset += 2; - remaining -= 2; + offset += Short.BYTES; + remaining -= Short.BYTES; } // 0...000X if (remaining == 1) { diff --git a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java index 78550c56136d5..8f182f3b33845 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java +++ b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java @@ -27,8 +27,10 @@ import jdk.internal.access.JavaLangAccess; import jdk.internal.access.SharedSecrets; -import jdk.internal.foreign.abi.SharedUtils; +import jdk.internal.misc.ScopedMemoryAccess; +import jdk.internal.util.Architecture; import jdk.internal.util.ArraysSupport; +import jdk.internal.vm.annotation.ForceInline; import java.lang.foreign.MemorySegment; import java.nio.charset.Charset; @@ -40,11 +42,14 @@ */ public final class StringSupport { - static final JavaLangAccess JAVA_LANG_ACCESS = SharedSecrets.getJavaLangAccess(); + private static final JavaLangAccess JAVA_LANG_ACCESS = SharedSecrets.getJavaLangAccess(); + private static final ScopedMemoryAccess SCOPED_MEMORY_ACCESS = ScopedMemoryAccess.getScopedMemoryAccess(); + private static final long LONG_MASK = ~7L; // The last three bits are zero private StringSupport() {} - public static String read(MemorySegment segment, long offset, Charset charset) { + @ForceInline + public static String read(AbstractMemorySegmentImpl segment, long offset, Charset charset) { return switch (CharsetKind.of(charset)) { case SINGLE_BYTE -> readByte(segment, offset, charset); case DOUBLE_BYTE -> readShort(segment, offset, charset); @@ -52,7 +57,8 @@ public static String read(MemorySegment segment, long offset, Charset charset) { }; } - public static void write(MemorySegment segment, long offset, Charset charset, String string) { + @ForceInline + public static void write(AbstractMemorySegmentImpl segment, long offset, Charset charset, String string) { switch (CharsetKind.of(charset)) { case SINGLE_BYTE -> writeByte(segment, offset, charset, string); case DOUBLE_BYTE -> writeShort(segment, offset, charset, string); @@ -60,111 +66,183 @@ public static void write(MemorySegment segment, long offset, Charset charset, St } } - private static String readByte(MemorySegment segment, long offset, Charset charset) { - long len = chunkedStrlenByte(segment, offset); - byte[] bytes = new byte[(int)len]; - MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, (int)len); + @ForceInline + private static String readByte(AbstractMemorySegmentImpl segment, long offset, Charset charset) { + final int len = strlenByte(segment, offset, segment.byteSize()); + final byte[] bytes = new byte[len]; + MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len); return new String(bytes, charset); } - private static void writeByte(MemorySegment segment, long offset, Charset charset, String string) { + @ForceInline + private static void writeByte(AbstractMemorySegmentImpl segment, long offset, Charset charset, String string) { int bytes = copyBytes(string, segment, charset, offset); segment.set(JAVA_BYTE, offset + bytes, (byte)0); } - private static String readShort(MemorySegment segment, long offset, Charset charset) { - long len = chunkedStrlenShort(segment, offset); - byte[] bytes = new byte[(int)len]; - MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, (int)len); + @ForceInline + private static String readShort(AbstractMemorySegmentImpl segment, long offset, Charset charset) { + int len = strlenShort(segment, offset, segment.byteSize()); + byte[] bytes = new byte[len]; + MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len); return new String(bytes, charset); } - private static void writeShort(MemorySegment segment, long offset, Charset charset, String string) { + @ForceInline + private static void writeShort(AbstractMemorySegmentImpl segment, long offset, Charset charset, String string) { int bytes = copyBytes(string, segment, charset, offset); segment.set(JAVA_SHORT_UNALIGNED, offset + bytes, (short)0); } - private static String readInt(MemorySegment segment, long offset, Charset charset) { - long len = strlenInt(segment, offset); - byte[] bytes = new byte[(int)len]; - MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, (int)len); + @ForceInline + private static String readInt(AbstractMemorySegmentImpl segment, long offset, Charset charset) { + int len = strlenInt(segment, offset, segment.byteSize()); + byte[] bytes = new byte[len]; + MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len); return new String(bytes, charset); } - private static void writeInt(MemorySegment segment, long offset, Charset charset, String string) { + @ForceInline + private static void writeInt(AbstractMemorySegmentImpl segment, long offset, Charset charset, String string) { int bytes = copyBytes(string, segment, charset, offset); segment.set(JAVA_INT_UNALIGNED, offset + bytes, 0); } /** - * {@return the shortest distance beginning at the provided {@code start} - * to the encountering of a zero byte in the provided {@code segment}} + * {@return the index of the first zero byte beginning at the provided + * {@code fromOffset} to the encountering of a zero byte in the provided + * {@code segment} checking bytes before the {@code toOffset}} *

- * The method divides the region of interest into three distinct regions: - *

- *

- * The body is using a heuristic method to determine if a long word - * contains a zero byte. The method might have false positives but - * never false negatives. + * The method is using a heuristic method to determine if a long word contains a + * zero byte. The method might have false positives but never false negatives. *

* This method is inspired by the `glibc/string/strlen.c` implementation * - * @param segment to examine - * @param start from where examination shall begin + * @param segment to examine + * @param fromOffset from where examination shall begin (inclusive) + * @param toOffset to where examination shall end (exclusive) * @throws IllegalArgumentException if the examined region contains no zero bytes * within a length that can be accepted by a String */ - public static int chunkedStrlenByte(MemorySegment segment, long start) { - - // Handle the first unaligned "head" bytes separately - int headCount = (int)SharedUtils.remainsToAlignment(segment.address() + start, Long.BYTES); - - int offset = 0; - for (; offset < headCount; offset++) { - byte curr = segment.get(JAVA_BYTE, start + offset); - if (curr == 0) { - return offset; + @ForceInline + public static int strlenByte(final AbstractMemorySegmentImpl segment, + final long fromOffset, + final long toOffset) { + final long length = toOffset - fromOffset; + segment.checkBounds(fromOffset, length); + if (length == 0) { + // The state has to be checked explicitly for zero-length segments + segment.scope.checkValidState(); + throw nullNotFound(segment, fromOffset, toOffset); + } + final long longBytes = length & LONG_MASK; + final long longLimit = fromOffset + longBytes; + long offset = fromOffset; + for (; offset < longLimit; offset += Long.BYTES) { + long val = SCOPED_MEMORY_ACCESS.getLongUnaligned(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + offset, !Architecture.isLittleEndian()); + if (mightContainZeroByte(val)) { + for (int j = 0; j < Long.BYTES; j++) { + if (SCOPED_MEMORY_ACCESS.getByte(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + offset + j) == 0) { + return requireWithinStringSize(offset + j - fromOffset, segment, fromOffset, toOffset); + } + } + } + } + // Handle the tail + for (; offset < toOffset; offset++) { + byte val = SCOPED_MEMORY_ACCESS.getByte(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + offset); + if (val == 0) { + return requireWithinStringSize(offset - fromOffset, segment, fromOffset, toOffset); } } + throw nullNotFound(segment, fromOffset, toOffset); + } - // We are now on a long-aligned boundary so this is the "body" - int bodyCount = bodyCount(segment.byteSize() - start - headCount); - - for (; offset < bodyCount; offset += Long.BYTES) { - // We know we are `long` aligned so, we can save on alignment checking here - long curr = segment.get(JAVA_LONG_UNALIGNED, start + offset); - // Is this a candidate? - if (mightContainZeroByte(curr)) { - for (int j = 0; j < 8; j++) { - if (segment.get(JAVA_BYTE, start + offset + j) == 0) { - return offset + j; + @ForceInline + public static int strlenShort(final AbstractMemorySegmentImpl segment, + final long fromOffset, + final long toOffset) { + final long length = toOffset - fromOffset; + segment.checkBounds(fromOffset, length); + if (length == 0) { + segment.scope.checkValidState(); + throw nullNotFound(segment, fromOffset, toOffset); + } + final long longBytes = length & LONG_MASK; + final long longLimit = fromOffset + longBytes; + long offset = fromOffset; + for (; offset < longLimit; offset += Long.BYTES) { + long val = SCOPED_MEMORY_ACCESS.getLongUnaligned(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + offset, !Architecture.isLittleEndian()); + if (mightContainZeroShort(val)) { + for (int j = 0; j < Long.BYTES; j += Short.BYTES) { + if (SCOPED_MEMORY_ACCESS.getShortUnaligned(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + offset + j, !Architecture.isLittleEndian()) == 0) { + return requireWithinStringSize(offset + j - fromOffset, segment, fromOffset, toOffset); } } } } + // Handle the tail + // Prevent over scanning as we step by 2 + final long endScan = toOffset & ~1; // The last bit is zero + for (; offset < endScan; offset += Short.BYTES) { + short val = SCOPED_MEMORY_ACCESS.getShortUnaligned(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + offset, !Architecture.isLittleEndian()); + if (val == 0) { + return requireWithinStringSize(offset - fromOffset, segment, fromOffset, toOffset); + } + } + throw nullNotFound(segment, fromOffset, toOffset); + } - // Handle the "tail" - return requireWithinArraySize((long) offset + strlenByte(segment, start + offset)); + @ForceInline + public static int strlenInt(final AbstractMemorySegmentImpl segment, + final long fromOffset, + final long toOffset) { + final long length = toOffset - fromOffset; + segment.checkBounds(fromOffset, length); + if (length == 0) { + segment.scope.checkValidState(); + throw nullNotFound(segment, fromOffset, toOffset); + } + final long longBytes = length & LONG_MASK; + final long longLimit = fromOffset + longBytes; + long offset = fromOffset; + for (; offset < longLimit; offset += Long.BYTES) { + long val = SCOPED_MEMORY_ACCESS.getLongUnaligned(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + offset, !Architecture.isLittleEndian()); + if (mightContainZeroInt(val)) { + for (int j = 0; j < Long.BYTES; j += Integer.BYTES) { + if (SCOPED_MEMORY_ACCESS.getIntUnaligned(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + offset + j, !Architecture.isLittleEndian()) == 0) { + return requireWithinStringSize(offset + j - fromOffset, segment, fromOffset, toOffset); + } + } + } + } + // Handle the tail + // Prevent over scanning as we step by 4 + final long endScan = toOffset & ~3; // The last two bit are zero + for (; offset < endScan; offset += Integer.BYTES) { + int val = SCOPED_MEMORY_ACCESS.getIntUnaligned(segment.sessionImpl(), segment.unsafeGetBase(), segment.unsafeGetOffset() + offset, !Architecture.isLittleEndian()); + if (val == 0) { + return requireWithinStringSize(offset - fromOffset, segment, fromOffset, toOffset); + } + } + throw nullNotFound(segment, fromOffset, toOffset); } - /* Bits 63 and N * 8 (N = 1..7) of this number are zero. Call these bits - the "holes". Note that there is a hole just to the left of - each byte, with an extra at the end: + /* + Bits 63 and N * 8 (N = 1..7) of this number are zero. Call these bits + the "holes". Note that there is a hole just to the left of + each byte, with an extra at the end: - bits: 01111110 11111110 11111110 11111110 11111110 11111110 11111110 11111111 - bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD EEEEEEEE FFFFFFFF GGGGGGGG HHHHHHHH + bits: 01111110 11111110 11111110 11111110 11111110 11111110 11111110 11111111 + bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD EEEEEEEE FFFFFFFF GGGGGGGG HHHHHHHH - The 1-bits make sure that carries propagate to the next 0-bit. - The 0-bits provide holes for carries to fall into. + The 1-bits make sure that carries propagate to the next 0-bit. + The 0-bits provide holes for carries to fall into. */ private static final long HIMAGIC_FOR_BYTES = 0x8080_8080_8080_8080L; private static final long LOMAGIC_FOR_BYTES = 0x0101_0101_0101_0101L; - static boolean mightContainZeroByte(long l) { + private static boolean mightContainZeroByte(long l) { return ((l - LOMAGIC_FOR_BYTES) & (~l) & HIMAGIC_FOR_BYTES) != 0; } @@ -175,99 +253,40 @@ static boolean mightContainZeroShort(long l) { return ((l - LOMAGIC_FOR_SHORTS) & (~l) & HIMAGIC_FOR_SHORTS) != 0; } - static int requireWithinArraySize(long size) { - if (size > ArraysSupport.SOFT_MAX_ARRAY_LENGTH) { - throw newIaeStringTooLarge(); - } - return (int) size; - } - - static int bodyCount(long remaining) { - return (int) Math.min( - // Make sure we do not wrap around - Integer.MAX_VALUE - Long.BYTES, - // Remaining bytes to consider - remaining) - & -Long.BYTES; // Mask 0xFFFFFFF8 - } + private static final long HIMAGIC_FOR_INTS = 0x8000_0000_8000_0000L; + private static final long LOMAGIC_FOR_INTS = 0x0000_0001_0000_0001L; - private static int strlenByte(MemorySegment segment, long start) { - for (int offset = 0; offset < ArraysSupport.SOFT_MAX_ARRAY_LENGTH; offset += 1) { - byte curr = segment.get(JAVA_BYTE, start + offset); - if (curr == 0) { - return offset; - } - } - throw newIaeStringTooLarge(); + static boolean mightContainZeroInt(long l) { + return ((l - LOMAGIC_FOR_INTS) & (~l) & HIMAGIC_FOR_INTS) != 0; } - /** - * {@return the shortest distance beginning at the provided {@code start} - * to the encountering of a zero short in the provided {@code segment}} - *

- * Note: The inspected region must be short aligned. - * - * @see #chunkedStrlenByte(MemorySegment, long) for more information - * - * @param segment to examine - * @param start from where examination shall begin - * @throws IllegalArgumentException if the examined region contains no zero shorts - * within a length that can be accepted by a String - */ - public static int chunkedStrlenShort(MemorySegment segment, long start) { - - // Handle the first unaligned "head" bytes separately - int headCount = (int)SharedUtils.remainsToAlignment(segment.address() + start, Long.BYTES); - int offset = 0; - for (; offset < headCount; offset += Short.BYTES) { - short curr = segment.get(JAVA_SHORT_UNALIGNED, start + offset); - if (curr == 0) { - return offset; - } - } - - // We are now on a long-aligned boundary so this is the "body" - int bodyCount = bodyCount(segment.byteSize() - start - headCount); - - for (; offset < bodyCount; offset += Long.BYTES) { - // We know we are `long` aligned so, we can save on alignment checking here - long curr = segment.get(JAVA_LONG_UNALIGNED, start + offset); - // Is this a candidate? - if (mightContainZeroShort(curr)) { - for (int j = 0; j < Long.BYTES; j += Short.BYTES) { - if (segment.get(JAVA_SHORT_UNALIGNED, start + offset + j) == 0) { - return offset + j; - } - } - } + private static int requireWithinStringSize(long size, + AbstractMemorySegmentImpl segment, + long fromOffset, + long toOffset) { + if (size > ArraysSupport.SOFT_MAX_ARRAY_LENGTH) { + throw stringTooLarge(segment, fromOffset, toOffset); } + return (int) size; + } - // Handle the "tail" - return requireWithinArraySize((long) offset + strlenShort(segment, start + offset)); + private static IllegalArgumentException stringTooLarge(AbstractMemorySegmentImpl segment, + long fromOffset, + long toOffset) { + return new IllegalArgumentException("String too large: " + exceptionInfo(segment, fromOffset, toOffset)); } - private static int strlenShort(MemorySegment segment, long start) { - for (int offset = 0; offset < ArraysSupport.SOFT_MAX_ARRAY_LENGTH; offset += Short.BYTES) { - short curr = segment.get(JAVA_SHORT_UNALIGNED, start + offset); - if (curr == (short)0) { - return offset; - } - } - throw newIaeStringTooLarge(); + private static IndexOutOfBoundsException nullNotFound(AbstractMemorySegmentImpl segment, + long fromOffset, + long toOffset) { + return new IndexOutOfBoundsException("No null terminator found: " + exceptionInfo(segment, fromOffset, toOffset)); } - // The gain of using `long` wide operations for `int` is lower than for the two other `byte` and `short` variants - // so, there is only one method for ints. - public static int strlenInt(MemorySegment segment, long start) { - for (int offset = 0; offset < ArraysSupport.SOFT_MAX_ARRAY_LENGTH; offset += Integer.BYTES) { - // We are guaranteed to be aligned here so, we can use unaligned access. - int curr = segment.get(JAVA_INT_UNALIGNED, start + offset); - if (curr == 0) { - return offset; - } - } - throw newIaeStringTooLarge(); + private static String exceptionInfo(AbstractMemorySegmentImpl segment, + long fromOffset, + long toOffset) { + return segment + " using region [" + fromOffset + ", " + toOffset + ")"; } public enum CharsetKind { @@ -323,9 +342,4 @@ public static int copyBytes(String string, MemorySegment segment, Charset charse public static void copyToSegmentRaw(String string, MemorySegment segment, long offset) { JAVA_LANG_ACCESS.copyToSegmentRaw(string, segment, offset); } - - private static IllegalArgumentException newIaeStringTooLarge() { - return new IllegalArgumentException("String too large"); - } - } diff --git a/test/jdk/java/foreign/TestStringEncoding.java b/test/jdk/java/foreign/TestStringEncoding.java index 4caef6fbd09b4..94732943b9d36 100644 --- a/test/jdk/java/foreign/TestStringEncoding.java +++ b/test/jdk/java/foreign/TestStringEncoding.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -39,6 +39,7 @@ import java.util.Random; import java.util.function.UnaryOperator; +import jdk.internal.foreign.AbstractMemorySegmentImpl; import jdk.internal.foreign.StringSupport; import org.testng.annotations.*; @@ -53,6 +54,20 @@ public class TestStringEncoding { + @Test + public void emptySegment() { + for (Charset charset : standardCharsets()) { + for (Arena arena : arenas()) { + try (arena) { + var segment = arena.allocate(0); + var e = expectThrows(IndexOutOfBoundsException.class, () -> + segment.getString(0, charset)); + assertTrue(e.getMessage().contains("No null terminator found")); + } + } + } + } + @Test(dataProvider = "strings") public void testStrings(String testString) { for (Charset charset : Charset.availableCharsets().values()) { @@ -87,7 +102,6 @@ public void testStrings(String testString) { } } - @Test(dataProvider = "strings") public void testStringsHeap(String testString) { for (Charset charset : singleByteCharsets()) { @@ -198,8 +212,9 @@ public void testOffset(String testString) { try (arena) { MemorySegment inSegment = arena.allocateFrom(testString, charset); for (int i = 0; i < 3; i++) { + String expected = testString.substring(i); String actual = inSegment.getString(i, charset); - assertEquals(actual, testString.substring(i)); + assertEquals(actual, expected); } } } @@ -249,6 +264,32 @@ public void segmentationFault() { } } + // This test ensures that we do not address outside the segment even though there + // are odd bytes at the end. + @Test(dataProvider = "strings") + public void offBoundaryTrailingBytes(String testString) { + if (testString.length() < 3 || !containsOnlyRegularCharacters(testString)) { + return; + } + for (var charset : standardCharsets()) { + for (var arena: arenas()) { + try (arena) { + MemorySegment strSegment = arena.allocateFrom(testString, charset); + // Add an odd byte at the end + MemorySegment inSegment = arena.allocate(strSegment.byteSize() + 1); + // Make sure there are no null-terminators so that we will try to scan + // the entire segment. + inSegment.fill((byte) 1); + for (int i = 0; i < 4; i++) { + final int offset = i; + var e = expectThrows(IndexOutOfBoundsException.class, () -> inSegment.getString(offset, charset)); + assertTrue(e.getMessage().contains("No null terminator found")); + } + } + } + } + } + private static final int TEST_LENGTH_MAX = 277; private Random deterministicRandom() { @@ -271,9 +312,15 @@ public void chunked_strlen_byte() { } segment.setAtIndex(JAVA_BYTE, len, (byte) 0); for (int j = 0; j < len; j++) { - int actual = StringSupport.chunkedStrlenByte(segment, j); + int actual = StringSupport.strlenByte((AbstractMemorySegmentImpl) segment, j, segment.byteSize()); assertEquals(actual, len - j); } + // Test end offset + for (int j = 0; j < len - 1; j++) { + final long toOffset = j; + expectThrows(IndexOutOfBoundsException.class, () -> + StringSupport.strlenByte((AbstractMemorySegmentImpl) segment, 0, toOffset)); + } } } } @@ -295,7 +342,7 @@ public void chunked_strlen_short() { } segment.setAtIndex(JAVA_SHORT, len, (short) 0); for (int j = 0; j < len; j++) { - int actual = StringSupport.chunkedStrlenShort(segment, j * Short.BYTES); + int actual = StringSupport.strlenShort((AbstractMemorySegmentImpl) segment, j * Short.BYTES, segment.byteSize()); assertEquals(actual, (len - j) * Short.BYTES); } } @@ -319,7 +366,7 @@ public void strlen_int() { } segment.setAtIndex(JAVA_INT, len, 0); for (int j = 0; j < len; j++) { - int actual = StringSupport.strlenInt(segment, j * Integer.BYTES); + int actual = StringSupport.strlenInt((AbstractMemorySegmentImpl) segment, j * Integer.BYTES, segment.byteSize()); assertEquals(actual, (len - j) * Integer.BYTES); } } diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/InternalStrLen.java b/test/micro/org/openjdk/bench/java/lang/foreign/InternalStrLen.java index 2db15bfe2652d..b7867efd77109 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/InternalStrLen.java +++ b/test/micro/org/openjdk/bench/java/lang/foreign/InternalStrLen.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -22,6 +22,8 @@ */ package org.openjdk.bench.java.lang.foreign; +import jdk.internal.foreign.AbstractMemorySegmentImpl; +import jdk.internal.foreign.StringSupport; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; @@ -43,20 +45,20 @@ import java.util.stream.Stream; import static java.lang.foreign.ValueLayout.*; -import static jdk.internal.foreign.StringSupport.*; @BenchmarkMode(Mode.AverageTime) @Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS) @Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS) @State(Scope.Benchmark) @OutputTimeUnit(TimeUnit.NANOSECONDS) -@Fork(value = 3, jvmArgs = {"--add-exports=java.base/jdk.internal.foreign=ALL-UNNAMED", "--enable-native-access=ALL-UNNAMED", "--enable-preview"}) +@Fork(value = 3, jvmArgs = {"--add-exports=java.base/jdk.internal.foreign=ALL-UNNAMED", + "--enable-native-access=ALL-UNNAMED"}) public class InternalStrLen { - private MemorySegment singleByteSegment; - private MemorySegment singleByteSegmentMisaligned; - private MemorySegment doubleByteSegment; - private MemorySegment quadByteSegment; + private AbstractMemorySegmentImpl singleByteSegment; + private AbstractMemorySegmentImpl singleByteSegmentMisaligned; + private AbstractMemorySegmentImpl doubleByteSegment; + private AbstractMemorySegmentImpl quadByteSegment; @Param({"1", "4", "16", "251", "1024"}) int size; @@ -64,10 +66,9 @@ public class InternalStrLen { @Setup public void setup() { var arena = Arena.ofAuto(); - singleByteSegment = arena.allocate((size + 1L) * Byte.BYTES); - singleByteSegmentMisaligned = arena.allocate((size + 1L) * Byte.BYTES); - doubleByteSegment = arena.allocate((size + 1L) * Short.BYTES); - quadByteSegment = arena.allocate((size + 1L) * Integer.BYTES); + singleByteSegment = (AbstractMemorySegmentImpl) arena.allocate((size + 1L) * Byte.BYTES); + doubleByteSegment = (AbstractMemorySegmentImpl) arena.allocate((size + 1L) * Short.BYTES); + quadByteSegment = (AbstractMemorySegmentImpl) arena.allocate((size + 1L) * Integer.BYTES); Stream.of(singleByteSegment, doubleByteSegment, quadByteSegment) .forEach(s -> IntStream.range(0, (int) s.byteSize() - 1) .forEach(i -> s.set( @@ -78,7 +79,7 @@ public void setup() { singleByteSegment.set(ValueLayout.JAVA_BYTE, singleByteSegment.byteSize() - Byte.BYTES, (byte) 0); doubleByteSegment.set(ValueLayout.JAVA_SHORT, doubleByteSegment.byteSize() - Short.BYTES, (short) 0); quadByteSegment.set(ValueLayout.JAVA_INT, quadByteSegment.byteSize() - Integer.BYTES, 0); - singleByteSegmentMisaligned = arena.allocate(singleByteSegment.byteSize() + 1). + singleByteSegmentMisaligned = (AbstractMemorySegmentImpl) arena.allocate(singleByteSegment.byteSize() + 1). asSlice(1); MemorySegment.copy(singleByteSegment, 0, singleByteSegmentMisaligned, 0, singleByteSegment.byteSize()); } @@ -105,22 +106,22 @@ public int elementQuad() { @Benchmark public int chunkedSingle() { - return chunkedStrlenByte(singleByteSegment, 0); + return StringSupport.strlenByte(singleByteSegment, 0, singleByteSegment.byteSize()); } @Benchmark public int chunkedSingleMisaligned() { - return chunkedStrlenByte(singleByteSegmentMisaligned, 0); + return StringSupport.strlenByte(singleByteSegmentMisaligned, 0, singleByteSegment.byteSize()); } @Benchmark public int chunkedDouble() { - return chunkedStrlenShort(doubleByteSegment, 0); + return StringSupport.strlenShort(doubleByteSegment, 0, doubleByteSegment.byteSize()); } @Benchmark public int changedElementQuad() { - return strlenInt(quadByteSegment, 0); + return StringSupport.strlenInt(quadByteSegment, 0, quadByteSegment.byteSize()); } // These are the legacy methods