Skip to content

Commit 110aa4b

Browse files
jbewingApache9
authored andcommitted
HBASE-28025 Enhance ByteBufferUtils.findCommonPrefix to compare 8 bytes each time (#5354)
Signed-off-by: Duo Zhang <[email protected]> (cherry picked from commit dae078e)
1 parent 4f8aff3 commit 110aa4b

File tree

4 files changed

+329
-37
lines changed

4 files changed

+329
-37
lines changed

hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java

Lines changed: 155 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,14 @@ static abstract class Converter {
8383
abstract int putLong(ByteBuffer buffer, int index, long val);
8484
}
8585

86+
static abstract class CommonPrefixer {
87+
abstract int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, byte[] right,
88+
int rightOffset, int rightLength);
89+
90+
abstract int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, ByteBuffer right,
91+
int rightOffset, int rightLength);
92+
}
93+
8694
static class ComparerHolder {
8795
static final String UNSAFE_COMPARER_NAME = ComparerHolder.class.getName() + "$UnsafeComparer";
8896

@@ -325,6 +333,111 @@ int putLong(ByteBuffer buffer, int index, long val) {
325333
}
326334
}
327335

336+
static class CommonPrefixerHolder {
337+
static final String UNSAFE_COMMON_PREFIXER_NAME =
338+
CommonPrefixerHolder.class.getName() + "$UnsafeCommonPrefixer";
339+
340+
static final CommonPrefixer BEST_COMMON_PREFIXER = getBestCommonPrefixer();
341+
342+
static CommonPrefixer getBestCommonPrefixer() {
343+
try {
344+
Class<? extends CommonPrefixer> theClass =
345+
Class.forName(UNSAFE_COMMON_PREFIXER_NAME).asSubclass(CommonPrefixer.class);
346+
347+
return theClass.getConstructor().newInstance();
348+
} catch (Throwable t) { // ensure we really catch *everything*
349+
return PureJavaCommonPrefixer.INSTANCE;
350+
}
351+
}
352+
353+
static final class PureJavaCommonPrefixer extends CommonPrefixer {
354+
static final PureJavaCommonPrefixer INSTANCE = new PureJavaCommonPrefixer();
355+
356+
private PureJavaCommonPrefixer() {
357+
}
358+
359+
@Override
360+
public int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, byte[] right,
361+
int rightOffset, int rightLength) {
362+
int length = Math.min(leftLength, rightLength);
363+
int result = 0;
364+
365+
while (
366+
result < length
367+
&& ByteBufferUtils.toByte(left, leftOffset + result) == right[rightOffset + result]
368+
) {
369+
result++;
370+
}
371+
372+
return result;
373+
}
374+
375+
@Override
376+
int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, ByteBuffer right,
377+
int rightOffset, int rightLength) {
378+
int length = Math.min(leftLength, rightLength);
379+
int result = 0;
380+
381+
while (
382+
result < length && ByteBufferUtils.toByte(left, leftOffset + result)
383+
== ByteBufferUtils.toByte(right, rightOffset + result)
384+
) {
385+
result++;
386+
}
387+
388+
return result;
389+
}
390+
}
391+
392+
static final class UnsafeCommonPrefixer extends CommonPrefixer {
393+
394+
static {
395+
if (!UNSAFE_UNALIGNED) {
396+
throw new Error();
397+
}
398+
}
399+
400+
public UnsafeCommonPrefixer() {
401+
}
402+
403+
@Override
404+
public int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, byte[] right,
405+
int rightOffset, int rightLength) {
406+
long offset1Adj;
407+
Object refObj1 = null;
408+
if (left.isDirect()) {
409+
offset1Adj = leftOffset + UnsafeAccess.directBufferAddress(left);
410+
} else {
411+
offset1Adj = leftOffset + left.arrayOffset() + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
412+
refObj1 = left.array();
413+
}
414+
return findCommonPrefixUnsafe(refObj1, offset1Adj, leftLength, right,
415+
rightOffset + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET, rightLength);
416+
}
417+
418+
@Override
419+
public int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, ByteBuffer right,
420+
int rightOffset, int rightLength) {
421+
long offset1Adj, offset2Adj;
422+
Object refObj1 = null, refObj2 = null;
423+
if (left.isDirect()) {
424+
offset1Adj = leftOffset + UnsafeAccess.directBufferAddress(left);
425+
} else {
426+
offset1Adj = leftOffset + left.arrayOffset() + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
427+
refObj1 = left.array();
428+
}
429+
if (right.isDirect()) {
430+
offset2Adj = rightOffset + UnsafeAccess.directBufferAddress(right);
431+
} else {
432+
offset2Adj = rightOffset + right.arrayOffset() + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
433+
refObj2 = right.array();
434+
}
435+
return findCommonPrefixUnsafe(refObj1, offset1Adj, leftLength, refObj2, offset2Adj,
436+
rightLength);
437+
}
438+
}
439+
}
440+
328441
/**
329442
* Similar to {@link WritableUtils#writeVLong(java.io.DataOutput, long)}, but writes to a
330443
* {@link ByteBuffer}.
@@ -769,14 +882,7 @@ public static int findCommonPrefix(ByteBuffer buffer, int offsetLeft, int offset
769882
*/
770883
public static int findCommonPrefix(byte[] left, int leftOffset, int leftLength, byte[] right,
771884
int rightOffset, int rightLength) {
772-
int length = Math.min(leftLength, rightLength);
773-
int result = 0;
774-
775-
while (result < length && left[leftOffset + result] == right[rightOffset + result]) {
776-
result++;
777-
}
778-
779-
return result;
885+
return Bytes.findCommonPrefix(left, right, leftLength, rightLength, leftOffset, rightOffset);
780886
}
781887

782888
/**
@@ -790,17 +896,8 @@ public static int findCommonPrefix(byte[] left, int leftOffset, int leftLength,
790896
*/
791897
public static int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength,
792898
ByteBuffer right, int rightOffset, int rightLength) {
793-
int length = Math.min(leftLength, rightLength);
794-
int result = 0;
795-
796-
while (
797-
result < length && ByteBufferUtils.toByte(left, leftOffset + result)
798-
== ByteBufferUtils.toByte(right, rightOffset + result)
799-
) {
800-
result++;
801-
}
802-
803-
return result;
899+
return CommonPrefixerHolder.BEST_COMMON_PREFIXER.findCommonPrefix(left, leftOffset, leftLength,
900+
right, rightOffset, rightLength);
804901
}
805902

806903
/**
@@ -814,17 +911,8 @@ public static int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLeng
814911
*/
815912
public static int findCommonPrefix(ByteBuffer left, int leftOffset, int leftLength, byte[] right,
816913
int rightOffset, int rightLength) {
817-
int length = Math.min(leftLength, rightLength);
818-
int result = 0;
819-
820-
while (
821-
result < length
822-
&& ByteBufferUtils.toByte(left, leftOffset + result) == right[rightOffset + result]
823-
) {
824-
result++;
825-
}
826-
827-
return result;
914+
return CommonPrefixerHolder.BEST_COMMON_PREFIXER.findCommonPrefix(left, leftOffset, leftLength,
915+
right, rightOffset, rightLength);
828916
}
829917

830918
/**
@@ -997,6 +1085,43 @@ static int compareToUnsafe(Object obj1, long o1, int l1, Object obj2, long o2, i
9971085
return l1 - l2;
9981086
}
9991087

1088+
static int findCommonPrefixUnsafe(Object left, long leftOffset, int leftLength, Object right,
1089+
long rightOffset, int rightLength) {
1090+
final int stride = 8;
1091+
final int minLength = Math.min(leftLength, rightLength);
1092+
int strideLimit = minLength & ~(stride - 1);
1093+
int result = 0;
1094+
int i;
1095+
1096+
for (i = 0; i < strideLimit; i += stride) {
1097+
long lw = HBasePlatformDependent.getLong(left, leftOffset + (long) i);
1098+
long rw = HBasePlatformDependent.getLong(right, rightOffset + (long) i);
1099+
1100+
if (lw != rw) {
1101+
if (!UnsafeAccess.LITTLE_ENDIAN) {
1102+
return result + (Long.numberOfLeadingZeros(lw ^ rw) / Bytes.SIZEOF_LONG);
1103+
} else {
1104+
return result + (Long.numberOfTrailingZeros(lw ^ rw) / Bytes.SIZEOF_LONG);
1105+
}
1106+
} else {
1107+
result += Bytes.SIZEOF_LONG;
1108+
}
1109+
}
1110+
1111+
// The epilogue to cover the last (minLength % stride) elements.
1112+
for (; i < minLength; i++) {
1113+
byte il = HBasePlatformDependent.getByte(left, leftOffset + i);
1114+
byte ir = HBasePlatformDependent.getByte(right, rightOffset + i);
1115+
if (il != ir) {
1116+
return result;
1117+
} else {
1118+
result++;
1119+
}
1120+
}
1121+
1122+
return result;
1123+
}
1124+
10001125
/**
10011126
* Reads a short value at the given buffer's offset.
10021127
* @param buffer input byte buffer to read

hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java

Lines changed: 100 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1305,6 +1305,11 @@ static abstract class Converter {
13051305

13061306
}
13071307

1308+
static abstract class CommonPrefixer {
1309+
abstract int findCommonPrefix(byte[] left, int leftOffset, int leftLength, byte[] right,
1310+
int rightOffset, int rightLength);
1311+
}
1312+
13081313
@InterfaceAudience.Private
13091314
static Comparer<byte[]> lexicographicalComparerJavaImpl() {
13101315
return LexicographicalComparerHolder.PureJavaComparer.INSTANCE;
@@ -1582,6 +1587,99 @@ public int compareTo(byte[] buffer1, int offset1, int length1, byte[] buffer2, i
15821587
}
15831588
}
15841589

1590+
static class CommonPrefixerHolder {
1591+
static final String UNSAFE_COMMON_PREFIXER_NAME =
1592+
CommonPrefixerHolder.class.getName() + "$UnsafeCommonPrefixer";
1593+
1594+
static final CommonPrefixer BEST_COMMON_PREFIXER = getBestCommonPrefixer();
1595+
1596+
static CommonPrefixer getBestCommonPrefixer() {
1597+
try {
1598+
Class<? extends CommonPrefixer> theClass =
1599+
Class.forName(UNSAFE_COMMON_PREFIXER_NAME).asSubclass(CommonPrefixer.class);
1600+
1601+
return theClass.getConstructor().newInstance();
1602+
} catch (Throwable t) { // ensure we really catch *everything*
1603+
return CommonPrefixerHolder.PureJavaCommonPrefixer.INSTANCE;
1604+
}
1605+
}
1606+
1607+
static final class PureJavaCommonPrefixer extends CommonPrefixer {
1608+
static final PureJavaCommonPrefixer INSTANCE = new PureJavaCommonPrefixer();
1609+
1610+
private PureJavaCommonPrefixer() {
1611+
}
1612+
1613+
@Override
1614+
public int findCommonPrefix(byte[] left, int leftOffset, int leftLength, byte[] right,
1615+
int rightOffset, int rightLength) {
1616+
int length = Math.min(leftLength, rightLength);
1617+
int result = 0;
1618+
1619+
while (result < length && left[leftOffset + result] == right[rightOffset + result]) {
1620+
result++;
1621+
}
1622+
return result;
1623+
}
1624+
}
1625+
1626+
static final class UnsafeCommonPrefixer extends CommonPrefixer {
1627+
1628+
static {
1629+
if (!UNSAFE_UNALIGNED) {
1630+
throw new Error();
1631+
}
1632+
1633+
// sanity check - this should never fail
1634+
if (HBasePlatformDependent.arrayIndexScale(byte[].class) != 1) {
1635+
throw new AssertionError();
1636+
}
1637+
}
1638+
1639+
public UnsafeCommonPrefixer() {
1640+
}
1641+
1642+
@Override
1643+
public int findCommonPrefix(byte[] left, int leftOffset, int leftLength, byte[] right,
1644+
int rightOffset, int rightLength) {
1645+
final int stride = 8;
1646+
final int minLength = Math.min(leftLength, rightLength);
1647+
int strideLimit = minLength & ~(stride - 1);
1648+
final long leftOffsetAdj = leftOffset + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
1649+
final long rightOffsetAdj = rightOffset + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
1650+
int result = 0;
1651+
int i;
1652+
1653+
for (i = 0; i < strideLimit; i += stride) {
1654+
long lw = HBasePlatformDependent.getLong(left, leftOffsetAdj + i);
1655+
long rw = HBasePlatformDependent.getLong(right, rightOffsetAdj + i);
1656+
if (lw != rw) {
1657+
if (!UnsafeAccess.LITTLE_ENDIAN) {
1658+
return result + (Long.numberOfLeadingZeros(lw ^ rw) / Bytes.SIZEOF_LONG);
1659+
} else {
1660+
return result + (Long.numberOfTrailingZeros(lw ^ rw) / Bytes.SIZEOF_LONG);
1661+
}
1662+
} else {
1663+
result += Bytes.SIZEOF_LONG;
1664+
}
1665+
}
1666+
1667+
// The epilogue to cover the last (minLength % stride) elements.
1668+
for (; i < minLength; i++) {
1669+
int il = (left[leftOffset + i]);
1670+
int ir = (right[rightOffset + i]);
1671+
if (il != ir) {
1672+
return result;
1673+
} else {
1674+
result++;
1675+
}
1676+
}
1677+
1678+
return result;
1679+
}
1680+
}
1681+
}
1682+
15851683
/**
15861684
* Lexicographically determine the equality of two arrays.
15871685
* @param left left operand
@@ -2618,12 +2716,7 @@ public static int searchDelimiterIndexInReverse(final byte[] b, final int offset
26182716

26192717
public static int findCommonPrefix(byte[] left, byte[] right, int leftLength, int rightLength,
26202718
int leftOffset, int rightOffset) {
2621-
int length = Math.min(leftLength, rightLength);
2622-
int result = 0;
2623-
2624-
while (result < length && left[leftOffset + result] == right[rightOffset + result]) {
2625-
result++;
2626-
}
2627-
return result;
2719+
return CommonPrefixerHolder.BEST_COMMON_PREFIXER.findCommonPrefix(left, leftOffset, leftLength,
2720+
right, rightOffset, rightLength);
26282721
}
26292722
}

hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,6 +606,37 @@ public void testEquals() {
606606
assertTrue(ByteBufferUtils.equals(bb, 0, a.length, a, 0, a.length));
607607
}
608608

609+
@Test
610+
public void testFindCommonPrefix() {
611+
ByteBuffer bb1 = ByteBuffer.allocate(135);
612+
ByteBuffer bb2 = ByteBuffer.allocate(135);
613+
ByteBuffer bb3 = ByteBuffer.allocateDirect(135);
614+
byte[] b = new byte[71];
615+
616+
fillBB(bb1, (byte) 5);
617+
fillBB(bb2, (byte) 5);
618+
fillBB(bb3, (byte) 5);
619+
fillArray(b, (byte) 5);
620+
621+
assertEquals(135,
622+
ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), bb2, 0, bb2.remaining()));
623+
assertEquals(71, ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), b, 0, b.length));
624+
assertEquals(135,
625+
ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), bb3, 0, bb3.remaining()));
626+
assertEquals(71, ByteBufferUtils.findCommonPrefix(bb3, 0, bb3.remaining(), b, 0, b.length));
627+
628+
b[13] = 9;
629+
assertEquals(13, ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), b, 0, b.length));
630+
631+
bb2.put(134, (byte) 6);
632+
assertEquals(134,
633+
ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), bb2, 0, bb2.remaining()));
634+
635+
bb2.put(6, (byte) 4);
636+
assertEquals(6,
637+
ByteBufferUtils.findCommonPrefix(bb1, 0, bb1.remaining(), bb2, 0, bb2.remaining()));
638+
}
639+
609640
private static void fillBB(ByteBuffer bb, byte b) {
610641
for (int i = bb.position(); i < bb.limit(); i++) {
611642
bb.put(i, b);

0 commit comments

Comments
 (0)