Skip to content

Commit 0b9ccd5

Browse files
gatorsmilecloud-fan
authored andcommitted
## What changes were proposed in this pull request? When running TPC-DS benchmarks on 2.4 release, npoggi and winglungngai saw more than 10% performance regression on the following queries: q67, q24a and q24b. After we applying the PR #22338, the performance regression still exists. If we revert the changes in #19222, npoggi and winglungngai found the performance regression was resolved. Thus, this PR is to revert the related changes for unblocking the 2.4 release. In the future release, we still can continue the investigation and find out the root cause of the regression. ## How was this patch tested? The existing test cases Closes #22361 from gatorsmile/revertMemoryBlock. Authored-by: gatorsmile <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent 1cfda44 commit 0b9ccd5

File tree

40 files changed

+376
-1070
lines changed

40 files changed

+376
-1070
lines changed

common/unsafe/src/main/java/org/apache/spark/sql/catalyst/expressions/HiveHasher.java

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,7 @@
1717

1818
package org.apache.spark.sql.catalyst.expressions;
1919

20-
import org.apache.spark.unsafe.memory.MemoryBlock;
21-
import org.apache.spark.unsafe.types.UTF8String;
20+
import org.apache.spark.unsafe.Platform;
2221

2322
/**
2423
* Simulates Hive's hashing function from Hive v1.2.1
@@ -39,21 +38,12 @@ public static int hashLong(long input) {
3938
return (int) ((input >>> 32) ^ input);
4039
}
4140

42-
public static int hashUnsafeBytesBlock(MemoryBlock mb) {
43-
long lengthInBytes = mb.size();
41+
public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes) {
4442
assert (lengthInBytes >= 0): "lengthInBytes cannot be negative";
4543
int result = 0;
46-
for (long i = 0; i < lengthInBytes; i++) {
47-
result = (result * 31) + (int) mb.getByte(i);
44+
for (int i = 0; i < lengthInBytes; i++) {
45+
result = (result * 31) + (int) Platform.getByte(base, offset + i);
4846
}
4947
return result;
5048
}
51-
52-
public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes) {
53-
return hashUnsafeBytesBlock(MemoryBlock.allocateFromObject(base, offset, lengthInBytes));
54-
}
55-
56-
public static int hashUTF8String(UTF8String str) {
57-
return hashUnsafeBytesBlock(str.getMemoryBlock());
58-
}
5949
}

common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ public static void setMemory(long address, byte value, long size) {
187187
}
188188

189189
public static void copyMemory(
190-
Object src, long srcOffset, Object dst, long dstOffset, long length) {
190+
Object src, long srcOffset, Object dst, long dstOffset, long length) {
191191
// Check if dstOffset is before or after srcOffset to determine if we should copy
192192
// forward or backwards. This is necessary in case src and dst overlap.
193193
if (dstOffset < srcOffset) {

common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
package org.apache.spark.unsafe.array;
1919

2020
import org.apache.spark.unsafe.Platform;
21-
import org.apache.spark.unsafe.memory.MemoryBlock;
2221

2322
public class ByteArrayMethods {
2423

@@ -53,25 +52,15 @@ public static long roundNumberOfBytesToNearestWord(long numBytes) {
5352
public static int MAX_ROUNDED_ARRAY_LENGTH = Integer.MAX_VALUE - 15;
5453

5554
private static final boolean unaligned = Platform.unaligned();
56-
/**
57-
* MemoryBlock equality check for MemoryBlocks.
58-
* @return true if the arrays are equal, false otherwise
59-
*/
60-
public static boolean arrayEqualsBlock(
61-
MemoryBlock leftBase, long leftOffset, MemoryBlock rightBase, long rightOffset, long length) {
62-
return arrayEquals(leftBase.getBaseObject(), leftBase.getBaseOffset() + leftOffset,
63-
rightBase.getBaseObject(), rightBase.getBaseOffset() + rightOffset, length);
64-
}
65-
6655
/**
6756
* Optimized byte array equality check for byte arrays.
6857
* @return true if the arrays are equal, false otherwise
6958
*/
7059
public static boolean arrayEquals(
71-
Object leftBase, long leftOffset, Object rightBase, long rightOffset, long length) {
60+
Object leftBase, long leftOffset, Object rightBase, long rightOffset, final long length) {
7261
int i = 0;
7362

74-
// check if starts align and we can get both offsets to be aligned
63+
// check if stars align and we can get both offsets to be aligned
7564
if ((leftOffset % 8) == (rightOffset % 8)) {
7665
while ((leftOffset + i) % 8 != 0 && i < length) {
7766
if (Platform.getByte(leftBase, leftOffset + i) !=

common/unsafe/src/main/java/org/apache/spark/unsafe/array/LongArray.java

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
package org.apache.spark.unsafe.array;
1919

20+
import org.apache.spark.unsafe.Platform;
2021
import org.apache.spark.unsafe.memory.MemoryBlock;
2122

2223
/**
@@ -32,12 +33,16 @@ public final class LongArray {
3233
private static final long WIDTH = 8;
3334

3435
private final MemoryBlock memory;
36+
private final Object baseObj;
37+
private final long baseOffset;
3538

3639
private final long length;
3740

3841
public LongArray(MemoryBlock memory) {
3942
assert memory.size() < (long) Integer.MAX_VALUE * 8: "Array size >= Integer.MAX_VALUE elements";
4043
this.memory = memory;
44+
this.baseObj = memory.getBaseObject();
45+
this.baseOffset = memory.getBaseOffset();
4146
this.length = memory.size() / WIDTH;
4247
}
4348

@@ -46,11 +51,11 @@ public MemoryBlock memoryBlock() {
4651
}
4752

4853
public Object getBaseObject() {
49-
return memory.getBaseObject();
54+
return baseObj;
5055
}
5156

5257
public long getBaseOffset() {
53-
return memory.getBaseOffset();
58+
return baseOffset;
5459
}
5560

5661
/**
@@ -64,8 +69,8 @@ public long size() {
6469
* Fill this all with 0L.
6570
*/
6671
public void zeroOut() {
67-
for (long off = 0; off < length * WIDTH; off += WIDTH) {
68-
memory.putLong(off, 0);
72+
for (long off = baseOffset; off < baseOffset + length * WIDTH; off += WIDTH) {
73+
Platform.putLong(baseObj, off, 0);
6974
}
7075
}
7176

@@ -75,7 +80,7 @@ public void zeroOut() {
7580
public void set(int index, long value) {
7681
assert index >= 0 : "index (" + index + ") should >= 0";
7782
assert index < length : "index (" + index + ") should < length (" + length + ")";
78-
memory.putLong(index * WIDTH, value);
83+
Platform.putLong(baseObj, baseOffset + index * WIDTH, value);
7984
}
8085

8186
/**
@@ -84,6 +89,6 @@ public void set(int index, long value) {
8489
public long get(int index) {
8590
assert index >= 0 : "index (" + index + ") should >= 0";
8691
assert index < length : "index (" + index + ") should < length (" + length + ")";
87-
return memory.getLong(index * WIDTH);
92+
return Platform.getLong(baseObj, baseOffset + index * WIDTH);
8893
}
8994
}

common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java

Lines changed: 12 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,7 @@
1717

1818
package org.apache.spark.unsafe.hash;
1919

20-
import com.google.common.primitives.Ints;
21-
2220
import org.apache.spark.unsafe.Platform;
23-
import org.apache.spark.unsafe.memory.MemoryBlock;
24-
import org.apache.spark.unsafe.types.UTF8String;
2521

2622
/**
2723
* 32-bit Murmur3 hasher. This is based on Guava's Murmur3_32HashFunction.
@@ -53,74 +49,49 @@ public static int hashInt(int input, int seed) {
5349
}
5450

5551
public int hashUnsafeWords(Object base, long offset, int lengthInBytes) {
56-
return hashUnsafeWordsBlock(MemoryBlock.allocateFromObject(base, offset, lengthInBytes), seed);
52+
return hashUnsafeWords(base, offset, lengthInBytes, seed);
5753
}
5854

59-
public static int hashUnsafeWordsBlock(MemoryBlock base, int seed) {
55+
public static int hashUnsafeWords(Object base, long offset, int lengthInBytes, int seed) {
6056
// This is based on Guava's `Murmur32_Hasher.processRemaining(ByteBuffer)` method.
61-
int lengthInBytes = Ints.checkedCast(base.size());
6257
assert (lengthInBytes % 8 == 0): "lengthInBytes must be a multiple of 8 (word-aligned)";
63-
int h1 = hashBytesByIntBlock(base, lengthInBytes, seed);
58+
int h1 = hashBytesByInt(base, offset, lengthInBytes, seed);
6459
return fmix(h1, lengthInBytes);
6560
}
6661

67-
public static int hashUnsafeWords(Object base, long offset, int lengthInBytes, int seed) {
68-
// This is based on Guava's `Murmur32_Hasher.processRemaining(ByteBuffer)` method.
69-
return hashUnsafeWordsBlock(MemoryBlock.allocateFromObject(base, offset, lengthInBytes), seed);
70-
}
71-
72-
public static int hashUnsafeBytesBlock(MemoryBlock base, int seed) {
73-
return hashUnsafeBytesBlock(base, Ints.checkedCast(base.size()), seed);
74-
}
75-
76-
private static int hashUnsafeBytesBlock(MemoryBlock base, int lengthInBytes, int seed) {
62+
public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes, int seed) {
7763
// This is not compatible with original and another implementations.
7864
// But remain it for backward compatibility for the components existing before 2.3.
7965
assert (lengthInBytes >= 0): "lengthInBytes cannot be negative";
8066
int lengthAligned = lengthInBytes - lengthInBytes % 4;
81-
int h1 = hashBytesByIntBlock(base, lengthAligned, seed);
82-
long offset = base.getBaseOffset();
83-
Object o = base.getBaseObject();
67+
int h1 = hashBytesByInt(base, offset, lengthAligned, seed);
8468
for (int i = lengthAligned; i < lengthInBytes; i++) {
85-
int halfWord = Platform.getByte(o, offset + i);
69+
int halfWord = Platform.getByte(base, offset + i);
8670
int k1 = mixK1(halfWord);
8771
h1 = mixH1(h1, k1);
8872
}
8973
return fmix(h1, lengthInBytes);
9074
}
9175

92-
public static int hashUTF8String(UTF8String str, int seed) {
93-
return hashUnsafeBytesBlock(str.getMemoryBlock(), str.numBytes(), seed);
94-
}
95-
96-
public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes, int seed) {
97-
return hashUnsafeBytesBlock(MemoryBlock.allocateFromObject(base, offset, lengthInBytes), seed);
98-
}
99-
10076
public static int hashUnsafeBytes2(Object base, long offset, int lengthInBytes, int seed) {
101-
return hashUnsafeBytes2Block(MemoryBlock.allocateFromObject(base, offset, lengthInBytes), seed);
102-
}
103-
104-
public static int hashUnsafeBytes2Block(MemoryBlock base, int seed) {
105-
// This is compatible with original and other implementations.
77+
// This is compatible with original and another implementations.
10678
// Use this method for new components after Spark 2.3.
107-
int lengthInBytes = Ints.checkedCast(base.size());
108-
assert (lengthInBytes >= 0) : "lengthInBytes cannot be negative";
79+
assert (lengthInBytes >= 0): "lengthInBytes cannot be negative";
10980
int lengthAligned = lengthInBytes - lengthInBytes % 4;
110-
int h1 = hashBytesByIntBlock(base, lengthAligned, seed);
81+
int h1 = hashBytesByInt(base, offset, lengthAligned, seed);
11182
int k1 = 0;
11283
for (int i = lengthAligned, shift = 0; i < lengthInBytes; i++, shift += 8) {
113-
k1 ^= (base.getByte(i) & 0xFF) << shift;
84+
k1 ^= (Platform.getByte(base, offset + i) & 0xFF) << shift;
11485
}
11586
h1 ^= mixK1(k1);
11687
return fmix(h1, lengthInBytes);
11788
}
11889

119-
private static int hashBytesByIntBlock(MemoryBlock base, int lengthInBytes, int seed) {
90+
private static int hashBytesByInt(Object base, long offset, int lengthInBytes, int seed) {
12091
assert (lengthInBytes % 4 == 0);
12192
int h1 = seed;
12293
for (int i = 0; i < lengthInBytes; i += 4) {
123-
int halfWord = base.getInt(i);
94+
int halfWord = Platform.getInt(base, offset + i);
12495
int k1 = mixK1(halfWord);
12596
h1 = mixH1(h1, k1);
12697
}

common/unsafe/src/main/java/org/apache/spark/unsafe/memory/ByteArrayMemoryBlock.java

Lines changed: 0 additions & 128 deletions
This file was deleted.

0 commit comments

Comments
 (0)