Skip to content

Commit c6e8794

Browse files
committed
[SPARK-9460] Fix prefix generation for UTF8String.
1 parent c0cc0ea commit c6e8794

File tree

1 file changed

+13
-1
lines changed

1 file changed

+13
-1
lines changed

unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,19 @@ public int numChars() {
141141
* Returns a 64-bit integer that can be used as the prefix used in sorting.
142142
*/
143143
public long getPrefix() {
144-
long p = PlatformDependent.UNSAFE.getLong(base, offset);
144+
// Since JVMs are either 4-byte aligned or 8-byte aligned, we check the size of the string.
145+
// If size is 0, just return 0.
146+
// If size is between 0 and 4 (inclusive), assume data is 4-byte aligned under the hood and
147+
// use a getInt to fetch the prefix.
148+
// If size is greater than 4, assume we have at least 8 bytes of data to fetch.
149+
long p;
150+
if (numBytes > 4) {
151+
p = PlatformDependent.UNSAFE.getLong(base, offset);
152+
} else if (numBytes > 0) {
153+
p = (long) PlatformDependent.UNSAFE.getInt(base, offset);
154+
} else {
155+
p = 0;
156+
}
145157
p = java.lang.Long.reverseBytes(p);
146158
return p;
147159
}

0 commit comments

Comments
 (0)