Skip to content

Commit cef6650

Browse files
committed
Revert "[SPARK-33428][SQL] Conv UDF use BigInt to avoid Long value overflow"
This reverts commit 5f9a7fe.
1 parent 4a6f534 commit cef6650

File tree

5 files changed

+57
-23
lines changed

5 files changed

+57
-23
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala

Lines changed: 51 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,37 +21,64 @@ import org.apache.spark.unsafe.types.UTF8String
2121

2222
object NumberConverter {
2323

24+
/**
25+
* Divide x by m as if x is an unsigned 64-bit integer. Examples:
26+
* unsignedLongDiv(-1, 2) == Long.MAX_VALUE unsignedLongDiv(6, 3) == 2
27+
* unsignedLongDiv(0, 5) == 0
28+
*
29+
* @param x is treated as unsigned
30+
* @param m is treated as signed
31+
*/
32+
private def unsignedLongDiv(x: Long, m: Int): Long = {
33+
if (x >= 0) {
34+
x / m
35+
} else {
36+
// Let uval be the value of the unsigned long with the same bits as x
37+
// Two's complement => x = uval - 2*MAX - 2
38+
// => uval = x + 2*MAX + 2
39+
// Now, use the fact: (a+b)/c = a/c + b/c + (a%c+b%c)/c
40+
x / m + 2 * (Long.MaxValue / m) + 2 / m + (x % m + 2 * (Long.MaxValue % m) + 2 % m) / m
41+
}
42+
}
43+
2444
/**
2545
* Decode v into value[].
2646
*
27-
* @param v is treated as an BigInt
47+
* @param v is treated as an unsigned 64-bit integer
2848
* @param radix must be between MIN_RADIX and MAX_RADIX
2949
*/
30-
private def decode(v: BigInt, radix: Int, value: Array[Byte]): Unit = {
50+
private def decode(v: Long, radix: Int, value: Array[Byte]): Unit = {
3151
var tmpV = v
3252
java.util.Arrays.fill(value, 0.asInstanceOf[Byte])
3353
var i = value.length - 1
3454
while (tmpV != 0) {
35-
val q = tmpV / radix
36-
value(i) = (tmpV - q * radix).byteValue
55+
val q = unsignedLongDiv(tmpV, radix)
56+
value(i) = (tmpV - q * radix).asInstanceOf[Byte]
3757
tmpV = q
3858
i -= 1
3959
}
4060
}
4161

4262
/**
43-
* Convert value[] into a BigInt. If a negative digit is found,
44-
* ignore the suffix starting there.
63+
* Convert value[] into a long. On overflow, return -1 (as mySQL does). If a
64+
* negative digit is found, ignore the suffix starting there.
4565
*
4666
* @param radix must be between MIN_RADIX and MAX_RADIX
4767
* @param fromPos is the first element that should be considered
4868
* @return the result should be treated as an unsigned 64-bit integer.
4969
*/
50-
private def encode(radix: Int, fromPos: Int, value: Array[Byte]): BigInt = {
51-
var v: BigInt = BigInt(0)
70+
private def encode(radix: Int, fromPos: Int, value: Array[Byte]): Long = {
71+
var v: Long = 0L
72+
val bound = unsignedLongDiv(-1 - radix, radix) // Possible overflow once
5273
var i = fromPos
5374
while (i < value.length && value(i) >= 0) {
54-
v = (v * radix) + BigInt(value(i))
75+
if (v >= bound) {
76+
// Check for overflow
77+
if (unsignedLongDiv(-1 - value(i), radix) < v) {
78+
return -1
79+
}
80+
}
81+
v = v * radix + value(i)
5582
i += 1
5683
}
5784
v
@@ -102,7 +129,7 @@ object NumberConverter {
102129
return null
103130
}
104131

105-
val (negative, first) = if (n(0) == '-') (true, 1) else (false, 0)
132+
var (negative, first) = if (n(0) == '-') (true, 1) else (false, 0)
106133

107134
// Copy the digits in the right side of the array
108135
val temp = new Array[Byte](64)
@@ -113,8 +140,19 @@ object NumberConverter {
113140
}
114141
char2byte(fromBase, temp.length - n.length + first, temp)
115142

116-
// Do the conversion by going through a BigInt
117-
val v: BigInt = encode(fromBase, temp.length - n.length + first, temp)
143+
// Do the conversion by going through a 64 bit integer
144+
var v = encode(fromBase, temp.length - n.length + first, temp)
145+
if (negative && toBase > 0) {
146+
if (v < 0) {
147+
v = -1
148+
} else {
149+
v = -v
150+
}
151+
}
152+
if (toBase < 0 && v < 0) {
153+
v = -v
154+
negative = true
155+
}
118156
decode(v, Math.abs(toBase), temp)
119157

120158
// Find the first non-zero digit or the last digits if all are zero.
@@ -125,7 +163,7 @@ object NumberConverter {
125163
byte2char(Math.abs(toBase), firstNonZeroPos, temp)
126164

127165
var resultStartPos = firstNonZeroPos
128-
if (negative) {
166+
if (negative && toBase < 0) {
129167
resultStartPos = firstNonZeroPos - 1
130168
temp(resultStartPos) = '-'
131169
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
158158
test("conv") {
159159
checkEvaluation(Conv(Literal("3"), Literal(10), Literal(2)), "11")
160160
checkEvaluation(Conv(Literal("-15"), Literal(10), Literal(-16)), "-F")
161-
checkEvaluation(Conv(Literal("-15"), Literal(10), Literal(16)), "-F")
161+
checkEvaluation(Conv(Literal("-15"), Literal(10), Literal(16)), "FFFFFFFFFFFFFFF1")
162162
checkEvaluation(Conv(Literal("big"), Literal(36), Literal(16)), "3A48")
163163
checkEvaluation(Conv(Literal.create(null, StringType), Literal(36), Literal(16)), null)
164164
checkEvaluation(Conv(Literal("3"), Literal.create(null, IntegerType), Literal(16)), null)
@@ -168,12 +168,10 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
168168
checkEvaluation(
169169
Conv(Literal(""), Literal(10), Literal(16)), null)
170170
checkEvaluation(
171-
Conv(Literal("9223372036854775807"), Literal(36), Literal(16)), "12DDAC15F246BAF8C0D551AC7")
171+
Conv(Literal("9223372036854775807"), Literal(36), Literal(16)), "FFFFFFFFFFFFFFFF")
172172
// If there is an invalid digit in the number, the longest valid prefix should be converted.
173173
checkEvaluation(
174174
Conv(Literal("11abc"), Literal(10), Literal(16)), "B")
175-
checkEvaluation(Conv(Literal("c8dcdfb41711fc9a1f17928001d7fd61"), Literal(16), Literal(10)),
176-
"266992441711411603393340504520074460513")
177175
}
178176

179177
test("e") {

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@ class NumberConverterSuite extends SparkFunSuite {
3434
test("convert") {
3535
checkConv("3", 10, 2, "11")
3636
checkConv("-15", 10, -16, "-F")
37-
checkConv("-15", 10, 16, "-F")
37+
checkConv("-15", 10, 16, "FFFFFFFFFFFFFFF1")
3838
checkConv("big", 36, 16, "3A48")
39-
checkConv("9223372036854775807", 36, 16, "12DDAC15F246BAF8C0D551AC7")
39+
checkConv("9223372036854775807", 36, 16, "FFFFFFFFFFFFFFFF")
4040
checkConv("11abc", 10, 16, "B")
4141
}
4242

sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
200200
checkAnswer(df.selectExpr("""conv("100", 2, 10)"""), Row("4"))
201201
checkAnswer(df.selectExpr("""conv("-10", 16, -10)"""), Row("-16"))
202202
checkAnswer(
203-
df.selectExpr("""conv("9223372036854775807", 36, -16)"""), Row("12DDAC15F246BAF8C0D551AC7"))
203+
df.selectExpr("""conv("9223372036854775807", 36, -16)"""), Row("-1")) // for overflow
204204
}
205205

206206
test("floor") {

sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -525,9 +525,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
525525
"udf_xpath_short",
526526
"udf_xpath_string",
527527

528-
// [SPARK-33428][SQL] CONV UDF use BigInt to avoid Long value overflow
529-
"udf_conv",
530-
531528
// These tests DROP TABLE that don't exist (but do not specify IF EXISTS)
532529
"alter_rename_partition1",
533530
"date_1",
@@ -1006,6 +1003,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
10061003
"udf_concat_insert1",
10071004
"udf_concat_insert2",
10081005
"udf_concat_ws",
1006+
"udf_conv",
10091007
"udf_cos",
10101008
"udf_count",
10111009
"udf_date_add",

0 commit comments

Comments
 (0)