Skip to content

Commit bffd37f

Browse files
committed
change to use Hex in apache common package
1 parent cde73f5 commit bffd37f

File tree

3 files changed

+9
-20
lines changed

3 files changed

+9
-20
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818
package org.apache.spark.sql.catalyst.expressions
1919

2020
import java.lang.{Long => JLong}
21+
import java.nio.charset.{StandardCharsets, Charset}
2122
import java.util.Arrays
2223

24+
import org.apache.commons.codec.DecoderException
2325
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
2426
import org.apache.spark.sql.catalyst.expressions.codegen._
2527
import org.apache.spark.sql.types._
@@ -370,25 +372,16 @@ case class UnHex(child: Expression)
370372
if (num == null) {
371373
null
372374
} else {
373-
unhex(num.asInstanceOf[UTF8String].toString)
375+
unhex(num.asInstanceOf[UTF8String])
374376
}
375377
}
376378

377-
private def unhex(s: String): Array[Byte] = {
378-
// append a leading 0 if needed
379-
val str = if (s.length % 2 == 1) {"0" + s} else {s}
380-
val result = new Array[Byte](str.length / 2)
381-
var i = 0
382-
while (i < str.length()) {
383-
try {
384-
result(i / 2) = Integer.parseInt(str.substring(i, i + 2), 16).asInstanceOf[Byte]
385-
} catch {
386-
// invalid character present, return null
387-
case _: NumberFormatException => return null
388-
}
389-
i += 2
379+
private def unhex(utf8Str: UTF8String): Array[Byte] = {
380+
try {
381+
new org.apache.commons.codec.binary.Hex(StandardCharsets.UTF_8).decode(utf8Str.getBytes)
382+
} catch {
383+
case _: DecoderException => null
390384
}
391-
result
392385
}
393386
}
394387

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -240,10 +240,7 @@ class MathFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
240240

241241
test("unhex") {
242242
checkEvaluation(UnHex(Literal("737472696E67")), "string".getBytes)
243-
// scalastyle:off
244-
// Turn off scala style for non-ascii chars
245-
checkEvaluation(UnHex(Literal("E4B889E9878DE79A84")), "三重的".getBytes)
246-
// scalastyle:on
243+
checkEvaluation(UnHex(Literal("")), new Array[Byte](0))
247244
}
248245

249246
test("hypot") {

sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,6 @@ class MathExpressionsSuite extends QueryTest {
232232
checkAnswer(data.selectExpr("unhex(a)"), Row(Array[Byte](28.toByte)))
233233
checkAnswer(data.selectExpr("unhex(b)"), Row("string".getBytes))
234234
checkAnswer(data.selectExpr("""unhex("##")"""), Row(null))
235-
236235
}
237236

238237
test("hypot") {

0 commit comments

Comments
 (0)