File tree Expand file tree Collapse file tree 3 files changed +9
-20
lines changed
main/scala/org/apache/spark/sql/catalyst/expressions
test/scala/org/apache/spark/sql/catalyst/expressions
core/src/test/scala/org/apache/spark/sql Expand file tree Collapse file tree 3 files changed +9
-20
lines changed Original file line number Diff line number Diff line change 1818package org .apache .spark .sql .catalyst .expressions
1919
2020import java .lang .{Long => JLong }
21+ import java .nio .charset .{StandardCharsets , Charset }
2122import java .util .Arrays
2223
24+ import org .apache .commons .codec .DecoderException
2325import org .apache .spark .sql .catalyst .analysis .TypeCheckResult
2426import org .apache .spark .sql .catalyst .expressions .codegen ._
2527import org .apache .spark .sql .types ._
@@ -370,25 +372,16 @@ case class UnHex(child: Expression)
370372 if (num == null ) {
371373 null
372374 } else {
373- unhex(num.asInstanceOf [UTF8String ].toString )
375+ unhex(num.asInstanceOf [UTF8String ])
374376 }
375377 }
376378
377- private def unhex (s : String ): Array [Byte ] = {
378- // append a leading 0 if needed
379- val str = if (s.length % 2 == 1 ) {" 0" + s} else {s}
380- val result = new Array [Byte ](str.length / 2 )
381- var i = 0
382- while (i < str.length()) {
383- try {
384- result(i / 2 ) = Integer .parseInt(str.substring(i, i + 2 ), 16 ).asInstanceOf [Byte ]
385- } catch {
386- // invalid character present, return null
387- case _ : NumberFormatException => return null
388- }
389- i += 2
379+ private def unhex (utf8Str : UTF8String ): Array [Byte ] = {
380+ try {
381+ new org.apache.commons.codec.binary.Hex (StandardCharsets .UTF_8 ).decode(utf8Str.getBytes)
382+ } catch {
383+ case _ : DecoderException => null
390384 }
391- result
392385 }
393386}
394387
Original file line number Diff line number Diff line change @@ -240,10 +240,7 @@ class MathFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
240240
241241 test(" unhex" ) {
242242 checkEvaluation(UnHex (Literal (" 737472696E67" )), " string" .getBytes)
243- // scalastyle:off
244- // Turn off scala style for non-ascii chars
245- checkEvaluation(UnHex (Literal (" E4B889E9878DE79A84" )), " 三重的" .getBytes)
246- // scalastyle:on
243+ checkEvaluation(UnHex (Literal (" " )), new Array [Byte ](0 ))
247244 }
248245
249246 test(" hypot" ) {
Original file line number Diff line number Diff line change @@ -232,7 +232,6 @@ class MathExpressionsSuite extends QueryTest {
232232 checkAnswer(data.selectExpr(" unhex(a)" ), Row (Array [Byte ](28 .toByte)))
233233 checkAnswer(data.selectExpr(" unhex(b)" ), Row (" string" .getBytes))
234234 checkAnswer(data.selectExpr(""" unhex("##")""" ), Row (null ))
235-
236235 }
237236
238237 test(" hypot" ) {
You can’t perform that action at this time.
0 commit comments