Skip to content

Commit c852d46

Browse files
committed
Add function unhex
1 parent 6c5a6db commit c852d46

File tree

5 files changed

+75
-0
lines changed

5 files changed

+75
-0
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ object FunctionRegistry {
156156
expression[Substring]("substr"),
157157
expression[Substring]("substring"),
158158
expression[Upper]("ucase"),
159+
expression[UnHex]("unhex"),
159160
expression[Upper]("upper")
160161
)
161162

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,44 @@ case class Pow(left: Expression, right: Expression)
354354
}
355355
}
356356

357+
/**
358+
* Performs the inverse operation of HEX.
359+
* Resulting characters are returned as a byte array.
360+
*/
361+
case class UnHex(child: Expression)
362+
extends UnaryExpression with ExpectsInputTypes with Serializable {
363+
364+
override def expectedChildTypes: Seq[DataType] = Seq(StringType)
365+
366+
override def dataType: DataType = BinaryType
367+
368+
override def eval(input: InternalRow): Any = {
369+
val num = child.eval(input)
370+
if (num == null) {
371+
null
372+
} else {
373+
unhex(num.asInstanceOf[UTF8String].toString)
374+
}
375+
}
376+
377+
private def unhex(s: String): Array[Byte] = {
378+
// append a leading 0 if needed
379+
val str = if (s.length % 2 == 1) {"0" + s} else {s}
380+
val result = new Array[Byte](str.length / 2)
381+
var i = 0
382+
while (i < str.length()) {
383+
try {
384+
result(i / 2) = Integer.parseInt(str.substring(i, i + 2), 16).asInstanceOf[Byte]
385+
} catch {
386+
// invalid character present, return null
387+
case _: NumberFormatException => return null
388+
}
389+
i += 2
390+
}
391+
result
392+
}
393+
}
394+
357395
case class Hypot(left: Expression, right: Expression)
358396
extends BinaryMathExpression(math.hypot, "HYPOT")
359397

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,14 @@ class MathFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
238238
// scalastyle:on
239239
}
240240

241+
test("unhex") {
242+
checkEvaluation(UnHex(Literal("737472696E67")), "string".getBytes)
243+
// scalastyle:off
244+
// Turn off scala style for non-ascii chars
245+
checkEvaluation(UnHex(Literal("E4B889E9878DE79A84")), "三重的".getBytes)
246+
// scalastyle:on
247+
}
248+
241249
test("hypot") {
242250
testBinary(Hypot, math.hypot)
243251
}

sql/core/src/main/scala/org/apache/spark/sql/functions.scala

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,6 +1062,24 @@ object functions {
10621062
*/
10631063
def hex(colName: String): Column = hex(Column(colName))
10641064

1065+
/**
1066+
* Inverse of hex. Interprets each pair of characters as a hexadecimal number
1067+
* and converts to the byte representation of number.
1068+
*
1069+
* @group math_funcs
1070+
* @since 1.5.0
1071+
*/
1072+
def unhex(column: Column): Column = UnHex(column.expr)
1073+
1074+
/**
1075+
* Inverse of hex. Interprets each pair of characters as a hexadecimal number
1076+
* and converts to the byte representation of number.
1077+
*
1078+
* @group math_funcs
1079+
* @since 1.5.0
1080+
*/
1081+
def unhex(colName: String): Column = unhex(Column(colName))
1082+
10651083
/**
10661084
* Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
10671085
*

sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,16 @@ class MathExpressionsSuite extends QueryTest {
225225
checkAnswer(data.selectExpr("hex(cast(d as binary))"), Seq(Row("68656C6C6F")))
226226
}
227227

228+
test("unhex") {
229+
val data = Seq(("1C", "737472696E67")).toDF("a", "b")
230+
checkAnswer(data.select(unhex('a)), Row(Array[Byte](28.toByte)))
231+
checkAnswer(data.select(unhex('b)), Row("string".getBytes))
232+
checkAnswer(data.selectExpr("unhex(a)"), Row(Array[Byte](28.toByte)))
233+
checkAnswer(data.selectExpr("unhex(b)"), Row("string".getBytes))
234+
checkAnswer(data.selectExpr("""unhex("##")"""), Row(null))
235+
236+
}
237+
228238
test("hypot") {
229239
testTwoToOneMathFunction(hypot, hypot, math.hypot)
230240
}

0 commit comments

Comments
 (0)