Skip to content

Commit 637b4ee

Browse files
zhichao-liDavies Liu
authored andcommitted
[SPARK-8214] [SQL] Add function hex
cc chenghao-intel adrian-wang Author: zhichao.li <[email protected]> Closes #6976 from zhichao-li/hex and squashes the following commits: e218d1b [zhichao.li] turn off scalastyle for non-ascii de3f5ea [zhichao.li] non-ascii char cf9c936 [zhichao.li] give separated buffer for each hex method 967ec90 [zhichao.li] Make 'value' as a feild of Hex 3b2fa13 [zhichao.li] tiny fix a647641 [zhichao.li] remove duplicate null check 7cab020 [zhichao.li] tiny refactoring 35ecfe5 [zhichao.li] add function hex
1 parent 94e040d commit 637b4ee

File tree

5 files changed

+125
-5
lines changed

5 files changed

+125
-5
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ object FunctionRegistry {
113113
expression[Expm1]("expm1"),
114114
expression[Floor]("floor"),
115115
expression[Hypot]("hypot"),
116+
expression[Hex]("hex"),
116117
expression[Logarithm]("log"),
117118
expression[Log]("ln"),
118119
expression[Log10]("log10"),

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala

Lines changed: 82 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@
1818
package org.apache.spark.sql.catalyst.expressions
1919

2020
import java.lang.{Long => JLong}
21+
import java.util.Arrays
2122

23+
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
2224
import org.apache.spark.sql.catalyst.expressions.codegen._
23-
import org.apache.spark.sql.types.{DataType, DoubleType, LongType, StringType}
25+
import org.apache.spark.sql.types._
2426
import org.apache.spark.unsafe.types.UTF8String
2527

2628
/**
@@ -273,9 +275,6 @@ case class Atan2(left: Expression, right: Expression)
273275
}
274276
}
275277

276-
case class Hypot(left: Expression, right: Expression)
277-
extends BinaryMathExpression(math.hypot, "HYPOT")
278-
279278
case class Pow(left: Expression, right: Expression)
280279
extends BinaryMathExpression(math.pow, "POWER") {
281280
override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
@@ -287,6 +286,85 @@ case class Pow(left: Expression, right: Expression)
287286
}
288287
}
289288

289+
/**
290+
* If the argument is an INT or binary, hex returns the number as a STRING in hexadecimal format.
291+
* Otherwise if the number is a STRING,
292+
* it converts each character into its hexadecimal representation and returns the resulting STRING.
293+
* Negative numbers would be treated as two's complement.
294+
*/
295+
case class Hex(child: Expression)
296+
extends UnaryExpression with Serializable {
297+
298+
override def dataType: DataType = StringType
299+
300+
override def checkInputDataTypes(): TypeCheckResult = {
301+
if (child.dataType.isInstanceOf[StringType]
302+
|| child.dataType.isInstanceOf[IntegerType]
303+
|| child.dataType.isInstanceOf[LongType]
304+
|| child.dataType.isInstanceOf[BinaryType]
305+
|| child.dataType == NullType) {
306+
TypeCheckResult.TypeCheckSuccess
307+
} else {
308+
TypeCheckResult.TypeCheckFailure(s"hex doesn't accepts ${child.dataType} type")
309+
}
310+
}
311+
312+
override def eval(input: InternalRow): Any = {
313+
val num = child.eval(input)
314+
if (num == null) {
315+
null
316+
} else {
317+
child.dataType match {
318+
case LongType => hex(num.asInstanceOf[Long])
319+
case IntegerType => hex(num.asInstanceOf[Integer].toLong)
320+
case BinaryType => hex(num.asInstanceOf[Array[Byte]])
321+
case StringType => hex(num.asInstanceOf[UTF8String])
322+
}
323+
}
324+
}
325+
326+
/**
327+
* Converts every character in s to two hex digits.
328+
*/
329+
private def hex(str: UTF8String): UTF8String = {
330+
hex(str.getBytes)
331+
}
332+
333+
private def hex(bytes: Array[Byte]): UTF8String = {
334+
doHex(bytes, bytes.length)
335+
}
336+
337+
private def doHex(bytes: Array[Byte], length: Int): UTF8String = {
338+
val value = new Array[Byte](length * 2)
339+
var i = 0
340+
while(i < length) {
341+
value(i * 2) = Character.toUpperCase(Character.forDigit(
342+
(bytes(i) & 0xF0) >>> 4, 16)).toByte
343+
value(i * 2 + 1) = Character.toUpperCase(Character.forDigit(
344+
bytes(i) & 0x0F, 16)).toByte
345+
i += 1
346+
}
347+
UTF8String.fromBytes(value)
348+
}
349+
350+
private def hex(num: Long): UTF8String = {
351+
// Extract the hex digits of num into value[] from right to left
352+
val value = new Array[Byte](16)
353+
var numBuf = num
354+
var len = 0
355+
do {
356+
len += 1
357+
value(value.length - len) = Character.toUpperCase(Character
358+
.forDigit((numBuf & 0xF).toInt, 16)).toByte
359+
numBuf >>>= 4
360+
} while (numBuf != 0)
361+
UTF8String.fromBytes(Arrays.copyOfRange(value, value.length - len, value.length))
362+
}
363+
}
364+
365+
case class Hypot(left: Expression, right: Expression)
366+
extends BinaryMathExpression(math.hypot, "HYPOT")
367+
290368
case class Logarithm(left: Expression, right: Expression)
291369
extends BinaryMathExpression((c1, c2) => math.log(c2) / math.log(c1), "LOG") {
292370

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
package org.apache.spark.sql.catalyst.expressions
1919

20-
import org.apache.spark.sql.catalyst.dsl.expressions._
2120
import org.apache.spark.SparkFunSuite
2221
import org.apache.spark.sql.catalyst.dsl.expressions._
2322
import org.apache.spark.sql.types.{DataType, DoubleType, LongType}
@@ -226,6 +225,19 @@ class MathFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
226225
testBinary(Pow, math.pow, Seq((-1.0, 0.9), (-2.2, 1.7), (-2.2, -1.7)), expectNull = true)
227226
}
228227

228+
test("hex") {
229+
checkEvaluation(Hex(Literal(28)), "1C")
230+
checkEvaluation(Hex(Literal(-28)), "FFFFFFFFFFFFFFE4")
231+
checkEvaluation(Hex(Literal(100800200404L)), "177828FED4")
232+
checkEvaluation(Hex(Literal(-100800200404L)), "FFFFFFE887D7012C")
233+
checkEvaluation(Hex(Literal("helloHex")), "68656C6C6F486578")
234+
checkEvaluation(Hex(Literal("helloHex".getBytes())), "68656C6C6F486578")
235+
// scalastyle:off
236+
// Turn off scala style for non-ascii chars
237+
checkEvaluation(Hex(Literal("三重的")), "E4B889E9878DE79A84")
238+
// scalastyle:on
239+
}
240+
229241
test("hypot") {
230242
testBinary(Hypot, math.hypot)
231243
}

sql/core/src/main/scala/org/apache/spark/sql/functions.scala

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,6 +1046,22 @@ object functions {
10461046
*/
10471047
def floor(columnName: String): Column = floor(Column(columnName))
10481048

1049+
/**
1050+
* Computes hex value of the given column
1051+
*
1052+
* @group math_funcs
1053+
* @since 1.5.0
1054+
*/
1055+
def hex(column: Column): Column = Hex(column.expr)
1056+
1057+
/**
1058+
* Computes hex value of the given input
1059+
*
1060+
* @group math_funcs
1061+
* @since 1.5.0
1062+
*/
1063+
def hex(colName: String): Column = hex(Column(colName))
1064+
10491065
/**
10501066
* Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
10511067
*

sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,19 @@ class MathExpressionsSuite extends QueryTest {
212212
)
213213
}
214214

215+
test("hex") {
216+
val data = Seq((28, -28, 100800200404L, "hello")).toDF("a", "b", "c", "d")
217+
checkAnswer(data.select(hex('a)), Seq(Row("1C")))
218+
checkAnswer(data.select(hex('b)), Seq(Row("FFFFFFFFFFFFFFE4")))
219+
checkAnswer(data.select(hex('c)), Seq(Row("177828FED4")))
220+
checkAnswer(data.select(hex('d)), Seq(Row("68656C6C6F")))
221+
checkAnswer(data.selectExpr("hex(a)"), Seq(Row("1C")))
222+
checkAnswer(data.selectExpr("hex(b)"), Seq(Row("FFFFFFFFFFFFFFE4")))
223+
checkAnswer(data.selectExpr("hex(c)"), Seq(Row("177828FED4")))
224+
checkAnswer(data.selectExpr("hex(d)"), Seq(Row("68656C6C6F")))
225+
checkAnswer(data.selectExpr("hex(cast(d as binary))"), Seq(Row("68656C6C6F")))
226+
}
227+
215228
test("hypot") {
216229
testTwoToOneMathFunction(hypot, hypot, math.hypot)
217230
}

0 commit comments

Comments
 (0)