Skip to content

Commit e589e71

Browse files
committed
Revert "[SPARK-8784] [SQL] Add Python API for hex and unhex"
This reverts commit fc7aebd.
1 parent 488bad3 commit e589e71

File tree

5 files changed

+77
-115
lines changed

5 files changed

+77
-115
lines changed

python/pyspark/sql/functions.py

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -381,34 +381,6 @@ def randn(seed=None):
381381
return Column(jc)
382382

383383

384-
@ignore_unicode_prefix
385-
@since(1.5)
386-
def hex(col):
387-
"""Computes hex value of the given column, which could be StringType,
388-
BinaryType, IntegerType or LongType.
389-
390-
>>> sqlContext.createDataFrame([('ABC', 3)], ['a', 'b']).select(hex('a'), hex('b')).collect()
391-
[Row(hex(a)=u'414243', hex(b)=u'3')]
392-
"""
393-
sc = SparkContext._active_spark_context
394-
jc = sc._jvm.functions.hex(_to_java_column(col))
395-
return Column(jc)
396-
397-
398-
@ignore_unicode_prefix
399-
@since(1.5)
400-
def unhex(col):
401-
"""Inverse of hex. Interprets each pair of characters as a hexadecimal number
402-
and converts to the byte representation of number.
403-
404-
>>> sqlContext.createDataFrame([('414243',)], ['a']).select(unhex('a')).collect()
405-
[Row(unhex(a)=bytearray(b'ABC'))]
406-
"""
407-
sc = SparkContext._active_spark_context
408-
jc = sc._jvm.functions.unhex(_to_java_column(col))
409-
return Column(jc)
410-
411-
412384
@ignore_unicode_prefix
413385
@since(1.5)
414386
def sha1(col):

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ object FunctionRegistry {
160160
expression[Substring]("substr"),
161161
expression[Substring]("substring"),
162162
expression[Upper]("ucase"),
163-
expression[Unhex]("unhex"),
163+
expression[UnHex]("unhex"),
164164
expression[Upper]("upper")
165165
)
166166

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala

Lines changed: 71 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -227,20 +227,6 @@ case class Bin(child: Expression)
227227
}
228228
}
229229

230-
object Hex {
231-
val hexDigits = Array[Char](
232-
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
233-
).map(_.toByte)
234-
235-
// lookup table to translate '0' -> 0 ... 'F'/'f' -> 15
236-
val unhexDigits = {
237-
val array = Array.fill[Byte](128)(-1)
238-
(0 to 9).foreach(i => array('0' + i) = i.toByte)
239-
(0 to 5).foreach(i => array('A' + i) = (i + 10).toByte)
240-
(0 to 5).foreach(i => array('a' + i) = (i + 10).toByte)
241-
array
242-
}
243-
}
244230

245231
/**
246232
* If the argument is an INT or binary, hex returns the number as a STRING in hexadecimal format.
@@ -272,18 +258,30 @@ case class Hex(child: Expression) extends UnaryExpression with Serializable {
272258
case LongType => hex(num.asInstanceOf[Long])
273259
case IntegerType => hex(num.asInstanceOf[Integer].toLong)
274260
case BinaryType => hex(num.asInstanceOf[Array[Byte]])
275-
case StringType => hex(num.asInstanceOf[UTF8String].getBytes)
261+
case StringType => hex(num.asInstanceOf[UTF8String])
276262
}
277263
}
278264
}
279265

280-
private[this] def hex(bytes: Array[Byte]): UTF8String = {
281-
val length = bytes.length
266+
/**
267+
* Converts every character in s to two hex digits.
268+
*/
269+
private def hex(str: UTF8String): UTF8String = {
270+
hex(str.getBytes)
271+
}
272+
273+
private def hex(bytes: Array[Byte]): UTF8String = {
274+
doHex(bytes, bytes.length)
275+
}
276+
277+
private def doHex(bytes: Array[Byte], length: Int): UTF8String = {
282278
val value = new Array[Byte](length * 2)
283279
var i = 0
284280
while (i < length) {
285-
value(i * 2) = Hex.hexDigits((bytes(i) & 0xF0) >> 4)
286-
value(i * 2 + 1) = Hex.hexDigits(bytes(i) & 0x0F)
281+
value(i * 2) = Character.toUpperCase(Character.forDigit(
282+
(bytes(i) & 0xF0) >>> 4, 16)).toByte
283+
value(i * 2 + 1) = Character.toUpperCase(Character.forDigit(
284+
bytes(i) & 0x0F, 16)).toByte
287285
i += 1
288286
}
289287
UTF8String.fromBytes(value)
@@ -296,64 +294,14 @@ case class Hex(child: Expression) extends UnaryExpression with Serializable {
296294
var len = 0
297295
do {
298296
len += 1
299-
value(value.length - len) = Hex.hexDigits((numBuf & 0xF).toInt)
297+
value(value.length - len) = Character.toUpperCase(Character
298+
.forDigit((numBuf & 0xF).toInt, 16)).toByte
300299
numBuf >>>= 4
301300
} while (numBuf != 0)
302301
UTF8String.fromBytes(Arrays.copyOfRange(value, value.length - len, value.length))
303302
}
304303
}
305304

306-
/**
307-
* Performs the inverse operation of HEX.
308-
* Resulting characters are returned as a byte array.
309-
*/
310-
case class Unhex(child: Expression)
311-
extends UnaryExpression with ExpectsInputTypes with Serializable {
312-
313-
override def nullable: Boolean = true
314-
override def dataType: DataType = BinaryType
315-
override def inputTypes: Seq[DataType] = Seq(BinaryType)
316-
317-
override def eval(input: InternalRow): Any = {
318-
val num = child.eval(input)
319-
if (num == null) {
320-
null
321-
} else {
322-
unhex(num.asInstanceOf[UTF8String].getBytes)
323-
}
324-
}
325-
326-
private[this] def unhex(bytes: Array[Byte]): Array[Byte] = {
327-
val out = new Array[Byte]((bytes.length + 1) >> 1)
328-
var i = 0
329-
if ((bytes.length & 0x01) != 0) {
330-
// padding with '0'
331-
if (bytes(0) < 0) {
332-
return null
333-
}
334-
val v = Hex.unhexDigits(bytes(0))
335-
if (v == -1) {
336-
return null
337-
}
338-
out(0) = v
339-
i += 1
340-
}
341-
// two characters form the hex value.
342-
while (i < bytes.length) {
343-
if (bytes(i) < 0 || bytes(i + 1) < 0) {
344-
return null
345-
}
346-
val first = Hex.unhexDigits(bytes(i))
347-
val second = Hex.unhexDigits(bytes(i + 1))
348-
if (first == -1 || second == -1) {
349-
return null
350-
}
351-
out(i / 2) = (((first << 4) | second) & 0xFF).toByte
352-
i += 2
353-
}
354-
out
355-
}
356-
}
357305

358306
////////////////////////////////////////////////////////////////////////////////////////////////////
359307
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -500,6 +448,58 @@ case class ShiftRight(left: Expression, right: Expression) extends BinaryExpress
500448
}
501449
}
502450

451+
/**
452+
* Performs the inverse operation of HEX.
453+
* Resulting characters are returned as a byte array.
454+
*/
455+
case class UnHex(child: Expression) extends UnaryExpression with Serializable {
456+
457+
override def dataType: DataType = BinaryType
458+
459+
override def checkInputDataTypes(): TypeCheckResult = {
460+
if (child.dataType.isInstanceOf[StringType] || child.dataType == NullType) {
461+
TypeCheckResult.TypeCheckSuccess
462+
} else {
463+
TypeCheckResult.TypeCheckFailure(s"unHex accepts String type, not ${child.dataType}")
464+
}
465+
}
466+
467+
override def eval(input: InternalRow): Any = {
468+
val num = child.eval(input)
469+
if (num == null) {
470+
null
471+
} else {
472+
unhex(num.asInstanceOf[UTF8String].getBytes)
473+
}
474+
}
475+
476+
private val unhexDigits = {
477+
val array = Array.fill[Byte](128)(-1)
478+
(0 to 9).foreach(i => array('0' + i) = i.toByte)
479+
(0 to 5).foreach(i => array('A' + i) = (i + 10).toByte)
480+
(0 to 5).foreach(i => array('a' + i) = (i + 10).toByte)
481+
array
482+
}
483+
484+
private def unhex(inputBytes: Array[Byte]): Array[Byte] = {
485+
var bytes = inputBytes
486+
if ((bytes.length & 0x01) != 0) {
487+
bytes = '0'.toByte +: bytes
488+
}
489+
val out = new Array[Byte](bytes.length >> 1)
490+
// two characters form the hex value.
491+
var i = 0
492+
while (i < bytes.length) {
493+
val first = unhexDigits(bytes(i))
494+
val second = unhexDigits(bytes(i + 1))
495+
if (first == -1 || second == -1) { return null}
496+
out(i / 2) = (((first << 4) | second) & 0xFF).toByte
497+
i += 2
498+
}
499+
out
500+
}
501+
}
502+
503503
case class Hypot(left: Expression, right: Expression)
504504
extends BinaryMathExpression(math.hypot, "HYPOT")
505505

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
1919

2020
import org.apache.spark.SparkFunSuite
2121
import org.apache.spark.sql.catalyst.dsl.expressions._
22-
import org.apache.spark.sql.types._
22+
import org.apache.spark.sql.types.{IntegerType, DataType, DoubleType, LongType}
2323

2424
class MathFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
2525

@@ -252,15 +252,11 @@ class MathFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
252252
}
253253

254254
test("hex") {
255-
checkEvaluation(Hex(Literal.create(null, IntegerType)), null)
256255
checkEvaluation(Hex(Literal(28)), "1C")
257256
checkEvaluation(Hex(Literal(-28)), "FFFFFFFFFFFFFFE4")
258-
checkEvaluation(Hex(Literal.create(null, LongType)), null)
259257
checkEvaluation(Hex(Literal(100800200404L)), "177828FED4")
260258
checkEvaluation(Hex(Literal(-100800200404L)), "FFFFFFE887D7012C")
261-
checkEvaluation(Hex(Literal.create(null, StringType)), null)
262259
checkEvaluation(Hex(Literal("helloHex")), "68656C6C6F486578")
263-
checkEvaluation(Hex(Literal.create(null, BinaryType)), null)
264260
checkEvaluation(Hex(Literal("helloHex".getBytes())), "68656C6C6F486578")
265261
// scalastyle:off
266262
// Turn off scala style for non-ascii chars
@@ -269,15 +265,9 @@ class MathFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
269265
}
270266

271267
test("unhex") {
272-
checkEvaluation(Unhex(Literal.create(null, StringType)), null)
273-
checkEvaluation(Unhex(Literal("737472696E67")), "string".getBytes)
274-
checkEvaluation(Unhex(Literal("")), new Array[Byte](0))
275-
checkEvaluation(Unhex(Literal("F")), Array[Byte](15))
276-
checkEvaluation(Unhex(Literal("ff")), Array[Byte](-1))
277-
// scalastyle:off
278-
// Turn off scala style for non-ascii chars
279-
checkEvaluation(Unhex(Literal("E4B889E9878DE79A84")), "三重的".getBytes("UTF-8"))
280-
// scalastyle:on
268+
checkEvaluation(UnHex(Literal("737472696E67")), "string".getBytes)
269+
checkEvaluation(UnHex(Literal("")), new Array[Byte](0))
270+
checkEvaluation(UnHex(Literal("0")), Array[Byte](0))
281271
}
282272

283273
test("hypot") {

sql/core/src/main/scala/org/apache/spark/sql/functions.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1061,7 +1061,7 @@ object functions {
10611061
* @group math_funcs
10621062
* @since 1.5.0
10631063
*/
1064-
def unhex(column: Column): Column = Unhex(column.expr)
1064+
def unhex(column: Column): Column = UnHex(column.expr)
10651065

10661066
/**
10671067
* Inverse of hex. Interprets each pair of characters as a hexadecimal number

0 commit comments

Comments
 (0)