Skip to content
This repository was archived by the owner on Nov 15, 2024. It is now read-only.

Commit af7d974

Browse files
ueshinMatthewRBruce
authored andcommitted
[SPARK-21300][SQL] ExternalMapToCatalyst should null-check map key prior to converting to internal value.
## What changes were proposed in this pull request? `ExternalMapToCatalyst` should null-check map key prior to converting to internal value to throw an appropriate Exception instead of something like NPE. ## How was this patch tested? Added a test and existing tests. Author: Takuya UESHIN <[email protected]> Closes apache#18524 from ueshin/issues/SPARK-21300. (cherry picked from commit ce10545) Signed-off-by: Wenchen Fan <[email protected]>
1 parent ff93071 commit af7d974

File tree

4 files changed

+24
-2
lines changed

4 files changed

+24
-2
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,7 @@ object JavaTypeInference {
423423
inputObject,
424424
ObjectType(keyType.getRawType),
425425
serializerFor(_, keyType),
426+
keyNullable = true,
426427
ObjectType(valueType.getRawType),
427428
serializerFor(_, valueType),
428429
valueNullable = true

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,7 @@ object ScalaReflection extends ScalaReflection {
511511
inputObject,
512512
dataTypeFor(keyType),
513513
serializerFor(_, keyType, keyPath, seenTypeSet),
514+
keyNullable = !keyType.typeSymbol.asClass.isPrimitive,
514515
dataTypeFor(valueType),
515516
serializerFor(_, valueType, valuePath, seenTypeSet),
516517
valueNullable = !valueType.typeSymbol.asClass.isPrimitive)

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -659,18 +659,21 @@ object ExternalMapToCatalyst {
659659
inputMap: Expression,
660660
keyType: DataType,
661661
keyConverter: Expression => Expression,
662+
keyNullable: Boolean,
662663
valueType: DataType,
663664
valueConverter: Expression => Expression,
664665
valueNullable: Boolean): ExternalMapToCatalyst = {
665666
val id = curId.getAndIncrement()
666667
val keyName = "ExternalMapToCatalyst_key" + id
668+
val keyIsNull = "ExternalMapToCatalyst_key_isNull" + id
667669
val valueName = "ExternalMapToCatalyst_value" + id
668670
val valueIsNull = "ExternalMapToCatalyst_value_isNull" + id
669671

670672
ExternalMapToCatalyst(
671673
keyName,
674+
keyIsNull,
672675
keyType,
673-
keyConverter(LambdaVariable(keyName, "false", keyType, false)),
676+
keyConverter(LambdaVariable(keyName, keyIsNull, keyType, keyNullable)),
674677
valueName,
675678
valueIsNull,
676679
valueType,
@@ -686,6 +689,8 @@ object ExternalMapToCatalyst {
686689
*
687690
* @param key the name of the map key variable that used when iterate the map, and used as input for
688691
* the `keyConverter`
692+
* @param keyIsNull the nullability of the map key variable that used when iterate the map, and
693+
* used as input for the `keyConverter`
689694
* @param keyType the data type of the map key variable that used when iterate the map, and used as
690695
* input for the `keyConverter`
691696
* @param keyConverter A function that take the `key` as input, and converts it to catalyst format.
@@ -701,6 +706,7 @@ object ExternalMapToCatalyst {
701706
*/
702707
case class ExternalMapToCatalyst private(
703708
key: String,
709+
keyIsNull: String,
704710
keyType: DataType,
705711
keyConverter: Expression,
706712
value: String,
@@ -731,6 +737,7 @@ case class ExternalMapToCatalyst private(
731737

732738
val keyElementJavaType = ctx.javaType(keyType)
733739
val valueElementJavaType = ctx.javaType(valueType)
740+
ctx.addMutableState("boolean", keyIsNull, "")
734741
ctx.addMutableState(keyElementJavaType, key, "")
735742
ctx.addMutableState("boolean", valueIsNull, "")
736743
ctx.addMutableState(valueElementJavaType, value, "")
@@ -768,6 +775,12 @@ case class ExternalMapToCatalyst private(
768775
defineEntries -> defineKeyValue
769776
}
770777

778+
val keyNullCheck = if (ctx.isPrimitiveType(keyType)) {
779+
s"$keyIsNull = false;"
780+
} else {
781+
s"$keyIsNull = $key == null;"
782+
}
783+
771784
val valueNullCheck = if (ctx.isPrimitiveType(valueType)) {
772785
s"$valueIsNull = false;"
773786
} else {
@@ -790,6 +803,7 @@ case class ExternalMapToCatalyst private(
790803
$defineEntries
791804
while($entries.hasNext()) {
792805
$defineKeyValue
806+
$keyNullCheck
793807
$valueNullCheck
794808

795809
${genKeyConverter.code}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -355,12 +355,18 @@ class ExpressionEncoderSuite extends PlanTest with AnalysisTest {
355355
checkNullable[String](true)
356356
}
357357

358-
test("null check for map key") {
358+
test("null check for map key: String") {
359359
val encoder = ExpressionEncoder[Map[String, Int]]()
360360
val e = intercept[RuntimeException](encoder.toRow(Map(("a", 1), (null, 2))))
361361
assert(e.getMessage.contains("Cannot use null as map key"))
362362
}
363363

364+
test("null check for map key: Integer") {
365+
val encoder = ExpressionEncoder[Map[Integer, String]]()
366+
val e = intercept[RuntimeException](encoder.toRow(Map((1, "a"), (null, "b"))))
367+
assert(e.getMessage.contains("Cannot use null as map key"))
368+
}
369+
364370
private def encodeDecodeTest[T : ExpressionEncoder](
365371
input: T,
366372
testName: String): Unit = {

0 commit comments

Comments
 (0)