Skip to content

Commit 89feef4

Browse files
viiryakai-chi
authored andcommitted
[SPARK-27671][SQL] Fix error when casting from a nested null in a struct
When a null in a nested field in struct, casting from the struct throws error, currently. ```scala scala> sql("select cast(struct(1, null) as struct<a:int,b:int>)").show scala.MatchError: NullType (of class org.apache.spark.sql.types.NullType$) at org.apache.spark.sql.catalyst.expressions.Cast.castToInt(Cast.scala:447) at org.apache.spark.sql.catalyst.expressions.Cast.cast(Cast.scala:635) at org.apache.spark.sql.catalyst.expressions.Cast.$anonfun$castStruct$1(Cast.scala:603) ``` Similarly, inline table, which casts null in nested field under the hood, also throws an error. ```scala scala> sql("select * FROM VALUES (('a', (10, null))), (('b', (10, 50))), (('c', null)) AS tab(x, y)").show org.apache.spark.sql.AnalysisException: failed to evaluate expression named_struct('col1', 10, 'col2', NULL): NullType (of class org.apache.spark.sql.t ypes.NullType$); line 1 pos 14 at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:47) at org.apache.spark.sql.catalyst.analysis.ResolveInlineTables.$anonfun$convert$6(ResolveInlineTables.scala:106) ``` This fixes the issue. Added tests. Closes apache#24576 from viirya/cast-null. Authored-by: Liang-Chi Hsieh <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]> (cherry picked from commit 8b0bdaa) Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent d2b6c20 commit 89feef4

File tree

3 files changed

+30
-0
lines changed

3 files changed

+30
-0
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,12 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
607607
// We can return what the children return. Same thing should happen in the codegen path.
608608
if (DataType.equalsStructurally(from, to)) {
609609
identity
610+
} else if (from == NullType) {
611+
// According to `canCast`, NullType can be casted to any type.
612+
// For primitive types, we don't reach here because the guard of `nullSafeEval`.
613+
// But for nested types like struct, we might reach here for nested null type field.
614+
// We won't call the returned function actually, but returns a placeholder.
615+
_ => throw new SparkException(s"should not directly cast from NullType to $to.")
610616
} else {
611617
to match {
612618
case dt if dt == from => identity[Any]

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -989,4 +989,19 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
989989
}
990990
}
991991
}
992+
993+
test("SPARK-27671: cast from nested null type in struct") {
994+
import DataTypeTestUtils._
995+
996+
atomicTypes.foreach { atomicType =>
997+
val struct = Literal.create(
998+
InternalRow(null),
999+
StructType(Seq(StructField("a", NullType, nullable = true))))
1000+
1001+
val ret = cast(struct, StructType(Seq(
1002+
StructField("a", atomicType, nullable = true))))
1003+
assert(ret.resolved)
1004+
checkEvaluation(ret, InternalRow(null))
1005+
}
1006+
}
9921007
}

sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2622,4 +2622,13 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
26222622
checkAnswer(res, Row("1-1", 6, 6))
26232623
}
26242624
}
2625+
2626+
test("SPARK-27671: Fix analysis exception when casting null in nested field in struct") {
2627+
val df = sql("SELECT * FROM VALUES (('a', (10, null))), (('b', (10, 50))), " +
2628+
"(('c', null)) AS tab(x, y)")
2629+
checkAnswer(df, Row("a", Row(10, null)) :: Row("b", Row(10, 50)) :: Row("c", null) :: Nil)
2630+
2631+
val cast = sql("SELECT cast(struct(1, null) AS struct<a:int,b:int>)")
2632+
checkAnswer(cast, Row(Row(1, null)) :: Nil)
2633+
}
26252634
}

0 commit comments

Comments
 (0)