From 0dfa110611f84348d623cd44d84370a5b06c6ba8 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Thu, 10 Dec 2015 00:03:41 -0800 Subject: [PATCH 1/2] null handling in udf --- .../apache/spark/sql/catalyst/analysis/Analyzer.scala | 3 ++- .../spark/sql/hive/execution/HiveUDFSuite.scala | 11 +++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index ca00a5e49f668..ffdba4d56e2af 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1095,7 +1095,8 @@ class Analyzer( // TODO: skip null handling for not-nullable primitive inputs after we can completely // trust the `nullable` information. // .filter { case (cls, expr) => cls.isPrimitive && expr.nullable } - .filter { case (cls, _) => cls.isPrimitive } + .filter { case (cls, _) => cls.isPrimitive || cls == classOf[java.sql.Timestamp] || + cls == classOf[java.sql.Date] || cls == classOf[java.math.BigDecimal] } .map { case (_, expr) => IsNull(expr) } .reduceLeftOption[Expression]((e1, e2) => Or(e1, e2)) inputsNullCheck.map(If(_, Literal.create(null, udf.dataType), udf)).getOrElse(udf) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala index 9deb1a6db15ad..1d99a4bc6eae3 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.hive.execution import java.io.{PrintWriter, File, DataInput, DataOutput} +import java.sql.Timestamp import java.util.{ArrayList, Arrays, Properties} import org.apache.hadoop.conf.Configuration @@ -350,6 +351,16 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils { sqlContext.dropTempTable("testUDF") } + test("SPARK-12258 Timestamp UDF and Null value") { + hiveContext.runSqlHive("CREATE TABLE ts_test (ts TIMESTAMP) STORED AS TEXTFILE") + hiveContext.runSqlHive("INSERT INTO TABLE ts_test VALUES(Null)") + hiveContext.udf.register("dummy", + (ts: Timestamp) => ts + ) + val result = hiveContext.sql("SELECT dummy(ts) FROM ts_test").collect().mkString("\n") + assertResult("[null]")(result) + } + test("SPARK-11522 select input_file_name from non-parquet table"){ withTempDir { tempDir => From 446d1fa49f4ab775016ca7eddace853bd0258302 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Thu, 10 Dec 2015 13:23:58 -0800 Subject: [PATCH 2/2] removed BigDecimal. --- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index ffdba4d56e2af..c1ab22def5698 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1095,8 +1095,10 @@ class Analyzer( // TODO: skip null handling for not-nullable primitive inputs after we can completely // trust the `nullable` information. // .filter { case (cls, expr) => cls.isPrimitive && expr.nullable } - .filter { case (cls, _) => cls.isPrimitive || cls == classOf[java.sql.Timestamp] || - cls == classOf[java.sql.Date] || cls == classOf[java.math.BigDecimal] } + .filter { case (cls, _) => + cls.isPrimitive || + cls == classOf[java.sql.Timestamp] || + cls == classOf[java.sql.Date] } .map { case (_, expr) => IsNull(expr) } .reduceLeftOption[Expression]((e1, e2) => Or(e1, e2)) inputsNullCheck.map(If(_, Literal.create(null, udf.dataType), udf)).getOrElse(udf)