From cb5440787d676d2c74983ddcd1df31b38d009d71 Mon Sep 17 00:00:00 2001 From: DB Tsai Date: Sun, 9 Apr 2017 00:57:45 -0700 Subject: [PATCH 1/3] na.fill will change the values in long or integer when the default value is in double --- .../scala/org/apache/spark/sql/DataFrameNaFunctions.scala | 5 +++-- .../org/apache/spark/sql/DataFrameNaFunctionsSuite.scala | 7 +++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala index 28820681cd3a6..d8f953fba5a8b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala @@ -407,10 +407,11 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { val quotedColName = "`" + col.name + "`" val colValue = col.dataType match { case DoubleType | FloatType => - nanvl(df.col(quotedColName), lit(null)) // nanvl only supports these types + // nanvl only supports these types + nanvl(df.col(quotedColName), lit(null).cast(col.dataType)) case _ => df.col(quotedColName) } - coalesce(colValue, lit(replacement)).cast(col.dataType).as(col.name) + coalesce(colValue, lit(replacement).cast(col.dataType)).as(col.name) } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala index fd829846ac332..145968d1f9d6a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala @@ -145,6 +145,13 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSQLContext { Row(1, 2) :: Row(-1, -2) :: Row(9123146099426677101L, 9123146560113991650L) :: Nil ) + checkAnswer( + Seq[(java.lang.Long, java.lang.Double)]((null, 3.14), (9123146099426677101L, null), + (9123146560113991650L, 1.6), (null, null)).toDF("a", "b").na.fill(0.2), + Row(0, 3.14) :: Row(9123146099426677101L, 0.3) :: Row(9123146560113991650L, 1.6) + :: Row(0, 0.2) :: Nil + ) + checkAnswer( Seq[(java.lang.Long, java.lang.Double)]((null, 1.23), (3L, null), (4L, 3.45)) .toDF("a", "b").na.fill(2.34), From 42c9eb0276ec46386d67dea7999d7796461de9da Mon Sep 17 00:00:00 2001 From: DB Tsai Date: Sun, 9 Apr 2017 01:19:12 -0700 Subject: [PATCH 2/3] Fix the test --- .../scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala index 145968d1f9d6a..430b16e4476cc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala @@ -148,7 +148,7 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSQLContext { checkAnswer( Seq[(java.lang.Long, java.lang.Double)]((null, 3.14), (9123146099426677101L, null), (9123146560113991650L, 1.6), (null, null)).toDF("a", "b").na.fill(0.2), - Row(0, 3.14) :: Row(9123146099426677101L, 0.3) :: Row(9123146560113991650L, 1.6) + Row(0, 3.14) :: Row(9123146099426677101L, 0.2) :: Row(9123146560113991650L, 1.6) :: Row(0, 0.2) :: Nil ) From 3fb1a66c84021b4b2cc223aff82e001f4cd7df03 Mon Sep 17 00:00:00 2001 From: DB Tsai Date: Sun, 9 Apr 2017 11:22:22 -0700 Subject: [PATCH 3/3] address feedback --- .../org/apache/spark/sql/DataFrameNaFunctionsSuite.scala | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala index 430b16e4476cc..aa237d0619ac3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala @@ -152,6 +152,13 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSQLContext { :: Row(0, 0.2) :: Nil ) + checkAnswer( + Seq[(java.lang.Long, java.lang.Float)]((null, 3.14f), (9123146099426677101L, null), + (9123146560113991650L, 1.6f), (null, null)).toDF("a", "b").na.fill(0.2), + Row(0, 3.14f) :: Row(9123146099426677101L, 0.2f) :: Row(9123146560113991650L, 1.6f) + :: Row(0, 0.2f) :: Nil + ) + checkAnswer( Seq[(java.lang.Long, java.lang.Double)]((null, 1.23), (3L, null), (4L, 3.45)) .toDF("a", "b").na.fill(2.34),