From f35aac2db12ba9d868184f1b66e11a59bcf2bbc9 Mon Sep 17 00:00:00 2001 From: Aman Omer Date: Wed, 30 Oct 2019 17:28:42 +0530 Subject: [PATCH 1/4] [SPARK-29462] Initial commit --- .../spark/sql/catalyst/expressions/complexTypeCreator.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 3f722e8537c36..8e00e32a719e6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -47,7 +47,7 @@ case class CreateArray(children: Seq[Expression]) extends Expression { override def dataType: ArrayType = { ArrayType( TypeCoercion.findCommonTypeDifferentOnlyInNullFlags(children.map(_.dataType)) - .getOrElse(StringType), + .getOrElse(NullType), containsNull = children.exists(_.nullable)) } From b1473b94cb81510e34491200267f5c8d3d4d8127 Mon Sep 17 00:00:00 2001 From: Aman Omer Date: Thu, 31 Oct 2019 10:37:07 +0530 Subject: [PATCH 2/4] Updated previous test case and added new test case --- .../apache/spark/sql/DataFrameFunctionsSuite.scala | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index 06484908f5e73..1dc7d34b52277 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -3400,12 +3400,9 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession { ).foreach(assertValuesDoNotChangeAfterCoalesceOrUnion(_)) } - test("SPARK-21281 use string types by default if array and map have no argument") { + test("SPARK-21281 use string types by default if map have no argument") { val ds = spark.range(1) var expectedSchema = new StructType() - .add("x", ArrayType(StringType, containsNull = false), nullable = false) - assert(ds.select(array().as("x")).schema == expectedSchema) - expectedSchema = new StructType() .add("x", MapType(StringType, StringType, valueContainsNull = false), nullable = false) assert(ds.select(map().as("x")).schema == expectedSchema) } @@ -3463,6 +3460,13 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession { checkAnswer(df.select("x").filter("exists(i, x -> x % d == 0)"), Seq(Row(1))) } + + test("SPARK-29462: Use null type by default if array have no argument") { + val ds = spark.range(1) + var expectedSchema = new StructType() + .add("x", ArrayType(NullType, containsNull = false), nullable = false) + assert(ds.select(array().as("x")).schema == expectedSchema) + } } object DataFrameFunctionsSuite { From acc2864bc62afed18c24ced92eccc379985a1d4a Mon Sep 17 00:00:00 2001 From: Aman Omer Date: Sun, 3 Nov 2019 15:52:32 +0530 Subject: [PATCH 3/4] Updated migration guide. --- docs/sql-migration-guide.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index d03ca663e8e3f..8f8371a0d4ecd 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -217,6 +217,8 @@ license: | For example `SELECT timestamp 'tomorrow';`. - Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input. In Spark version 2.4 and earlier, this function gives `-1` for the same input. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.sizeOfNull` to `true`. + + - Since Spark 3.0, when `array` function is called without parameters, it returns `array`. ## Upgrading from Spark SQL 2.4 to 2.4.1 From bb5e68a1c35ae6966ac01cd1b9d06ff1862abaa6 Mon Sep 17 00:00:00 2001 From: Aman Omer Date: Mon, 4 Nov 2019 08:21:06 +0530 Subject: [PATCH 4/4] Handled review comments --- docs/sql-migration-guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 8f8371a0d4ecd..bfc0f72f3151f 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -218,7 +218,7 @@ license: | - Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input. In Spark version 2.4 and earlier, this function gives `-1` for the same input. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.sizeOfNull` to `true`. - - Since Spark 3.0, when `array` function is called without parameters, it returns `array`. + - Since Spark 3.0, when `array` function is called without parameters, it returns an empty array with `NullType` data type. In Spark version 2.4 and earlier, the data type of the result is `StringType`. ## Upgrading from Spark SQL 2.4 to 2.4.1