From 395eee801dab323b6f32b25b43a4e92d99027551 Mon Sep 17 00:00:00 2001
From: iRakson <raksonrakesh@gmail.com>
Date: Tue, 11 Feb 2020 21:14:12 +0530
Subject: [PATCH 1/4] [SPARK-30790]map() should return map<null,null>

[SPARK-30790]map() should return map<null,null>
---
 docs/sql-migration-guide.md                      |  2 ++
 .../expressions/complexTypeCreator.scala         | 12 ++++++++++--
 .../org/apache/spark/sql/internal/SQLConf.scala  |  9 +++++++++
 .../spark/sql/DataFrameFunctionsSuite.scala      | 16 +++++++++++-----
 4 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index f98fab5b4c56b..e631d1c046aab 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -218,6 +218,8 @@ license: |
   
   - Since Spark 3.0, when the `array` function is called without any parameters, it returns an empty array of `NullType`. In Spark version 2.4 and earlier, it returns an empty array of string type. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.arrayDefaultToStringType.enabled` to `true`.
 
+  - Since Spark 3.0, when the `map` function is called without any parameters, it returns an empty map of `NullType`. In Spark version 2.4 and earlier, it returns an empty map of string type. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.mapDefaultToStringType.enabled` to `true`.
+
   - Since Spark 3.0, the interval literal syntax does not allow multiple from-to units anymore. For example, `SELECT INTERVAL '1-1' YEAR TO MONTH '2-2' YEAR TO MONTH'` throws parser exception.
 
   - Since Spark 3.0, when casting interval values to string type, there is no "interval" prefix, e.g. `1 days 2 hours`. In Spark version 2.4 and earlier, the string contains the "interval" prefix like `interval 1 days 2 hours`.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index 7335e305bfe55..cac9df6513d39 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -145,6 +145,14 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
   lazy val keys = children.indices.filter(_ % 2 == 0).map(children)
   lazy val values = children.indices.filter(_ % 2 != 0).map(children)
 
+  private val defaultElementType: DataType = {
+    if (SQLConf.get.getConf(SQLConf.LEGACY_MAP_DEFAULT_TO_STRING)) {
+      StringType
+    } else {
+      NullType
+    }
+  }
+
   override def foldable: Boolean = children.forall(_.foldable)
 
   override def checkInputDataTypes(): TypeCheckResult = {
@@ -167,9 +175,9 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
   override lazy val dataType: MapType = {
     MapType(
       keyType = TypeCoercion.findCommonTypeDifferentOnlyInNullFlags(keys.map(_.dataType))
-        .getOrElse(StringType),
+        .getOrElse(defaultElementType),
       valueType = TypeCoercion.findCommonTypeDifferentOnlyInNullFlags(values.map(_.dataType))
-        .getOrElse(StringType),
+        .getOrElse(defaultElementType),
       valueContainsNull = values.exists(_.nullable))
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index d86f8693e0655..997f531c160bf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2016,6 +2016,15 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val LEGACY_MAP_DEFAULT_TO_STRING =
+    buildConf("spark.sql.legacy.mapDefaultToStringType.enabled")
+      .internal()
+      .doc("When set to true, it returns an empty map of string type when the `map` " +
+        "function is called without any parameters. Otherwise, it returns an empty " +
+        "map of `NullType`")
+      .booleanConf
+      .createWithDefault(false)
+
   val TRUNCATE_TABLE_IGNORE_PERMISSION_ACL =
     buildConf("spark.sql.truncateTable.ignorePermissionAcl.enabled")
       .internal()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 9e9d8c3e9a7c5..6a12f76d4747d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -3499,11 +3499,17 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     ).foreach(assertValuesDoNotChangeAfterCoalesceOrUnion(_))
   }
 
-  test("SPARK-21281 use string types by default if map have no argument") {
-    val ds = spark.range(1)
-    var expectedSchema = new StructType()
-      .add("x", MapType(StringType, StringType, valueContainsNull = false), nullable = false)
-    assert(ds.select(map().as("x")).schema == expectedSchema)
+  test("SPARK-30790: Empty map of <NullType,NullType> for map function with no arguments") {
+    Seq((true, StringType), (false, NullType)).foreach {
+      case (mapDefaultToString, expectedType) =>
+        withSQLConf(SQLConf.LEGACY_MAP_DEFAULT_TO_STRING.key -> mapDefaultToString.toString) {
+          val schema = spark.range(1).select(map()).schema
+          assert(schema.nonEmpty && schema.head.dataType.isInstanceOf[MapType])
+          val actualKeyType = schema.head.dataType.asInstanceOf[MapType].keyType
+          val actualValueType = schema.head.dataType.asInstanceOf[MapType].valueType
+          assert(actualKeyType === expectedType && actualValueType === expectedType)
+        }
+    }
   }
 
   test("SPARK-21281 fails if functions have no argument") {

From 3319d3ab13262e90764c06006b9528844592890b Mon Sep 17 00:00:00 2001
From: iRakson <raksonrakesh@gmail.com>
Date: Wed, 12 Feb 2020 15:16:53 +0530
Subject: [PATCH 2/4] Fix

---
 docs/sql-migration-guide.md                   |  4 +--
 .../expressions/complexTypeCreator.scala      |  4 +--
 .../catalyst/util/ArrayBasedMapBuilder.scala  |  5 ++-
 .../apache/spark/sql/internal/SQLConf.scala   | 20 ++++--------
 .../spark/sql/DataFrameFunctionsSuite.scala   | 31 ++++++++++---------
 5 files changed, 29 insertions(+), 35 deletions(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index e631d1c046aab..f38e0c5673581 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -216,9 +216,9 @@ license: |
 
   - Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input. In Spark version 2.4 and earlier, this function gives `-1` for the same input. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.sizeOfNull` to `true`.
   
-  - Since Spark 3.0, when the `array` function is called without any parameters, it returns an empty array of `NullType`. In Spark version 2.4 and earlier, it returns an empty array of string type. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.arrayDefaultToStringType.enabled` to `true`.
+  - Since Spark 3.0, when the `array` function is called without any parameters, it returns an empty array of `NullType`. In Spark version 2.4 and earlier, it returns an empty array of string type. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.createEmptyCollectionUsingStringType.enabled` to `true`.
 
-  - Since Spark 3.0, when the `map` function is called without any parameters, it returns an empty map of `NullType`. In Spark version 2.4 and earlier, it returns an empty map of string type. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.mapDefaultToStringType.enabled` to `true`.
+  - Since Spark 3.0, when the `map` function is called without any parameters, it returns an empty map with `NullType` as key/value type. In Spark version 2.4 and earlier, it returns an empty map of string type. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.createEmptyCollectionUsingStringType.enabled` to `true`.
 
   - Since Spark 3.0, the interval literal syntax does not allow multiple from-to units anymore. For example, `SELECT INTERVAL '1-1' YEAR TO MONTH '2-2' YEAR TO MONTH'` throws parser exception.
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index cac9df6513d39..4bd85d304ded2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -46,7 +46,7 @@ case class CreateArray(children: Seq[Expression]) extends Expression {
   }
 
   private val defaultElementType: DataType = {
-    if (SQLConf.get.getConf(SQLConf.LEGACY_ARRAY_DEFAULT_TO_STRING)) {
+    if (SQLConf.get.getConf(SQLConf.LEGACY_CREATE_EMPTY_COLLECTION_USING_STRING_TYPE)) {
       StringType
     } else {
       NullType
@@ -146,7 +146,7 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
   lazy val values = children.indices.filter(_ % 2 != 0).map(children)
 
   private val defaultElementType: DataType = {
-    if (SQLConf.get.getConf(SQLConf.LEGACY_MAP_DEFAULT_TO_STRING)) {
+    if (SQLConf.get.getConf(SQLConf.LEGACY_CREATE_EMPTY_COLLECTION_USING_STRING_TYPE)) {
       StringType
     } else {
       NullType
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilder.scala
index 98934368205ec..37d65309e2b89 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilder.scala
@@ -29,12 +29,11 @@ import org.apache.spark.unsafe.array.ByteArrayMethods
  */
 class ArrayBasedMapBuilder(keyType: DataType, valueType: DataType) extends Serializable {
   assert(!keyType.existsRecursively(_.isInstanceOf[MapType]), "key of map cannot be/contain map")
-  assert(keyType != NullType, "map key cannot be null type.")
 
   private lazy val keyToIndex = keyType match {
     // Binary type data is `byte[]`, which can't use `==` to check equality.
-    case _: AtomicType | _: CalendarIntervalType if !keyType.isInstanceOf[BinaryType] =>
-      new java.util.HashMap[Any, Int]()
+    case _: AtomicType | _: CalendarIntervalType | _: NullType
+      if !keyType.isInstanceOf[BinaryType] => new java.util.HashMap[Any, Int]()
     case _ =>
       // for complex types, use interpreted ordering to be able to compare unsafe data with safe
       // data, e.g. UnsafeRow vs GenericInternalRow.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 997f531c160bf..e14f7a4451988 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2007,21 +2007,13 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
-  val LEGACY_ARRAY_DEFAULT_TO_STRING =
-    buildConf("spark.sql.legacy.arrayDefaultToStringType.enabled")
+  val LEGACY_CREATE_EMPTY_COLLECTION_USING_STRING_TYPE =
+    buildConf("spark.sql.legacy.createEmptyCollectionUsingStringType.enabled")
       .internal()
-      .doc("When set to true, it returns an empty array of string type when the `array` " +
-        "function is called without any parameters. Otherwise, it returns an empty " +
-        "array of `NullType`")
-      .booleanConf
-      .createWithDefault(false)
-
-  val LEGACY_MAP_DEFAULT_TO_STRING =
-    buildConf("spark.sql.legacy.mapDefaultToStringType.enabled")
-      .internal()
-      .doc("When set to true, it returns an empty map of string type when the `map` " +
-        "function is called without any parameters. Otherwise, it returns an empty " +
-        "map of `NullType`")
+      .doc("When set to true, it returns an empty array of string type and an empty map with " +
+        "string type as key/value type when `array` and `map` functions are called without any " +
+        "parameters, respectively. Otherwise, it returns an empty array of `NullType` and " +
+        "empty map with `NullType` as key/value type, respectively. ")
       .booleanConf
       .createWithDefault(false)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 6a12f76d4747d..5f6c1c00b6700 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -3499,19 +3499,6 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     ).foreach(assertValuesDoNotChangeAfterCoalesceOrUnion(_))
   }
 
-  test("SPARK-30790: Empty map of <NullType,NullType> for map function with no arguments") {
-    Seq((true, StringType), (false, NullType)).foreach {
-      case (mapDefaultToString, expectedType) =>
-        withSQLConf(SQLConf.LEGACY_MAP_DEFAULT_TO_STRING.key -> mapDefaultToString.toString) {
-          val schema = spark.range(1).select(map()).schema
-          assert(schema.nonEmpty && schema.head.dataType.isInstanceOf[MapType])
-          val actualKeyType = schema.head.dataType.asInstanceOf[MapType].keyType
-          val actualValueType = schema.head.dataType.asInstanceOf[MapType].valueType
-          assert(actualKeyType === expectedType && actualValueType === expectedType)
-        }
-    }
-  }
-
   test("SPARK-21281 fails if functions have no argument") {
     val df = Seq(1).toDF("a")
 
@@ -3584,7 +3571,8 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
   test("SPARK-29462: Empty array of NullType for array function with no arguments") {
     Seq((true, StringType), (false, NullType)).foreach {
       case (arrayDefaultToString, expectedType) =>
-        withSQLConf(SQLConf.LEGACY_ARRAY_DEFAULT_TO_STRING.key -> arrayDefaultToString.toString) {
+        withSQLConf(SQLConf.LEGACY_CREATE_EMPTY_COLLECTION_USING_STRING_TYPE.key ->
+          arrayDefaultToString.toString) {
           val schema = spark.range(1).select(array()).schema
           assert(schema.nonEmpty && schema.head.dataType.isInstanceOf[ArrayType])
           val actualType = schema.head.dataType.asInstanceOf[ArrayType].elementType
@@ -3592,6 +3580,21 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
         }
     }
   }
+
+  test("SPARK-30790: Empty map with NullType as key/value type for map function with no argument") {
+    Seq((true, StringType), (false, NullType)).foreach {
+      case (mapDefaultToString, expectedType) =>
+        withSQLConf(SQLConf.LEGACY_CREATE_EMPTY_COLLECTION_USING_STRING_TYPE.key ->
+          mapDefaultToString.toString) {
+          val schema = spark.range(1).select(map()).schema
+          assert(schema.nonEmpty && schema.head.dataType.isInstanceOf[MapType])
+          val actualKeyType = schema.head.dataType.asInstanceOf[MapType].keyType
+          val actualValueType = schema.head.dataType.asInstanceOf[MapType].valueType
+          assert(actualKeyType === expectedType)
+          assert(actualValueType === expectedType)
+        }
+    }
+  }
 }
 
 object DataFrameFunctionsSuite {

From 1a066e1ec71fe7eca329781c76ab75c18de2a0cc Mon Sep 17 00:00:00 2001
From: iRakson <raksonrakesh@gmail.com>
Date: Wed, 12 Feb 2020 16:34:39 +0530
Subject: [PATCH 3/4] Fix migration guide

---
 docs/sql-migration-guide.md | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index f38e0c5673581..9bfa792f36079 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -216,9 +216,7 @@ license: |
 
   - Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input. In Spark version 2.4 and earlier, this function gives `-1` for the same input. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.sizeOfNull` to `true`.
   
-  - Since Spark 3.0, when the `array` function is called without any parameters, it returns an empty array of `NullType`. In Spark version 2.4 and earlier, it returns an empty array of string type. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.createEmptyCollectionUsingStringType.enabled` to `true`.
-
-  - Since Spark 3.0, when the `map` function is called without any parameters, it returns an empty map with `NullType` as key/value type. In Spark version 2.4 and earlier, it returns an empty map of string type. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.createEmptyCollectionUsingStringType.enabled` to `true`.
+  - Since Spark 3.0, when the `array`/`map` function is called without any parameters, it returns an empty collection with `NullType` as element type. In Spark version 2.4 and earlier, it returns an empty collection with `StringType` as element type. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.createEmptyCollectionUsingStringType.enabled` to `true`.
 
   - Since Spark 3.0, the interval literal syntax does not allow multiple from-to units anymore. For example, `SELECT INTERVAL '1-1' YEAR TO MONTH '2-2' YEAR TO MONTH'` throws parser exception.
 

From e873527032ffaed583b211d81f2c9aafebe9fe07 Mon Sep 17 00:00:00 2001
From: iRakson <raksonrakesh@gmail.com>
Date: Wed, 12 Feb 2020 23:32:24 +0530
Subject: [PATCH 4/4] Review comment fix

---
 docs/sql-migration-guide.md                              | 2 +-
 .../scala/org/apache/spark/sql/internal/SQLConf.scala    | 9 ++++-----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 9bfa792f36079..46b741687363f 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -216,7 +216,7 @@ license: |
 
   - Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input. In Spark version 2.4 and earlier, this function gives `-1` for the same input. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.sizeOfNull` to `true`.
   
-  - Since Spark 3.0, when the `array`/`map` function is called without any parameters, it returns an empty collection with `NullType` as element type. In Spark version 2.4 and earlier, it returns an empty collection with `StringType` as element type. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.createEmptyCollectionUsingStringType.enabled` to `true`.
+  - Since Spark 3.0, when the `array`/`map` function is called without any parameters, it returns an empty collection with `NullType` as element type. In Spark version 2.4 and earlier, it returns an empty collection with `StringType` as element type. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.createEmptyCollectionUsingStringType` to `true`.
 
   - Since Spark 3.0, the interval literal syntax does not allow multiple from-to units anymore. For example, `SELECT INTERVAL '1-1' YEAR TO MONTH '2-2' YEAR TO MONTH'` throws parser exception.
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index e14f7a4451988..95b5b3afc3933 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2008,12 +2008,11 @@ object SQLConf {
       .createWithDefault(false)
 
   val LEGACY_CREATE_EMPTY_COLLECTION_USING_STRING_TYPE =
-    buildConf("spark.sql.legacy.createEmptyCollectionUsingStringType.enabled")
+    buildConf("spark.sql.legacy.createEmptyCollectionUsingStringType")
       .internal()
-      .doc("When set to true, it returns an empty array of string type and an empty map with " +
-        "string type as key/value type when `array` and `map` functions are called without any " +
-        "parameters, respectively. Otherwise, it returns an empty array of `NullType` and " +
-        "empty map with `NullType` as key/value type, respectively. ")
+      .doc("When set to true, Spark returns an empty collection with `StringType` as element " +
+        "type if the `array`/`map` function is called without any parameters. Otherwise, Spark " +
+        "returns an empty collection with `NullType` as element type.")
       .booleanConf
       .createWithDefault(false)