Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/sql-migration-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,8 @@ license: |
For example `SELECT timestamp 'tomorrow';`.

- Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input. In Spark version 2.4 and earlier, this function gives `-1` for the same input. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.sizeOfNull` to `true`.

- Since Spark 3.0, when the `array` function is called without any parameters, it returns an empty array of `NullType`. In Spark version 2.4 and earlier, it returns an empty array of string type. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.arrayDefaultToStringType.enabled` to `true`.

- Since Spark 3.0, the interval literal syntax does not allow multiple from-to units anymore. For example, `SELECT INTERVAL '1-1' YEAR TO MONTH '2-2' YEAR TO MONTH'` throws parser exception.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
import org.apache.spark.sql.catalyst.expressions.codegen._
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String

Expand All @@ -44,10 +45,18 @@ case class CreateArray(children: Seq[Expression]) extends Expression {
TypeUtils.checkForSameTypeInputExpr(children.map(_.dataType), s"function $prettyName")
}

private val defaultElementType: DataType = {
if (SQLConf.get.getConf(SQLConf.LEGACY_ARRAY_DEFAULT_TO_STRING)) {
StringType
} else {
NullType
}
}

override def dataType: ArrayType = {
ArrayType(
TypeCoercion.findCommonTypeDifferentOnlyInNullFlags(children.map(_.dataType))
.getOrElse(StringType),
.getOrElse(defaultElementType),
containsNull = children.exists(_.nullable))
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2007,6 +2007,15 @@ object SQLConf {
.booleanConf
.createWithDefault(false)

val LEGACY_ARRAY_DEFAULT_TO_STRING =
buildConf("spark.sql.legacy.arrayDefaultToStringType.enabled")
.internal()
.doc("When set to true, it returns an empty array of string type when the `array` " +
"function is called without any parameters. Otherwise, it returns an empty " +
"array of `NullType`")
.booleanConf
.createWithDefault(false)

val TRUNCATE_TABLE_IGNORE_PERMISSION_ACL =
buildConf("spark.sql.truncateTable.ignorePermissionAcl.enabled")
.internal()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3499,12 +3499,9 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
).foreach(assertValuesDoNotChangeAfterCoalesceOrUnion(_))
}

test("SPARK-21281 use string types by default if array and map have no argument") {
test("SPARK-21281 use string types by default if map have no argument") {
val ds = spark.range(1)
var expectedSchema = new StructType()
.add("x", ArrayType(StringType, containsNull = false), nullable = false)
assert(ds.select(array().as("x")).schema == expectedSchema)
expectedSchema = new StructType()
.add("x", MapType(StringType, StringType, valueContainsNull = false), nullable = false)
assert(ds.select(map().as("x")).schema == expectedSchema)
}
Expand Down Expand Up @@ -3577,6 +3574,18 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
}.getMessage
assert(nonFoldableError.contains("The 'escape' parameter must be a string literal"))
}

test("SPARK-29462: Empty array of NullType for array function with no arguments") {
Seq((true, StringType), (false, NullType)).foreach {
case (arrayDefaultToString, expectedType) =>
withSQLConf(SQLConf.LEGACY_ARRAY_DEFAULT_TO_STRING.key -> arrayDefaultToString.toString) {
val schema = spark.range(1).select(array()).schema
assert(schema.nonEmpty && schema.head.dataType.isInstanceOf[ArrayType])
val actualType = schema.head.dataType.asInstanceOf[ArrayType].elementType
assert(actualType === expectedType)
}
}
}
}

object DataFrameFunctionsSuite {
Expand Down