Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/sql-migration-guide-upgrade.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ license: |

- Since Spark 3.0, if `hive.default.fileformat` is not found in `Spark SQL configuration` then it will fallback to hive-site.xml present in the `Hadoop configuration` of `SparkContext`.

- Since Spark 3.0, Spark will cast `String` to `Date/TimeStamp` in binary comparisons with dates/timestamps. The previous behaviour of casting `Date/Timestamp` to `String` can be restored by setting `spark.sql.legacy.typeCoercion.datetimeToString` to `true`.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

spark.sql.legacy.typeCoercion.datetimeToString -> spark.sql.legacy.typeCoercion.datetimeToString.enabled

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


## Upgrading from Spark SQL 2.4 to 2.4.1

- The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,13 +120,14 @@ object TypeCoercion {
*/
private def findCommonTypeForBinaryComparison(
dt1: DataType, dt2: DataType, conf: SQLConf): Option[DataType] = (dt1, dt2) match {
// We should cast all relative timestamp/date/string comparison into string comparisons
// This behaves as a user would expect because timestamp strings sort lexicographically.
// i.e. TimeStamp(2013-01-01 00:00 ...) < "2014" = true
case (StringType, DateType) => Some(StringType)
case (DateType, StringType) => Some(StringType)
case (StringType, TimestampType) => Some(StringType)
case (TimestampType, StringType) => Some(StringType)
case (StringType, DateType)
=> if (conf.castDatetimeToString) Some(StringType) else Some(DateType)
case (DateType, StringType)
=> if (conf.castDatetimeToString) Some(StringType) else Some(DateType)
case (StringType, TimestampType)
=> if (conf.castDatetimeToString) Some(StringType) else Some(TimestampType)
case (TimestampType, StringType)
=> if (conf.castDatetimeToString) Some(StringType) else Some(TimestampType)
case (StringType, NullType) => Some(StringType)
case (NullType, StringType) => Some(StringType)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1760,6 +1760,13 @@ object SQLConf {
.internal()
.intConf
.createWithDefault(Int.MaxValue)

val LEGACY_CAST_DATETIME_TO_STRING =
buildConf("spark.sql.legacy.typeCoercion.datetimeToString")
.doc("If it is set to true, date/timestamp will cast to string in binary comparisons " +
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we also add a migration guide for this behavior change and mention this config?

"with String")
.booleanConf
.createWithDefault(false)
}

/**
Expand Down Expand Up @@ -2211,6 +2218,8 @@ class SQLConf extends Serializable with Logging {
def setCommandRejectsSparkCoreConfs: Boolean =
getConf(SQLConf.SET_COMMAND_REJECTS_SPARK_CORE_CONFS)

def castDatetimeToString: Boolean = getConf(SQLConf.LEGACY_CAST_DATETIME_TO_STRING)

/** ********************** SQLConf functionality methods ************ */

/** Set Spark SQL configuration properties. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ false
-- !query 10
select to_date('2009-07-30 04:17:52') > '2009-07-30 04:17:52'
-- !query 10 schema
struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) > 2009-07-30 04:17:52):boolean>
struct<(to_date('2009-07-30 04:17:52') > CAST(2009-07-30 04:17:52 AS DATE)):boolean>
-- !query 10 output
false

Expand Down Expand Up @@ -141,9 +141,9 @@ true
-- !query 17
select to_date('2009-07-30 04:17:52') >= '2009-07-30 04:17:52'
-- !query 17 schema
struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) >= 2009-07-30 04:17:52):boolean>
struct<(to_date('2009-07-30 04:17:52') >= CAST(2009-07-30 04:17:52 AS DATE)):boolean>
-- !query 17 output
false
true


-- !query 18
Expand Down Expand Up @@ -197,9 +197,9 @@ false
-- !query 24
select to_date('2009-07-30 04:17:52') < '2009-07-30 04:17:52'
-- !query 24 schema
struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) < 2009-07-30 04:17:52):boolean>
struct<(to_date('2009-07-30 04:17:52') < CAST(2009-07-30 04:17:52 AS DATE)):boolean>
-- !query 24 output
true
false


-- !query 25
Expand Down Expand Up @@ -253,7 +253,7 @@ true
-- !query 31
select to_date('2009-07-30 04:17:52') <= '2009-07-30 04:17:52'
-- !query 31 schema
struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) <= 2009-07-30 04:17:52):boolean>
struct<(to_date('2009-07-30 04:17:52') <= CAST(2009-07-30 04:17:52 AS DATE)):boolean>
-- !query 31 output
true

Expand Down
Loading