diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md index 14ccc259ef47..1aea26208694 100644 --- a/docs/sql-migration-guide-upgrade.md +++ b/docs/sql-migration-guide-upgrade.md @@ -128,6 +128,8 @@ license: | - Since Spark 3.0, if `hive.default.fileformat` is not found in `Spark SQL configuration` then it will fallback to hive-site.xml present in the `Hadoop configuration` of `SparkContext`. + - Since Spark 3.0, Spark will cast `String` to `Date/TimeStamp` in binary comparisons with dates/timestamps. The previous behaviour of casting `Date/Timestamp` to `String` can be restored by setting `spark.sql.legacy.typeCoercion.datetimeToString` to `true`. + ## Upgrading from Spark SQL 2.4 to 2.4.1 - The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 13cc9b9c125e..1ff1e7fffebd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -120,13 +120,14 @@ object TypeCoercion { */ private def findCommonTypeForBinaryComparison( dt1: DataType, dt2: DataType, conf: SQLConf): Option[DataType] = (dt1, dt2) match { - // We should cast all relative timestamp/date/string comparison into string comparisons - // This behaves as a user would expect because timestamp strings sort lexicographically. - // i.e. TimeStamp(2013-01-01 00:00 ...) < "2014" = true - case (StringType, DateType) => Some(StringType) - case (DateType, StringType) => Some(StringType) - case (StringType, TimestampType) => Some(StringType) - case (TimestampType, StringType) => Some(StringType) + case (StringType, DateType) + => if (conf.castDatetimeToString) Some(StringType) else Some(DateType) + case (DateType, StringType) + => if (conf.castDatetimeToString) Some(StringType) else Some(DateType) + case (StringType, TimestampType) + => if (conf.castDatetimeToString) Some(StringType) else Some(TimestampType) + case (TimestampType, StringType) + => if (conf.castDatetimeToString) Some(StringType) else Some(TimestampType) case (StringType, NullType) => Some(StringType) case (NullType, StringType) => Some(StringType) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 7f577f015973..b5e40dde0b4f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1760,6 +1760,13 @@ object SQLConf { .internal() .intConf .createWithDefault(Int.MaxValue) + + val LEGACY_CAST_DATETIME_TO_STRING = + buildConf("spark.sql.legacy.typeCoercion.datetimeToString") + .doc("If it is set to true, date/timestamp will cast to string in binary comparisons " + + "with String") + .booleanConf + .createWithDefault(false) } /** @@ -2211,6 +2218,8 @@ class SQLConf extends Serializable with Logging { def setCommandRejectsSparkCoreConfs: Boolean = getConf(SQLConf.SET_COMMAND_REJECTS_SPARK_CORE_CONFS) + def castDatetimeToString: Boolean = getConf(SQLConf.LEGACY_CAST_DATETIME_TO_STRING) + /** ********************** SQLConf functionality methods ************ */ /** Set Spark SQL configuration properties. */ diff --git a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out index cf828c69af62..d38cab8fa786 100644 --- a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out @@ -85,7 +85,7 @@ false -- !query 10 select to_date('2009-07-30 04:17:52') > '2009-07-30 04:17:52' -- !query 10 schema -struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) > 2009-07-30 04:17:52):boolean> +struct<(to_date('2009-07-30 04:17:52') > CAST(2009-07-30 04:17:52 AS DATE)):boolean> -- !query 10 output false @@ -141,9 +141,9 @@ true -- !query 17 select to_date('2009-07-30 04:17:52') >= '2009-07-30 04:17:52' -- !query 17 schema -struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) >= 2009-07-30 04:17:52):boolean> +struct<(to_date('2009-07-30 04:17:52') >= CAST(2009-07-30 04:17:52 AS DATE)):boolean> -- !query 17 output -false +true -- !query 18 @@ -197,9 +197,9 @@ false -- !query 24 select to_date('2009-07-30 04:17:52') < '2009-07-30 04:17:52' -- !query 24 schema -struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) < 2009-07-30 04:17:52):boolean> +struct<(to_date('2009-07-30 04:17:52') < CAST(2009-07-30 04:17:52 AS DATE)):boolean> -- !query 24 output -true +false -- !query 25 @@ -253,7 +253,7 @@ true -- !query 31 select to_date('2009-07-30 04:17:52') <= '2009-07-30 04:17:52' -- !query 31 schema -struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) <= 2009-07-30 04:17:52):boolean> +struct<(to_date('2009-07-30 04:17:52') <= CAST(2009-07-30 04:17:52 AS DATE)):boolean> -- !query 31 output true diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out index b764ce45d029..55caab8528fa 100644 --- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out @@ -1549,7 +1549,7 @@ NULL -- !query 193 SELECT '1996-09-09' = date('1996-09-09') FROM t -- !query 193 schema -struct<(1996-09-09 = CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean> +struct<(CAST(1996-09-09 AS DATE) = CAST(1996-09-09 AS DATE)):boolean> -- !query 193 output true @@ -1557,7 +1557,7 @@ true -- !query 194 SELECT '1996-9-10' > date('1996-09-09') FROM t -- !query 194 schema -struct<(1996-9-10 > CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean> +struct<(CAST(1996-9-10 AS DATE) > CAST(1996-09-09 AS DATE)):boolean> -- !query 194 output true @@ -1565,7 +1565,7 @@ true -- !query 195 SELECT '1996-9-10' >= date('1996-09-09') FROM t -- !query 195 schema -struct<(1996-9-10 >= CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean> +struct<(CAST(1996-9-10 AS DATE) >= CAST(1996-09-09 AS DATE)):boolean> -- !query 195 output true @@ -1573,7 +1573,7 @@ true -- !query 196 SELECT '1996-9-10' < date('1996-09-09') FROM t -- !query 196 schema -struct<(1996-9-10 < CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean> +struct<(CAST(1996-9-10 AS DATE) < CAST(1996-09-09 AS DATE)):boolean> -- !query 196 output false @@ -1581,7 +1581,7 @@ false -- !query 197 SELECT '1996-9-10' <= date('1996-09-09') FROM t -- !query 197 schema -struct<(1996-9-10 <= CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean> +struct<(CAST(1996-9-10 AS DATE) <= CAST(1996-09-09 AS DATE)):boolean> -- !query 197 output false @@ -1589,7 +1589,7 @@ false -- !query 198 SELECT '1996-9-10' <> date('1996-09-09') FROM t -- !query 198 schema -struct<(NOT (1996-9-10 = CAST(CAST(1996-09-09 AS DATE) AS STRING))):boolean> +struct<(NOT (CAST(1996-9-10 AS DATE) = CAST(1996-09-09 AS DATE))):boolean> -- !query 198 output true @@ -1597,7 +1597,7 @@ true -- !query 199 SELECT cast(null as string) = date('1996-09-09') FROM t -- !query 199 schema -struct<(CAST(NULL AS STRING) = CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean> +struct<(CAST(CAST(NULL AS STRING) AS DATE) = CAST(1996-09-09 AS DATE)):boolean> -- !query 199 output NULL @@ -1605,7 +1605,7 @@ NULL -- !query 200 SELECT cast(null as string)> date('1996-09-09') FROM t -- !query 200 schema -struct<(CAST(NULL AS STRING) > CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean> +struct<(CAST(CAST(NULL AS STRING) AS DATE) > CAST(1996-09-09 AS DATE)):boolean> -- !query 200 output NULL @@ -1613,7 +1613,7 @@ NULL -- !query 201 SELECT cast(null as string)>= date('1996-09-09') FROM t -- !query 201 schema -struct<(CAST(NULL AS STRING) >= CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean> +struct<(CAST(CAST(NULL AS STRING) AS DATE) >= CAST(1996-09-09 AS DATE)):boolean> -- !query 201 output NULL @@ -1621,7 +1621,7 @@ NULL -- !query 202 SELECT cast(null as string)< date('1996-09-09') FROM t -- !query 202 schema -struct<(CAST(NULL AS STRING) < CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean> +struct<(CAST(CAST(NULL AS STRING) AS DATE) < CAST(1996-09-09 AS DATE)):boolean> -- !query 202 output NULL @@ -1629,7 +1629,7 @@ NULL -- !query 203 SELECT cast(null as string)<= date('1996-09-09') FROM t -- !query 203 schema -struct<(CAST(NULL AS STRING) <= CAST(CAST(1996-09-09 AS DATE) AS STRING)):boolean> +struct<(CAST(CAST(NULL AS STRING) AS DATE) <= CAST(1996-09-09 AS DATE)):boolean> -- !query 203 output NULL @@ -1637,7 +1637,7 @@ NULL -- !query 204 SELECT cast(null as string)<> date('1996-09-09') FROM t -- !query 204 schema -struct<(NOT (CAST(NULL AS STRING) = CAST(CAST(1996-09-09 AS DATE) AS STRING))):boolean> +struct<(NOT (CAST(CAST(NULL AS STRING) AS DATE) = CAST(1996-09-09 AS DATE))):boolean> -- !query 204 output NULL @@ -1645,7 +1645,7 @@ NULL -- !query 205 SELECT date('1996-09-09') = '1996-09-09' FROM t -- !query 205 schema -struct<(CAST(CAST(1996-09-09 AS DATE) AS STRING) = 1996-09-09):boolean> +struct<(CAST(1996-09-09 AS DATE) = CAST(1996-09-09 AS DATE)):boolean> -- !query 205 output true @@ -1653,7 +1653,7 @@ true -- !query 206 SELECT date('1996-9-10') > '1996-09-09' FROM t -- !query 206 schema -struct<(CAST(CAST(1996-9-10 AS DATE) AS STRING) > 1996-09-09):boolean> +struct<(CAST(1996-9-10 AS DATE) > CAST(1996-09-09 AS DATE)):boolean> -- !query 206 output true @@ -1661,7 +1661,7 @@ true -- !query 207 SELECT date('1996-9-10') >= '1996-09-09' FROM t -- !query 207 schema -struct<(CAST(CAST(1996-9-10 AS DATE) AS STRING) >= 1996-09-09):boolean> +struct<(CAST(1996-9-10 AS DATE) >= CAST(1996-09-09 AS DATE)):boolean> -- !query 207 output true @@ -1669,7 +1669,7 @@ true -- !query 208 SELECT date('1996-9-10') < '1996-09-09' FROM t -- !query 208 schema -struct<(CAST(CAST(1996-9-10 AS DATE) AS STRING) < 1996-09-09):boolean> +struct<(CAST(1996-9-10 AS DATE) < CAST(1996-09-09 AS DATE)):boolean> -- !query 208 output false @@ -1677,7 +1677,7 @@ false -- !query 209 SELECT date('1996-9-10') <= '1996-09-09' FROM t -- !query 209 schema -struct<(CAST(CAST(1996-9-10 AS DATE) AS STRING) <= 1996-09-09):boolean> +struct<(CAST(1996-9-10 AS DATE) <= CAST(1996-09-09 AS DATE)):boolean> -- !query 209 output false @@ -1685,7 +1685,7 @@ false -- !query 210 SELECT date('1996-9-10') <> '1996-09-09' FROM t -- !query 210 schema -struct<(NOT (CAST(CAST(1996-9-10 AS DATE) AS STRING) = 1996-09-09)):boolean> +struct<(NOT (CAST(1996-9-10 AS DATE) = CAST(1996-09-09 AS DATE))):boolean> -- !query 210 output true @@ -1693,7 +1693,7 @@ true -- !query 211 SELECT date('1996-09-09') = cast(null as string) FROM t -- !query 211 schema -struct<(CAST(CAST(1996-09-09 AS DATE) AS STRING) = CAST(NULL AS STRING)):boolean> +struct<(CAST(1996-09-09 AS DATE) = CAST(CAST(NULL AS STRING) AS DATE)):boolean> -- !query 211 output NULL @@ -1701,7 +1701,7 @@ NULL -- !query 212 SELECT date('1996-9-10') > cast(null as string) FROM t -- !query 212 schema -struct<(CAST(CAST(1996-9-10 AS DATE) AS STRING) > CAST(NULL AS STRING)):boolean> +struct<(CAST(1996-9-10 AS DATE) > CAST(CAST(NULL AS STRING) AS DATE)):boolean> -- !query 212 output NULL @@ -1709,7 +1709,7 @@ NULL -- !query 213 SELECT date('1996-9-10') >= cast(null as string) FROM t -- !query 213 schema -struct<(CAST(CAST(1996-9-10 AS DATE) AS STRING) >= CAST(NULL AS STRING)):boolean> +struct<(CAST(1996-9-10 AS DATE) >= CAST(CAST(NULL AS STRING) AS DATE)):boolean> -- !query 213 output NULL @@ -1717,7 +1717,7 @@ NULL -- !query 214 SELECT date('1996-9-10') < cast(null as string) FROM t -- !query 214 schema -struct<(CAST(CAST(1996-9-10 AS DATE) AS STRING) < CAST(NULL AS STRING)):boolean> +struct<(CAST(1996-9-10 AS DATE) < CAST(CAST(NULL AS STRING) AS DATE)):boolean> -- !query 214 output NULL @@ -1725,7 +1725,7 @@ NULL -- !query 215 SELECT date('1996-9-10') <= cast(null as string) FROM t -- !query 215 schema -struct<(CAST(CAST(1996-9-10 AS DATE) AS STRING) <= CAST(NULL AS STRING)):boolean> +struct<(CAST(1996-9-10 AS DATE) <= CAST(CAST(NULL AS STRING) AS DATE)):boolean> -- !query 215 output NULL @@ -1733,7 +1733,7 @@ NULL -- !query 216 SELECT date('1996-9-10') <> cast(null as string) FROM t -- !query 216 schema -struct<(NOT (CAST(CAST(1996-9-10 AS DATE) AS STRING) = CAST(NULL AS STRING))):boolean> +struct<(NOT (CAST(1996-9-10 AS DATE) = CAST(CAST(NULL AS STRING) AS DATE))):boolean> -- !query 216 output NULL @@ -1749,7 +1749,7 @@ true -- !query 218 SELECT '1996-09-09 12:12:12.5' > timestamp('1996-09-09 12:12:12.4') FROM t -- !query 218 schema -struct<(1996-09-09 12:12:12.5 > CAST(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) AS STRING)):boolean> +struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) > CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean> -- !query 218 output true @@ -1757,7 +1757,7 @@ true -- !query 219 SELECT '1996-09-09 12:12:12.5' >= timestamp('1996-09-09 12:12:12.4') FROM t -- !query 219 schema -struct<(1996-09-09 12:12:12.5 >= CAST(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) AS STRING)):boolean> +struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) >= CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean> -- !query 219 output true @@ -1765,7 +1765,7 @@ true -- !query 220 SELECT '1996-09-09 12:12:12.5' < timestamp('1996-09-09 12:12:12.4') FROM t -- !query 220 schema -struct<(1996-09-09 12:12:12.5 < CAST(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) AS STRING)):boolean> +struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) < CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean> -- !query 220 output false @@ -1773,7 +1773,7 @@ false -- !query 221 SELECT '1996-09-09 12:12:12.5' <= timestamp('1996-09-09 12:12:12.4') FROM t -- !query 221 schema -struct<(1996-09-09 12:12:12.5 <= CAST(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) AS STRING)):boolean> +struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) <= CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean> -- !query 221 output false @@ -1797,7 +1797,7 @@ NULL -- !query 224 SELECT cast(null as string) > timestamp('1996-09-09 12:12:12.4') FROM t -- !query 224 schema -struct<(CAST(NULL AS STRING) > CAST(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) AS STRING)):boolean> +struct<(CAST(CAST(NULL AS STRING) AS TIMESTAMP) > CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean> -- !query 224 output NULL @@ -1805,7 +1805,7 @@ NULL -- !query 225 SELECT cast(null as string) >= timestamp('1996-09-09 12:12:12.4') FROM t -- !query 225 schema -struct<(CAST(NULL AS STRING) >= CAST(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) AS STRING)):boolean> +struct<(CAST(CAST(NULL AS STRING) AS TIMESTAMP) >= CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean> -- !query 225 output NULL @@ -1813,7 +1813,7 @@ NULL -- !query 226 SELECT cast(null as string) < timestamp('1996-09-09 12:12:12.4') FROM t -- !query 226 schema -struct<(CAST(NULL AS STRING) < CAST(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) AS STRING)):boolean> +struct<(CAST(CAST(NULL AS STRING) AS TIMESTAMP) < CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean> -- !query 226 output NULL @@ -1821,7 +1821,7 @@ NULL -- !query 227 SELECT cast(null as string) <= timestamp('1996-09-09 12:12:12.4') FROM t -- !query 227 schema -struct<(CAST(NULL AS STRING) <= CAST(CAST(1996-09-09 12:12:12.4 AS TIMESTAMP) AS STRING)):boolean> +struct<(CAST(CAST(NULL AS STRING) AS TIMESTAMP) <= CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean> -- !query 227 output NULL @@ -1845,7 +1845,7 @@ true -- !query 230 SELECT timestamp('1996-09-09 12:12:12.5' )> '1996-09-09 12:12:12.4' FROM t -- !query 230 schema -struct<(CAST(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) AS STRING) > 1996-09-09 12:12:12.4):boolean> +struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) > CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean> -- !query 230 output true @@ -1853,7 +1853,7 @@ true -- !query 231 SELECT timestamp('1996-09-09 12:12:12.5' )>= '1996-09-09 12:12:12.4' FROM t -- !query 231 schema -struct<(CAST(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) AS STRING) >= 1996-09-09 12:12:12.4):boolean> +struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) >= CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean> -- !query 231 output true @@ -1861,7 +1861,7 @@ true -- !query 232 SELECT timestamp('1996-09-09 12:12:12.5' )< '1996-09-09 12:12:12.4' FROM t -- !query 232 schema -struct<(CAST(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) AS STRING) < 1996-09-09 12:12:12.4):boolean> +struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) < CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean> -- !query 232 output false @@ -1869,7 +1869,7 @@ false -- !query 233 SELECT timestamp('1996-09-09 12:12:12.5' )<= '1996-09-09 12:12:12.4' FROM t -- !query 233 schema -struct<(CAST(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) AS STRING) <= 1996-09-09 12:12:12.4):boolean> +struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) <= CAST(1996-09-09 12:12:12.4 AS TIMESTAMP)):boolean> -- !query 233 output false @@ -1893,7 +1893,7 @@ NULL -- !query 236 SELECT timestamp('1996-09-09 12:12:12.5' )> cast(null as string) FROM t -- !query 236 schema -struct<(CAST(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) AS STRING) > CAST(NULL AS STRING)):boolean> +struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) > CAST(CAST(NULL AS STRING) AS TIMESTAMP)):boolean> -- !query 236 output NULL @@ -1901,7 +1901,7 @@ NULL -- !query 237 SELECT timestamp('1996-09-09 12:12:12.5' )>= cast(null as string) FROM t -- !query 237 schema -struct<(CAST(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) AS STRING) >= CAST(NULL AS STRING)):boolean> +struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) >= CAST(CAST(NULL AS STRING) AS TIMESTAMP)):boolean> -- !query 237 output NULL @@ -1909,7 +1909,7 @@ NULL -- !query 238 SELECT timestamp('1996-09-09 12:12:12.5' )< cast(null as string) FROM t -- !query 238 schema -struct<(CAST(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) AS STRING) < CAST(NULL AS STRING)):boolean> +struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) < CAST(CAST(NULL AS STRING) AS TIMESTAMP)):boolean> -- !query 238 output NULL @@ -1917,7 +1917,7 @@ NULL -- !query 239 SELECT timestamp('1996-09-09 12:12:12.5' )<= cast(null as string) FROM t -- !query 239 schema -struct<(CAST(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) AS STRING) <= CAST(NULL AS STRING)):boolean> +struct<(CAST(1996-09-09 12:12:12.5 AS TIMESTAMP) <= CAST(CAST(NULL AS STRING) AS TIMESTAMP)):boolean> -- !query 239 output NULL diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out index 0beb1f6263d2..c54ceba85ce7 100644 --- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out @@ -1253,9 +1253,9 @@ NULL -- !query 150 SELECT '1' = cast('2017-12-11 09:30:00' as date) FROM t -- !query 150 schema -struct<(1 = CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING)):boolean> +struct<(CAST(1 AS DATE) = CAST(2017-12-11 09:30:00 AS DATE)):boolean> -- !query 150 output -false +NULL -- !query 151 @@ -1341,9 +1341,9 @@ NULL -- !query 161 SELECT cast('2017-12-11 09:30:00' as date) = '1' FROM t -- !query 161 schema -struct<(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING) = 1):boolean> +struct<(CAST(2017-12-11 09:30:00 AS DATE) = CAST(1 AS DATE)):boolean> -- !query 161 output -false +NULL -- !query 162 @@ -1437,7 +1437,7 @@ false -- !query 173 SELECT '1' <=> cast('2017-12-11 09:30:00' as date) FROM t -- !query 173 schema -struct<(1 <=> CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING)):boolean> +struct<(CAST(1 AS DATE) <=> CAST(2017-12-11 09:30:00 AS DATE)):boolean> -- !query 173 output false @@ -1525,7 +1525,7 @@ false -- !query 184 SELECT cast('2017-12-11 09:30:00' as date) <=> '1' FROM t -- !query 184 schema -struct<(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING) <=> 1):boolean> +struct<(CAST(2017-12-11 09:30:00 AS DATE) <=> CAST(1 AS DATE)):boolean> -- !query 184 output false @@ -1613,17 +1613,17 @@ false -- !query 195 SELECT '1' < cast('2017-12-11 09:30:00.0' as timestamp) FROM t -- !query 195 schema -struct<(1 < CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING)):boolean> +struct<(CAST(1 AS TIMESTAMP) < CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)):boolean> -- !query 195 output -true +NULL -- !query 196 SELECT '1' < cast('2017-12-11 09:30:00' as date) FROM t -- !query 196 schema -struct<(1 < CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING)):boolean> +struct<(CAST(1 AS DATE) < CAST(2017-12-11 09:30:00 AS DATE)):boolean> -- !query 196 output -true +NULL -- !query 197 @@ -1709,17 +1709,17 @@ true -- !query 207 SELECT '1' <= cast('2017-12-11 09:30:00.0' as timestamp) FROM t -- !query 207 schema -struct<(1 <= CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING)):boolean> +struct<(CAST(1 AS TIMESTAMP) <= CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)):boolean> -- !query 207 output -true +NULL -- !query 208 SELECT '1' <= cast('2017-12-11 09:30:00' as date) FROM t -- !query 208 schema -struct<(1 <= CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING)):boolean> +struct<(CAST(1 AS DATE) <= CAST(2017-12-11 09:30:00 AS DATE)):boolean> -- !query 208 output -true +NULL -- !query 209 @@ -1805,17 +1805,17 @@ false -- !query 219 SELECT '1' > cast('2017-12-11 09:30:00.0' as timestamp) FROM t -- !query 219 schema -struct<(1 > CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING)):boolean> +struct<(CAST(1 AS TIMESTAMP) > CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)):boolean> -- !query 219 output -false +NULL -- !query 220 SELECT '1' > cast('2017-12-11 09:30:00' as date) FROM t -- !query 220 schema -struct<(1 > CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING)):boolean> +struct<(CAST(1 AS DATE) > CAST(2017-12-11 09:30:00 AS DATE)):boolean> -- !query 220 output -false +NULL -- !query 221 @@ -1901,17 +1901,17 @@ true -- !query 231 SELECT '1' >= cast('2017-12-11 09:30:00.0' as timestamp) FROM t -- !query 231 schema -struct<(1 >= CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING)):boolean> +struct<(CAST(1 AS TIMESTAMP) >= CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)):boolean> -- !query 231 output -false +NULL -- !query 232 SELECT '1' >= cast('2017-12-11 09:30:00' as date) FROM t -- !query 232 schema -struct<(1 >= CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING)):boolean> +struct<(CAST(1 AS DATE) >= CAST(2017-12-11 09:30:00 AS DATE)):boolean> -- !query 232 output -false +NULL -- !query 233 @@ -2005,9 +2005,9 @@ NULL -- !query 244 SELECT '1' <> cast('2017-12-11 09:30:00' as date) FROM t -- !query 244 schema -struct<(NOT (1 = CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING))):boolean> +struct<(NOT (CAST(1 AS DATE) = CAST(2017-12-11 09:30:00 AS DATE))):boolean> -- !query 244 output -true +NULL -- !query 245 @@ -2093,17 +2093,17 @@ false -- !query 255 SELECT cast('2017-12-11 09:30:00.0' as timestamp) < '1' FROM t -- !query 255 schema -struct<(CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING) < 1):boolean> +struct<(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) < CAST(1 AS TIMESTAMP)):boolean> -- !query 255 output -false +NULL -- !query 256 SELECT cast('2017-12-11 09:30:00' as date) < '1' FROM t -- !query 256 schema -struct<(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING) < 1):boolean> +struct<(CAST(2017-12-11 09:30:00 AS DATE) < CAST(1 AS DATE)):boolean> -- !query 256 output -false +NULL -- !query 257 @@ -2189,17 +2189,17 @@ true -- !query 267 SELECT cast('2017-12-11 09:30:00.0' as timestamp) <= '1' FROM t -- !query 267 schema -struct<(CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING) <= 1):boolean> +struct<(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) <= CAST(1 AS TIMESTAMP)):boolean> -- !query 267 output -false +NULL -- !query 268 SELECT cast('2017-12-11 09:30:00' as date) <= '1' FROM t -- !query 268 schema -struct<(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING) <= 1):boolean> +struct<(CAST(2017-12-11 09:30:00 AS DATE) <= CAST(1 AS DATE)):boolean> -- !query 268 output -false +NULL -- !query 269 @@ -2285,17 +2285,17 @@ false -- !query 279 SELECT cast('2017-12-11 09:30:00.0' as timestamp) > '1' FROM t -- !query 279 schema -struct<(CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING) > 1):boolean> +struct<(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) > CAST(1 AS TIMESTAMP)):boolean> -- !query 279 output -true +NULL -- !query 280 SELECT cast('2017-12-11 09:30:00' as date) > '1' FROM t -- !query 280 schema -struct<(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING) > 1):boolean> +struct<(CAST(2017-12-11 09:30:00 AS DATE) > CAST(1 AS DATE)):boolean> -- !query 280 output -true +NULL -- !query 281 @@ -2381,17 +2381,17 @@ true -- !query 291 SELECT cast('2017-12-11 09:30:00.0' as timestamp) >= '1' FROM t -- !query 291 schema -struct<(CAST(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) AS STRING) >= 1):boolean> +struct<(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) >= CAST(1 AS TIMESTAMP)):boolean> -- !query 291 output -true +NULL -- !query 292 SELECT cast('2017-12-11 09:30:00' as date) >= '1' FROM t -- !query 292 schema -struct<(CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING) >= 1):boolean> +struct<(CAST(2017-12-11 09:30:00 AS DATE) >= CAST(1 AS DATE)):boolean> -- !query 292 output -true +NULL -- !query 293 @@ -2485,9 +2485,9 @@ NULL -- !query 304 SELECT cast('2017-12-11 09:30:00' as date) <> '1' FROM t -- !query 304 schema -struct<(NOT (CAST(CAST(2017-12-11 09:30:00 AS DATE) AS STRING) = 1)):boolean> +struct<(NOT (CAST(2017-12-11 09:30:00 AS DATE) = CAST(1 AS DATE))):boolean> -- !query 304 output -true +NULL -- !query 305 diff --git a/sql/core/src/test/resources/tpcds/q32.sql b/sql/core/src/test/resources/tpcds/q32.sql index 1a907961e74b..a6f59ecb8736 100755 --- a/sql/core/src/test/resources/tpcds/q32.sql +++ b/sql/core/src/test/resources/tpcds/q32.sql @@ -10,6 +10,6 @@ WHERE SELECT 1.3 * avg(cs_ext_discount_amt) FROM catalog_sales, date_dim WHERE cs_item_sk = i_item_sk - AND d_date BETWEEN '2000-01-27]' AND (cast('2000-01-27' AS DATE) + interval 90 days) + AND d_date BETWEEN '2000-01-27' AND (cast('2000-01-27' AS DATE) + interval 90 days) AND d_date_sk = cs_sold_date_sk) LIMIT 100 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index db896b3b36ac..6ae77a8e68c9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -3024,6 +3024,84 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { sql("reset") } } + + test("string date comparison") { + spark.range(1).selectExpr("date '2000-01-01' as d").createOrReplaceTempView("t1") + val result = Date.valueOf("2000-01-01") + checkAnswer(sql("select * from t1 where d < '2000'"), Nil) + checkAnswer(sql("select * from t1 where d < '2001'"), Row(result)) + checkAnswer(sql("select * from t1 where d < '2000-01'"), Nil) + checkAnswer(sql("select * from t1 where d < '2000-01-01'"), Nil) + checkAnswer(sql("select * from t1 where d < '2000-1-1'"), Nil) + checkAnswer(sql("select * from t1 where d <= '2000-1-1'"), Row(result)) + checkAnswer(sql("select * from t1 where d <= '1999-12-30'"), Nil) + checkAnswer(sql("select * from t1 where d = '2000-1-1'"), Row(result)) + checkAnswer(sql("select * from t1 where d = '2000-01-01'"), Row(result)) + checkAnswer(sql("select * from t1 where d = '2000-1-02'"), Nil) + checkAnswer(sql("select * from t1 where d > '2000-01-01'"), Nil) + checkAnswer(sql("select * from t1 where d > '1999'"), Row(result)) + checkAnswer(sql("select * from t1 where d >= '2000'"), Row(result)) + checkAnswer(sql("select * from t1 where d >= '2000-1'"), Row(result)) + checkAnswer(sql("select * from t1 where d >= '2000-1-1'"), Row(result)) + checkAnswer(sql("select * from t1 where d >= '2000-1-01'"), Row(result)) + checkAnswer(sql("select * from t1 where d >= '2000-01-1'"), Row(result)) + checkAnswer(sql("select * from t1 where d >= '2000-01-01'"), Row(result)) + checkAnswer(sql("select * from t1 where d >= '2000-01-02'"), Nil) + checkAnswer(sql("select * from t1 where '2000' >= d"), Row(result)) + checkAnswer(sql("select * from t1 where d > '2000-13'"), Nil) + + withSQLConf(SQLConf.LEGACY_CAST_DATETIME_TO_STRING.key -> "true") { + checkAnswer(sql("select * from t1 where d < '2000'"), Nil) + checkAnswer(sql("select * from t1 where d < '2001'"), Row(result)) + checkAnswer(sql("select * from t1 where d < '2000-1-1'"), Row(result)) + checkAnswer(sql("select * from t1 where d <= '1999'"), Nil) + checkAnswer(sql("select * from t1 where d >= '2000'"), Row(result)) + checkAnswer(sql("select * from t1 where d > '1999-13'"), Row(result)) + checkAnswer(sql("select to_date('2000-01-01') > '1'"), Row(true)) + } + } + + test("string timestamp comparison") { + spark.range(1) + .selectExpr("timestamp '2000-01-01 01:10:00.000' as d") + .createOrReplaceTempView("t1") + val result = Timestamp.valueOf("2000-01-01 01:10:00") + checkAnswer(sql("select * from t1 where d < '2000'"), Nil) + checkAnswer(sql("select * from t1 where d < '2001'"), Row(result)) + checkAnswer(sql("select * from t1 where d < '2000-01'"), Nil) + checkAnswer(sql("select * from t1 where d < '2000-1-1'"), Nil) + checkAnswer(sql("select * from t1 where d < '2000-01-01 01:10:00.000'"), Nil) + checkAnswer(sql("select * from t1 where d < '2000-01-01 02:10:00.000'"), Row(result)) + checkAnswer(sql("select * from t1 where d <= '2000-1-1 01:10:00'"), Row(result)) + checkAnswer(sql("select * from t1 where d <= '2000-1-1 01:00:00'"), Nil) + checkAnswer(sql("select * from t1 where d = '2000-1-1 01:10:00.000'"), Row(result)) + checkAnswer(sql("select * from t1 where d = '2000-01-01 01:10:00.000'"), Row(result)) + checkAnswer(sql("select * from t1 where d = '2000-1-02 01:10:00.000'"), Nil) + checkAnswer(sql("select * from t1 where d > '2000'"), Row(result)) + checkAnswer(sql("select * from t1 where d > '2000-1'"), Row(result)) + checkAnswer(sql("select * from t1 where d > '2000-1-1'"), Row(result)) + checkAnswer(sql("select * from t1 where d > '2000-1-1 01:00:00.000'"), Row(result)) + checkAnswer(sql("select * from t1 where d > '2001'"), Nil) + checkAnswer(sql("select * from t1 where d > '2000-01-02'"), Nil) + checkAnswer(sql("select * from t1 where d >= '2000-1-01'"), Row(result)) + checkAnswer(sql("select * from t1 where d >= '2000-01-1'"), Row(result)) + checkAnswer(sql("select * from t1 where d >= '2000-01-01'"), Row(result)) + checkAnswer(sql("select * from t1 where d >= '2000-01-01 01:10:00.000'"), Row(result)) + checkAnswer(sql("select * from t1 where d >= '2000-01-02 01:10:00.000'"), Nil) + checkAnswer(sql("select * from t1 where '2000' >= d"), Nil) + checkAnswer(sql("select * from t1 where d > '2000-13'"), Nil) + + withSQLConf(SQLConf.LEGACY_CAST_DATETIME_TO_STRING.key -> "true") { + checkAnswer(sql("select * from t1 where d < '2000'"), Nil) + checkAnswer(sql("select * from t1 where d < '2001'"), Row(result)) + checkAnswer(sql("select * from t1 where d <= '2000-1-1'"), Row(result)) + checkAnswer(sql("select * from t1 where d <= '2000-01-02'"), Row(result)) + checkAnswer(sql("select * from t1 where d <= '1999'"), Nil) + checkAnswer(sql("select * from t1 where d >= '2000'"), Row(result)) + checkAnswer(sql("select * from t1 where d > '1999-13'"), Row(result)) + checkAnswer(sql("select to_timestamp('2000-01-01 01:10:00') > '1'"), Row(true)) + } + } } case class Foo(bar: Option[String])