diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala index 06e1cdc27e7d5..fe20e546f5d24 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala @@ -117,7 +117,13 @@ class LegacySimpleDateFormatter(pattern: String, locale: Locale) extends LegacyD object DateFormatter { import LegacyDateFormats._ - val defaultLocale: Locale = Locale.US + /** + * Before Spark 3.0, the first day-of-week is always Monday. Since Spark 3.0, it depends on the + * locale. + * We pick GB as the default locale instead of US, to be compatible with Spark 2.x, as US locale + * uses Sunday as the first day-of-week. See SPARK-31879. + */ + val defaultLocale: Locale = new Locale("en", "GB") val defaultPattern: String = "yyyy-MM-dd" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index 3e302e2170390..1f14c70164c1a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -278,7 +278,13 @@ object LegacyDateFormats extends Enumeration { object TimestampFormatter { import LegacyDateFormats._ - val defaultLocale: Locale = Locale.US + /** + * Before Spark 3.0, the first day-of-week is always Monday. Since Spark 3.0, it depends on the + * locale. + * We pick GB as the default locale instead of US, to be compatible with Spark 2.x, as US locale + * uses Sunday as the first day-of-week. See SPARK-31879. + */ + val defaultLocale: Locale = new Locale("en", "GB") def defaultPattern(): String = s"${DateFormatter.defaultPattern} HH:mm:ss" diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql index 9bd936f6f441f..5636e0b670362 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql @@ -164,3 +164,7 @@ select from_csv('26/October/2015', 'date Date', map('dateFormat', 'dd/MMMMM/yyyy select from_unixtime(1, 'yyyyyyyyyyy-MM-dd'); select date_format(timestamp '2018-11-17 13:33:33', 'yyyyyyyyyy-MM-dd HH:mm:ss'); select date_format(date '2018-11-17', 'yyyyyyyyyyy-MM-dd'); + +-- SPARK-31879: the first day of week +select date_format('2020-01-01', 'YYYY-MM-dd uu'); +select date_format('2020-01-01', 'YYYY-MM-dd uuuu'); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out index ca04b008d6537..3803460f3f083 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 119 +-- Number of queries: 121 -- !query @@ -1025,3 +1025,19 @@ struct<> -- !query output org.apache.spark.SparkUpgradeException You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyyyyyyyyy-MM-dd' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format('2020-01-01', 'YYYY-MM-dd uu') +-- !query schema +struct +-- !query output +2020-01-01 03 + + +-- !query +select date_format('2020-01-01', 'YYYY-MM-dd uuuu') +-- !query schema +struct +-- !query output +2020-01-01 Wednesday diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out index fe932d3a706a8..99dd14d21e6fd 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 119 +-- Number of queries: 121 -- !query @@ -980,3 +980,19 @@ select date_format(date '2018-11-17', 'yyyyyyyyyyy-MM-dd') struct -- !query output 00000002018-11-17 + + +-- !query +select date_format('2020-01-01', 'YYYY-MM-dd uu') +-- !query schema +struct +-- !query output +2020-01-01 03 + + +-- !query +select date_format('2020-01-01', 'YYYY-MM-dd uuuu') +-- !query schema +struct +-- !query output +2020-01-01 0003 diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out index 06a41da2671e6..c8c568c736d76 100755 --- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 119 +-- Number of queries: 121 -- !query @@ -997,3 +997,19 @@ struct<> -- !query output org.apache.spark.SparkUpgradeException You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'yyyyyyyyyyy-MM-dd' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + + +-- !query +select date_format('2020-01-01', 'YYYY-MM-dd uu') +-- !query schema +struct +-- !query output +2020-01-01 03 + + +-- !query +select date_format('2020-01-01', 'YYYY-MM-dd uuuu') +-- !query schema +struct +-- !query output +2020-01-01 Wednesday