From 1bc02ada576440f9a49e6e51e0eb1e8ddf393ebb Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Mon, 11 Oct 2021 11:41:24 +0800 Subject: [PATCH 1/5] Manual disabled format B --- .../spark/sql/catalyst/util/DateTimeFormatterHelper.scala | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala index b00113b2e9ee3..71b3c84f006d7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala @@ -280,6 +280,7 @@ private object DateTimeFormatterHelper { // 2.4, the SimpleDateFormat uses Monday as the first day of week. final val weekBasedLetters = Set('Y', 'W', 'w', 'u', 'e', 'c') final val unsupportedLetters = Set('A', 'n', 'N', 'p') + final val unknownPatternLetters: Set[Char] = Set('B') // The quarter fields will also be parsed strangely, e.g. when the pattern contains `yMd` and can // be directly resolved then the `q` do check for whether the month is valid, but if the date // fields is incomplete, e.g. `yM`, the checking will be bypassed. @@ -322,6 +323,13 @@ private object DateTimeFormatterHelper { (isParsing && unsupportedLettersForParsing.contains(c))) { throw new IllegalArgumentException(s"Illegal pattern character: $c") } + // SPARK-36970: `select date_format('2018-11-17 13:33:33.333', 'B')` failed with Java 8, + // but use Java 17 will return `in the afternoon` because 'B' is used to represent + // `Pattern letters to output a day period` in Java 17 and manual disabled it here for + // compatibility with Java 8 behavior. + for (c <- patternPart if unknownPatternLetters.contains(c)) { + throw new IllegalArgumentException(s"Unknown pattern letter: $c") + } for (style <- unsupportedPatternLengths if patternPart.contains(style)) { throw new IllegalArgumentException(s"Too many pattern letters: ${style.head}") } From df5ec7e9e56a98aada903b0de660ff7b0f46a089 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Mon, 11 Oct 2021 18:01:03 +0800 Subject: [PATCH 2/5] use unsupportedLetters --- .../catalyst/util/DateTimeFormatterHelper.scala | 14 +++++--------- .../results/datetime-formatting-invalid.sql.out | 2 +- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala index 71b3c84f006d7..cb03ab2ee4aae 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala @@ -279,8 +279,11 @@ private object DateTimeFormatterHelper { // localized, for the default Locale.US, it uses Sunday as the first day of week, while in Spark // 2.4, the SimpleDateFormat uses Monday as the first day of week. final val weekBasedLetters = Set('Y', 'W', 'w', 'u', 'e', 'c') - final val unsupportedLetters = Set('A', 'n', 'N', 'p') - final val unknownPatternLetters: Set[Char] = Set('B') + // SPARK-36970: `select date_format('2018-11-17 13:33:33.333', 'B')` failed with Java 8, + // but use Java 17 will return `in the afternoon` because 'B' is used to represent + // `Pattern letters to output a day period` in Java 17. So there manual disabled `B` for + // compatibility with Java 8 behavior. + final val unsupportedLetters = Set('A', 'B', 'n', 'N', 'p') // The quarter fields will also be parsed strangely, e.g. when the pattern contains `yMd` and can // be directly resolved then the `q` do check for whether the month is valid, but if the date // fields is incomplete, e.g. `yM`, the checking will be bypassed. @@ -323,13 +326,6 @@ private object DateTimeFormatterHelper { (isParsing && unsupportedLettersForParsing.contains(c))) { throw new IllegalArgumentException(s"Illegal pattern character: $c") } - // SPARK-36970: `select date_format('2018-11-17 13:33:33.333', 'B')` failed with Java 8, - // but use Java 17 will return `in the afternoon` because 'B' is used to represent - // `Pattern letters to output a day period` in Java 17 and manual disabled it here for - // compatibility with Java 8 behavior. - for (c <- patternPart if unknownPatternLetters.contains(c)) { - throw new IllegalArgumentException(s"Unknown pattern letter: $c") - } for (style <- unsupportedPatternLengths if patternPart.contains(style)) { throw new IllegalArgumentException(s"Too many pattern letters: ${style.head}") } diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-formatting-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-formatting-invalid.sql.out index 18d1a10068794..9c8553dc0f01f 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime-formatting-invalid.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime-formatting-invalid.sql.out @@ -314,7 +314,7 @@ select date_format('2018-11-17 13:33:33.333', 'B') struct<> -- !query output java.lang.IllegalArgumentException -Unknown pattern letter: B +Illegal pattern character: B -- !query From 8271b478ef0b808662c08c95cf852f6cd8cef3f4 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Mon, 11 Oct 2021 18:04:00 +0800 Subject: [PATCH 3/5] fix case --- .../apache/spark/sql/catalyst/util/DatetimeFormatterSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DatetimeFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DatetimeFormatterSuite.scala index 0640dc7a16edb..332568ab2d71a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DatetimeFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DatetimeFormatterSuite.scala @@ -76,7 +76,7 @@ trait DatetimeFormatterSuite extends SparkFunSuite with SQLHelper with Matchers Seq(true, false).foreach { isParsing => // not support by the legacy one too - val unsupportedBoth = Seq("QQQQQ", "qqqqq", "eeeee", "A", "c", "n", "N", "p", "e") + val unsupportedBoth = Seq("QQQQQ", "qqqqq", "eeeee", "A", "B", "c", "n", "N", "p", "e") unsupportedBoth.foreach { pattern => intercept[IllegalArgumentException](checkFormatterCreation(pattern, isParsing)) } From e274c7c7e1b87c1da0119c9a660fa3fdae7613df Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Tue, 12 Oct 2021 19:26:05 +0800 Subject: [PATCH 4/5] Trigger build From ac2e7007b0d37ec7f6af6e8375f03e24174ac59a Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Tue, 12 Oct 2021 19:39:27 +0800 Subject: [PATCH 5/5] Trigger build