From f1cbe301cca284801b3eec0a32f26f55728cd4fc Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 16 Feb 2024 14:20:37 +0300 Subject: [PATCH] [SPARK-47072][SQL] Fix supported interval formats in error messages In the PR, I propose to add one more field to keys of `supportedFormat` in `IntervalUtils` because current implementation has duplicate keys that overwrites each other. For instance, the following keys are the same: ``` (YM.YEAR, YM.MONTH) ... (DT.DAY, DT.HOUR) ``` because `YM.YEAR = DT.DAY = 0` and `YM.MONTH = DT.HOUR = 1`. To fix the incorrect error message when Spark cannot parse ANSI interval string. For example, the expected format should be some year-month format but Spark outputs day-time one: ```sql spark-sql (default)> select interval '-\t2-2\t' year to month; Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: - 2-2 . (line 1, pos 16) == SQL == select interval '-\t2-2\t' year to month ----------------^^^ ``` Yes. By running the existing test suite: ``` $ build/sbt "test:testOnly *IntervalUtilsSuite" ``` and regenerating the golden files: ``` $ SPARK_GENERATE_GOLDEN_FILES=1 PYSPARK_PYTHON=python3 build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite" ``` No. Closes #45127 from MaxGekk/fix-supportedFormat. Authored-by: Max Gekk Signed-off-by: Max Gekk (cherry picked from commit 074fcf2807000d342831379de0fafc1e49a6bf19) Signed-off-by: Max Gekk --- .../sql/catalyst/util/IntervalUtils.scala | 33 +++++++++++-------- .../catalyst/expressions/CastSuiteBase.scala | 8 ++--- .../analyzer-results/ansi/interval.sql.out | 2 +- .../analyzer-results/interval.sql.out | 2 +- .../sql-tests/results/ansi/interval.sql.out | 2 +- .../sql-tests/results/interval.sql.out | 2 +- 6 files changed, 27 insertions(+), 22 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index e051cfc37f12d..4d90007400ea7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -107,25 +107,30 @@ object IntervalUtils extends SparkIntervalUtils { fallBackNotice: Option[String] = None) = { throw new IllegalArgumentException( s"Interval string does not match $intervalStr format of " + - s"${supportedFormat((startFiled, endField)).map(format => s"`$format`").mkString(", ")} " + + s"${supportedFormat((intervalStr, startFiled, endField)) + .map(format => s"`$format`").mkString(", ")} " + s"when cast to $typeName: ${input.toString}" + s"${fallBackNotice.map(s => s", $s").getOrElse("")}") } val supportedFormat = Map( - (YM.YEAR, YM.MONTH) -> Seq("[+|-]y-m", "INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH"), - (YM.YEAR, YM.YEAR) -> Seq("[+|-]y", "INTERVAL [+|-]'[+|-]y' YEAR"), - (YM.MONTH, YM.MONTH) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MONTH"), - (DT.DAY, DT.DAY) -> Seq("[+|-]d", "INTERVAL [+|-]'[+|-]d' DAY"), - (DT.DAY, DT.HOUR) -> Seq("[+|-]d h", "INTERVAL [+|-]'[+|-]d h' DAY TO HOUR"), - (DT.DAY, DT.MINUTE) -> Seq("[+|-]d h:m", "INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE"), - (DT.DAY, DT.SECOND) -> Seq("[+|-]d h:m:s.n", "INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND"), - (DT.HOUR, DT.HOUR) -> Seq("[+|-]h", "INTERVAL [+|-]'[+|-]h' HOUR"), - (DT.HOUR, DT.MINUTE) -> Seq("[+|-]h:m", "INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE"), - (DT.HOUR, DT.SECOND) -> Seq("[+|-]h:m:s.n", "INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND"), - (DT.MINUTE, DT.MINUTE) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MINUTE"), - (DT.MINUTE, DT.SECOND) -> Seq("[+|-]m:s.n", "INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND"), - (DT.SECOND, DT.SECOND) -> Seq("[+|-]s.n", "INTERVAL [+|-]'[+|-]s.n' SECOND") + ("year-month", YM.YEAR, YM.MONTH) -> Seq("[+|-]y-m", "INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH"), + ("year-month", YM.YEAR, YM.YEAR) -> Seq("[+|-]y", "INTERVAL [+|-]'[+|-]y' YEAR"), + ("year-month", YM.MONTH, YM.MONTH) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MONTH"), + ("day-time", DT.DAY, DT.DAY) -> Seq("[+|-]d", "INTERVAL [+|-]'[+|-]d' DAY"), + ("day-time", DT.DAY, DT.HOUR) -> Seq("[+|-]d h", "INTERVAL [+|-]'[+|-]d h' DAY TO HOUR"), + ("day-time", DT.DAY, DT.MINUTE) -> + Seq("[+|-]d h:m", "INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE"), + ("day-time", DT.DAY, DT.SECOND) -> + Seq("[+|-]d h:m:s.n", "INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND"), + ("day-time", DT.HOUR, DT.HOUR) -> Seq("[+|-]h", "INTERVAL [+|-]'[+|-]h' HOUR"), + ("day-time", DT.HOUR, DT.MINUTE) -> Seq("[+|-]h:m", "INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE"), + ("day-time", DT.HOUR, DT.SECOND) -> + Seq("[+|-]h:m:s.n", "INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND"), + ("day-time", DT.MINUTE, DT.MINUTE) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MINUTE"), + ("day-time", DT.MINUTE, DT.SECOND) -> + Seq("[+|-]m:s.n", "INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND"), + ("day-time", DT.SECOND, DT.SECOND) -> Seq("[+|-]s.n", "INTERVAL [+|-]'[+|-]s.n' SECOND") ) def castStringToYMInterval( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala index 1ce311a5544fa..4352d5bc9c6bb 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala @@ -1174,7 +1174,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { Seq("INTERVAL '1-1' YEAR", "INTERVAL '1-1' MONTH").foreach { interval => val dataType = YearMonthIntervalType() val expectedMsg = s"Interval string does not match year-month format of " + - s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField)) + s"${IntervalUtils.supportedFormat(("year-month", dataType.startField, dataType.endField)) .map(format => s"`$format`").mkString(", ")} " + s"when cast to ${dataType.typeName}: $interval" checkExceptionInExpression[IllegalArgumentException]( @@ -1194,7 +1194,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { ("INTERVAL '1' MONTH", YearMonthIntervalType(YEAR, MONTH))) .foreach { case (interval, dataType) => val expectedMsg = s"Interval string does not match year-month format of " + - s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField)) + s"${IntervalUtils.supportedFormat(("year-month", dataType.startField, dataType.endField)) .map(format => s"`$format`").mkString(", ")} " + s"when cast to ${dataType.typeName}: $interval" checkExceptionInExpression[IllegalArgumentException]( @@ -1314,7 +1314,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { ("1.23", DayTimeIntervalType(MINUTE))) .foreach { case (interval, dataType) => val expectedMsg = s"Interval string does not match day-time format of " + - s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField)) + s"${IntervalUtils.supportedFormat(("day-time", dataType.startField, dataType.endField)) .map(format => s"`$format`").mkString(", ")} " + s"when cast to ${dataType.typeName}: $interval, " + s"set ${SQLConf.LEGACY_FROM_DAYTIME_STRING.key} to true " + @@ -1338,7 +1338,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { ("INTERVAL '92233720368541.775807' SECOND", DayTimeIntervalType(SECOND))) .foreach { case (interval, dataType) => val expectedMsg = "Interval string does not match day-time format of " + - s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField)) + s"${IntervalUtils.supportedFormat(("day-time", dataType.startField, dataType.endField)) .map(format => s"`$format`").mkString(", ")} " + s"when cast to ${dataType.typeName}: $interval, " + s"set ${SQLConf.LEGACY_FROM_DAYTIME_STRING.key} to true " + diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out index 1120c40ac15c4..2e2a07beb7176 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out @@ -1916,7 +1916,7 @@ org.apache.spark.sql.catalyst.parser.ParseException { "errorClass" : "_LEGACY_ERROR_TEMP_0063", "messageParameters" : { - "msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t" + "msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t" }, "queryContext" : [ { "objectType" : "", diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out index 337edd5980c39..6242dc142eabb 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out @@ -1916,7 +1916,7 @@ org.apache.spark.sql.catalyst.parser.ParseException { "errorClass" : "_LEGACY_ERROR_TEMP_0063", "messageParameters" : { - "msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t" + "msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t" }, "queryContext" : [ { "objectType" : "", diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index 9eb4a4766df89..b0d128e967a6d 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -2355,7 +2355,7 @@ org.apache.spark.sql.catalyst.parser.ParseException { "errorClass" : "_LEGACY_ERROR_TEMP_0063", "messageParameters" : { - "msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t" + "msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t" }, "queryContext" : [ { "objectType" : "", diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index fe15ade941785..faba4abfdbe7d 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -2168,7 +2168,7 @@ org.apache.spark.sql.catalyst.parser.ParseException { "errorClass" : "_LEGACY_ERROR_TEMP_0063", "messageParameters" : { - "msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t" + "msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t" }, "queryContext" : [ { "objectType" : "",