From ce7caad832b61f80d113903bc9b475a07f20b512 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 5 Apr 2021 23:45:32 +0300 Subject: [PATCH 1/7] Cast year-month to string --- .../spark/sql/catalyst/expressions/Cast.scala | 5 +++++ .../spark/sql/catalyst/util/IntervalUtils.scala | 17 +++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 6b18563f367c..5756623a43ba 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -406,6 +406,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case pudt: PythonUserDefinedType => castToString(pudt.sqlType) case udt: UserDefinedType[_] => buildCast[Any](_, o => UTF8String.fromString(udt.deserialize(o).toString)) + case YearMonthIntervalType => + buildCast[Int](_, i => UTF8String.fromString(IntervalUtils.toYMIntervalString(i))) case _ => buildCast[Any](_, o => UTF8String.fromString(o.toString)) } @@ -1121,6 +1123,9 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit (c, evPrim, evNull) => { code"$evPrim = UTF8String.fromString($udtRef.deserialize($c).toString());" } + case YearMonthIntervalType => + val iu = IntervalUtils.getClass.getName.stripSuffix("$") + (c, evPrim, _) => code"""$evPrim = UTF8String.fromString($iu.toYMIntervalString($c));""" case _ => (c, evPrim, evNull) => code"$evPrim = UTF8String.fromString(String.valueOf($c));" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index 58a52475b624..540b95c3d65c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -834,4 +834,21 @@ object IntervalUtils { * @return The period of months, not null */ def monthsToPeriod(months: Int): Period = Period.ofMonths(months).normalized() + + /** + * Converts an year-month interval as a number of months to its textual representation + * which conforms to the ANSI SQL standard. + * + * @param months The number of months, positive or negative + * @return Year-month interval string + */ + def toYMIntervalString(months: Int): String = { + var sign = "" + var absMonths: Long = months + if (months < 0) { + sign = "-" + absMonths = -absMonths + } + s"interval '$sign${absMonths / MONTHS_PER_YEAR}-${absMonths % MONTHS_PER_YEAR}' year to month" + } } From 1bfba7f83d200a52446ee3f0b99b8b4786c0789e Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 5 Apr 2021 23:45:42 +0300 Subject: [PATCH 2/7] Add tests --- .../sql/catalyst/expressions/CastSuite.scala | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 3a79e8d383e4..2fd174750236 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} -import java.time.DateTimeException +import java.time.{DateTimeException, Period} import java.util.{Calendar, TimeZone} import scala.collection.parallel.immutable.ParVector @@ -799,6 +799,24 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { } } } + + test("SPARK-34667: cast year-month interval to string") { + Seq( + Period.ofMonths(0) -> "0-0", + Period.ofMonths(1) -> "0-1", + Period.ofMonths(-1) -> "-0-1", + Period.ofYears(1) -> "1-0", + Period.ofYears(-1) -> "-1-0", + Period.ofYears(10).plusMonths(10) -> "10-10", + Period.ofYears(-123).minusMonths(6) -> "-123-6", + Period.ofMonths(Int.MaxValue) -> "178956970-7", + Period.ofMonths(Int.MinValue) -> "-178956970-8" + ).foreach { case (period, intervalPayload) => + checkEvaluation( + Cast(Literal(period), StringType), + s"interval '$intervalPayload' year to month") + } + } } abstract class AnsiCastSuiteBase extends CastSuiteBase { From 70f8b47a3bc1309fc5fa5bdef5a8fbf0b311c431 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 5 Apr 2021 23:46:47 +0300 Subject: [PATCH 3/7] Null cast check --- .../org/apache/spark/sql/catalyst/expressions/CastSuite.scala | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 2fd174750236..ec7a29a81c8a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -64,9 +64,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { atomicTypes.foreach(dt => checkNullCast(NullType, dt)) (atomicTypes -- Set( // TODO(SPARK-34668): Support casting of day-time intervals to strings - DayTimeIntervalType, - // TODO(SPARK-34667): Support casting of year-month intervals to strings - YearMonthIntervalType)).foreach(dt => checkNullCast(dt, StringType)) + DayTimeIntervalType)).foreach(dt => checkNullCast(dt, StringType)) checkNullCast(StringType, BinaryType) checkNullCast(StringType, BooleanType) numericTypes.foreach(dt => checkNullCast(dt, BooleanType)) From d203e1ef4767940fd70dbff4528ab3d779b196e0 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 5 Apr 2021 23:50:19 +0300 Subject: [PATCH 4/7] checkConsistencyBetweenInterpretedAndCodegen --- .../org/apache/spark/sql/catalyst/expressions/CastSuite.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index ec7a29a81c8a..26197492885d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -814,6 +814,9 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { Cast(Literal(period), StringType), s"interval '$intervalPayload' year to month") } + + checkConsistencyBetweenInterpretedAndCodegen( + (child: Expression) => Cast(child, StringType), YearMonthIntervalType) } } From 4be294ae19ba3ee3299ce4345d8232d4884104ac Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 6 Apr 2021 12:25:40 +0300 Subject: [PATCH 5/7] toYMIntervalString -> toYearMonthIntervalString --- .../org/apache/spark/sql/catalyst/expressions/Cast.scala | 5 +++-- .../org/apache/spark/sql/catalyst/util/IntervalUtils.scala | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 5756623a43ba..1c37713204f3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -407,7 +407,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case udt: UserDefinedType[_] => buildCast[Any](_, o => UTF8String.fromString(udt.deserialize(o).toString)) case YearMonthIntervalType => - buildCast[Int](_, i => UTF8String.fromString(IntervalUtils.toYMIntervalString(i))) + buildCast[Int](_, i => UTF8String.fromString(IntervalUtils.toYearMonthIntervalString(i))) case _ => buildCast[Any](_, o => UTF8String.fromString(o.toString)) } @@ -1125,7 +1125,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit } case YearMonthIntervalType => val iu = IntervalUtils.getClass.getName.stripSuffix("$") - (c, evPrim, _) => code"""$evPrim = UTF8String.fromString($iu.toYMIntervalString($c));""" + (c, evPrim, _) => + code"""$evPrim = UTF8String.fromString($iu.toYearMonthIntervalString($c));""" case _ => (c, evPrim, evNull) => code"$evPrim = UTF8String.fromString(String.valueOf($c));" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index 540b95c3d65c..ad92599d5943 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -842,7 +842,7 @@ object IntervalUtils { * @param months The number of months, positive or negative * @return Year-month interval string */ - def toYMIntervalString(months: Int): String = { + def toYearMonthIntervalString(months: Int): String = { var sign = "" var absMonths: Long = months if (months < 0) { From df2992c923c4c053542eb9c101c020d70da5851a Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 6 Apr 2021 12:28:37 +0300 Subject: [PATCH 6/7] To upper case --- .../org/apache/spark/sql/catalyst/util/IntervalUtils.scala | 2 +- .../org/apache/spark/sql/catalyst/expressions/CastSuite.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index ad92599d5943..8cd9d2815476 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -849,6 +849,6 @@ object IntervalUtils { sign = "-" absMonths = -absMonths } - s"interval '$sign${absMonths / MONTHS_PER_YEAR}-${absMonths % MONTHS_PER_YEAR}' year to month" + s"INTERVAL '$sign${absMonths / MONTHS_PER_YEAR}-${absMonths % MONTHS_PER_YEAR}' YEAR TO MONTH" } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 26197492885d..547bf887673a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -812,7 +812,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { ).foreach { case (period, intervalPayload) => checkEvaluation( Cast(Literal(period), StringType), - s"interval '$intervalPayload' year to month") + s"INTERVAL '$intervalPayload' YEAR TO MONTH") } checkConsistencyBetweenInterpretedAndCodegen( From d91847325a6a35095497b63dcce1777ef98ad807 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 6 Apr 2021 13:52:44 +0300 Subject: [PATCH 7/7] Trigger build