From 18bc09be6ed76799dd33a4a3658efd032486b123 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 14 Oct 2019 17:29:13 +0300 Subject: [PATCH 01/14] Add SubtractDates --- .../expressions/datetimeExpressions.scala | 18 ++++++++++++++++++ .../sql/catalyst/util/DateTimeUtils.scala | 18 +++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 5aea884ad5003..0379d916474be 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1702,6 +1702,24 @@ case class DateDiff(endDate: Expression, startDate: Expression) } } +case class SubtractDates(left: Expression, right: Expression) + extends BinaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType, DateType) + override def dataType: DataType = CalendarIntervalType + + override def nullSafeEval(leftDays: Any, rightDays: Any): Any = { + DateTimeUtils.subtractDates(leftDays.asInstanceOf[Int], rightDays.asInstanceOf[Int]) + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + defineCodeGen(ctx, ev, (leftDays, rightDays) => { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + s"$dtu.subtractDates($leftDays, $rightDays)" + }) + } +} + /** * Gets timestamps from strings using given pattern. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 34e8012106bbe..088876921dccd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -27,7 +27,7 @@ import java.util.concurrent.TimeUnit._ import scala.util.control.NonFatal import org.apache.spark.sql.types.Decimal -import org.apache.spark.unsafe.types.UTF8String +import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} /** * Helper functions for converting between internal and external date and time representations. @@ -950,4 +950,20 @@ object DateTimeUtils { None } } + + /** + * Subtracts two dates. + * @param endDate - the end date, exclusive + * @param startDate - the start date, inclusive + * @return an interval between two dates. The interval can be negative + * if the end date is before the start date. + */ + def subtractDates(endDate: SQLDate, startDate: SQLDate): CalendarInterval = { + val period = Period.between( + LocalDate.ofEpochDay(startDate), + LocalDate.ofEpochDay(endDate)) + val months = period.getMonths + 12 * period.getYears + val microseconds = period.getDays * MICROS_PER_DAY + new CalendarInterval(months, microseconds) + } } From 141d3dad11e2573f77939b236424e365717fc554 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 14 Oct 2019 17:36:01 +0300 Subject: [PATCH 02/14] Add tests for SubtractDates --- .../expressions/DateExpressionsSuite.scala | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 8680a15ee1cd7..6b194726e7a66 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} import java.text.SimpleDateFormat -import java.time.{Instant, LocalDateTime, ZoneId, ZoneOffset} +import java.time.{Instant, LocalDate, LocalDateTime, ZoneId, ZoneOffset} import java.util.{Calendar, Locale, TimeZone} import java.util.concurrent.TimeUnit import java.util.concurrent.TimeUnit._ @@ -1087,4 +1087,24 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { CalendarInterval.fromString("interval 521722 weeks 4 days " + "23 hours 59 minutes 59 seconds 999 milliseconds 999 microseconds")) } + + test("subtract dates") { + val end = LocalDate.of(2019, 10, 5) + checkEvaluation(SubtractDates(Literal(end), Literal(end)), + new CalendarInterval(0, 0)) + checkEvaluation(SubtractDates(Literal(end.plusDays(1)), Literal(end)), + CalendarInterval.fromString("interval 1 days")) + checkEvaluation(SubtractDates(Literal(end.minusDays(1)), Literal(end)), + CalendarInterval.fromString("interval -1 days")) + val epochDate = Literal(LocalDate.ofEpochDay(0)) + checkEvaluation(SubtractDates(Literal(end), epochDate), + CalendarInterval.fromString("interval 49 years 9 months 4 days")) + checkEvaluation(SubtractDates(epochDate, Literal(end)), + CalendarInterval.fromString("interval -49 years -9 months -4 days")) + checkEvaluation( + SubtractDates( + Literal(LocalDate.of(10000, 1, 1)), + Literal(LocalDate.of(1, 1, 1))), + CalendarInterval.fromString("interval 9999 years")) + } } From dc37bc76cf2dce32fe67511ddf19acad288d62df Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 14 Oct 2019 18:07:38 +0300 Subject: [PATCH 03/14] Use SubtractDates for dates subtract --- .../sql/catalyst/analysis/TypeCoercion.scala | 6 +-- .../expressions/datetimeExpressions.scala | 40 ++++++++++--------- .../catalyst/analysis/TypeCoercionSuite.scala | 2 +- 3 files changed, 26 insertions(+), 22 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 0983810c9ad1a..d2e56cbb6ee9a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -828,8 +828,8 @@ object TypeCoercion { /** * 1. Turns Add/Subtract of DateType/TimestampType/StringType and CalendarIntervalType * to TimeAdd/TimeSub. - * 2. Turns Add/Subtract of DateType/IntegerType and IntegerType/DateType - * to DateAdd/DateSub/DateDiff. + * 2. Turns Add/Subtract of TimestampType/DateType/IntegerType + * and TimestampType/IntegerType/DateType to DateAdd/DateSub/SubtractDates. */ object DateTimeOperations extends Rule[LogicalPlan] { @@ -849,7 +849,7 @@ object TypeCoercion { case Add(l @ DateType(), r @ IntegerType()) => DateAdd(l, r) case Add(l @ IntegerType(), r @ DateType()) => DateAdd(r, l) case Subtract(l @ DateType(), r @ IntegerType()) => DateSub(l, r) - case Subtract(l @ DateType(), r @ DateType()) => DateDiff(l, r) + case Subtract(l @ DateType(), r @ DateType()) => SubtractDates(l, r) case Subtract(l @ TimestampType(), r @ TimestampType()) => TimestampDiff(l, r) case Subtract(l @ TimestampType(), r @ DateType()) => TimestampDiff(l, Cast(r, TimestampType)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 0379d916474be..cb0518a1fb4cc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1702,24 +1702,6 @@ case class DateDiff(endDate: Expression, startDate: Expression) } } -case class SubtractDates(left: Expression, right: Expression) - extends BinaryExpression with ImplicitCastInputTypes { - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType, DateType) - override def dataType: DataType = CalendarIntervalType - - override def nullSafeEval(leftDays: Any, rightDays: Any): Any = { - DateTimeUtils.subtractDates(leftDays.asInstanceOf[Int], rightDays.asInstanceOf[Int]) - } - - override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - defineCodeGen(ctx, ev, (leftDays, rightDays) => { - val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") - s"$dtu.subtractDates($leftDays, $rightDays)" - }) - } -} - /** * Gets timestamps from strings using given pattern. */ @@ -2134,3 +2116,25 @@ case class TimestampDiff(endTimestamp: Expression, startTimestamp: Expression) s"new org.apache.spark.unsafe.types.CalendarInterval(0, $end - $start)") } } + +/** + * Returns the interval from the `left` date (inclusive) to the `right` date (exclusive). + */ +case class SubtractDates(left: Expression, right: Expression) + extends BinaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType, DateType) + override def dataType: DataType = CalendarIntervalType + + override def nullSafeEval(leftDays: Any, rightDays: Any): Any = { + DateTimeUtils.subtractDates(leftDays.asInstanceOf[Int], rightDays.asInstanceOf[Int]) + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + defineCodeGen(ctx, ev, (leftDays, rightDays) => { + val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + s"$dtu.subtractDates($leftDays, $rightDays)" + }) + } +} + diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala index f60e0f2bfee6a..8f51fe6c5a414 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala @@ -1430,7 +1430,7 @@ class TypeCoercionSuite extends AnalysisTest { ruleTest(dateTimeOperations, Add(date, intValue), DateAdd(date, intValue)) ruleTest(dateTimeOperations, Add(intValue, date), DateAdd(date, intValue)) ruleTest(dateTimeOperations, Subtract(date, intValue), DateSub(date, intValue)) - ruleTest(dateTimeOperations, Subtract(date, date), DateDiff(date, date)) + ruleTest(dateTimeOperations, Subtract(date, date), SubtractDates(date, date)) ruleTest(dateTimeOperations, Subtract(timestamp, timestamp), TimestampDiff(timestamp, timestamp)) ruleTest(dateTimeOperations, Subtract(timestamp, date), From e82f0271ebe816216eb291fa27b39872458aa1e7 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 14 Oct 2019 18:14:52 +0300 Subject: [PATCH 04/14] Update the SQL migration guide --- docs/sql-migration-guide.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 8c5721340a30c..c5bb39b0dd85f 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -217,6 +217,8 @@ license: | - Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input. In Spark version 2.4 and earlier, this function gives `-1` for the same input. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.sizeOfNull` to `true`. + - In Spark version 2.4 and earlier, dates subtraction `date1` - `date2` gives the number of days from `date1` to `date2`. Since Spark 3.0, the expression has the `INTERVAL` type and returns an interval between two dates. To get the number of days, use the `datediff` function. + ## Upgrading from Spark SQL 2.4 to 2.4.1 - The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was From e74f81b19f69608e93fa2cfa23f25d940a72c58b Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 14 Oct 2019 18:36:42 +0300 Subject: [PATCH 05/14] Regen date.sql.out --- .../sql-tests/results/postgreSQL/date.sql.out | 88 +++++++++---------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out index 29fcf61bd5b78..510869e35d16c 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out @@ -482,93 +482,93 @@ SELECT date '5874898-01-01' -- !query 46 SELECT f1 - date '2000-01-01' AS `Days From 2K` FROM DATE_TBL -- !query 46 schema -struct +struct -- !query 46 output --1035 --1036 --1037 --1400 --1401 --1402 --1403 --15542 --15607 -13977 -14343 -14710 -91 -92 -93 +interval -2 years -10 months +interval -2 years -10 months -1 days +interval -2 years -9 months -4 weeks -2 days +interval -3 years -10 months +interval -3 years -10 months -1 days +interval -3 years -10 months -2 days +interval -3 years -9 months -4 weeks -2 days +interval -42 years -6 months -2 weeks -4 days +interval -42 years -8 months -3 weeks -1 days +interval 3 months +interval 3 months 1 days +interval 3 months 2 days +interval 38 years 3 months 1 weeks +interval 39 years 3 months 1 weeks 1 days +interval 40 years 3 months 1 weeks 2 days -- !query 47 SELECT f1 - date 'epoch' AS `Days From Epoch` FROM DATE_TBL -- !query 47 schema -struct +struct -- !query 47 output --4585 --4650 -11048 -11049 -11050 -24934 -25300 -25667 -9554 -9555 -9556 -9557 -9920 -9921 -9922 +interval -12 years -6 months -2 weeks -4 days +interval -12 years -8 months -3 weeks -1 days +interval 26 years 1 months 3 weeks 6 days +interval 26 years 1 months 4 weeks +interval 26 years 2 months +interval 26 years 2 months 1 days +interval 27 years 1 months 3 weeks 6 days +interval 27 years 2 months +interval 27 years 2 months 1 days +interval 30 years 3 months +interval 30 years 3 months 1 days +interval 30 years 3 months 2 days +interval 68 years 3 months 1 weeks +interval 69 years 3 months 1 weeks 1 days +interval 70 years 3 months 1 weeks 2 days -- !query 48 SELECT date 'yesterday' - date 'today' AS `One day` -- !query 48 schema -struct +struct -- !query 48 output --1 +interval -1 days -- !query 49 SELECT date 'today' - date 'tomorrow' AS `One day` -- !query 49 schema -struct +struct -- !query 49 output --1 +interval -1 days -- !query 50 SELECT date 'yesterday' - date 'tomorrow' AS `Two days` -- !query 50 schema -struct +struct -- !query 50 output --2 +interval -2 days -- !query 51 SELECT date 'tomorrow' - date 'today' AS `One day` -- !query 51 schema -struct +struct -- !query 51 output -1 +interval 1 days -- !query 52 SELECT date 'today' - date 'yesterday' AS `One day` -- !query 52 schema -struct +struct -- !query 52 output -1 +interval 1 days -- !query 53 SELECT date 'tomorrow' - date 'yesterday' AS `Two days` -- !query 53 schema -struct +struct -- !query 53 output -2 +interval 2 days -- !query 54 From 4a8173b231f57576f7af578eb3ff561cb5f289d9 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 14 Oct 2019 18:56:43 +0300 Subject: [PATCH 06/14] Regen datetime.sql.out --- .../src/test/resources/sql-tests/results/datetime.sql.out | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out index c3c131d22d0fb..9ebd1e77b6551 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -126,9 +126,9 @@ struct -- !query 14 select date '2001-10-01' - date '2001-09-28' -- !query 14 schema -struct +struct -- !query 14 output -3 +interval 3 days -- !query 15 From 27de6b80c9e1cf68164a69be28c702092b85313a Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 14 Oct 2019 22:49:55 +0300 Subject: [PATCH 07/14] Add config spark.sql.legacy.datesSubtraction.enabled --- docs/sql-migration-guide.md | 2 +- .../spark/sql/catalyst/analysis/TypeCoercion.scala | 7 ++++++- .../scala/org/apache/spark/sql/internal/SQLConf.scala | 9 +++++++++ .../spark/sql/catalyst/analysis/TypeCoercionSuite.scala | 7 ++++++- 4 files changed, 22 insertions(+), 3 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index c5bb39b0dd85f..fdf3c012da0c9 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -217,7 +217,7 @@ license: | - Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input. In Spark version 2.4 and earlier, this function gives `-1` for the same input. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.sizeOfNull` to `true`. - - In Spark version 2.4 and earlier, dates subtraction `date1` - `date2` gives the number of days from `date1` to `date2`. Since Spark 3.0, the expression has the `INTERVAL` type and returns an interval between two dates. To get the number of days, use the `datediff` function. + - In Spark version 2.4 and earlier, dates subtraction `date1` - `date2` gives the number of days from `date1` to `date2`. Since Spark 3.0, the expression has the `INTERVAL` type and returns an interval between two dates. To get the number of days, you can set `spark.sql.legacy.datesSubtraction.enabled` to `true`. ## Upgrading from Spark SQL 2.4 to 2.4.1 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index d2e56cbb6ee9a..1c77d129e356d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -849,7 +849,12 @@ object TypeCoercion { case Add(l @ DateType(), r @ IntegerType()) => DateAdd(l, r) case Add(l @ IntegerType(), r @ DateType()) => DateAdd(r, l) case Subtract(l @ DateType(), r @ IntegerType()) => DateSub(l, r) - case Subtract(l @ DateType(), r @ DateType()) => SubtractDates(l, r) + case Subtract(l @ DateType(), r @ DateType()) => + if (SQLConf.get.getConf(SQLConf.LEGACY_DATES_SUBTRACTION)) { + DateDiff(l, r) + } else { + SubtractDates(l, r) + } case Subtract(l @ TimestampType(), r @ TimestampType()) => TimestampDiff(l, r) case Subtract(l @ TimestampType(), r @ DateType()) => TimestampDiff(l, Cast(r, TimestampType)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index eebf4b6dfd396..7476d2c3641ef 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2014,6 +2014,15 @@ object SQLConf { .stringConf .createWithDefault( "https://maven-central.storage-download.googleapis.com/repos/central/data/") + + val LEGACY_DATES_SUBTRACTION = + buildConf("spark.sql.legacy.datesSubtraction.enabled") + .doc("When true, date subtraction expressions have the INT type and return " + + "the number of days between the dates. If it is set to false, the expressions " + + "have the INTERVAL type and return an interval from the left date (inclusive) " + + "to the right date (exclusive).") + .booleanConf + .createWithDefault(false) } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala index 8f51fe6c5a414..579172b6b33b6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala @@ -1430,7 +1430,12 @@ class TypeCoercionSuite extends AnalysisTest { ruleTest(dateTimeOperations, Add(date, intValue), DateAdd(date, intValue)) ruleTest(dateTimeOperations, Add(intValue, date), DateAdd(date, intValue)) ruleTest(dateTimeOperations, Subtract(date, intValue), DateSub(date, intValue)) - ruleTest(dateTimeOperations, Subtract(date, date), SubtractDates(date, date)) + withSQLConf(SQLConf.LEGACY_DATES_SUBTRACTION.key -> "false") { + ruleTest(dateTimeOperations, Subtract(date, date), SubtractDates(date, date)) + } + withSQLConf(SQLConf.LEGACY_DATES_SUBTRACTION.key -> "true") { + ruleTest(dateTimeOperations, Subtract(date, date), DateDiff(date, date)) + } ruleTest(dateTimeOperations, Subtract(timestamp, timestamp), TimestampDiff(timestamp, timestamp)) ruleTest(dateTimeOperations, Subtract(timestamp, date), From fbd2723901deefea55ce31bfd49d5fc28dbe0ea3 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 14 Oct 2019 23:00:50 +0300 Subject: [PATCH 08/14] TimestampDiff -> SubtractTimestamps --- .../spark/sql/catalyst/analysis/TypeCoercion.scala | 10 ++++++---- .../sql/catalyst/expressions/datetimeExpressions.scala | 2 +- .../sql/catalyst/analysis/TypeCoercionSuite.scala | 6 +++--- .../catalyst/expressions/DateExpressionsSuite.scala | 8 ++++---- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 1c77d129e356d..624bfd1be14ca 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -829,7 +829,8 @@ object TypeCoercion { * 1. Turns Add/Subtract of DateType/TimestampType/StringType and CalendarIntervalType * to TimeAdd/TimeSub. * 2. Turns Add/Subtract of TimestampType/DateType/IntegerType - * and TimestampType/IntegerType/DateType to DateAdd/DateSub/SubtractDates. + * and TimestampType/IntegerType/DateType to DateAdd/DateSub/SubtractDates and + * to SubtractTimestamps. */ object DateTimeOperations extends Rule[LogicalPlan] { @@ -855,11 +856,12 @@ object TypeCoercion { } else { SubtractDates(l, r) } - case Subtract(l @ TimestampType(), r @ TimestampType()) => TimestampDiff(l, r) + case Subtract(l @ TimestampType(), r @ TimestampType()) => + SubtractTimestamps(l, r) case Subtract(l @ TimestampType(), r @ DateType()) => - TimestampDiff(l, Cast(r, TimestampType)) + SubtractTimestamps(l, Cast(r, TimestampType)) case Subtract(l @ DateType(), r @ TimestampType()) => - TimestampDiff(Cast(l, TimestampType), r) + SubtractTimestamps(Cast(l, TimestampType), r) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index cb0518a1fb4cc..cddd8c9bd61b3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -2099,7 +2099,7 @@ case class DatePart(field: Expression, source: Expression, child: Expression) * is set to 0 and the `microseconds` field is initialized to the microsecond difference * between the given timestamps. */ -case class TimestampDiff(endTimestamp: Expression, startTimestamp: Expression) +case class SubtractTimestamps(endTimestamp: Expression, startTimestamp: Expression) extends BinaryExpression with ImplicitCastInputTypes { override def left: Expression = endTimestamp diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala index 579172b6b33b6..d6e69b248f35a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala @@ -1437,11 +1437,11 @@ class TypeCoercionSuite extends AnalysisTest { ruleTest(dateTimeOperations, Subtract(date, date), DateDiff(date, date)) } ruleTest(dateTimeOperations, Subtract(timestamp, timestamp), - TimestampDiff(timestamp, timestamp)) + SubtractTimestamps(timestamp, timestamp)) ruleTest(dateTimeOperations, Subtract(timestamp, date), - TimestampDiff(timestamp, Cast(date, TimestampType))) + SubtractTimestamps(timestamp, Cast(date, TimestampType))) ruleTest(dateTimeOperations, Subtract(date, timestamp), - TimestampDiff(Cast(date, TimestampType), timestamp)) + SubtractTimestamps(Cast(date, TimestampType), timestamp)) } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 6b194726e7a66..e893e863b3675 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -1072,16 +1072,16 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("timestamps difference") { val end = Instant.parse("2019-10-04T11:04:01.123456Z") - checkEvaluation(TimestampDiff(Literal(end), Literal(end)), + checkEvaluation(SubtractTimestamps(Literal(end), Literal(end)), new CalendarInterval(0, 0)) - checkEvaluation(TimestampDiff(Literal(end), Literal(Instant.EPOCH)), + checkEvaluation(SubtractTimestamps(Literal(end), Literal(Instant.EPOCH)), CalendarInterval.fromString("interval 18173 days " + "11 hours 4 minutes 1 seconds 123 milliseconds 456 microseconds")) - checkEvaluation(TimestampDiff(Literal(Instant.EPOCH), Literal(end)), + checkEvaluation(SubtractTimestamps(Literal(Instant.EPOCH), Literal(end)), CalendarInterval.fromString("interval -18173 days " + "-11 hours -4 minutes -1 seconds -123 milliseconds -456 microseconds")) checkEvaluation( - TimestampDiff( + SubtractTimestamps( Literal(Instant.parse("9999-12-31T23:59:59.999999Z")), Literal(Instant.parse("0001-01-01T00:00:00Z"))), CalendarInterval.fromString("interval 521722 weeks 4 days " + From 199adffa1c50cad8e736c3ddc827ac7fcc4f0abd Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 14 Oct 2019 23:26:32 +0300 Subject: [PATCH 09/14] Update config description regarding datediff --- docs/sql-migration-guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index fdf3c012da0c9..58d9e7d3f86a7 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -217,7 +217,7 @@ license: | - Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input. In Spark version 2.4 and earlier, this function gives `-1` for the same input. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.sizeOfNull` to `true`. - - In Spark version 2.4 and earlier, dates subtraction `date1` - `date2` gives the number of days from `date1` to `date2`. Since Spark 3.0, the expression has the `INTERVAL` type and returns an interval between two dates. To get the number of days, you can set `spark.sql.legacy.datesSubtraction.enabled` to `true`. + - In Spark version 2.4 and earlier, dates subtraction `date1` - `date2` gives the number of days from `date1` to `date2`. Since Spark 3.0, the expression has the `INTERVAL` type and returns an interval between two dates. To get the number of days, you can set `spark.sql.legacy.datesSubtraction.enabled` to `true` or use the `datediff` function. ## Upgrading from Spark SQL 2.4 to 2.4.1 From 8b49c9bc2215310acd2b2461e3ff3cee93cbac5b Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 15 Oct 2019 07:03:29 +0300 Subject: [PATCH 10/14] Regen datetime.sql.out --- .../src/test/resources/sql-tests/results/datetime.sql.out | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out index 9ebd1e77b6551..0f4036cad6125 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -134,7 +134,7 @@ interval 3 days -- !query 15 select date'2020-01-01' - timestamp'2019-10-06 10:11:12.345678' -- !query 15 schema -struct +struct -- !query 15 output interval 12 weeks 2 days 14 hours 48 minutes 47 seconds 654 milliseconds 322 microseconds @@ -142,6 +142,6 @@ interval 12 weeks 2 days 14 hours 48 minutes 47 seconds 654 milliseconds 322 mic -- !query 16 select timestamp'2019-10-06 10:11:12.345678' - date'2020-01-01' -- !query 16 schema -struct +struct -- !query 16 output interval -12 weeks -2 days -14 hours -48 minutes -47 seconds -654 milliseconds -322 microseconds From 85f17f7fd221dcd7b4c5023b01e2a5fbcd76b9b8 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 15 Oct 2019 21:27:16 +0300 Subject: [PATCH 11/14] Revert "Update config description regarding datediff" This reverts commit 199adffa1c50cad8e736c3ddc827ac7fcc4f0abd. --- docs/sql-migration-guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 58d9e7d3f86a7..fdf3c012da0c9 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -217,7 +217,7 @@ license: | - Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input. In Spark version 2.4 and earlier, this function gives `-1` for the same input. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.sizeOfNull` to `true`. - - In Spark version 2.4 and earlier, dates subtraction `date1` - `date2` gives the number of days from `date1` to `date2`. Since Spark 3.0, the expression has the `INTERVAL` type and returns an interval between two dates. To get the number of days, you can set `spark.sql.legacy.datesSubtraction.enabled` to `true` or use the `datediff` function. + - In Spark version 2.4 and earlier, dates subtraction `date1` - `date2` gives the number of days from `date1` to `date2`. Since Spark 3.0, the expression has the `INTERVAL` type and returns an interval between two dates. To get the number of days, you can set `spark.sql.legacy.datesSubtraction.enabled` to `true`. ## Upgrading from Spark SQL 2.4 to 2.4.1 From 228673d7940800a7122a721c3cf9a646ce630e40 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 15 Oct 2019 21:31:09 +0300 Subject: [PATCH 12/14] Revert "Add config spark.sql.legacy.datesSubtraction.enabled" This reverts commit 27de6b80c9e1cf68164a69be28c702092b85313a. --- docs/sql-migration-guide.md | 2 +- .../spark/sql/catalyst/analysis/TypeCoercion.scala | 7 +------ .../scala/org/apache/spark/sql/internal/SQLConf.scala | 9 --------- .../spark/sql/catalyst/analysis/TypeCoercionSuite.scala | 7 +------ 4 files changed, 3 insertions(+), 22 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index fdf3c012da0c9..c5bb39b0dd85f 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -217,7 +217,7 @@ license: | - Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input. In Spark version 2.4 and earlier, this function gives `-1` for the same input. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.sizeOfNull` to `true`. - - In Spark version 2.4 and earlier, dates subtraction `date1` - `date2` gives the number of days from `date1` to `date2`. Since Spark 3.0, the expression has the `INTERVAL` type and returns an interval between two dates. To get the number of days, you can set `spark.sql.legacy.datesSubtraction.enabled` to `true`. + - In Spark version 2.4 and earlier, dates subtraction `date1` - `date2` gives the number of days from `date1` to `date2`. Since Spark 3.0, the expression has the `INTERVAL` type and returns an interval between two dates. To get the number of days, use the `datediff` function. ## Upgrading from Spark SQL 2.4 to 2.4.1 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 624bfd1be14ca..68ac14a517e43 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -850,12 +850,7 @@ object TypeCoercion { case Add(l @ DateType(), r @ IntegerType()) => DateAdd(l, r) case Add(l @ IntegerType(), r @ DateType()) => DateAdd(r, l) case Subtract(l @ DateType(), r @ IntegerType()) => DateSub(l, r) - case Subtract(l @ DateType(), r @ DateType()) => - if (SQLConf.get.getConf(SQLConf.LEGACY_DATES_SUBTRACTION)) { - DateDiff(l, r) - } else { - SubtractDates(l, r) - } + case Subtract(l @ DateType(), r @ DateType()) => SubtractDates(l, r) case Subtract(l @ TimestampType(), r @ TimestampType()) => SubtractTimestamps(l, r) case Subtract(l @ TimestampType(), r @ DateType()) => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 7476d2c3641ef..eebf4b6dfd396 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2014,15 +2014,6 @@ object SQLConf { .stringConf .createWithDefault( "https://maven-central.storage-download.googleapis.com/repos/central/data/") - - val LEGACY_DATES_SUBTRACTION = - buildConf("spark.sql.legacy.datesSubtraction.enabled") - .doc("When true, date subtraction expressions have the INT type and return " + - "the number of days between the dates. If it is set to false, the expressions " + - "have the INTERVAL type and return an interval from the left date (inclusive) " + - "to the right date (exclusive).") - .booleanConf - .createWithDefault(false) } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala index d6e69b248f35a..4f9e4ec0201dd 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala @@ -1430,12 +1430,7 @@ class TypeCoercionSuite extends AnalysisTest { ruleTest(dateTimeOperations, Add(date, intValue), DateAdd(date, intValue)) ruleTest(dateTimeOperations, Add(intValue, date), DateAdd(date, intValue)) ruleTest(dateTimeOperations, Subtract(date, intValue), DateSub(date, intValue)) - withSQLConf(SQLConf.LEGACY_DATES_SUBTRACTION.key -> "false") { - ruleTest(dateTimeOperations, Subtract(date, date), SubtractDates(date, date)) - } - withSQLConf(SQLConf.LEGACY_DATES_SUBTRACTION.key -> "true") { - ruleTest(dateTimeOperations, Subtract(date, date), DateDiff(date, date)) - } + ruleTest(dateTimeOperations, Subtract(date, date), SubtractDates(date, date)) ruleTest(dateTimeOperations, Subtract(timestamp, timestamp), SubtractTimestamps(timestamp, timestamp)) ruleTest(dateTimeOperations, Subtract(timestamp, date), From f232b64eada7a9dbd6d859e2957ef5ed037051d7 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 15 Oct 2019 21:31:42 +0300 Subject: [PATCH 13/14] Revert "Update the SQL migration guide" This reverts commit e82f0271ebe816216eb291fa27b39872458aa1e7. --- docs/sql-migration-guide.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index c5bb39b0dd85f..8c5721340a30c 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -217,8 +217,6 @@ license: | - Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input. In Spark version 2.4 and earlier, this function gives `-1` for the same input. To restore the behavior before Spark 3.0, you can set `spark.sql.legacy.sizeOfNull` to `true`. - - In Spark version 2.4 and earlier, dates subtraction `date1` - `date2` gives the number of days from `date1` to `date2`. Since Spark 3.0, the expression has the `INTERVAL` type and returns an interval between two dates. To get the number of days, use the `datediff` function. - ## Upgrading from Spark SQL 2.4 to 2.4.1 - The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was From c6ec2118e93d05e9055089aa7b6112a2ef6a0e46 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Wed, 16 Oct 2019 09:15:48 +0300 Subject: [PATCH 14/14] Use DateDiff in the PostgreSQL dialect --- .../sql/catalyst/analysis/TypeCoercion.scala | 3 +- .../sql-tests/results/postgreSQL/date.sql.out | 88 +++++++++---------- 2 files changed, 46 insertions(+), 45 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 68ac14a517e43..c451eb2b877da 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -850,7 +850,8 @@ object TypeCoercion { case Add(l @ DateType(), r @ IntegerType()) => DateAdd(l, r) case Add(l @ IntegerType(), r @ DateType()) => DateAdd(r, l) case Subtract(l @ DateType(), r @ IntegerType()) => DateSub(l, r) - case Subtract(l @ DateType(), r @ DateType()) => SubtractDates(l, r) + case Subtract(l @ DateType(), r @ DateType()) => + if (SQLConf.get.usePostgreSQLDialect) DateDiff(l, r) else SubtractDates(l, r) case Subtract(l @ TimestampType(), r @ TimestampType()) => SubtractTimestamps(l, r) case Subtract(l @ TimestampType(), r @ DateType()) => diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out index 510869e35d16c..29fcf61bd5b78 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out @@ -482,93 +482,93 @@ SELECT date '5874898-01-01' -- !query 46 SELECT f1 - date '2000-01-01' AS `Days From 2K` FROM DATE_TBL -- !query 46 schema -struct +struct -- !query 46 output -interval -2 years -10 months -interval -2 years -10 months -1 days -interval -2 years -9 months -4 weeks -2 days -interval -3 years -10 months -interval -3 years -10 months -1 days -interval -3 years -10 months -2 days -interval -3 years -9 months -4 weeks -2 days -interval -42 years -6 months -2 weeks -4 days -interval -42 years -8 months -3 weeks -1 days -interval 3 months -interval 3 months 1 days -interval 3 months 2 days -interval 38 years 3 months 1 weeks -interval 39 years 3 months 1 weeks 1 days -interval 40 years 3 months 1 weeks 2 days +-1035 +-1036 +-1037 +-1400 +-1401 +-1402 +-1403 +-15542 +-15607 +13977 +14343 +14710 +91 +92 +93 -- !query 47 SELECT f1 - date 'epoch' AS `Days From Epoch` FROM DATE_TBL -- !query 47 schema -struct +struct -- !query 47 output -interval -12 years -6 months -2 weeks -4 days -interval -12 years -8 months -3 weeks -1 days -interval 26 years 1 months 3 weeks 6 days -interval 26 years 1 months 4 weeks -interval 26 years 2 months -interval 26 years 2 months 1 days -interval 27 years 1 months 3 weeks 6 days -interval 27 years 2 months -interval 27 years 2 months 1 days -interval 30 years 3 months -interval 30 years 3 months 1 days -interval 30 years 3 months 2 days -interval 68 years 3 months 1 weeks -interval 69 years 3 months 1 weeks 1 days -interval 70 years 3 months 1 weeks 2 days +-4585 +-4650 +11048 +11049 +11050 +24934 +25300 +25667 +9554 +9555 +9556 +9557 +9920 +9921 +9922 -- !query 48 SELECT date 'yesterday' - date 'today' AS `One day` -- !query 48 schema -struct +struct -- !query 48 output -interval -1 days +-1 -- !query 49 SELECT date 'today' - date 'tomorrow' AS `One day` -- !query 49 schema -struct +struct -- !query 49 output -interval -1 days +-1 -- !query 50 SELECT date 'yesterday' - date 'tomorrow' AS `Two days` -- !query 50 schema -struct +struct -- !query 50 output -interval -2 days +-2 -- !query 51 SELECT date 'tomorrow' - date 'today' AS `One day` -- !query 51 schema -struct +struct -- !query 51 output -interval 1 days +1 -- !query 52 SELECT date 'today' - date 'yesterday' AS `One day` -- !query 52 schema -struct +struct -- !query 52 output -interval 1 days +1 -- !query 53 SELECT date 'tomorrow' - date 'yesterday' AS `Two days` -- !query 53 schema -struct +struct -- !query 53 output -interval 2 days +2 -- !query 54