Skip to content

Commit d11cbf2

Browse files
MaxGekkwangyum
authored andcommitted
[SPARK-29364][SQL] Return an interval from date subtract according to SQL standard
### What changes were proposed in this pull request? Proposed new expression `SubtractDates` which is used in `date1` - `date2`. It has the `INTERVAL` type, and returns the interval from `date1` (inclusive) and `date2` (exclusive). For example: ```sql > select date'tomorrow' - date'yesterday'; interval 2 days ``` Closes #26034 ### Why are the changes needed? - To conform the SQL standard which states the result type of `date operand 1` - `date operand 2` must be the interval type. See [4.5.3 Operations involving datetimes and intervals](http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt). - Improve Spark SQL UX and allow mixing date and timestamp in subtractions. For example: `select timestamp'now' + (date'2019-10-01' - date'2019-09-15')` ### Does this PR introduce any user-facing change? Before the query below returns number of days: ```sql spark-sql> select date'2019-10-05' - date'2018-09-01'; 399 ``` After it returns an interval: ```sql spark-sql> select date'2019-10-05' - date'2018-09-01'; interval 1 years 1 months 4 days ``` ### How was this patch tested? - by new tests in `DateExpressionsSuite` and `TypeCoercionSuite`. - by existing tests in `date.sql` Closes #26112 from MaxGekk/date-subtract. Authored-by: Maxim Gekk <[email protected]> Signed-off-by: Yuming Wang <[email protected]>
1 parent 5a482e7 commit d11cbf2

File tree

6 files changed

+82
-21
lines changed

6 files changed

+82
-21
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -828,8 +828,9 @@ object TypeCoercion {
828828
/**
829829
* 1. Turns Add/Subtract of DateType/TimestampType/StringType and CalendarIntervalType
830830
* to TimeAdd/TimeSub.
831-
* 2. Turns Add/Subtract of DateType/IntegerType and IntegerType/DateType
832-
* to DateAdd/DateSub/DateDiff.
831+
* 2. Turns Add/Subtract of TimestampType/DateType/IntegerType
832+
* and TimestampType/IntegerType/DateType to DateAdd/DateSub/SubtractDates and
833+
* to SubtractTimestamps.
833834
*/
834835
object DateTimeOperations extends Rule[LogicalPlan] {
835836

@@ -849,12 +850,14 @@ object TypeCoercion {
849850
case Add(l @ DateType(), r @ IntegerType()) => DateAdd(l, r)
850851
case Add(l @ IntegerType(), r @ DateType()) => DateAdd(r, l)
851852
case Subtract(l @ DateType(), r @ IntegerType()) => DateSub(l, r)
852-
case Subtract(l @ DateType(), r @ DateType()) => DateDiff(l, r)
853-
case Subtract(l @ TimestampType(), r @ TimestampType()) => TimestampDiff(l, r)
853+
case Subtract(l @ DateType(), r @ DateType()) =>
854+
if (SQLConf.get.usePostgreSQLDialect) DateDiff(l, r) else SubtractDates(l, r)
855+
case Subtract(l @ TimestampType(), r @ TimestampType()) =>
856+
SubtractTimestamps(l, r)
854857
case Subtract(l @ TimestampType(), r @ DateType()) =>
855-
TimestampDiff(l, Cast(r, TimestampType))
858+
SubtractTimestamps(l, Cast(r, TimestampType))
856859
case Subtract(l @ DateType(), r @ TimestampType()) =>
857-
TimestampDiff(Cast(l, TimestampType), r)
860+
SubtractTimestamps(Cast(l, TimestampType), r)
858861
}
859862
}
860863

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2099,7 +2099,7 @@ case class DatePart(field: Expression, source: Expression, child: Expression)
20992099
* is set to 0 and the `microseconds` field is initialized to the microsecond difference
21002100
* between the given timestamps.
21012101
*/
2102-
case class TimestampDiff(endTimestamp: Expression, startTimestamp: Expression)
2102+
case class SubtractTimestamps(endTimestamp: Expression, startTimestamp: Expression)
21032103
extends BinaryExpression with ImplicitCastInputTypes {
21042104

21052105
override def left: Expression = endTimestamp
@@ -2116,3 +2116,25 @@ case class TimestampDiff(endTimestamp: Expression, startTimestamp: Expression)
21162116
s"new org.apache.spark.unsafe.types.CalendarInterval(0, $end - $start)")
21172117
}
21182118
}
2119+
2120+
/**
2121+
* Returns the interval from the `left` date (inclusive) to the `right` date (exclusive).
2122+
*/
2123+
case class SubtractDates(left: Expression, right: Expression)
2124+
extends BinaryExpression with ImplicitCastInputTypes {
2125+
2126+
override def inputTypes: Seq[AbstractDataType] = Seq(DateType, DateType)
2127+
override def dataType: DataType = CalendarIntervalType
2128+
2129+
override def nullSafeEval(leftDays: Any, rightDays: Any): Any = {
2130+
DateTimeUtils.subtractDates(leftDays.asInstanceOf[Int], rightDays.asInstanceOf[Int])
2131+
}
2132+
2133+
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
2134+
defineCodeGen(ctx, ev, (leftDays, rightDays) => {
2135+
val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
2136+
s"$dtu.subtractDates($leftDays, $rightDays)"
2137+
})
2138+
}
2139+
}
2140+

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import java.util.concurrent.TimeUnit._
2727
import scala.util.control.NonFatal
2828

2929
import org.apache.spark.sql.types.Decimal
30-
import org.apache.spark.unsafe.types.UTF8String
30+
import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
3131

3232
/**
3333
* Helper functions for converting between internal and external date and time representations.
@@ -950,4 +950,20 @@ object DateTimeUtils {
950950
None
951951
}
952952
}
953+
954+
/**
955+
* Subtracts two dates.
956+
* @param endDate - the end date, exclusive
957+
* @param startDate - the start date, inclusive
958+
* @return an interval between two dates. The interval can be negative
959+
* if the end date is before the start date.
960+
*/
961+
def subtractDates(endDate: SQLDate, startDate: SQLDate): CalendarInterval = {
962+
val period = Period.between(
963+
LocalDate.ofEpochDay(startDate),
964+
LocalDate.ofEpochDay(endDate))
965+
val months = period.getMonths + 12 * period.getYears
966+
val microseconds = period.getDays * MICROS_PER_DAY
967+
new CalendarInterval(months, microseconds)
968+
}
953969
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1430,13 +1430,13 @@ class TypeCoercionSuite extends AnalysisTest {
14301430
ruleTest(dateTimeOperations, Add(date, intValue), DateAdd(date, intValue))
14311431
ruleTest(dateTimeOperations, Add(intValue, date), DateAdd(date, intValue))
14321432
ruleTest(dateTimeOperations, Subtract(date, intValue), DateSub(date, intValue))
1433-
ruleTest(dateTimeOperations, Subtract(date, date), DateDiff(date, date))
1433+
ruleTest(dateTimeOperations, Subtract(date, date), SubtractDates(date, date))
14341434
ruleTest(dateTimeOperations, Subtract(timestamp, timestamp),
1435-
TimestampDiff(timestamp, timestamp))
1435+
SubtractTimestamps(timestamp, timestamp))
14361436
ruleTest(dateTimeOperations, Subtract(timestamp, date),
1437-
TimestampDiff(timestamp, Cast(date, TimestampType)))
1437+
SubtractTimestamps(timestamp, Cast(date, TimestampType)))
14381438
ruleTest(dateTimeOperations, Subtract(date, timestamp),
1439-
TimestampDiff(Cast(date, TimestampType), timestamp))
1439+
SubtractTimestamps(Cast(date, TimestampType), timestamp))
14401440
}
14411441

14421442
/**

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
1919

2020
import java.sql.{Date, Timestamp}
2121
import java.text.SimpleDateFormat
22-
import java.time.{Instant, LocalDateTime, ZoneId, ZoneOffset}
22+
import java.time.{Instant, LocalDate, LocalDateTime, ZoneId, ZoneOffset}
2323
import java.util.{Calendar, Locale, TimeZone}
2424
import java.util.concurrent.TimeUnit
2525
import java.util.concurrent.TimeUnit._
@@ -1072,19 +1072,39 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
10721072

10731073
test("timestamps difference") {
10741074
val end = Instant.parse("2019-10-04T11:04:01.123456Z")
1075-
checkEvaluation(TimestampDiff(Literal(end), Literal(end)),
1075+
checkEvaluation(SubtractTimestamps(Literal(end), Literal(end)),
10761076
new CalendarInterval(0, 0))
1077-
checkEvaluation(TimestampDiff(Literal(end), Literal(Instant.EPOCH)),
1077+
checkEvaluation(SubtractTimestamps(Literal(end), Literal(Instant.EPOCH)),
10781078
CalendarInterval.fromString("interval 18173 days " +
10791079
"11 hours 4 minutes 1 seconds 123 milliseconds 456 microseconds"))
1080-
checkEvaluation(TimestampDiff(Literal(Instant.EPOCH), Literal(end)),
1080+
checkEvaluation(SubtractTimestamps(Literal(Instant.EPOCH), Literal(end)),
10811081
CalendarInterval.fromString("interval -18173 days " +
10821082
"-11 hours -4 minutes -1 seconds -123 milliseconds -456 microseconds"))
10831083
checkEvaluation(
1084-
TimestampDiff(
1084+
SubtractTimestamps(
10851085
Literal(Instant.parse("9999-12-31T23:59:59.999999Z")),
10861086
Literal(Instant.parse("0001-01-01T00:00:00Z"))),
10871087
CalendarInterval.fromString("interval 521722 weeks 4 days " +
10881088
"23 hours 59 minutes 59 seconds 999 milliseconds 999 microseconds"))
10891089
}
1090+
1091+
test("subtract dates") {
1092+
val end = LocalDate.of(2019, 10, 5)
1093+
checkEvaluation(SubtractDates(Literal(end), Literal(end)),
1094+
new CalendarInterval(0, 0))
1095+
checkEvaluation(SubtractDates(Literal(end.plusDays(1)), Literal(end)),
1096+
CalendarInterval.fromString("interval 1 days"))
1097+
checkEvaluation(SubtractDates(Literal(end.minusDays(1)), Literal(end)),
1098+
CalendarInterval.fromString("interval -1 days"))
1099+
val epochDate = Literal(LocalDate.ofEpochDay(0))
1100+
checkEvaluation(SubtractDates(Literal(end), epochDate),
1101+
CalendarInterval.fromString("interval 49 years 9 months 4 days"))
1102+
checkEvaluation(SubtractDates(epochDate, Literal(end)),
1103+
CalendarInterval.fromString("interval -49 years -9 months -4 days"))
1104+
checkEvaluation(
1105+
SubtractDates(
1106+
Literal(LocalDate.of(10000, 1, 1)),
1107+
Literal(LocalDate.of(1, 1, 1))),
1108+
CalendarInterval.fromString("interval 9999 years"))
1109+
}
10901110
}

sql/core/src/test/resources/sql-tests/results/datetime.sql.out

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -126,22 +126,22 @@ struct<date_sub(DATE '2001-10-01', 7):date>
126126
-- !query 14
127127
select date '2001-10-01' - date '2001-09-28'
128128
-- !query 14 schema
129-
struct<datediff(DATE '2001-10-01', DATE '2001-09-28'):int>
129+
struct<subtractdates(DATE '2001-10-01', DATE '2001-09-28'):interval>
130130
-- !query 14 output
131-
3
131+
interval 3 days
132132

133133

134134
-- !query 15
135135
select date'2020-01-01' - timestamp'2019-10-06 10:11:12.345678'
136136
-- !query 15 schema
137-
struct<timestampdiff(CAST(DATE '2020-01-01' AS TIMESTAMP), TIMESTAMP('2019-10-06 10:11:12.345678')):interval>
137+
struct<subtracttimestamps(CAST(DATE '2020-01-01' AS TIMESTAMP), TIMESTAMP('2019-10-06 10:11:12.345678')):interval>
138138
-- !query 15 output
139139
interval 12 weeks 2 days 14 hours 48 minutes 47 seconds 654 milliseconds 322 microseconds
140140

141141

142142
-- !query 16
143143
select timestamp'2019-10-06 10:11:12.345678' - date'2020-01-01'
144144
-- !query 16 schema
145-
struct<timestampdiff(TIMESTAMP('2019-10-06 10:11:12.345678'), CAST(DATE '2020-01-01' AS TIMESTAMP)):interval>
145+
struct<subtracttimestamps(TIMESTAMP('2019-10-06 10:11:12.345678'), CAST(DATE '2020-01-01' AS TIMESTAMP)):interval>
146146
-- !query 16 output
147147
interval -12 weeks -2 days -14 hours -48 minutes -47 seconds -654 milliseconds -322 microseconds

0 commit comments

Comments
 (0)