Skip to content

Commit 77fe8a8

Browse files
MaxGekkcloud-fan
authored andcommitted
[SPARK-28420][SQL] Support the INTERVAL type in date_part()
### What changes were proposed in this pull request? The `date_part()` function can accept the `source` parameter of the `INTERVAL` type (`CalendarIntervalType`). The following values of the `field` parameter are supported: - `"MILLENNIUM"` (`"MILLENNIA"`, `"MIL"`, `"MILS"`) - number of millenniums in the given interval. It is `YEAR / 1000`. - `"CENTURY"` (`"CENTURIES"`, `"C"`, `"CENT"`) - number of centuries in the interval calculated as `YEAR / 100`. - `"DECADE"` (`"DECADES"`, `"DEC"`, `"DECS"`) - decades in the `YEAR` part of the interval calculated as `YEAR / 10`. - `"YEAR"` (`"Y"`, `"YEARS"`, `"YR"`, `"YRS"`) - years in a values of `CalendarIntervalType`. It is `MONTHS / 12`. - `"QUARTER"` (`"QTR"`) - a quarter of year calculated as `MONTHS / 3 + 1` - `"MONTH"` (`"MON"`, `"MONS"`, `"MONTHS"`) - the months part of the interval calculated as `CalendarInterval.months % 12` - `"DAY"` (`"D"`, `"DAYS"`) - total number of days in `CalendarInterval.microseconds` - `"HOUR"` (`"H"`, `"HOURS"`, `"HR"`, `"HRS"`) - the hour part of the interval. - `"MINUTE"` (`"M"`, `"MIN"`, `"MINS"`, `"MINUTES"`) - the minute part of the interval. - `"SECOND"` (`"S"`, `"SEC"`, `"SECONDS"`, `"SECS"`) - the seconds part with fractional microsecond part. - `"MILLISECONDS"` (`"MSEC"`, `"MSECS"`, `"MILLISECON"`, `"MSECONDS"`, `"MS"`) - the millisecond part of the interval with fractional microsecond part. - `"MICROSECONDS"` (`"USEC"`, `"USECS"`, `"USECONDS"`, `"MICROSECON"`, `"US"`) - the total number of microseconds in the `second`, `millisecond` and `microsecond` parts of the given interval. - `"EPOCH"` - the total number of seconds in the interval including the fractional part with microsecond precision. Here we assume 365.25 days per year (leap year every four years). For example: ```sql > SELECT date_part('days', interval 1 year 10 months 5 days); 5 > SELECT date_part('seconds', interval 30 seconds 1 milliseconds 1 microseconds); 30.001001 ``` ### Why are the changes needed? To maintain feature parity with PostgreSQL (https://www.postgresql.org/docs/11/functions-datetime.html#FUNCTIONS-DATETIME-EXTRACT) ### Does this PR introduce any user-facing change? No ### How was this patch tested? - Added new test suite `IntervalExpressionsSuite` - Add new test cases to `date_part.sql` Closes #25981 from MaxGekk/extract-from-intervals. Authored-by: Maxim Gekk <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent c3a0d02 commit 77fe8a8

File tree

6 files changed

+970
-9
lines changed

6 files changed

+970
-9
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
package org.apache.spark.sql.catalyst.expressions
1919

2020
import java.sql.Timestamp
21-
import java.time.{DateTimeException, Instant, LocalDate, LocalDateTime, ZoneId}
21+
import java.time.{DateTimeException, LocalDate, LocalDateTime, ZoneId}
2222
import java.time.temporal.IsoFields
2323
import java.util.{Locale, TimeZone}
2424

@@ -2032,10 +2032,11 @@ object DatePart {
20322032
}
20332033

20342034
@ExpressionDescription(
2035-
usage = "_FUNC_(field, source) - Extracts a part of the date/timestamp.",
2035+
usage = "_FUNC_(field, source) - Extracts a part of the date/timestamp or interval source.",
20362036
arguments = """
20372037
Arguments:
2038-
* field - selects which part of the source should be extracted. Supported string values are:
2038+
* field - selects which part of the source should be extracted.
2039+
Supported string values of `field` for dates and timestamps are:
20392040
["MILLENNIUM", ("MILLENNIA", "MIL", "MILS"),
20402041
"CENTURY", ("CENTURIES", "C", "CENT"),
20412042
"DECADE", ("DECADES", "DEC", "DECS"),
@@ -2055,7 +2056,21 @@ object DatePart {
20552056
"MILLISECONDS", ("MSEC", "MSECS", "MILLISECON", "MSECONDS", "MS"),
20562057
"MICROSECONDS", ("USEC", "USECS", "USECONDS", "MICROSECON", "US"),
20572058
"EPOCH"]
2058-
* source - a date (or timestamp) column from where `field` should be extracted
2059+
Supported string values of `field` for intervals are:
2060+
["MILLENNIUM", ("MILLENNIA", "MIL", "MILS"),
2061+
"CENTURY", ("CENTURIES", "C", "CENT"),
2062+
"DECADE", ("DECADES", "DEC", "DECS"),
2063+
"YEAR", ("Y", "YEARS", "YR", "YRS"),
2064+
"QUARTER", ("QTR"),
2065+
"MONTH", ("MON", "MONS", "MONTHS"),
2066+
"DAY", ("D", "DAYS"),
2067+
"HOUR", ("H", "HOURS", "HR", "HRS"),
2068+
"MINUTE", ("M", "MIN", "MINS", "MINUTES"),
2069+
"SECOND", ("S", "SEC", "SECONDS", "SECS"),
2070+
"MILLISECONDS", ("MSEC", "MSECS", "MILLISECON", "MSECONDS", "MS"),
2071+
"MICROSECONDS", ("USEC", "USECS", "USECONDS", "MICROSECON", "US"),
2072+
"EPOCH"]
2073+
* source - a date/timestamp or interval column from where `field` should be extracted
20592074
""",
20602075
examples = """
20612076
Examples:
@@ -2067,6 +2082,10 @@ object DatePart {
20672082
224
20682083
> SELECT _FUNC_('SECONDS', timestamp'2019-10-01 00:00:01.000001');
20692084
1.000001
2085+
> SELECT _FUNC_('days', interval 1 year 10 months 5 days);
2086+
5
2087+
> SELECT _FUNC_('seconds', interval 5 hours 30 seconds 1 milliseconds 1 microseconds);
2088+
30.001001
20702089
""",
20712090
since = "3.0.0")
20722091
case class DatePart(field: Expression, source: Expression, child: Expression)
@@ -2082,9 +2101,16 @@ case class DatePart(field: Expression, source: Expression, child: Expression)
20822101
Literal(null, DoubleType)
20832102
} else {
20842103
val fieldStr = fieldEval.asInstanceOf[UTF8String].toString
2085-
DatePart.parseExtractField(fieldStr, source, {
2086-
throw new AnalysisException(s"Literals of type '$fieldStr' are currently not supported.")
2087-
})
2104+
val errMsg = s"Literals of type '$fieldStr' are currently not supported " +
2105+
s"for the ${source.dataType.catalogString} type."
2106+
if (source.dataType == CalendarIntervalType) {
2107+
ExtractIntervalPart.parseExtractField(
2108+
fieldStr,
2109+
source,
2110+
throw new AnalysisException(errMsg))
2111+
} else {
2112+
DatePart.parseExtractField(fieldStr, source, throw new AnalysisException(errMsg))
2113+
}
20882114
}
20892115
})
20902116
}
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.catalyst.expressions
19+
20+
import java.util.Locale
21+
22+
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
23+
import org.apache.spark.sql.catalyst.util.IntervalUtils
24+
import org.apache.spark.sql.catalyst.util.IntervalUtils._
25+
import org.apache.spark.sql.types._
26+
import org.apache.spark.unsafe.types.CalendarInterval
27+
28+
abstract class ExtractIntervalPart(
29+
child: Expression,
30+
val dataType: DataType,
31+
func: CalendarInterval => Any,
32+
funcName: String)
33+
extends UnaryExpression with ExpectsInputTypes with Serializable {
34+
35+
override def inputTypes: Seq[AbstractDataType] = Seq(CalendarIntervalType)
36+
37+
override protected def nullSafeEval(interval: Any): Any = {
38+
func(interval.asInstanceOf[CalendarInterval])
39+
}
40+
41+
override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
42+
val iu = IntervalUtils.getClass.getName.stripSuffix("$")
43+
defineCodeGen(ctx, ev, c => s"$iu.$funcName($c)")
44+
}
45+
}
46+
47+
case class ExtractIntervalMillenniums(child: Expression)
48+
extends ExtractIntervalPart(child, IntegerType, getMillenniums, "getMillenniums")
49+
50+
case class ExtractIntervalCenturies(child: Expression)
51+
extends ExtractIntervalPart(child, IntegerType, getCenturies, "getCenturies")
52+
53+
case class ExtractIntervalDecades(child: Expression)
54+
extends ExtractIntervalPart(child, IntegerType, getDecades, "getDecades")
55+
56+
case class ExtractIntervalYears(child: Expression)
57+
extends ExtractIntervalPart(child, IntegerType, getYears, "getYears")
58+
59+
case class ExtractIntervalQuarters(child: Expression)
60+
extends ExtractIntervalPart(child, ByteType, getQuarters, "getQuarters")
61+
62+
case class ExtractIntervalMonths(child: Expression)
63+
extends ExtractIntervalPart(child, ByteType, getMonths, "getMonths")
64+
65+
case class ExtractIntervalDays(child: Expression)
66+
extends ExtractIntervalPart(child, LongType, getDays, "getDays")
67+
68+
case class ExtractIntervalHours(child: Expression)
69+
extends ExtractIntervalPart(child, ByteType, getHours, "getHours")
70+
71+
case class ExtractIntervalMinutes(child: Expression)
72+
extends ExtractIntervalPart(child, ByteType, getMinutes, "getMinutes")
73+
74+
case class ExtractIntervalSeconds(child: Expression)
75+
extends ExtractIntervalPart(child, DecimalType(8, 6), getSeconds, "getSeconds")
76+
77+
case class ExtractIntervalMilliseconds(child: Expression)
78+
extends ExtractIntervalPart(child, DecimalType(8, 3), getMilliseconds, "getMilliseconds")
79+
80+
case class ExtractIntervalMicroseconds(child: Expression)
81+
extends ExtractIntervalPart(child, LongType, getMicroseconds, "getMicroseconds")
82+
83+
// Number of seconds in 10000 years is 315576000001 (30 days per one month)
84+
// which is 12 digits + 6 digits for the fractional part of seconds.
85+
case class ExtractIntervalEpoch(child: Expression)
86+
extends ExtractIntervalPart(child, DecimalType(18, 6), getEpoch, "getEpoch")
87+
88+
object ExtractIntervalPart {
89+
90+
def parseExtractField(
91+
extractField: String,
92+
source: Expression,
93+
errorHandleFunc: => Nothing): Expression = extractField.toUpperCase(Locale.ROOT) match {
94+
case "MILLENNIUM" | "MILLENNIA" | "MIL" | "MILS" => ExtractIntervalMillenniums(source)
95+
case "CENTURY" | "CENTURIES" | "C" | "CENT" => ExtractIntervalCenturies(source)
96+
case "DECADE" | "DECADES" | "DEC" | "DECS" => ExtractIntervalDecades(source)
97+
case "YEAR" | "Y" | "YEARS" | "YR" | "YRS" => ExtractIntervalYears(source)
98+
case "QUARTER" | "QTR" => ExtractIntervalQuarters(source)
99+
case "MONTH" | "MON" | "MONS" | "MONTHS" => ExtractIntervalMonths(source)
100+
case "DAY" | "D" | "DAYS" => ExtractIntervalDays(source)
101+
case "HOUR" | "H" | "HOURS" | "HR" | "HRS" => ExtractIntervalHours(source)
102+
case "MINUTE" | "M" | "MIN" | "MINS" | "MINUTES" => ExtractIntervalMinutes(source)
103+
case "SECOND" | "S" | "SEC" | "SECONDS" | "SECS" => ExtractIntervalSeconds(source)
104+
case "MILLISECONDS" | "MSEC" | "MSECS" | "MILLISECON" | "MSECONDS" | "MS" =>
105+
ExtractIntervalMilliseconds(source)
106+
case "MICROSECONDS" | "USEC" | "USECS" | "USECONDS" | "MICROSECON" | "US" =>
107+
ExtractIntervalMicroseconds(source)
108+
case "EPOCH" => ExtractIntervalEpoch(source)
109+
case _ => errorHandleFunc
110+
}
111+
}
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.catalyst.util
19+
20+
import org.apache.spark.sql.types.Decimal
21+
import org.apache.spark.unsafe.types.CalendarInterval
22+
23+
object IntervalUtils {
24+
val MONTHS_PER_YEAR: Int = 12
25+
val MONTHS_PER_QUARTER: Byte = 3
26+
val YEARS_PER_MILLENNIUM: Int = 1000
27+
val YEARS_PER_CENTURY: Int = 100
28+
val YEARS_PER_DECADE: Int = 10
29+
val MICROS_PER_HOUR: Long = DateTimeUtils.MILLIS_PER_HOUR * DateTimeUtils.MICROS_PER_MILLIS
30+
val MICROS_PER_MINUTE: Long = DateTimeUtils.MILLIS_PER_MINUTE * DateTimeUtils.MICROS_PER_MILLIS
31+
val DAYS_PER_MONTH: Byte = 30
32+
val MICROS_PER_MONTH: Long = DAYS_PER_MONTH * DateTimeUtils.SECONDS_PER_DAY
33+
/* 365.25 days per year assumes leap year every four years */
34+
val MICROS_PER_YEAR: Long = (36525L * DateTimeUtils.MICROS_PER_DAY) / 100
35+
36+
def getYears(interval: CalendarInterval): Int = {
37+
interval.months / MONTHS_PER_YEAR
38+
}
39+
40+
def getMillenniums(interval: CalendarInterval): Int = {
41+
getYears(interval) / YEARS_PER_MILLENNIUM
42+
}
43+
44+
def getCenturies(interval: CalendarInterval): Int = {
45+
getYears(interval) / YEARS_PER_CENTURY
46+
}
47+
48+
def getDecades(interval: CalendarInterval): Int = {
49+
getYears(interval) / YEARS_PER_DECADE
50+
}
51+
52+
def getMonths(interval: CalendarInterval): Byte = {
53+
(interval.months % MONTHS_PER_YEAR).toByte
54+
}
55+
56+
def getQuarters(interval: CalendarInterval): Byte = {
57+
(getMonths(interval) / MONTHS_PER_QUARTER + 1).toByte
58+
}
59+
60+
def getDays(interval: CalendarInterval): Long = {
61+
interval.microseconds / DateTimeUtils.MICROS_PER_DAY
62+
}
63+
64+
def getHours(interval: CalendarInterval): Byte = {
65+
((interval.microseconds % DateTimeUtils.MICROS_PER_DAY) / MICROS_PER_HOUR).toByte
66+
}
67+
68+
def getMinutes(interval: CalendarInterval): Byte = {
69+
((interval.microseconds % MICROS_PER_HOUR) / MICROS_PER_MINUTE).toByte
70+
}
71+
72+
def getMicroseconds(interval: CalendarInterval): Long = {
73+
interval.microseconds % MICROS_PER_MINUTE
74+
}
75+
76+
def getSeconds(interval: CalendarInterval): Decimal = {
77+
Decimal(getMicroseconds(interval), 8, 6)
78+
}
79+
80+
def getMilliseconds(interval: CalendarInterval): Decimal = {
81+
Decimal(getMicroseconds(interval), 8, 3)
82+
}
83+
84+
// Returns total number of seconds with microseconds fractional part in the given interval.
85+
def getEpoch(interval: CalendarInterval): Decimal = {
86+
var result = interval.microseconds
87+
result += MICROS_PER_YEAR * (interval.months / MONTHS_PER_YEAR)
88+
result += MICROS_PER_MONTH * (interval.months % MONTHS_PER_YEAR)
89+
Decimal(result, 18, 6)
90+
}
91+
}

0 commit comments

Comments
 (0)