Skip to content

Commit 1985437

Browse files
yaooqinncloud-fan
authored andcommitted
[SPARK-31474][SQL] Consistency between dayofweek/dow in extract exprsession and dayofweek function
### What changes were proposed in this pull request? ```sql spark-sql> SELECT extract(dayofweek from '2009-07-26'); 1 spark-sql> SELECT extract(dow from '2009-07-26'); 0 spark-sql> SELECT extract(isodow from '2009-07-26'); 7 spark-sql> SELECT dayofweek('2009-07-26'); 1 spark-sql> SELECT weekday('2009-07-26'); 6 ``` Currently, there are 4 types of day-of-week range: 1. the function `dayofweek`(2.3.0) and extracting `dayofweek`(2.4.0) result as of Sunday(1) to Saturday(7) 2. extracting `dow`(3.0.0) results as of Sunday(0) to Saturday(6) 3. extracting` isodow` (3.0.0) results as of Monday(1) to Sunday(7) 4. the function `weekday`(2.4.0) results as of Monday(0) to Sunday(6) Actually, extracting `dayofweek` and `dow` are both derived from PostgreSQL but have different meanings. https://issues.apache.org/jira/browse/SPARK-23903 https://issues.apache.org/jira/browse/SPARK-28623 In this PR, we make extracting `dow` as same as extracting `dayofweek` and the `dayofweek` function for historical reason and not breaking anything. Also, add more documentation to the extracting function to make extract field more clear to understand. ### Why are the changes needed? Consistency insurance ### Does this PR introduce any user-facing change? yes, doc updated and extract `dow` is as same as `dayofweek` ### How was this patch tested? 1. modified ut 2. local SQL doc verification #### before ![image](https://user-images.githubusercontent.com/8326978/79601949-3535b100-811c-11ea-957b-a33d68641181.png) #### after ![image](https://user-images.githubusercontent.com/8326978/79601847-12a39800-811c-11ea-8ff6-aa329255d099.png) Closes #28248 from yaooqinn/SPARK-31474. Authored-by: Kent Yao <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent 7103f19 commit 1985437

File tree

4 files changed

+47
-41
lines changed

4 files changed

+47
-41
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala

Lines changed: 40 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,9 @@
1717

1818
package org.apache.spark.sql.catalyst.expressions
1919

20-
import java.sql.Timestamp
2120
import java.time.{DateTimeException, LocalDate, LocalDateTime, ZoneId}
2221
import java.time.temporal.IsoFields
23-
import java.util.{Locale, TimeZone}
22+
import java.util.Locale
2423

2524
import scala.util.control.NonFatal
2625

@@ -2120,8 +2119,7 @@ object DatePart {
21202119
case "MONTH" | "MON" | "MONS" | "MONTHS" => Month(source)
21212120
case "WEEK" | "W" | "WEEKS" => WeekOfYear(source)
21222121
case "DAY" | "D" | "DAYS" => DayOfMonth(source)
2123-
case "DAYOFWEEK" => DayOfWeek(source)
2124-
case "DOW" => Subtract(DayOfWeek(source), Literal(1))
2122+
case "DAYOFWEEK" | "DOW" => DayOfWeek(source)
21252123
case "ISODOW" => Add(WeekDay(source), Literal(1))
21262124
case "DOY" => DayOfYear(source)
21272125
case "HOUR" | "H" | "HOURS" | "HR" | "HRS" => Hour(source)
@@ -2161,38 +2159,12 @@ object DatePartLike {
21612159
}
21622160
}
21632161

2162+
// scalastyle:off line.size.limit
21642163
@ExpressionDescription(
21652164
usage = "_FUNC_(field, source) - Extracts a part of the date/timestamp or interval source.",
21662165
arguments = """
21672166
Arguments:
2168-
* field - selects which part of the source should be extracted.
2169-
Supported string values of `field` for dates and timestamps are:
2170-
["MILLENNIUM", ("MILLENNIA", "MIL", "MILS"),
2171-
"CENTURY", ("CENTURIES", "C", "CENT"),
2172-
"DECADE", ("DECADES", "DEC", "DECS"),
2173-
"YEAR", ("Y", "YEARS", "YR", "YRS"),
2174-
"ISOYEAR",
2175-
"QUARTER", ("QTR"),
2176-
"MONTH", ("MON", "MONS", "MONTHS"),
2177-
"WEEK", ("W", "WEEKS"),
2178-
"DAY", ("D", "DAYS"),
2179-
"DAYOFWEEK",
2180-
"DOW",
2181-
"ISODOW",
2182-
"DOY",
2183-
"HOUR", ("H", "HOURS", "HR", "HRS"),
2184-
"MINUTE", ("M", "MIN", "MINS", "MINUTES"),
2185-
"SECOND", ("S", "SEC", "SECONDS", "SECS"),
2186-
"MILLISECONDS", ("MSEC", "MSECS", "MILLISECON", "MSECONDS", "MS"),
2187-
"MICROSECONDS", ("USEC", "USECS", "USECONDS", "MICROSECON", "US"),
2188-
"EPOCH"]
2189-
Supported string values of `field` for intervals are:
2190-
["YEAR", ("Y", "YEARS", "YR", "YRS"),
2191-
"MONTH", ("MON", "MONS", "MONTHS"),
2192-
"DAY", ("D", "DAYS"),
2193-
"HOUR", ("H", "HOURS", "HR", "HRS"),
2194-
"MINUTE", ("M", "MIN", "MINS", "MINUTES"),
2195-
"SECOND", ("S", "SEC", "SECONDS", "SECS")]
2167+
* field - selects which part of the source should be extracted, and supported string values are as same as the fields of the equivalent function `EXTRACT`.
21962168
* source - a date/timestamp or interval column from where `field` should be extracted
21972169
""",
21982170
examples = """
@@ -2210,8 +2182,12 @@ object DatePartLike {
22102182
> SELECT _FUNC_('seconds', interval 5 hours 30 seconds 1 milliseconds 1 microseconds);
22112183
30.001001
22122184
""",
2185+
note = """
2186+
The _FUNC_ function is equivalent to the SQL-standard function `EXTRACT(field FROM source)`
2187+
""",
22132188
group = "datetime_funcs",
22142189
since = "3.0.0")
2190+
// scalastyle:on line.size.limit
22152191
case class DatePart(field: Expression, source: Expression, child: Expression)
22162192
extends RuntimeReplaceable {
22172193

@@ -2224,12 +2200,38 @@ case class DatePart(field: Expression, source: Expression, child: Expression)
22242200
override def prettyName: String = "date_part"
22252201
}
22262202

2203+
// scalastyle:off line.size.limit
22272204
@ExpressionDescription(
22282205
usage = "_FUNC_(field FROM source) - Extracts a part of the date/timestamp or interval source.",
22292206
arguments = """
22302207
Arguments:
2231-
* field - selects which part of the source should be extracted and supported string values
2232-
are the same with the `date_part` fields.
2208+
* field - selects which part of the source should be extracted
2209+
- Supported string values of `field` for dates and timestamps are:
2210+
- "MILLENNIUM", ("MILLENNIA", "MIL", "MILS") - the conventional numbering of millennia
2211+
- "CENTURY", ("CENTURIES", "C", "CENT") - the conventional numbering of centuries
2212+
- "DECADE", ("DECADES", "DEC", "DECS") - the year field divided by 10
2213+
- "YEAR", ("Y", "YEARS", "YR", "YRS") - the year field
2214+
- "ISOYEAR" - the ISO 8601 week-numbering year that the datetime falls in
2215+
- "QUARTER", ("QTR") - the quarter (1 - 4) of the year that the datetime falls in
2216+
- "MONTH", ("MON", "MONS", "MONTHS") - the month field (1 - 12)
2217+
- "WEEK", ("W", "WEEKS") - the number of the ISO 8601 week-of-week-based-year. A week is considered to start on a Monday and week 1 is the first week with >3 days. In the ISO week-numbering system, it is possible for early-January dates to be part of the 52nd or 53rd week of the previous year, and for late-December dates to be part of the first week of the next year. For example, 2005-01-02 is part of the 53rd week of year 2004, while 2012-12-31 is part of the first week of 2013
2218+
- "DAY", ("D", "DAYS") - the day of the month field (1 - 31)
2219+
- "DAYOFWEEK",("DOW") - the day of the week for datetime as Sunday(1) to Saturday(7)
2220+
- "ISODOW" - ISO 8601 based day of the week for datetime as Monday(1) to Sunday(7)
2221+
- "DOY" - the day of the year (1 - 365/366)
2222+
- "HOUR", ("H", "HOURS", "HR", "HRS") - The hour field (0 - 23)
2223+
- "MINUTE", ("M", "MIN", "MINS", "MINUTES") - the minutes field (0 - 59)
2224+
- "SECOND", ("S", "SEC", "SECONDS", "SECS") - the seconds field, including fractional parts
2225+
- "MILLISECONDS", ("MSEC", "MSECS", "MILLISECON", "MSECONDS", "MS") - the seconds field, including fractional parts, multiplied by 1000. Note that this includes full seconds
2226+
- "MICROSECONDS", ("USEC", "USECS", "USECONDS", "MICROSECON", "US") - The seconds field, including fractional parts, multiplied by 1000000. Note that this includes full seconds
2227+
- "EPOCH" - the number of seconds with fractional part in microsecond precision since 1970-01-01 00:00:00 local time (can be negative)
2228+
- Supported string values of `field` for interval(which consists of `months`, `days`, `microseconds`) are:
2229+
- "YEAR", ("Y", "YEARS", "YR", "YRS") - the total `months` / 12
2230+
- "MONTH", ("MON", "MONS", "MONTHS") - the total `months` % 12
2231+
- "DAY", ("D", "DAYS") - the `days` part of interval
2232+
- "HOUR", ("H", "HOURS", "HR", "HRS") - how many hours the `microseconds` contains
2233+
- "MINUTE", ("M", "MIN", "MINS", "MINUTES") - how many minutes left after taking hours from `microseconds`
2234+
- "SECOND", ("S", "SEC", "SECONDS", "SECS") - how many second with fractions left after taking hours and minutes from `microseconds`
22332235
* source - a date/timestamp or interval column from where `field` should be extracted
22342236
""",
22352237
examples = """
@@ -2247,7 +2249,11 @@ case class DatePart(field: Expression, source: Expression, child: Expression)
22472249
> SELECT _FUNC_(seconds FROM interval 5 hours 30 seconds 1 milliseconds 1 microseconds);
22482250
30.001001
22492251
""",
2252+
note = """
2253+
The _FUNC_ function is equivalent to `date_part(field, source)`.
2254+
""",
22502255
since = "3.0.0")
2256+
// scalastyle:on line.size.limit
22512257
case class Extract(field: Expression, source: Expression, child: Expression)
22522258
extends RuntimeReplaceable {
22532259

sql/core/src/test/resources/sql-tests/results/date_part.sql.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ select date_part('dow', c) from t
255255
-- !query schema
256256
struct<date_part('dow', t.`c`):int>
257257
-- !query output
258-
5
258+
6
259259

260260

261261
-- !query

sql/core/src/test/resources/sql-tests/results/extract.sql.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ select extract(dow from c) from t
263263
-- !query schema
264264
struct<extract('dow' FROM t.`c`):int>
265265
-- !query output
266-
5
266+
6
267267

268268

269269
-- !query

sql/core/src/test/resources/sql-tests/results/postgreSQL/timestamp.sql.out

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -288,11 +288,11 @@ SELECT '' AS `54`, d1 as `timestamp`,
288288
-- !query schema
289289
struct<54:string,timestamp:timestamp,isoyear:int,week:int,dow:int>
290290
-- !query output
291-
1969-12-31 16:00:00 1970 1 3
292-
1997-01-02 00:00:00 1997 1 4
293-
1997-01-02 03:04:05 1997 1 4
294-
1997-02-10 17:32:01 1997 7 1
295-
2001-09-22 18:19:20 2001 38 6
291+
1969-12-31 16:00:00 1970 1 4
292+
1997-01-02 00:00:00 1997 1 5
293+
1997-01-02 03:04:05 1997 1 5
294+
1997-02-10 17:32:01 1997 7 2
295+
2001-09-22 18:19:20 2001 38 7
296296

297297

298298
-- !query

0 commit comments

Comments
 (0)