Skip to content

Commit 0be5aa2

Browse files
wangyumueshin
authored andcommitted
[SPARK-23903][SQL] Add support for date extract
## What changes were proposed in this pull request? Add support for date `extract` function: ```sql spark-sql> SELECT EXTRACT(YEAR FROM TIMESTAMP '2000-12-16 12:21:13'); 2000 ``` Supported field same as [Hive](https://github.com/apache/hive/blob/rel/release-2.3.3/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g#L308-L316): `YEAR`, `QUARTER`, `MONTH`, `WEEK`, `DAY`, `DAYOFWEEK`, `HOUR`, `MINUTE`, `SECOND`. ## How was this patch tested? unit tests Author: Yuming Wang <[email protected]> Closes #21479 from wangyum/SPARK-23903.
1 parent 1d9338b commit 0be5aa2

File tree

5 files changed

+149
-1
lines changed

5 files changed

+149
-1
lines changed

sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,7 @@ primaryExpression
592592
| identifier #columnReference
593593
| base=primaryExpression '.' fieldName=identifier #dereference
594594
| '(' expression ')' #parenthesizedExpression
595+
| EXTRACT '(' field=identifier FROM source=valueExpression ')' #extract
595596
;
596597

597598
constant
@@ -739,6 +740,7 @@ nonReserved
739740
| VIEW | REPLACE
740741
| IF
741742
| POSITION
743+
| EXTRACT
742744
| NO | DATA
743745
| START | TRANSACTION | COMMIT | ROLLBACK | IGNORE
744746
| SORT | CLUSTER | DISTRIBUTE | UNSET | TBLPROPERTIES | SKEWED | STORED | DIRECTORIES | LOCATION
@@ -878,6 +880,7 @@ TRAILING: 'TRAILING';
878880

879881
IF: 'IF';
880882
POSITION: 'POSITION';
883+
EXTRACT: 'EXTRACT';
881884

882885
EQ : '=' | '==';
883886
NSEQ: '<=>';

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1206,6 +1206,34 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
12061206
new StringLocate(expression(ctx.substr), expression(ctx.str))
12071207
}
12081208

1209+
/**
1210+
* Create a Extract expression.
1211+
*/
1212+
override def visitExtract(ctx: ExtractContext): Expression = withOrigin(ctx) {
1213+
ctx.field.getText.toUpperCase(Locale.ROOT) match {
1214+
case "YEAR" =>
1215+
Year(expression(ctx.source))
1216+
case "QUARTER" =>
1217+
Quarter(expression(ctx.source))
1218+
case "MONTH" =>
1219+
Month(expression(ctx.source))
1220+
case "WEEK" =>
1221+
WeekOfYear(expression(ctx.source))
1222+
case "DAY" =>
1223+
DayOfMonth(expression(ctx.source))
1224+
case "DAYOFWEEK" =>
1225+
DayOfWeek(expression(ctx.source))
1226+
case "HOUR" =>
1227+
Hour(expression(ctx.source))
1228+
case "MINUTE" =>
1229+
Minute(expression(ctx.source))
1230+
case "SECOND" =>
1231+
Second(expression(ctx.source))
1232+
case other =>
1233+
throw new ParseException(s"Literals of type '$other' are currently not supported.", ctx)
1234+
}
1235+
}
1236+
12091237
/**
12101238
* Create a (windowed) Function expression.
12111239
*/

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class TableIdentifierParserSuite extends SparkFunSuite {
5151
"rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger",
5252
"true", "truncate", "update", "user", "values", "with", "regexp", "rlike",
5353
"bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float",
54-
"int", "smallint", "timestamp", "at", "position", "both", "leading", "trailing")
54+
"int", "smallint", "timestamp", "at", "position", "both", "leading", "trailing", "extract")
5555

5656
val hiveStrictNonReservedKeyword = Seq("anti", "full", "inner", "left", "semi", "right",
5757
"natural", "union", "intersect", "except", "database", "on", "join", "cross", "select", "from",
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
CREATE TEMPORARY VIEW t AS select '2011-05-06 07:08:09.1234567' as c;
2+
3+
select extract(year from c) from t;
4+
5+
select extract(quarter from c) from t;
6+
7+
select extract(month from c) from t;
8+
9+
select extract(week from c) from t;
10+
11+
select extract(day from c) from t;
12+
13+
select extract(dayofweek from c) from t;
14+
15+
select extract(hour from c) from t;
16+
17+
select extract(minute from c) from t;
18+
19+
select extract(second from c) from t;
20+
21+
select extract(not_supported from c) from t;
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
-- Automatically generated by SQLQueryTestSuite
2+
-- Number of queries: 11
3+
4+
5+
-- !query 0
6+
CREATE TEMPORARY VIEW t AS select '2011-05-06 07:08:09.1234567' as c
7+
-- !query 0 schema
8+
struct<>
9+
-- !query 0 output
10+
11+
12+
13+
-- !query 1
14+
select extract(year from c) from t
15+
-- !query 1 schema
16+
struct<year(CAST(c AS DATE)):int>
17+
-- !query 1 output
18+
2011
19+
20+
21+
-- !query 2
22+
select extract(quarter from c) from t
23+
-- !query 2 schema
24+
struct<quarter(CAST(c AS DATE)):int>
25+
-- !query 2 output
26+
2
27+
28+
29+
-- !query 3
30+
select extract(month from c) from t
31+
-- !query 3 schema
32+
struct<month(CAST(c AS DATE)):int>
33+
-- !query 3 output
34+
5
35+
36+
37+
-- !query 4
38+
select extract(week from c) from t
39+
-- !query 4 schema
40+
struct<weekofyear(CAST(c AS DATE)):int>
41+
-- !query 4 output
42+
18
43+
44+
45+
-- !query 5
46+
select extract(day from c) from t
47+
-- !query 5 schema
48+
struct<dayofmonth(CAST(c AS DATE)):int>
49+
-- !query 5 output
50+
6
51+
52+
53+
-- !query 6
54+
select extract(dayofweek from c) from t
55+
-- !query 6 schema
56+
struct<dayofweek(CAST(c AS DATE)):int>
57+
-- !query 6 output
58+
6
59+
60+
61+
-- !query 7
62+
select extract(hour from c) from t
63+
-- !query 7 schema
64+
struct<hour(CAST(c AS TIMESTAMP)):int>
65+
-- !query 7 output
66+
7
67+
68+
69+
-- !query 8
70+
select extract(minute from c) from t
71+
-- !query 8 schema
72+
struct<minute(CAST(c AS TIMESTAMP)):int>
73+
-- !query 8 output
74+
8
75+
76+
77+
-- !query 9
78+
select extract(second from c) from t
79+
-- !query 9 schema
80+
struct<second(CAST(c AS TIMESTAMP)):int>
81+
-- !query 9 output
82+
9
83+
84+
85+
-- !query 10
86+
select extract(not_supported from c) from t
87+
-- !query 10 schema
88+
struct<>
89+
-- !query 10 output
90+
org.apache.spark.sql.catalyst.parser.ParseException
91+
92+
Literals of type 'NOT_SUPPORTED' are currently not supported.(line 1, pos 7)
93+
94+
== SQL ==
95+
select extract(not_supported from c) from t
96+
-------^^^

0 commit comments

Comments
 (0)