From 933c89fe292b825e6498eecb5e936271725bef1f Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Sat, 18 Apr 2020 08:11:36 +0900 Subject: [PATCH 1/2] Fix --- .../catalyst/analysis/FunctionRegistry.scala | 1 + .../expressions/datetimeExpressions.scala | 85 ++++++++---- .../sql/catalyst/parser/AstBuilder.scala | 2 +- .../sql-tests/results/extract.sql.out | 124 +++++++++--------- 4 files changed, 127 insertions(+), 85 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index aba755cec8990..7f879c606c2a4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -423,6 +423,7 @@ object FunctionRegistry { expression[MakeTimestamp]("make_timestamp"), expression[MakeInterval]("make_interval"), expression[DatePart]("date_part"), + expression[Extract]("extract"), // collection functions expression[CreateArray]("array"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 98536caef7be6..da1152b535f06 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -2105,6 +2105,31 @@ object DatePart { } } +object DatePartLike { + + def toEquivalentExpr(field: Expression, source: Expression): Expression = { + if (!field.foldable) { + throw new AnalysisException("The field parameter needs to be a foldable string value.") + } + val fieldEval = field.eval() + if (fieldEval == null) { + Literal(null, DoubleType) + } else { + val fieldStr = fieldEval.asInstanceOf[UTF8String].toString + val errMsg = s"Literals of type '$fieldStr' are currently not supported " + + s"for the ${source.dataType.catalogString} type." + if (source.dataType == CalendarIntervalType) { + ExtractIntervalPart.parseExtractField( + fieldStr, + source, + throw new AnalysisException(errMsg)) + } else { + DatePart.parseExtractField(fieldStr, source, throw new AnalysisException(errMsg)) + } + } + } +} + @ExpressionDescription( usage = "_FUNC_(field, source) - Extracts a part of the date/timestamp or interval source.", arguments = """ @@ -2158,28 +2183,8 @@ object DatePart { case class DatePart(field: Expression, source: Expression, child: Expression) extends RuntimeReplaceable { - def this(field: Expression, source: Expression) { - this(field, source, { - if (!field.foldable) { - throw new AnalysisException("The field parameter needs to be a foldable string value.") - } - val fieldEval = field.eval() - if (fieldEval == null) { - Literal(null, DoubleType) - } else { - val fieldStr = fieldEval.asInstanceOf[UTF8String].toString - val errMsg = s"Literals of type '$fieldStr' are currently not supported " + - s"for the ${source.dataType.catalogString} type." - if (source.dataType == CalendarIntervalType) { - ExtractIntervalPart.parseExtractField( - fieldStr, - source, - throw new AnalysisException(errMsg)) - } else { - DatePart.parseExtractField(fieldStr, source, throw new AnalysisException(errMsg)) - } - } - }) + def this(field: Expression, source: Expression) = { + this(field, source, DatePartLike.toEquivalentExpr(field, source)) } override def flatArguments: Iterator[Any] = Iterator(field, source) @@ -2187,6 +2192,42 @@ case class DatePart(field: Expression, source: Expression, child: Expression) override def prettyName: String = "date_part" } +@ExpressionDescription( + usage = "_FUNC_(field FROM source) - Extracts a part of the date/timestamp or interval source.", + arguments = """ + Arguments: + * field - selects which part of the source should be extracted and supported string values + are the same with the `date_part` fields. + * source - a date/timestamp or interval column from where `field` should be extracted + """, + examples = """ + Examples: + > SELECT _FUNC_(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456'); + 2019 + > SELECT _FUNC_(week FROM timestamp'2019-08-12 01:00:00.123456'); + 33 + > SELECT _FUNC_(doy FROM DATE'2019-08-12'); + 224 + > SELECT _FUNC_(SECONDS FROM timestamp'2019-10-01 00:00:01.000001'); + 1.000001 + > SELECT _FUNC_(days FROM interval 1 year 10 months 5 days); + 5 + > SELECT _FUNC_(seconds FROM interval 5 hours 30 seconds 1 milliseconds 1 microseconds); + 30.001001 + """, + since = "3.0.0") +case class Extract(field: Expression, source: Expression, child: Expression) + extends RuntimeReplaceable { + + def this(field: Expression, source: Expression) = { + this(field, source, DatePartLike.toEquivalentExpr(field, source)) + } + + override def flatArguments: Iterator[Any] = Iterator(field, source) + override def sql: String = s"$prettyName(${field.sql} FROM ${source.sql})" + override def prettyName: String = "extract" +} + /** * Returns the interval from startTimestamp to endTimestamp in which the `months` and `day` field * is set to 0 and the `microseconds` field is initialized to the microsecond difference diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index d5c44066d3c46..ff362e7373413 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1550,7 +1550,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging */ override def visitExtract(ctx: ExtractContext): Expression = withOrigin(ctx) { val arguments = Seq(Literal(ctx.field.getText), expression(ctx.source)) - UnresolvedFunction("date_part", arguments, isDistinct = false) + UnresolvedFunction("extract", arguments, isDistinct = false) } /** diff --git a/sql/core/src/test/resources/sql-tests/results/extract.sql.out b/sql/core/src/test/resources/sql-tests/results/extract.sql.out index 1f77c67871ae8..47cd5180ddebd 100644 --- a/sql/core/src/test/resources/sql-tests/results/extract.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/extract.sql.out @@ -13,7 +13,7 @@ struct<> -- !query select extract(millennium from c) from t -- !query schema -struct +struct -- !query output 3 @@ -21,7 +21,7 @@ struct -- !query select extract(millennia from c) from t -- !query schema -struct +struct -- !query output 3 @@ -29,7 +29,7 @@ struct -- !query select extract(mil from c) from t -- !query schema -struct +struct -- !query output 3 @@ -37,7 +37,7 @@ struct -- !query select extract(mils from c) from t -- !query schema -struct +struct -- !query output 3 @@ -45,7 +45,7 @@ struct -- !query select extract(century from c) from t -- !query schema -struct +struct -- !query output 21 @@ -53,7 +53,7 @@ struct -- !query select extract(centuries from c) from t -- !query schema -struct +struct -- !query output 21 @@ -61,7 +61,7 @@ struct -- !query select extract(c from c) from t -- !query schema -struct +struct -- !query output 21 @@ -69,7 +69,7 @@ struct -- !query select extract(cent from c)from t -- !query schema -struct +struct -- !query output 21 @@ -77,7 +77,7 @@ struct -- !query select extract(decade from c) from t -- !query schema -struct +struct -- !query output 201 @@ -85,7 +85,7 @@ struct -- !query select extract(decades from c) from t -- !query schema -struct +struct -- !query output 201 @@ -93,7 +93,7 @@ struct -- !query select extract(dec from c) from t -- !query schema -struct +struct -- !query output 201 @@ -101,7 +101,7 @@ struct -- !query select extract(decs from c) from t -- !query schema -struct +struct -- !query output 201 @@ -109,7 +109,7 @@ struct -- !query select extract(year from c), extract(year from i) from t -- !query schema -struct +struct -- !query output 2011 11 @@ -117,7 +117,7 @@ struct -- !query select extract(y from c), extract(y from i) from t -- !query schema -struct +struct -- !query output 2011 11 @@ -125,7 +125,7 @@ struct -- !query select extract(years from c), extract(years from i) from t -- !query schema -struct +struct -- !query output 2011 11 @@ -133,7 +133,7 @@ struct -- !query select extract(yr from c), extract(yr from i) from t -- !query schema -struct +struct -- !query output 2011 11 @@ -141,7 +141,7 @@ struct -- !query select extract(yrs from c), extract(yrs from i) from t -- !query schema -struct +struct -- !query output 2011 11 @@ -149,7 +149,7 @@ struct -- !query select extract(isoyear from c) from t -- !query schema -struct +struct -- !query output 2011 @@ -157,7 +157,7 @@ struct -- !query select extract(quarter from c) from t -- !query schema -struct +struct -- !query output 2 @@ -165,7 +165,7 @@ struct -- !query select extract(qtr from c) from t -- !query schema -struct +struct -- !query output 2 @@ -173,7 +173,7 @@ struct -- !query select extract(month from c), extract(month from i) from t -- !query schema -struct +struct -- !query output 5 8 @@ -181,7 +181,7 @@ struct -- !query select extract(mon from c), extract(mon from i) from t -- !query schema -struct +struct -- !query output 5 8 @@ -189,7 +189,7 @@ struct -- !query select extract(mons from c), extract(mons from i) from t -- !query schema -struct +struct -- !query output 5 8 @@ -197,7 +197,7 @@ struct -- !query select extract(months from c), extract(months from i) from t -- !query schema -struct +struct -- !query output 5 8 @@ -205,7 +205,7 @@ struct -- !query select extract(week from c) from t -- !query schema -struct +struct -- !query output 18 @@ -213,7 +213,7 @@ struct -- !query select extract(w from c) from t -- !query schema -struct +struct -- !query output 18 @@ -221,7 +221,7 @@ struct -- !query select extract(weeks from c) from t -- !query schema -struct +struct -- !query output 18 @@ -229,7 +229,7 @@ struct -- !query select extract(day from c), extract(day from i) from t -- !query schema -struct +struct -- !query output 6 30 @@ -237,7 +237,7 @@ struct -- !query select extract(d from c), extract(d from i) from t -- !query schema -struct +struct -- !query output 6 30 @@ -245,7 +245,7 @@ struct -- !query select extract(days from c), extract(days from i) from t -- !query schema -struct +struct -- !query output 6 30 @@ -253,7 +253,7 @@ struct -- !query select extract(dayofweek from c) from t -- !query schema -struct +struct -- !query output 6 @@ -261,7 +261,7 @@ struct -- !query select extract(dow from c) from t -- !query schema -struct +struct -- !query output 5 @@ -269,7 +269,7 @@ struct -- !query select extract(isodow from c) from t -- !query schema -struct +struct -- !query output 5 @@ -277,7 +277,7 @@ struct -- !query select extract(doy from c) from t -- !query schema -struct +struct -- !query output 126 @@ -285,7 +285,7 @@ struct -- !query select extract(hour from c), extract(hour from i) from t -- !query schema -struct +struct -- !query output 7 40 @@ -293,7 +293,7 @@ struct -- !query select extract(h from c), extract(h from i) from t -- !query schema -struct +struct -- !query output 7 40 @@ -301,7 +301,7 @@ struct -- !query select extract(hours from c), extract(hours from i) from t -- !query schema -struct +struct -- !query output 7 40 @@ -309,7 +309,7 @@ struct -- !query select extract(hr from c), extract(hr from i) from t -- !query schema -struct +struct -- !query output 7 40 @@ -317,7 +317,7 @@ struct -- !query select extract(hrs from c), extract(hrs from i) from t -- !query schema -struct +struct -- !query output 7 40 @@ -325,7 +325,7 @@ struct -- !query select extract(minute from c), extract(minute from i) from t -- !query schema -struct +struct -- !query output 8 50 @@ -333,7 +333,7 @@ struct -- !query select extract(m from c), extract(m from i) from t -- !query schema -struct +struct -- !query output 8 50 @@ -341,7 +341,7 @@ struct -- !query select extract(min from c), extract(min from i) from t -- !query schema -struct +struct -- !query output 8 50 @@ -349,7 +349,7 @@ struct -- !query select extract(mins from c), extract(mins from i) from t -- !query schema -struct +struct -- !query output 8 50 @@ -357,7 +357,7 @@ struct -- !query select extract(minutes from c), extract(minutes from i) from t -- !query schema -struct +struct -- !query output 8 50 @@ -365,7 +365,7 @@ struct -- !query select extract(second from c), extract(second from i) from t -- !query schema -struct +struct -- !query output 9.123456 6.789000 @@ -373,7 +373,7 @@ struct +struct -- !query output 9.123456 6.789000 @@ -381,7 +381,7 @@ struct -- !query select extract(sec from c), extract(sec from i) from t -- !query schema -struct +struct -- !query output 9.123456 6.789000 @@ -389,7 +389,7 @@ struct +struct -- !query output 9.123456 6.789000 @@ -397,7 +397,7 @@ struct +struct -- !query output 9.123456 6.789000 @@ -405,7 +405,7 @@ struct +struct -- !query output 9123.456 @@ -413,7 +413,7 @@ struct -- !query select extract(msec from c) from t -- !query schema -struct +struct -- !query output 9123.456 @@ -421,7 +421,7 @@ struct -- !query select extract(msecs from c) from t -- !query schema -struct +struct -- !query output 9123.456 @@ -429,7 +429,7 @@ struct -- !query select extract(millisecon from c) from t -- !query schema -struct +struct -- !query output 9123.456 @@ -437,7 +437,7 @@ struct -- !query select extract(mseconds from c) from t -- !query schema -struct +struct -- !query output 9123.456 @@ -445,7 +445,7 @@ struct -- !query select extract(ms from c) from t -- !query schema -struct +struct -- !query output 9123.456 @@ -453,7 +453,7 @@ struct -- !query select extract(microseconds from c) from t -- !query schema -struct +struct -- !query output 9123456 @@ -461,7 +461,7 @@ struct -- !query select extract(usec from c) from t -- !query schema -struct +struct -- !query output 9123456 @@ -469,7 +469,7 @@ struct -- !query select extract(usecs from c) from t -- !query schema -struct +struct -- !query output 9123456 @@ -477,7 +477,7 @@ struct -- !query select extract(useconds from c) from t -- !query schema -struct +struct -- !query output 9123456 @@ -485,7 +485,7 @@ struct -- !query select extract(microsecon from c) from t -- !query schema -struct +struct -- !query output 9123456 @@ -493,7 +493,7 @@ struct -- !query select extract(us from c) from t -- !query schema -struct +struct -- !query output 9123456 @@ -501,7 +501,7 @@ struct -- !query select extract(epoch from c) from t -- !query schema -struct +struct -- !query output 1304665689.123456 From 557973654f8cb10cfb826bd58c3609a2696cb94b Mon Sep 17 00:00:00 2001 From: Takeshi Yamamuro Date: Sat, 18 Apr 2020 15:34:45 +0900 Subject: [PATCH 2/2] Fix --- .../sql-tests/results/postgreSQL/date.sql.out | 52 +++++++++---------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out index e5a7c43c0a8ab..0dac13d9deae0 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out @@ -574,7 +574,7 @@ struct -- !query SELECT EXTRACT(EPOCH FROM DATE '1970-01-01') -- !query schema -struct +struct -- !query output 0.000000 @@ -582,7 +582,7 @@ struct -- !query SELECT EXTRACT(EPOCH FROM TIMESTAMP '1970-01-01') -- !query schema -struct +struct -- !query output 0.000000 @@ -590,7 +590,7 @@ struct -- !query SELECT EXTRACT(CENTURY FROM TO_DATE('0101-12-31 BC', 'yyyy-MM-dd G')) -- !query schema -struct +struct -- !query output -2 @@ -598,7 +598,7 @@ struct -- !query SELECT EXTRACT(CENTURY FROM TO_DATE('0100-12-31 BC', 'yyyy-MM-dd G')) -- !query schema -struct +struct -- !query output -1 @@ -606,7 +606,7 @@ struct -- !query SELECT EXTRACT(CENTURY FROM TO_DATE('0001-12-31 BC', 'yyyy-MM-dd G')) -- !query schema -struct +struct -- !query output -1 @@ -614,7 +614,7 @@ struct -- !query SELECT EXTRACT(CENTURY FROM DATE '0001-01-01') -- !query schema -struct +struct -- !query output 1 @@ -622,7 +622,7 @@ struct -- !query SELECT EXTRACT(CENTURY FROM DATE '0001-01-01 AD') -- !query schema -struct +struct -- !query output 1 @@ -630,7 +630,7 @@ struct -- !query SELECT EXTRACT(CENTURY FROM DATE '1900-12-31') -- !query schema -struct +struct -- !query output 19 @@ -638,7 +638,7 @@ struct -- !query SELECT EXTRACT(CENTURY FROM DATE '1901-01-01') -- !query schema -struct +struct -- !query output 20 @@ -646,7 +646,7 @@ struct -- !query SELECT EXTRACT(CENTURY FROM DATE '2000-12-31') -- !query schema -struct +struct -- !query output 20 @@ -654,7 +654,7 @@ struct -- !query SELECT EXTRACT(CENTURY FROM DATE '2001-01-01') -- !query schema -struct +struct -- !query output 21 @@ -670,7 +670,7 @@ true -- !query SELECT EXTRACT(MILLENNIUM FROM TO_DATE('0001-12-31 BC', 'yyyy-MM-dd G')) -- !query schema -struct +struct -- !query output -1 @@ -678,7 +678,7 @@ struct -- !query SELECT EXTRACT(MILLENNIUM FROM DATE '0001-01-01 AD') -- !query schema -struct +struct -- !query output 1 @@ -686,7 +686,7 @@ struct -- !query SELECT EXTRACT(MILLENNIUM FROM DATE '1000-12-31') -- !query schema -struct +struct -- !query output 1 @@ -694,7 +694,7 @@ struct -- !query SELECT EXTRACT(MILLENNIUM FROM DATE '1001-01-01') -- !query schema -struct +struct -- !query output 2 @@ -702,7 +702,7 @@ struct -- !query SELECT EXTRACT(MILLENNIUM FROM DATE '2000-12-31') -- !query schema -struct +struct -- !query output 2 @@ -710,7 +710,7 @@ struct -- !query SELECT EXTRACT(MILLENNIUM FROM DATE '2001-01-01') -- !query schema -struct +struct -- !query output 3 @@ -718,7 +718,7 @@ struct -- !query SELECT EXTRACT(MILLENNIUM FROM CURRENT_DATE) -- !query schema -struct +struct -- !query output 3 @@ -726,7 +726,7 @@ struct -- !query SELECT EXTRACT(DECADE FROM DATE '1994-12-25') -- !query schema -struct +struct -- !query output 199 @@ -734,7 +734,7 @@ struct -- !query SELECT EXTRACT(DECADE FROM DATE '0010-01-01') -- !query schema -struct +struct -- !query output 1 @@ -742,7 +742,7 @@ struct -- !query SELECT EXTRACT(DECADE FROM DATE '0009-12-31') -- !query schema -struct +struct -- !query output 0 @@ -750,7 +750,7 @@ struct -- !query SELECT EXTRACT(DECADE FROM TO_DATE('0001-01-01 BC', 'yyyy-MM-dd G')) -- !query schema -struct +struct -- !query output 0 @@ -758,7 +758,7 @@ struct -- !query SELECT EXTRACT(DECADE FROM TO_DATE('0002-12-31 BC', 'yyyy-MM-dd G')) -- !query schema -struct +struct -- !query output -1 @@ -766,7 +766,7 @@ struct -- !query SELECT EXTRACT(DECADE FROM TO_DATE('0011-01-01 BC', 'yyyy-MM-dd G')) -- !query schema -struct +struct -- !query output -1 @@ -774,7 +774,7 @@ struct -- !query SELECT EXTRACT(DECADE FROM TO_DATE('0012-12-31 BC', 'yyyy-MM-dd G')) -- !query schema -struct +struct -- !query output -2 @@ -790,7 +790,7 @@ true -- !query SELECT EXTRACT(CENTURY FROM TIMESTAMP '1970-03-20 04:30:00.00000') -- !query schema -struct +struct -- !query output 20