From 45bf353866991248773ae2e59cc3951f93111189 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Tue, 9 May 2017 01:53:53 +0900 Subject: [PATCH 1/7] Add single argument support for to_timestamp in SQL with documentation improvement --- R/pkg/R/functions.R | 10 +++---- python/pyspark/sql/functions.py | 11 ++----- .../expressions/datetimeExpressions.scala | 30 ++++++++++--------- .../org/apache/spark/sql/functions.scala | 6 ++-- .../resources/sql-tests/inputs/datetime.sql | 2 ++ .../sql-tests/results/datetime.sql.out | 10 ++++++- 6 files changed, 38 insertions(+), 31 deletions(-) diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 5f9d11475c94..e197ac0c40ba 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -1752,15 +1752,15 @@ setMethod("toRadians", #' to_date #' -#' Converts the column into a DateType. You may optionally specify a format +#' Converts the column into a date column. You may optionally specify a format #' according to the rules in: #' \url{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}. #' If the string cannot be parsed according to the specified format (or default), #' the value of the column will be null. -#' The default format is 'yyyy-MM-dd'. +#' By default, it follows casting rules to a date if the format is omitted. #' #' @param x Column to parse. -#' @param format string to use to parse x Column to DateType. (optional) +#' @param format string to use to parse x column to a date column. (optional) #' #' @rdname to_date #' @name to_date @@ -1827,7 +1827,7 @@ setMethod("to_json", signature(x = "Column"), #' to_timestamp #' -#' Converts the column into a TimestampType. You may optionally specify a format +#' Converts the column into a timestamp column. You may optionally specify a format #' according to the rules in: #' \url{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}. #' If the string cannot be parsed according to the specified format (or default), @@ -1835,7 +1835,7 @@ setMethod("to_json", signature(x = "Column"), #' The default format is 'yyyy-MM-dd HH:mm:ss'. #' #' @param x Column to parse. -#' @param format string to use to parse x Column to DateType. (optional) +#' @param format string to use to parse x column to a timestamp column. (optional) #' #' @rdname to_timestamp #' @name to_timestamp diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 8b3487c3f108..302cfaa8dcf0 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -144,12 +144,6 @@ def _(): 'measured in radians.', } -_functions_2_2 = { - 'to_date': 'Converts a string date into a DateType using the (optionally) specified format.', - 'to_timestamp': 'Converts a string timestamp into a timestamp type using the ' + - '(optionally) specified format.', -} - # math functions that take two arguments as input _binary_mathfunctions = { 'atan2': 'Returns the angle theta from the conversion of rectangular coordinates (x, y) to' + @@ -987,9 +981,10 @@ def months_between(date1, date2): def to_date(col, format=None): """Converts a :class:`Column` of :class:`pyspark.sql.types.StringType` or :class:`pyspark.sql.types.TimestampType` into :class:`pyspark.sql.types.DateType` - using the optionally specified format. Default format is 'yyyy-MM-dd'. - Specify formats according to + using the optionally specified format. Specify formats according to `SimpleDateFormats `_. + By default, it follows casting rules to :class:`pyspark.sql.types.DateType` if the format + is omitted. >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t']) >>> df.select(to_date(df.t).alias('date')).collect() diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index a98cd33f2780..350d0d6a0349 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1148,13 +1148,6 @@ case class ToUTCTimestamp(left: Expression, right: Expression) /** * Returns the date part of a timestamp or string. */ -@ExpressionDescription( - usage = "_FUNC_(expr) - Extracts the date part of the date or timestamp expression `expr`.", - extended = """ - Examples: - > SELECT _FUNC_('2009-07-30 04:17:52'); - 2009-07-30 - """) case class ToDate(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { // Implicit casting of spark will accept string in both date and timestamp format, as @@ -1175,15 +1168,19 @@ case class ToDate(child: Expression) extends UnaryExpression with ImplicitCastIn /** * Parses a column to a date based on the given format. */ -// scalastyle:off line.size.limit @ExpressionDescription( - usage = "_FUNC_(date_str, fmt) - Parses the `left` expression with the `fmt` expression. Returns null with invalid input.", + usage = """ + _FUNC_(date_str[, fmt]) - Parses the `date_str` expression with the `fmt` expression to + a date. Returns null with invalid input. By default, it follows casting rules to a date if + the `fmt` is omitted. + """, extended = """ Examples: + > SELECT _FUNC_('2009-07-30 04:17:52'); + 2009-07-30 > SELECT _FUNC_('2016-12-31', 'yyyy-MM-dd'); 2016-12-31 """) -// scalastyle:on line.size.limit case class ParseToDate(left: Expression, format: Option[Expression], child: Expression) extends RuntimeReplaceable { @@ -1212,15 +1209,18 @@ case class ParseToDate(left: Expression, format: Option[Expression], child: Expr /** * Parses a column to a timestamp based on the supplied format. */ -// scalastyle:off line.size.limit @ExpressionDescription( - usage = "_FUNC_(timestamp, fmt) - Parses the `left` expression with the `format` expression to a timestamp. Returns null with invalid input.", + usage = """ + _FUNC_(timestamp[, fmt]) - Parses the `timestamp` expression with the `format` expression to + a timestamp. Returns null with invalid input. Default `fmt` is 'yyyy-MM-dd HH:mm:ss'. + """, extended = """ Examples: + > SELECT _FUNC_('2016-12-31 00:12:00'); + 2016-12-31 00:12:00 > SELECT _FUNC_('2016-12-31', 'yyyy-MM-dd'); - 2016-12-31 00:00:00.0 + 2016-12-31 00:00:00 """) -// scalastyle:on line.size.limit case class ParseToTimestamp(left: Expression, format: Expression, child: Expression) extends RuntimeReplaceable { @@ -1228,6 +1228,8 @@ case class ParseToTimestamp(left: Expression, format: Expression, child: Express this(left, format, Cast(UnixTimestamp(left, format), TimestampType)) } + def this(left: Expression) = this(left, Literal("yyyy-MM-dd HH:mm:ss")) + override def flatArguments: Iterator[Any] = Iterator(left, format) override def sql: String = s"$prettyName(${left.sql}, ${format.sql})" diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 987011edfe1e..7fdeaa415394 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -2704,15 +2704,15 @@ object functions { } /** - * Converts the column into DateType. + * Converts the column into `DateType` by casting rules to `DateType`. * * @group datetime_funcs * @since 1.5.0 */ - def to_date(e: Column): Column = withExpr { ToDate(e.expr) } + def to_date(e: Column): Column = withExpr { new ParseToDate(e.expr) } /** - * Converts the column into a DateType with a specified format + * Converts the column into a `DateType` with a specified format * (see [http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html]) * return null if fail. * diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql index 3fd1c37e7179..075a406b0ca5 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql @@ -2,3 +2,5 @@ -- [SPARK-16836] current_date and current_timestamp literals select current_date = current_date(), current_timestamp = current_timestamp(); + +select to_timestamp('2016-12-31 00:12:00'), to_timestamp('2016-12-31', 'yyyy-MM-dd'); diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out index 032e4258500f..13d6b96bf496 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 1 +-- Number of queries: 2 -- !query 0 @@ -8,3 +8,11 @@ select current_date = current_date(), current_timestamp = current_timestamp() struct<(current_date() = current_date()):boolean,(current_timestamp() = current_timestamp()):boolean> -- !query 0 output true true + + +-- !query 1 +select to_timestamp('2016-12-31 00:12:00'), to_timestamp('2016-12-31', 'yyyy-MM-dd') +-- !query 1 schema +struct +-- !query 1 output +2016-12-31 00:12:00 2016-12-31 00:00:00 From f8921f4541422f3cd26f0cde6d7c0e2640f29c80 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Tue, 9 May 2017 03:25:29 +0900 Subject: [PATCH 2/7] Place the default value in single place to make sure --- sql/core/src/main/scala/org/apache/spark/sql/functions.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 7fdeaa415394..fbebbe522bb1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -2689,7 +2689,7 @@ object functions { * @since 2.2.0 */ def to_timestamp(s: Column): Column = withExpr { - new ParseToTimestamp(s.expr, Literal("yyyy-MM-dd HH:mm:ss")) + new ParseToTimestamp(s.expr) } /** From b2d3b0a39594ed6fde664ba0e754779c1278c4f9 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Tue, 9 May 2017 10:00:15 +0900 Subject: [PATCH 3/7] Python test in column name with backticks --- python/pyspark/sql/tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index e3fe01eae243..995142d2c583 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -2518,7 +2518,7 @@ def test_datetime_functions(self): from datetime import date, datetime df = self.spark.range(1).selectExpr("'2017-01-22' as dateCol") parse_result = df.select(functions.to_date(functions.col("dateCol"))).first() - self.assertEquals(date(2017, 1, 22), parse_result['to_date(dateCol)']) + self.assertEquals(date(2017, 1, 22), parse_result['to_date(`dateCol`)']) @unittest.skipIf(sys.version_info < (3, 3), "Unittest < 3.3 doesn't support mocking") def test_unbounded_frames(self): From 497a22965af3a74e89c73b60667ab19fecb0af39 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Tue, 9 May 2017 15:05:18 +0900 Subject: [PATCH 4/7] Address comments --- R/pkg/R/functions.R | 15 ++++--- python/pyspark/sql/functions.py | 7 ++-- .../expressions/datetimeExpressions.scala | 41 +++++++------------ .../sql/catalyst/util/DateTimeUtils.scala | 2 +- .../expressions/DateExpressionsSuite.scala | 8 ---- .../org/apache/spark/sql/functions.scala | 3 +- .../resources/sql-tests/inputs/datetime.sql | 4 +- .../sql-tests/results/datetime.sql.out | 16 ++++++-- .../apache/spark/sql/DateFunctionsSuite.scala | 8 ++-- 9 files changed, 48 insertions(+), 56 deletions(-) diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index e197ac0c40ba..684d8571f8a0 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -1752,15 +1752,17 @@ setMethod("toRadians", #' to_date #' -#' Converts the column into a date column. You may optionally specify a format +#' Converts the column into a DateType. You may optionally specify a format #' according to the rules in: #' \url{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}. #' If the string cannot be parsed according to the specified format (or default), #' the value of the column will be null. -#' By default, it follows casting rules to a date if the format is omitted. +#' By default, it follows casting rules to a date if the format is omitted +#' (equivalent with \code{cast(df$x, "date")}). +#' #' #' @param x Column to parse. -#' @param format string to use to parse x column to a date column. (optional) +#' @param format string to use to parse x Column to DateType. (optional) #' #' @rdname to_date #' @name to_date @@ -1827,15 +1829,16 @@ setMethod("to_json", signature(x = "Column"), #' to_timestamp #' -#' Converts the column into a timestamp column. You may optionally specify a format +#' Converts the column into a TimestampType. You may optionally specify a format #' according to the rules in: #' \url{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}. #' If the string cannot be parsed according to the specified format (or default), #' the value of the column will be null. -#' The default format is 'yyyy-MM-dd HH:mm:ss'. +#' By default, it follows casting rules to a timestamp if the format is omitted +#' (equivalent with \code{cast(df$x, "timestamp")}). #' #' @param x Column to parse. -#' @param format string to use to parse x column to a timestamp column. (optional) +#' @param format string to use to parse x Column to DateType. (optional) #' #' @rdname to_timestamp #' @name to_timestamp diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 302cfaa8dcf0..fd1671c69884 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -984,7 +984,7 @@ def to_date(col, format=None): using the optionally specified format. Specify formats according to `SimpleDateFormats `_. By default, it follows casting rules to :class:`pyspark.sql.types.DateType` if the format - is omitted. + is omitted (equivalent with ``col.cast("date")``). >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t']) >>> df.select(to_date(df.t).alias('date')).collect() @@ -1006,9 +1006,10 @@ def to_date(col, format=None): def to_timestamp(col, format=None): """Converts a :class:`Column` of :class:`pyspark.sql.types.StringType` or :class:`pyspark.sql.types.TimestampType` into :class:`pyspark.sql.types.DateType` - using the optionally specified format. Default format is 'yyyy-MM-dd HH:mm:ss'. Specify - formats according to + using the optionally specified format. Specify formats according to `SimpleDateFormats `_. + By default, it follows casting rules to :class:`pyspark.sql.types.TimestampType` if the format + is omitted (equivalent with ``col.cast("timestamp")``). >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t']) >>> df.select(to_timestamp(df.t).alias('dt')).collect() diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 350d0d6a0349..658d46e763ea 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1145,26 +1145,6 @@ case class ToUTCTimestamp(left: Expression, right: Expression) } } -/** - * Returns the date part of a timestamp or string. - */ -case class ToDate(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { - - // Implicit casting of spark will accept string in both date and timestamp format, as - // well as TimestampType. - override def inputTypes: Seq[AbstractDataType] = Seq(DateType) - - override def dataType: DataType = DateType - - override def eval(input: InternalRow): Any = child.eval(input) - - override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - defineCodeGen(ctx, ev, d => d) - } - - override def prettyName: String = "to_date" -} - /** * Parses a column to a date based on the given format. */ @@ -1191,13 +1171,13 @@ case class ParseToDate(left: Expression, format: Option[Expression], child: Expr def this(left: Expression) = { // backwards compatability - this(left, Option(null), ToDate(left)) + this(left, None, Cast(left, DateType)) } override def flatArguments: Iterator[Any] = Iterator(left, format) override def sql: String = { if (format.isDefined) { - s"$prettyName(${left.sql}, ${format.get.sql}" + s"$prettyName(${left.sql}, ${format.get.sql})" } else { s"$prettyName(${left.sql})" } @@ -1212,7 +1192,8 @@ case class ParseToDate(left: Expression, format: Option[Expression], child: Expr @ExpressionDescription( usage = """ _FUNC_(timestamp[, fmt]) - Parses the `timestamp` expression with the `format` expression to - a timestamp. Returns null with invalid input. Default `fmt` is 'yyyy-MM-dd HH:mm:ss'. + a timestamp. Returns null with invalid input. By default, it follows casting rules to + a timestamp if the `fmt` is omitted. """, extended = """ Examples: @@ -1221,17 +1202,23 @@ case class ParseToDate(left: Expression, format: Option[Expression], child: Expr > SELECT _FUNC_('2016-12-31', 'yyyy-MM-dd'); 2016-12-31 00:00:00 """) -case class ParseToTimestamp(left: Expression, format: Expression, child: Expression) +case class ParseToTimestamp(left: Expression, format: Option[Expression], child: Expression) extends RuntimeReplaceable { def this(left: Expression, format: Expression) = { - this(left, format, Cast(UnixTimestamp(left, format), TimestampType)) + this(left, Option(format), Cast(UnixTimestamp(left, format), TimestampType)) } - def this(left: Expression) = this(left, Literal("yyyy-MM-dd HH:mm:ss")) + def this(left: Expression) = this(left, None, Cast(left, TimestampType)) override def flatArguments: Iterator[Any] = Iterator(left, format) - override def sql: String = s"$prettyName(${left.sql}, ${format.sql})" + override def sql: String = { + if (format.isDefined) { + s"$prettyName(${left.sql}, ${format.get.sql})" + } else { + s"$prettyName(${left.sql})" + } + } override def prettyName: String = "to_timestamp" override def dataType: DataType = TimestampType diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index bf596fa0a89d..2f262d99539d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -427,7 +427,7 @@ object DateTimeUtils { * The return type is [[Option]] in order to distinguish between 0 and null. The following * formats are allowed: * - * `yyyy`, + * `yyyy` * `yyyy-[m]m` * `yyyy-[m]m-[d]d` * `yyyy-[m]m-[d]d ` diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index ca89bf7db0b4..d3bac0a4d277 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -495,14 +495,6 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { NextDay(Literal(Date.valueOf("2015-07-23")), Literal.create(null, StringType)), null) } - test("function to_date") { - checkEvaluation( - ToDate(Literal(Date.valueOf("2015-07-22"))), - DateTimeUtils.fromJavaDate(Date.valueOf("2015-07-22"))) - checkEvaluation(ToDate(Literal.create(null, DateType)), null) - checkConsistencyBetweenInterpretedAndCodegen(ToDate, DateType) - } - test("function trunc") { def testTrunc(input: Date, fmt: String, expected: Date): Unit = { checkEvaluation(TruncDate(Literal.create(input, DateType), Literal.create(fmt, StringType)), diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index fbebbe522bb1..4561b307b32d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -2683,8 +2683,7 @@ object functions { def unix_timestamp(s: Column, p: String): Column = withExpr { UnixTimestamp(s.expr, Literal(p)) } /** - * Convert time string to a Unix timestamp (in seconds). - * Uses the pattern "yyyy-MM-dd HH:mm:ss" and will return null on failure. + * Convert time string to a Unix timestamp (in seconds) by casting rules to `TimestampType`. * @group datetime_funcs * @since 2.2.0 */ diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql index 075a406b0ca5..e957f693a983 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql @@ -3,4 +3,6 @@ -- [SPARK-16836] current_date and current_timestamp literals select current_date = current_date(), current_timestamp = current_timestamp(); -select to_timestamp('2016-12-31 00:12:00'), to_timestamp('2016-12-31', 'yyyy-MM-dd'); +select to_date(null), to_date('2016-12-31'), to_date('2016-12-31', 'yyyy-MM-dd'); + +select to_timestamp(null), to_timestamp('2016-12-31 00:12:00'), to_timestamp('2016-12-31', 'yyyy-MM-dd'); diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out index 13d6b96bf496..13e1e48b038a 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 2 +-- Number of queries: 3 -- !query 0 @@ -11,8 +11,16 @@ true true -- !query 1 -select to_timestamp('2016-12-31 00:12:00'), to_timestamp('2016-12-31', 'yyyy-MM-dd') +select to_date(null), to_date('2016-12-31'), to_date('2016-12-31', 'yyyy-MM-dd') -- !query 1 schema -struct +struct -- !query 1 output -2016-12-31 00:12:00 2016-12-31 00:00:00 +NULL 2016-12-31 2016-12-31 + + +-- !query 2 +select to_timestamp(null), to_timestamp('2016-12-31 00:12:00'), to_timestamp('2016-12-31', 'yyyy-MM-dd') +-- !query 2 schema +struct +-- !query 2 output +NULL 2016-12-31 00:12:00 2016-12-31 00:00:00 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala index 2acda3f00732..cded087fa924 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala @@ -387,7 +387,7 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext { df.selectExpr("to_date(s)"), Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null))) - // Now with format + // Now with format checkAnswer( df.select(to_date(col("t"), "yyyy-MM-dd")), Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), @@ -400,12 +400,12 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext { df.select(to_date(col("s"), "yyyy-MM-dd")), Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null))) - // now switch format + // Now switch format checkAnswer( df.select(to_date(col("s"), "yyyy-dd-MM")), Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31")))) - // invalid format + // Invalid format checkAnswer( df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null), Row(null), Row(null))) @@ -413,7 +413,7 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext { df.select(to_date(col("s"), "yyyy-dd-aa")), Seq(Row(null), Row(null), Row(null))) - // february + // February val x1 = "2016-02-29" val x2 = "2017-02-29" val df1 = Seq(x1, x2).toDF("x") From b6f867cd87e46ca2daf74eabce14b735a962c9a4 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Tue, 9 May 2017 15:14:17 +0900 Subject: [PATCH 5/7] Fix some typos --- R/pkg/R/functions.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 684d8571f8a0..575697ba68b7 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -1757,10 +1757,9 @@ setMethod("toRadians", #' \url{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}. #' If the string cannot be parsed according to the specified format (or default), #' the value of the column will be null. -#' By default, it follows casting rules to a date if the format is omitted +#' By default, it follows casting rules to a DateType if the format is omitted #' (equivalent with \code{cast(df$x, "date")}). #' -#' #' @param x Column to parse. #' @param format string to use to parse x Column to DateType. (optional) #' @@ -1834,11 +1833,11 @@ setMethod("to_json", signature(x = "Column"), #' \url{http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html}. #' If the string cannot be parsed according to the specified format (or default), #' the value of the column will be null. -#' By default, it follows casting rules to a timestamp if the format is omitted +#' By default, it follows casting rules to a TimestampType if the format is omitted #' (equivalent with \code{cast(df$x, "timestamp")}). #' #' @param x Column to parse. -#' @param format string to use to parse x Column to DateType. (optional) +#' @param format string to use to parse x Column to TimestampType. (optional) #' #' @rdname to_timestamp #' @name to_timestamp From fc02460c5d014c573631f3b62cd6b62f5a46c261 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Tue, 9 May 2017 15:17:32 +0900 Subject: [PATCH 6/7] Fix one more typo --- .../spark/sql/catalyst/expressions/datetimeExpressions.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 658d46e763ea..de4c94d12abd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1191,7 +1191,7 @@ case class ParseToDate(left: Expression, format: Option[Expression], child: Expr */ @ExpressionDescription( usage = """ - _FUNC_(timestamp[, fmt]) - Parses the `timestamp` expression with the `format` expression to + _FUNC_(timestamp[, fmt]) - Parses the `timestamp` expression with the `fmt` expression to a timestamp. Returns null with invalid input. By default, it follows casting rules to a timestamp if the `fmt` is omitted. """, From b03892780f182e8cd08fe976f3e17ec65e71ab74 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Tue, 9 May 2017 20:16:58 +0900 Subject: [PATCH 7/7] Fix typos and minimised change --- R/pkg/R/functions.R | 4 ++-- python/pyspark/sql/functions.py | 4 ++-- .../scala/org/apache/spark/sql/DateFunctionsSuite.scala | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 575697ba68b7..2fd2d3675661 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -1758,7 +1758,7 @@ setMethod("toRadians", #' If the string cannot be parsed according to the specified format (or default), #' the value of the column will be null. #' By default, it follows casting rules to a DateType if the format is omitted -#' (equivalent with \code{cast(df$x, "date")}). +#' (equivalent to \code{cast(df$x, "date")}). #' #' @param x Column to parse. #' @param format string to use to parse x Column to DateType. (optional) @@ -1834,7 +1834,7 @@ setMethod("to_json", signature(x = "Column"), #' If the string cannot be parsed according to the specified format (or default), #' the value of the column will be null. #' By default, it follows casting rules to a TimestampType if the format is omitted -#' (equivalent with \code{cast(df$x, "timestamp")}). +#' (equivalent to \code{cast(df$x, "timestamp")}). #' #' @param x Column to parse. #' @param format string to use to parse x Column to TimestampType. (optional) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index fd1671c69884..d9b86aff63fa 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -984,7 +984,7 @@ def to_date(col, format=None): using the optionally specified format. Specify formats according to `SimpleDateFormats `_. By default, it follows casting rules to :class:`pyspark.sql.types.DateType` if the format - is omitted (equivalent with ``col.cast("date")``). + is omitted (equivalent to ``col.cast("date")``). >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t']) >>> df.select(to_date(df.t).alias('date')).collect() @@ -1009,7 +1009,7 @@ def to_timestamp(col, format=None): using the optionally specified format. Specify formats according to `SimpleDateFormats `_. By default, it follows casting rules to :class:`pyspark.sql.types.TimestampType` if the format - is omitted (equivalent with ``col.cast("timestamp")``). + is omitted (equivalent to ``col.cast("timestamp")``). >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t']) >>> df.select(to_timestamp(df.t).alias('dt')).collect() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala index cded087fa924..3a8694839bb2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala @@ -387,7 +387,7 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext { df.selectExpr("to_date(s)"), Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null))) - // Now with format + // now with format checkAnswer( df.select(to_date(col("t"), "yyyy-MM-dd")), Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), @@ -400,12 +400,12 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext { df.select(to_date(col("s"), "yyyy-MM-dd")), Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null))) - // Now switch format + // now switch format checkAnswer( df.select(to_date(col("s"), "yyyy-dd-MM")), Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31")))) - // Invalid format + // invalid format checkAnswer( df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null), Row(null), Row(null))) @@ -413,7 +413,7 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext { df.select(to_date(col("s"), "yyyy-dd-aa")), Seq(Row(null), Row(null), Row(null))) - // February + // february val x1 = "2016-02-29" val x2 = "2017-02-29" val df1 = Seq(x1, x2).toDF("x")