From d84fc0ef0d605ba8944de64d3f6e0ac3d0da5a1b Mon Sep 17 00:00:00 2001 From: Youngbin Kim Date: Mon, 18 Dec 2017 17:36:29 -0800 Subject: [PATCH 1/3] date_trunc support --- python/pyspark/sql/functions.py | 18 ++ .../catalyst/analysis/FunctionRegistry.scala | 1 + .../expressions/datetimeExpressions.scala | 163 ++++++++++++++---- .../sql/catalyst/util/DateTimeUtils.scala | 102 +++++++++-- .../expressions/DateExpressionsSuite.scala | 73 +++++++- .../catalyst/util/DateTimeUtilsSuite.scala | 70 ++++++++ .../org/apache/spark/sql/functions.scala | 15 ++ .../apache/spark/sql/DateFunctionsSuite.scala | 46 +++++ 8 files changed, 441 insertions(+), 47 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 4e0faddb1c0df..fae09d5fed911 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -1111,6 +1111,24 @@ def trunc(date, format): return Column(sc._jvm.functions.trunc(_to_java_column(date), format)) +@since(2.3) +def date_trunc(format, timestamp): + """ + Returns timestamp truncated to the unit specified by the format. + + :param format: 'year', 'YYYY', 'yy', 'month', 'mon', 'mm', + 'DAY', 'DD', 'HOUR', 'MINUTE', 'SECOND', 'WEEK', 'QUARTER' + + >>> df = spark.createDataFrame([('1997-02-28',)], ['d']) + >>> df.select(date_trunc('year', df.d).alias('year')).collect() + [Row(year=datetime.datetime(1997, 1, 1, 0, 0))] + >>> df.select(date_trunc('mon', df.d).alias('month')).collect() + [Row(month=datetime.datetime(1997, 2, 1, 0, 0))] + """ + sc = SparkContext._active_spark_context + return Column(sc._jvm.functions.date_trunc(format, _to_java_column(timestamp))) + + @since(1.5) def next_day(date, dayOfWeek): """ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 11538bd31b4fd..5ddb39822617d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -392,6 +392,7 @@ object FunctionRegistry { expression[ToUnixTimestamp]("to_unix_timestamp"), expression[ToUTCTimestamp]("to_utc_timestamp"), expression[TruncDate]("trunc"), + expression[TruncTimestamp]("date_trunc"), expression[UnixTimestamp]("unix_timestamp"), expression[DayOfWeek]("dayofweek"), expression[WeekOfYear]("weekofyear"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 44d54a20844a3..63da44a203ef4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1295,80 +1295,82 @@ case class ParseToTimestamp(left: Expression, format: Option[Expression], child: override def dataType: DataType = TimestampType } -/** - * Returns date truncated to the unit specified by the format. - */ -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = "_FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`.", - examples = """ - Examples: - > SELECT _FUNC_('2009-02-12', 'MM'); - 2009-02-01 - > SELECT _FUNC_('2015-10-27', 'YEAR'); - 2015-01-01 - """, - since = "1.5.0") -// scalastyle:on line.size.limit -case class TruncDate(date: Expression, format: Expression) - extends BinaryExpression with ImplicitCastInputTypes { - override def left: Expression = date - override def right: Expression = format - - override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType) - override def dataType: DataType = DateType +trait TruncTime extends BinaryExpression with ImplicitCastInputTypes { + val time: Expression + val format: Expression override def nullable: Boolean = true - override def prettyName: String = "trunc" private lazy val truncLevel: Int = DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String]) - override def eval(input: InternalRow): Any = { + /** + * + * @param input + * @param maxLevel Maximum level that can be used for truncation (e.g MONTH for Date input) + * @param truncFunc + * @tparam T + * @return + */ + protected def evalHelper[T](input: InternalRow, maxLevel: Int)( + truncFunc: (Any, Int) => T): Any = { val level = if (format.foldable) { truncLevel } else { DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String]) } - if (level == -1) { + if (level == DateTimeUtils.TRUNC_INVALID || level > maxLevel) { // unknown format null } else { - val d = date.eval(input) + val d = time.eval(input) if (d == null) { null } else { - DateTimeUtils.truncDate(d.asInstanceOf[Int], level) + truncFunc(d, level) } } } - override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + protected def codeGenHelper[T]( + ctx: CodegenContext, + ev: ExprCode, + maxLevel: Int, + orderReversed: Boolean = false)( + truncFunc: (String, String) => String) + : ExprCode = { val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") if (format.foldable) { - if (truncLevel == -1) { + if (truncLevel == DateTimeUtils.TRUNC_INVALID || truncLevel > maxLevel) { ev.copy(code = s""" boolean ${ev.isNull} = true; ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};""") } else { - val d = date.genCode(ctx) + val d = time.genCode(ctx) + val truncFuncStr = truncFunc(d.value, truncLevel.toString) ev.copy(code = s""" ${d.code} boolean ${ev.isNull} = ${d.isNull}; ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; if (!${ev.isNull}) { - ${ev.value} = $dtu.truncDate(${d.value}, $truncLevel); + ${ev.value} = $dtu.$truncFuncStr; }""") } } else { - nullSafeCodeGen(ctx, ev, (dateVal, fmt) => { + nullSafeCodeGen(ctx, ev, (left, right) => { val form = ctx.freshName("form") + val (dateVal, fmt) = if (orderReversed) { + (right, left) + } else { + (left, right) + } + val truncFuncStr = truncFunc(dateVal, form) s""" int $form = $dtu.parseTruncLevel($fmt); - if ($form == -1) { + if ($form == -1 || $form > $maxLevel) { ${ev.isNull} = true; } else { - ${ev.value} = $dtu.truncDate($dateVal, $form); + ${ev.value} = $dtu.$truncFuncStr } """ }) @@ -1376,6 +1378,99 @@ case class TruncDate(date: Expression, format: Expression) } } +/** + * Returns date truncated to the unit specified by the format. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ + _FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`. + `fmt` should be one of ["YEAR", "YYYY", "YY", "MON", "MONTH", "MM"] + """, + extended = """ + Examples: + > SELECT _FUNC_('2009-02-12', 'MM'); + 2009-02-01 + > SELECT _FUNC_('2015-10-27', 'YEAR'); + 2015-01-01 + """) +// scalastyle:on line.size.limit +case class TruncDate(date: Expression, format: Expression) + extends TruncTime { + override def left: Expression = date + override def right: Expression = format + + override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType) + override def dataType: DataType = DateType + override def prettyName: String = "trunc" + override val time = date + + override def eval(input: InternalRow): Any = { + evalHelper(input, maxLevel = DateTimeUtils.TRUNC_TO_MONTH) { (d: Any, level: Int) => + DateTimeUtils.truncDate(d.asInstanceOf[Int], level) + } + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + codeGenHelper(ctx, ev, maxLevel = DateTimeUtils.TRUNC_TO_MONTH) { (date: String, fmt: String) => + s"truncDate($date, $fmt);" + } + } +} + +/** + * Returns timestamp truncated to the unit specified by the format. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ + _FUNC_(fmt, date) - Returns timestamp `ts` truncated to the unit specified by the format model `fmt`. + `fmt` should be one of ["YEAR", "YYYY", "YY", "MON", "MONTH", "MM", "DAY", "DD", "HOUR", "MINUTE", "SECOND", "WEEK", "QUARTER"] + """, + extended = """ + Examples: + > SELECT _FUNC_('2015-03-05T09:32:05.359', 'YEAR'); + 2015-01-01T00:00:00 + > SELECT _FUNC_('2015-03-05T09:32:05.359', 'MM'); + 2015-03-01T00:00:00 + > SELECT _FUNC_('2015-03-05T09:32:05.359', 'DD'); + 2015-03-05T00:00:00 + > SELECT _FUNC_('2015-03-05T09:32:05.359', 'HOUR'); + 2015-03-05T09:00:00 + """) +// scalastyle:on line.size.limit +case class TruncTimestamp( + format: Expression, + timestamp: Expression, + timeZoneId: Option[String] = None) + extends TruncTime with TimeZoneAwareExpression { + override def left: Expression = format + override def right: Expression = timestamp + + override def inputTypes: Seq[AbstractDataType] = Seq(StringType, TimestampType) + override def dataType: TimestampType = TimestampType + override def prettyName: String = "date_trunc" + override val time = timestamp + override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = + copy(timeZoneId = Option(timeZoneId)) + + def this(format: Expression, timestamp: Expression) = this(format, timestamp, None) + + override def eval(input: InternalRow): Any = { + evalHelper(input, maxLevel = DateTimeUtils.TRUNC_TO_QUARTER) { (d: Any, level: Int) => + DateTimeUtils.truncTimestamp(d.asInstanceOf[Long], level, timeZone) + } + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val tz = ctx.addReferenceObj("timeZone", timeZone) + codeGenHelper(ctx, ev, maxLevel = DateTimeUtils.TRUNC_TO_QUARTER, true) { + (date: String, fmt: String) => + s"truncTimestamp($date, $fmt, $tz);" + } + } +} + /** * Returns the number of days from startDate to endDate. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index b1ed25645b36c..cfa796948a22b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -45,7 +45,8 @@ object DateTimeUtils { // it's 2440587.5, rounding up to compatible with Hive final val JULIAN_DAY_OF_EPOCH = 2440588 final val SECONDS_PER_DAY = 60 * 60 * 24L - final val MICROS_PER_SECOND = 1000L * 1000L + final val MICROS_PER_MILLIS = 1000L + final val MICROS_PER_SECOND = MICROS_PER_MILLIS * MILLIS_PER_SECOND final val MILLIS_PER_SECOND = 1000L final val NANOS_PER_SECOND = MICROS_PER_SECOND * 1000L final val MICROS_PER_DAY = MICROS_PER_SECOND * SECONDS_PER_DAY @@ -909,6 +910,15 @@ object DateTimeUtils { math.round(diff * 1e8) / 1e8 } + // Thursday = 0 since 1970/Jan/01 => Thursday + private val SUNDAY = 3 + private val MONDAY = 4 + private val TUESDAY = 5 + private val WEDNESDAY = 6 + private val THURSDAY = 0 + private val FRIDAY = 1 + private val SATURDAY = 2 + /* * Returns day of week from String. Starting from Thursday, marked as 0. * (Because 1970-01-01 is Thursday). @@ -916,13 +926,13 @@ object DateTimeUtils { def getDayOfWeekFromString(string: UTF8String): Int = { val dowString = string.toString.toUpperCase(Locale.ROOT) dowString match { - case "SU" | "SUN" | "SUNDAY" => 3 - case "MO" | "MON" | "MONDAY" => 4 - case "TU" | "TUE" | "TUESDAY" => 5 - case "WE" | "WED" | "WEDNESDAY" => 6 - case "TH" | "THU" | "THURSDAY" => 0 - case "FR" | "FRI" | "FRIDAY" => 1 - case "SA" | "SAT" | "SATURDAY" => 2 + case "SU" | "SUN" | "SUNDAY" => SUNDAY + case "MO" | "MON" | "MONDAY" => MONDAY + case "TU" | "TUE" | "TUESDAY" => TUESDAY + case "WE" | "WED" | "WEDNESDAY" => WEDNESDAY + case "TH" | "THU" | "THURSDAY" => THURSDAY + case "FR" | "FRI" | "FRIDAY" => FRIDAY + case "SA" | "SAT" | "SATURDAY" => SATURDAY case _ => -1 } } @@ -944,9 +954,16 @@ object DateTimeUtils { date + daysToMonthEnd } - private val TRUNC_TO_YEAR = 1 - private val TRUNC_TO_MONTH = 2 - private val TRUNC_INVALID = -1 + // Visible for testing. + val TRUNC_TO_YEAR = 1 + val TRUNC_TO_MONTH = 2 + val TRUNC_TO_DAY = 3 + val TRUNC_TO_HOUR = 4 + val TRUNC_TO_MINUTE = 5 + val TRUNC_TO_SECOND = 6 + val TRUNC_TO_WEEK = 7 + val TRUNC_TO_QUARTER = 8 + val TRUNC_INVALID = -1 /** * Returns the trunc date from original date and trunc level. @@ -964,7 +981,62 @@ object DateTimeUtils { } /** - * Returns the truncate level, could be TRUNC_YEAR, TRUNC_MONTH, or TRUNC_INVALID, + * Returns the trunc date time from original date time and trunc level. + * Trunc level should be generated using `parseTruncLevel()`, should be between 1 and 8 + */ + def truncTimestamp(d: SQLTimestamp, level: Int, timeZone: TimeZone): SQLTimestamp = { + var millis = d / MICROS_PER_MILLIS + val truncated = level match { + case TRUNC_TO_YEAR => + val dDays = millisToDays(millis, timeZone) + daysToMillis(truncDate(dDays, level), timeZone) + case TRUNC_TO_MONTH => + val dDays = millisToDays(millis, timeZone) + daysToMillis(truncDate(dDays, level), timeZone) + case TRUNC_TO_DAY => + val offset = timeZone.getOffset(millis) + millis += offset + millis - millis % (MILLIS_PER_SECOND * SECONDS_PER_DAY) - offset + case TRUNC_TO_HOUR => + val offset = timeZone.getOffset(millis) + millis += offset + millis - millis % (60 * 60 * MILLIS_PER_SECOND) - offset + case TRUNC_TO_MINUTE => + millis - millis % (60 * MILLIS_PER_SECOND) + case TRUNC_TO_SECOND => + millis - millis % MILLIS_PER_SECOND + case TRUNC_TO_WEEK => + val dDays = millisToDays(millis, timeZone) + val prevMonday = getNextDateForDayOfWeek(dDays - 7, MONDAY) + daysToMillis(prevMonday, timeZone) + case TRUNC_TO_QUARTER => + val dDays = millisToDays(millis, timeZone) + millis = daysToMillis(truncDate(dDays, TRUNC_TO_MONTH), timeZone) + val cal = Calendar.getInstance() + cal.setTimeInMillis(millis) + val quarter = getQuarter(dDays) + val month = quarter match { + case 1 => Calendar.JANUARY + case 2 => Calendar.APRIL + case 3 => Calendar.JULY + case 4 => Calendar.OCTOBER + } + cal.set(Calendar.MONTH, month) + cal.getTimeInMillis() + case _ => + // caller make sure that this should never be reached + sys.error(s"Invalid trunc level: $level") + } + truncated * MICROS_PER_MILLIS + } + + def truncTimestamp(d: SQLTimestamp, level: Int): SQLTimestamp = { + truncTimestamp(d, level, defaultTimeZone()) + } + + /** + * Returns the truncate level, could be TRUNC_YEAR, TRUNC_MONTH, TRUNC_TO_DAY, TRUNC_TO_HOUR, + * TRUNC_TO_MINUTE, TRUNC_TO_SECOND, TRUNC_TO_WEEK, TRUNC_TO_QUARTER or TRUNC_INVALID, * TRUNC_INVALID means unsupported truncate level. */ def parseTruncLevel(format: UTF8String): Int = { @@ -974,6 +1046,12 @@ object DateTimeUtils { format.toString.toUpperCase(Locale.ROOT) match { case "YEAR" | "YYYY" | "YY" => TRUNC_TO_YEAR case "MON" | "MONTH" | "MM" => TRUNC_TO_MONTH + case "DAY" | "DD" => TRUNC_TO_DAY + case "HOUR" => TRUNC_TO_HOUR + case "MINUTE" => TRUNC_TO_MINUTE + case "SECOND" => TRUNC_TO_SECOND + case "WEEK" => TRUNC_TO_WEEK + case "QUARTER" => TRUNC_TO_QUARTER case _ => TRUNC_INVALID } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 89d99f9678cda..63f6ceeb21b96 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -527,7 +527,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { NextDay(Literal(Date.valueOf("2015-07-23")), Literal.create(null, StringType)), null) } - test("function trunc") { + test("TruncDate") { def testTrunc(input: Date, fmt: String, expected: Date): Unit = { checkEvaluation(TruncDate(Literal.create(input, DateType), Literal.create(fmt, StringType)), expected) @@ -543,11 +543,82 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { testTrunc(date, fmt, Date.valueOf("2015-07-01")) } testTrunc(date, "DD", null) + testTrunc(date, "SECOND", null) + testTrunc(date, "HOUR", null) testTrunc(date, null, null) testTrunc(null, "MON", null) testTrunc(null, null, null) } + test("TruncTimestamp") { + def testTrunc(input: Timestamp, fmt: String, expected: Timestamp): Unit = { + checkEvaluation( + TruncTimestamp(Literal.create(fmt, StringType), Literal.create(input, TimestampType)), + expected) + checkEvaluation( + TruncTimestamp( + NonFoldableLiteral.create(fmt, StringType), Literal.create(input, TimestampType)), + expected) + } + + withDefaultTimeZone(TimeZoneGMT) { + val inputDate = Timestamp.valueOf("2015-07-22 05:30:06") + + Seq("yyyy", "YYYY", "year", "YEAR", "yy", "YY").foreach { fmt => + testTrunc( + inputDate, fmt, + Timestamp.valueOf("2015-01-01 00:00:00")) + } + + Seq("month", "MONTH", "mon", "MON", "mm", "MM").foreach { fmt => + testTrunc( + inputDate, fmt, + Timestamp.valueOf("2015-07-01 00:00:00")) + } + + Seq("DAY", "day", "DD", "dd").foreach { fmt => + testTrunc( + inputDate, fmt, + Timestamp.valueOf("2015-07-22 00:00:00")) + } + + Seq("HOUR", "hour").foreach { fmt => + testTrunc( + inputDate, fmt, + Timestamp.valueOf("2015-07-22 05:00:00")) + } + + Seq("MINUTE", "minute").foreach { fmt => + testTrunc( + inputDate, fmt, + Timestamp.valueOf("2015-07-22 05:30:00")) + } + + Seq("SECOND", "second").foreach { fmt => + testTrunc( + inputDate, fmt, + Timestamp.valueOf("2015-07-22 05:30:06")) + } + + Seq("WEEK", "week").foreach { fmt => + testTrunc( + inputDate, fmt, + Timestamp.valueOf("2015-07-20 00:00:00")) + } + + Seq("QUARTER", "quarter").foreach { fmt => + testTrunc( + inputDate, fmt, + Timestamp.valueOf("2015-07-01 00:00:00")) + } + + testTrunc(inputDate, "INVALID", null) + testTrunc(inputDate, null, null) + testTrunc(null, "MON", null) + testTrunc(null, null, null) + } + } + test("from_unixtime") { val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US) val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS" diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index c8cf16d937352..416fbfdd01d05 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -563,6 +563,76 @@ class DateTimeUtilsSuite extends SparkFunSuite { } } + test("truncTimestamp") { + def test( + level: Int, + expected: String, + inputTS: SQLTimestamp, + timezone: TimeZone = DateTimeUtils.defaultTimeZone()): Unit = { + val truncated = + DateTimeUtils.truncTimestamp(inputTS, level, timezone) + val expectedTS = + DateTimeUtils.stringToTimestamp(UTF8String.fromString(expected)) + assert(truncated === expectedTS.get) + } + + val defaultInputTS = + DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-05T09:32:05.359")) + val defaultInputTS1 = + DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-31T20:32:05.359")) + val defaultInputTS2 = + DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-04-01T02:32:05.359")) + val defaultInputTS3 = + DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-30T02:32:05.359")) + val defaultInputTS4 = + DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-29T02:32:05.359")) + + test(DateTimeUtils.TRUNC_TO_YEAR, "2015-01-01T00:00:00", defaultInputTS.get) + test(DateTimeUtils.TRUNC_TO_MONTH, "2015-03-01T00:00:00", defaultInputTS.get) + test(DateTimeUtils.TRUNC_TO_DAY, "2015-03-05T00:00:00", defaultInputTS.get) + test(DateTimeUtils.TRUNC_TO_HOUR, "2015-03-05T09:00:00", defaultInputTS.get) + test(DateTimeUtils.TRUNC_TO_MINUTE, "2015-03-05T09:32:00", defaultInputTS.get) + test(DateTimeUtils.TRUNC_TO_SECOND, "2015-03-05T09:32:05", defaultInputTS.get) + test(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-02T00:00:00", defaultInputTS.get) + test(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", defaultInputTS1.get) + test(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", defaultInputTS2.get) + test(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", defaultInputTS3.get) + test(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-23T00:00:00", defaultInputTS4.get) + test(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", defaultInputTS.get) + test(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", defaultInputTS1.get) + test(DateTimeUtils.TRUNC_TO_QUARTER, "2015-04-01T00:00:00", defaultInputTS2.get) + + for (tz <- DateTimeTestUtils.ALL_TIMEZONES) { + DateTimeTestUtils.withDefaultTimeZone(tz) { + val inputTS = + DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-05T09:32:05.359")) + val inputTS1 = + DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-31T20:32:05.359")) + val inputTS2 = + DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-04-01T02:32:05.359")) + val inputTS3 = + DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-30T02:32:05.359")) + val inputTS4 = + DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-29T02:32:05.359")) + + test(DateTimeUtils.TRUNC_TO_YEAR, "2015-01-01T00:00:00", inputTS.get, tz) + test(DateTimeUtils.TRUNC_TO_MONTH, "2015-03-01T00:00:00", inputTS.get, tz) + test(DateTimeUtils.TRUNC_TO_DAY, "2015-03-05T00:00:00", inputTS.get, tz) + test(DateTimeUtils.TRUNC_TO_HOUR, "2015-03-05T09:00:00", inputTS.get, tz) + test(DateTimeUtils.TRUNC_TO_MINUTE, "2015-03-05T09:32:00", inputTS.get, tz) + test(DateTimeUtils.TRUNC_TO_SECOND, "2015-03-05T09:32:05", inputTS.get, tz) + test(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-02T00:00:00", inputTS.get, tz) + test(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", inputTS1.get, tz) + test(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", inputTS2.get, tz) + test(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", inputTS3.get, tz) + test(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-23T00:00:00", inputTS4.get, tz) + test(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", inputTS.get, tz) + test(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", inputTS1.get, tz) + test(DateTimeUtils.TRUNC_TO_QUARTER, "2015-04-01T00:00:00", inputTS2.get, tz) + } + } + } + test("daysToMillis and millisToDays") { val c = Calendar.getInstance(TimeZonePST) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 3e4659b9eae60..b6e359edf0142 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -2797,6 +2797,21 @@ object functions { TruncDate(date.expr, Literal(format)) } + /** + * Returns timestamp truncated to the unit specified by the format. + * + * @param format: 'year', 'yyyy', 'yy' for truncate by year, + * 'month', 'mon', 'mm' for truncate by month, + * 'day', 'dd' for truncate by day, + * Other options are: second, minute, hour, week, month, quarter + * + * @group datetime_funcs + * @since 2.3.0 + */ + def date_trunc(format: String, timestamp: Column): Column = withExpr { + TruncTimestamp(Literal(format), timestamp.expr) + } + /** * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders * that time as a timestamp in the given time zone. For example, 'GMT+1' would yield diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala index 3a8694839bb24..6bbf38516cdf6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala @@ -435,6 +435,52 @@ class DateFunctionsSuite extends QueryTest with SharedSQLContext { Seq(Row(Date.valueOf("2015-07-01")), Row(Date.valueOf("2014-12-01")))) } + test("function date_trunc") { + val df = Seq( + (1, Timestamp.valueOf("2015-07-22 10:01:40.523")), + (2, Timestamp.valueOf("2014-12-31 05:29:06.876"))).toDF("i", "t") + + checkAnswer( + df.select(date_trunc("YY", col("t"))), + Seq(Row(Timestamp.valueOf("2015-01-01 00:00:00")), + Row(Timestamp.valueOf("2014-01-01 00:00:00")))) + + checkAnswer( + df.selectExpr("date_trunc('MONTH', t)"), + Seq(Row(Timestamp.valueOf("2015-07-01 00:00:00")), + Row(Timestamp.valueOf("2014-12-01 00:00:00")))) + + checkAnswer( + df.selectExpr("date_trunc('DAY', t)"), + Seq(Row(Timestamp.valueOf("2015-07-22 00:00:00")), + Row(Timestamp.valueOf("2014-12-31 00:00:00")))) + + checkAnswer( + df.selectExpr("date_trunc('HOUR', t)"), + Seq(Row(Timestamp.valueOf("2015-07-22 10:00:00")), + Row(Timestamp.valueOf("2014-12-31 05:00:00")))) + + checkAnswer( + df.selectExpr("date_trunc('MINUTE', t)"), + Seq(Row(Timestamp.valueOf("2015-07-22 10:01:00")), + Row(Timestamp.valueOf("2014-12-31 05:29:00")))) + + checkAnswer( + df.selectExpr("date_trunc('SECOND', t)"), + Seq(Row(Timestamp.valueOf("2015-07-22 10:01:40")), + Row(Timestamp.valueOf("2014-12-31 05:29:06")))) + + checkAnswer( + df.selectExpr("date_trunc('WEEK', t)"), + Seq(Row(Timestamp.valueOf("2015-07-20 00:00:00")), + Row(Timestamp.valueOf("2014-12-29 00:00:00")))) + + checkAnswer( + df.selectExpr("date_trunc('QUARTER', t)"), + Seq(Row(Timestamp.valueOf("2015-07-01 00:00:00")), + Row(Timestamp.valueOf("2014-10-01 00:00:00")))) + } + test("from_unixtime") { val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US) val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS" From f94f401bcfd765b21c3fb466041b42a605d6a814 Mon Sep 17 00:00:00 2001 From: Youngbin Kim Date: Mon, 18 Dec 2017 17:53:43 -0800 Subject: [PATCH 2/3] minor edits: version --- .../sql/catalyst/expressions/datetimeExpressions.scala | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 63da44a203ef4..fdd0d16e5498f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1387,13 +1387,14 @@ trait TruncTime extends BinaryExpression with ImplicitCastInputTypes { _FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`. `fmt` should be one of ["YEAR", "YYYY", "YY", "MON", "MONTH", "MM"] """, - extended = """ + examples = """ Examples: > SELECT _FUNC_('2009-02-12', 'MM'); 2009-02-01 > SELECT _FUNC_('2015-10-27', 'YEAR'); 2015-01-01 - """) + """, + since = "1.5.0") // scalastyle:on line.size.limit case class TruncDate(date: Expression, format: Expression) extends TruncTime { @@ -1427,7 +1428,7 @@ case class TruncDate(date: Expression, format: Expression) _FUNC_(fmt, date) - Returns timestamp `ts` truncated to the unit specified by the format model `fmt`. `fmt` should be one of ["YEAR", "YYYY", "YY", "MON", "MONTH", "MM", "DAY", "DD", "HOUR", "MINUTE", "SECOND", "WEEK", "QUARTER"] """, - extended = """ + examples = """ Examples: > SELECT _FUNC_('2015-03-05T09:32:05.359', 'YEAR'); 2015-01-01T00:00:00 @@ -1437,7 +1438,8 @@ case class TruncDate(date: Expression, format: Expression) 2015-03-05T00:00:00 > SELECT _FUNC_('2015-03-05T09:32:05.359', 'HOUR'); 2015-03-05T09:00:00 - """) + """, + since = "2.3.0") // scalastyle:on line.size.limit case class TruncTimestamp( format: Expression, From 238d7d470c583c910bccbca8bbcaa681b67d6025 Mon Sep 17 00:00:00 2001 From: Youngbin Kim Date: Tue, 19 Dec 2017 11:53:21 -0800 Subject: [PATCH 3/3] style fixes --- python/pyspark/sql/functions.py | 12 ++-- .../expressions/datetimeExpressions.scala | 51 ++++++++-------- .../sql/catalyst/util/DateTimeUtils.scala | 22 +++---- .../catalyst/util/DateTimeUtilsSuite.scala | 58 +++++++++---------- .../org/apache/spark/sql/functions.scala | 6 +- 5 files changed, 73 insertions(+), 76 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index fae09d5fed911..54530055dfa85 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -1099,7 +1099,7 @@ def trunc(date, format): """ Returns date truncated to the unit specified by the format. - :param format: 'year', 'YYYY', 'yy' or 'month', 'mon', 'mm' + :param format: 'year', 'yyyy', 'yy' or 'month', 'mon', 'mm' >>> df = spark.createDataFrame([('1997-02-28',)], ['d']) >>> df.select(trunc(df.d, 'year').alias('year')).collect() @@ -1116,13 +1116,13 @@ def date_trunc(format, timestamp): """ Returns timestamp truncated to the unit specified by the format. - :param format: 'year', 'YYYY', 'yy', 'month', 'mon', 'mm', - 'DAY', 'DD', 'HOUR', 'MINUTE', 'SECOND', 'WEEK', 'QUARTER' + :param format: 'year', 'yyyy', 'yy', 'month', 'mon', 'mm', + 'day', 'dd', 'hour', 'minute', 'second', 'week', 'quarter' - >>> df = spark.createDataFrame([('1997-02-28',)], ['d']) - >>> df.select(date_trunc('year', df.d).alias('year')).collect() + >>> df = spark.createDataFrame([('1997-02-28 05:02:11',)], ['t']) + >>> df.select(date_trunc('year', df.t).alias('year')).collect() [Row(year=datetime.datetime(1997, 1, 1, 0, 0))] - >>> df.select(date_trunc('mon', df.d).alias('month')).collect() + >>> df.select(date_trunc('mon', df.t).alias('month')).collect() [Row(month=datetime.datetime(1997, 2, 1, 0, 0))] """ sc = SparkContext._active_spark_context diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index fdd0d16e5498f..2da940a44eca3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -1295,8 +1295,8 @@ case class ParseToTimestamp(left: Expression, format: Option[Expression], child: override def dataType: DataType = TimestampType } -trait TruncTime extends BinaryExpression with ImplicitCastInputTypes { - val time: Expression +trait TruncInstant extends BinaryExpression with ImplicitCastInputTypes { + val instant: Expression val format: Expression override def nullable: Boolean = true @@ -1304,34 +1304,31 @@ trait TruncTime extends BinaryExpression with ImplicitCastInputTypes { DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String]) /** - * - * @param input + * @param input internalRow (time) * @param maxLevel Maximum level that can be used for truncation (e.g MONTH for Date input) - * @param truncFunc - * @tparam T - * @return + * @param truncFunc function: (time, level) => time */ - protected def evalHelper[T](input: InternalRow, maxLevel: Int)( - truncFunc: (Any, Int) => T): Any = { + protected def evalHelper(input: InternalRow, maxLevel: Int)( + truncFunc: (Any, Int) => Any): Any = { val level = if (format.foldable) { truncLevel } else { DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String]) } if (level == DateTimeUtils.TRUNC_INVALID || level > maxLevel) { - // unknown format + // unknown format or too large level null } else { - val d = time.eval(input) - if (d == null) { + val t = instant.eval(input) + if (t == null) { null } else { - truncFunc(d, level) + truncFunc(t, level) } } } - protected def codeGenHelper[T]( + protected def codeGenHelper( ctx: CodegenContext, ev: ExprCode, maxLevel: Int, @@ -1346,11 +1343,11 @@ trait TruncTime extends BinaryExpression with ImplicitCastInputTypes { boolean ${ev.isNull} = true; ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};""") } else { - val d = time.genCode(ctx) - val truncFuncStr = truncFunc(d.value, truncLevel.toString) + val t = instant.genCode(ctx) + val truncFuncStr = truncFunc(t.value, truncLevel.toString) ev.copy(code = s""" - ${d.code} - boolean ${ev.isNull} = ${d.isNull}; + ${t.code} + boolean ${ev.isNull} = ${t.isNull}; ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; if (!${ev.isNull}) { ${ev.value} = $dtu.$truncFuncStr; @@ -1385,7 +1382,7 @@ trait TruncTime extends BinaryExpression with ImplicitCastInputTypes { @ExpressionDescription( usage = """ _FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`. - `fmt` should be one of ["YEAR", "YYYY", "YY", "MON", "MONTH", "MM"] + `fmt` should be one of ["year", "yyyy", "yy", "mon", "month", "mm"] """, examples = """ Examples: @@ -1397,14 +1394,14 @@ trait TruncTime extends BinaryExpression with ImplicitCastInputTypes { since = "1.5.0") // scalastyle:on line.size.limit case class TruncDate(date: Expression, format: Expression) - extends TruncTime { + extends TruncInstant { override def left: Expression = date override def right: Expression = format override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType) override def dataType: DataType = DateType override def prettyName: String = "trunc" - override val time = date + override val instant = date override def eval(input: InternalRow): Any = { evalHelper(input, maxLevel = DateTimeUtils.TRUNC_TO_MONTH) { (d: Any, level: Int) => @@ -1425,7 +1422,7 @@ case class TruncDate(date: Expression, format: Expression) // scalastyle:off line.size.limit @ExpressionDescription( usage = """ - _FUNC_(fmt, date) - Returns timestamp `ts` truncated to the unit specified by the format model `fmt`. + _FUNC_(fmt, ts) - Returns timestamp `ts` truncated to the unit specified by the format model `fmt`. `fmt` should be one of ["YEAR", "YYYY", "YY", "MON", "MONTH", "MM", "DAY", "DD", "HOUR", "MINUTE", "SECOND", "WEEK", "QUARTER"] """, examples = """ @@ -1445,28 +1442,28 @@ case class TruncTimestamp( format: Expression, timestamp: Expression, timeZoneId: Option[String] = None) - extends TruncTime with TimeZoneAwareExpression { + extends TruncInstant with TimeZoneAwareExpression { override def left: Expression = format override def right: Expression = timestamp override def inputTypes: Seq[AbstractDataType] = Seq(StringType, TimestampType) override def dataType: TimestampType = TimestampType override def prettyName: String = "date_trunc" - override val time = timestamp + override val instant = timestamp override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = copy(timeZoneId = Option(timeZoneId)) def this(format: Expression, timestamp: Expression) = this(format, timestamp, None) override def eval(input: InternalRow): Any = { - evalHelper(input, maxLevel = DateTimeUtils.TRUNC_TO_QUARTER) { (d: Any, level: Int) => - DateTimeUtils.truncTimestamp(d.asInstanceOf[Long], level, timeZone) + evalHelper(input, maxLevel = DateTimeUtils.TRUNC_TO_SECOND) { (t: Any, level: Int) => + DateTimeUtils.truncTimestamp(t.asInstanceOf[Long], level, timeZone) } } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val tz = ctx.addReferenceObj("timeZone", timeZone) - codeGenHelper(ctx, ev, maxLevel = DateTimeUtils.TRUNC_TO_QUARTER, true) { + codeGenHelper(ctx, ev, maxLevel = DateTimeUtils.TRUNC_TO_SECOND, true) { (date: String, fmt: String) => s"truncTimestamp($date, $fmt, $tz);" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index cfa796948a22b..fa69b8af62c85 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -955,15 +955,15 @@ object DateTimeUtils { } // Visible for testing. - val TRUNC_TO_YEAR = 1 - val TRUNC_TO_MONTH = 2 - val TRUNC_TO_DAY = 3 - val TRUNC_TO_HOUR = 4 - val TRUNC_TO_MINUTE = 5 - val TRUNC_TO_SECOND = 6 - val TRUNC_TO_WEEK = 7 - val TRUNC_TO_QUARTER = 8 - val TRUNC_INVALID = -1 + private[sql] val TRUNC_TO_YEAR = 1 + private[sql] val TRUNC_TO_MONTH = 2 + private[sql] val TRUNC_TO_QUARTER = 3 + private[sql] val TRUNC_TO_WEEK = 4 + private[sql] val TRUNC_TO_DAY = 5 + private[sql] val TRUNC_TO_HOUR = 6 + private[sql] val TRUNC_TO_MINUTE = 7 + private[sql] val TRUNC_TO_SECOND = 8 + private[sql] val TRUNC_INVALID = -1 /** * Returns the trunc date from original date and trunc level. @@ -984,8 +984,8 @@ object DateTimeUtils { * Returns the trunc date time from original date time and trunc level. * Trunc level should be generated using `parseTruncLevel()`, should be between 1 and 8 */ - def truncTimestamp(d: SQLTimestamp, level: Int, timeZone: TimeZone): SQLTimestamp = { - var millis = d / MICROS_PER_MILLIS + def truncTimestamp(t: SQLTimestamp, level: Int, timeZone: TimeZone): SQLTimestamp = { + var millis = t / MICROS_PER_MILLIS val truncated = level match { case TRUNC_TO_YEAR => val dDays = millisToDays(millis, timeZone) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 416fbfdd01d05..625ff38943fa3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -564,7 +564,7 @@ class DateTimeUtilsSuite extends SparkFunSuite { } test("truncTimestamp") { - def test( + def testTrunc( level: Int, expected: String, inputTS: SQLTimestamp, @@ -587,20 +587,20 @@ class DateTimeUtilsSuite extends SparkFunSuite { val defaultInputTS4 = DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-29T02:32:05.359")) - test(DateTimeUtils.TRUNC_TO_YEAR, "2015-01-01T00:00:00", defaultInputTS.get) - test(DateTimeUtils.TRUNC_TO_MONTH, "2015-03-01T00:00:00", defaultInputTS.get) - test(DateTimeUtils.TRUNC_TO_DAY, "2015-03-05T00:00:00", defaultInputTS.get) - test(DateTimeUtils.TRUNC_TO_HOUR, "2015-03-05T09:00:00", defaultInputTS.get) - test(DateTimeUtils.TRUNC_TO_MINUTE, "2015-03-05T09:32:00", defaultInputTS.get) - test(DateTimeUtils.TRUNC_TO_SECOND, "2015-03-05T09:32:05", defaultInputTS.get) - test(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-02T00:00:00", defaultInputTS.get) - test(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", defaultInputTS1.get) - test(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", defaultInputTS2.get) - test(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", defaultInputTS3.get) - test(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-23T00:00:00", defaultInputTS4.get) - test(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", defaultInputTS.get) - test(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", defaultInputTS1.get) - test(DateTimeUtils.TRUNC_TO_QUARTER, "2015-04-01T00:00:00", defaultInputTS2.get) + testTrunc(DateTimeUtils.TRUNC_TO_YEAR, "2015-01-01T00:00:00", defaultInputTS.get) + testTrunc(DateTimeUtils.TRUNC_TO_MONTH, "2015-03-01T00:00:00", defaultInputTS.get) + testTrunc(DateTimeUtils.TRUNC_TO_DAY, "2015-03-05T00:00:00", defaultInputTS.get) + testTrunc(DateTimeUtils.TRUNC_TO_HOUR, "2015-03-05T09:00:00", defaultInputTS.get) + testTrunc(DateTimeUtils.TRUNC_TO_MINUTE, "2015-03-05T09:32:00", defaultInputTS.get) + testTrunc(DateTimeUtils.TRUNC_TO_SECOND, "2015-03-05T09:32:05", defaultInputTS.get) + testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-02T00:00:00", defaultInputTS.get) + testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", defaultInputTS1.get) + testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", defaultInputTS2.get) + testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", defaultInputTS3.get) + testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-23T00:00:00", defaultInputTS4.get) + testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", defaultInputTS.get) + testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", defaultInputTS1.get) + testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-04-01T00:00:00", defaultInputTS2.get) for (tz <- DateTimeTestUtils.ALL_TIMEZONES) { DateTimeTestUtils.withDefaultTimeZone(tz) { @@ -615,20 +615,20 @@ class DateTimeUtilsSuite extends SparkFunSuite { val inputTS4 = DateTimeUtils.stringToTimestamp(UTF8String.fromString("2015-03-29T02:32:05.359")) - test(DateTimeUtils.TRUNC_TO_YEAR, "2015-01-01T00:00:00", inputTS.get, tz) - test(DateTimeUtils.TRUNC_TO_MONTH, "2015-03-01T00:00:00", inputTS.get, tz) - test(DateTimeUtils.TRUNC_TO_DAY, "2015-03-05T00:00:00", inputTS.get, tz) - test(DateTimeUtils.TRUNC_TO_HOUR, "2015-03-05T09:00:00", inputTS.get, tz) - test(DateTimeUtils.TRUNC_TO_MINUTE, "2015-03-05T09:32:00", inputTS.get, tz) - test(DateTimeUtils.TRUNC_TO_SECOND, "2015-03-05T09:32:05", inputTS.get, tz) - test(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-02T00:00:00", inputTS.get, tz) - test(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", inputTS1.get, tz) - test(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", inputTS2.get, tz) - test(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", inputTS3.get, tz) - test(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-23T00:00:00", inputTS4.get, tz) - test(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", inputTS.get, tz) - test(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", inputTS1.get, tz) - test(DateTimeUtils.TRUNC_TO_QUARTER, "2015-04-01T00:00:00", inputTS2.get, tz) + testTrunc(DateTimeUtils.TRUNC_TO_YEAR, "2015-01-01T00:00:00", inputTS.get, tz) + testTrunc(DateTimeUtils.TRUNC_TO_MONTH, "2015-03-01T00:00:00", inputTS.get, tz) + testTrunc(DateTimeUtils.TRUNC_TO_DAY, "2015-03-05T00:00:00", inputTS.get, tz) + testTrunc(DateTimeUtils.TRUNC_TO_HOUR, "2015-03-05T09:00:00", inputTS.get, tz) + testTrunc(DateTimeUtils.TRUNC_TO_MINUTE, "2015-03-05T09:32:00", inputTS.get, tz) + testTrunc(DateTimeUtils.TRUNC_TO_SECOND, "2015-03-05T09:32:05", inputTS.get, tz) + testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-02T00:00:00", inputTS.get, tz) + testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", inputTS1.get, tz) + testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", inputTS2.get, tz) + testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-30T00:00:00", inputTS3.get, tz) + testTrunc(DateTimeUtils.TRUNC_TO_WEEK, "2015-03-23T00:00:00", inputTS4.get, tz) + testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", inputTS.get, tz) + testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-01-01T00:00:00", inputTS1.get, tz) + testTrunc(DateTimeUtils.TRUNC_TO_QUARTER, "2015-04-01T00:00:00", inputTS2.get, tz) } } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index b6e359edf0142..052a3f533da5a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -2801,9 +2801,9 @@ object functions { * Returns timestamp truncated to the unit specified by the format. * * @param format: 'year', 'yyyy', 'yy' for truncate by year, - * 'month', 'mon', 'mm' for truncate by month, - * 'day', 'dd' for truncate by day, - * Other options are: second, minute, hour, week, month, quarter + * 'month', 'mon', 'mm' for truncate by month, + * 'day', 'dd' for truncate by day, + * Other options are: 'second', 'minute', 'hour', 'week', 'month', 'quarter' * * @group datetime_funcs * @since 2.3.0