Skip to content

Commit 6d94bf6

Browse files
adrian-wangdavies
authored andcommitted
[SPARK-8174] [SPARK-8175] [SQL] function unix_timestamp, from_unixtime
unix_timestamp(): long Gets current Unix timestamp in seconds. unix_timestamp(string|date): long Converts time string in format yyyy-MM-dd HH:mm:ss to Unix timestamp (in seconds), using the default timezone and the default locale, return null if fail: unix_timestamp('2009-03-20 11:30:01') = 1237573801 unix_timestamp(string date, string pattern): long Convert time string with given pattern (see [http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html]) to Unix time stamp (in seconds), return null if fail: unix_timestamp('2009-03-20', 'yyyy-MM-dd') = 1237532400. from_unixtime(bigint unixtime[, string format]): string Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string representing the timestamp of that moment in the current system time zone in the format of "1970-01-01 00:00:00". Jira: https://issues.apache.org/jira/browse/SPARK-8174 https://issues.apache.org/jira/browse/SPARK-8175 Author: Daoyuan Wang <[email protected]> Closes apache#7644 from adrian-wang/udfunixtime and squashes the following commits: 2fe20c4 [Daoyuan Wang] util.Date ea2ec16 [Daoyuan Wang] use util.Date for better performance a2cf929 [Daoyuan Wang] doc return null instead of 0 f6f070a [Daoyuan Wang] address comments from davies 6a4cbb3 [Daoyuan Wang] temp 56ded53 [Daoyuan Wang] rebase and address comments 14a8b37 [Daoyuan Wang] function unix_timestamp, from_unixtime
1 parent 06b6a07 commit 6d94bf6

File tree

5 files changed

+374
-4
lines changed

5 files changed

+374
-4
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,13 +211,15 @@ object FunctionRegistry {
211211
expression[DayOfMonth]("day"),
212212
expression[DayOfYear]("dayofyear"),
213213
expression[DayOfMonth]("dayofmonth"),
214+
expression[FromUnixTime]("from_unixtime"),
214215
expression[Hour]("hour"),
215216
expression[LastDay]("last_day"),
216217
expression[Minute]("minute"),
217218
expression[Month]("month"),
218219
expression[NextDay]("next_day"),
219220
expression[Quarter]("quarter"),
220221
expression[Second]("second"),
222+
expression[UnixTimestamp]("unix_timestamp"),
221223
expression[WeekOfYear]("weekofyear"),
222224
expression[Year]("year"),
223225

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeFunctions.scala

Lines changed: 216 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
package org.apache.spark.sql.catalyst.expressions
1919

20-
import java.sql.Date
2120
import java.text.SimpleDateFormat
2221
import java.util.{Calendar, TimeZone}
2322

@@ -28,6 +27,8 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils
2827
import org.apache.spark.sql.types._
2928
import org.apache.spark.unsafe.types.UTF8String
3029

30+
import scala.util.Try
31+
3132
/**
3233
* Returns the current date at the start of query evaluation.
3334
* All calls of current_date within the same query return the same value.
@@ -236,20 +237,232 @@ case class DateFormatClass(left: Expression, right: Expression) extends BinaryEx
236237

237238
override protected def nullSafeEval(timestamp: Any, format: Any): Any = {
238239
val sdf = new SimpleDateFormat(format.toString)
239-
UTF8String.fromString(sdf.format(new Date(timestamp.asInstanceOf[Long] / 1000)))
240+
UTF8String.fromString(sdf.format(new java.util.Date(timestamp.asInstanceOf[Long] / 1000)))
240241
}
241242

242243
override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
243244
val sdf = classOf[SimpleDateFormat].getName
244245
defineCodeGen(ctx, ev, (timestamp, format) => {
245246
s"""UTF8String.fromString((new $sdf($format.toString()))
246-
.format(new java.sql.Date($timestamp / 1000)))"""
247+
.format(new java.util.Date($timestamp / 1000)))"""
247248
})
248249
}
249250

250251
override def prettyName: String = "date_format"
251252
}
252253

254+
/**
255+
* Converts time string with given pattern
256+
* (see [http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html])
257+
* to Unix time stamp (in seconds), returns null if fail.
258+
* Note that hive Language Manual says it returns 0 if fail, but in fact it returns null.
259+
* If the second parameter is missing, use "yyyy-MM-dd HH:mm:ss".
260+
* If no parameters provided, the first parameter will be current_timestamp.
261+
* If the first parameter is a Date or Timestamp instead of String, we will ignore the
262+
* second parameter.
263+
*/
264+
case class UnixTimestamp(timeExp: Expression, format: Expression)
265+
extends BinaryExpression with ExpectsInputTypes {
266+
267+
override def left: Expression = timeExp
268+
override def right: Expression = format
269+
270+
def this(time: Expression) = {
271+
this(time, Literal("yyyy-MM-dd HH:mm:ss"))
272+
}
273+
274+
def this() = {
275+
this(CurrentTimestamp())
276+
}
277+
278+
override def inputTypes: Seq[AbstractDataType] =
279+
Seq(TypeCollection(StringType, DateType, TimestampType), StringType)
280+
281+
override def dataType: DataType = LongType
282+
283+
private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String]
284+
285+
override def eval(input: InternalRow): Any = {
286+
val t = left.eval(input)
287+
if (t == null) {
288+
null
289+
} else {
290+
left.dataType match {
291+
case DateType =>
292+
DateTimeUtils.daysToMillis(t.asInstanceOf[Int]) / 1000L
293+
case TimestampType =>
294+
t.asInstanceOf[Long] / 1000000L
295+
case StringType if right.foldable =>
296+
if (constFormat != null) {
297+
Try(new SimpleDateFormat(constFormat.toString).parse(
298+
t.asInstanceOf[UTF8String].toString).getTime / 1000L).getOrElse(null)
299+
} else {
300+
null
301+
}
302+
case StringType =>
303+
val f = format.eval(input)
304+
if (f == null) {
305+
null
306+
} else {
307+
val formatString = f.asInstanceOf[UTF8String].toString
308+
Try(new SimpleDateFormat(formatString).parse(
309+
t.asInstanceOf[UTF8String].toString).getTime / 1000L).getOrElse(null)
310+
}
311+
}
312+
}
313+
}
314+
315+
override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
316+
left.dataType match {
317+
case StringType if right.foldable =>
318+
val sdf = classOf[SimpleDateFormat].getName
319+
val fString = if (constFormat == null) null else constFormat.toString
320+
val formatter = ctx.freshName("formatter")
321+
if (fString == null) {
322+
s"""
323+
boolean ${ev.isNull} = true;
324+
${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)};
325+
"""
326+
} else {
327+
val eval1 = left.gen(ctx)
328+
s"""
329+
${eval1.code}
330+
boolean ${ev.isNull} = ${eval1.isNull};
331+
${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)};
332+
if (!${ev.isNull}) {
333+
try {
334+
$sdf $formatter = new $sdf("$fString");
335+
${ev.primitive} =
336+
$formatter.parse(${eval1.primitive}.toString()).getTime() / 1000L;
337+
} catch (java.lang.Throwable e) {
338+
${ev.isNull} = true;
339+
}
340+
}
341+
"""
342+
}
343+
case StringType =>
344+
val sdf = classOf[SimpleDateFormat].getName
345+
nullSafeCodeGen(ctx, ev, (string, format) => {
346+
s"""
347+
try {
348+
${ev.primitive} =
349+
(new $sdf($format.toString())).parse($string.toString()).getTime() / 1000L;
350+
} catch (java.lang.Throwable e) {
351+
${ev.isNull} = true;
352+
}
353+
"""
354+
})
355+
case TimestampType =>
356+
val eval1 = left.gen(ctx)
357+
s"""
358+
${eval1.code}
359+
boolean ${ev.isNull} = ${eval1.isNull};
360+
${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)};
361+
if (!${ev.isNull}) {
362+
${ev.primitive} = ${eval1.primitive} / 1000000L;
363+
}
364+
"""
365+
case DateType =>
366+
val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
367+
val eval1 = left.gen(ctx)
368+
s"""
369+
${eval1.code}
370+
boolean ${ev.isNull} = ${eval1.isNull};
371+
${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)};
372+
if (!${ev.isNull}) {
373+
${ev.primitive} = $dtu.daysToMillis(${eval1.primitive}) / 1000L;
374+
}
375+
"""
376+
}
377+
}
378+
}
379+
380+
/**
381+
* Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string
382+
* representing the timestamp of that moment in the current system time zone in the given
383+
* format. If the format is missing, using format like "1970-01-01 00:00:00".
384+
* Note that hive Language Manual says it returns 0 if fail, but in fact it returns null.
385+
*/
386+
case class FromUnixTime(sec: Expression, format: Expression)
387+
extends BinaryExpression with ImplicitCastInputTypes {
388+
389+
override def left: Expression = sec
390+
override def right: Expression = format
391+
392+
def this(unix: Expression) = {
393+
this(unix, Literal("yyyy-MM-dd HH:mm:ss"))
394+
}
395+
396+
override def dataType: DataType = StringType
397+
398+
override def inputTypes: Seq[AbstractDataType] = Seq(LongType, StringType)
399+
400+
private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String]
401+
402+
override def eval(input: InternalRow): Any = {
403+
val time = left.eval(input)
404+
if (time == null) {
405+
null
406+
} else {
407+
if (format.foldable) {
408+
if (constFormat == null) {
409+
null
410+
} else {
411+
Try(UTF8String.fromString(new SimpleDateFormat(constFormat.toString).format(
412+
new java.util.Date(time.asInstanceOf[Long] * 1000L)))).getOrElse(null)
413+
}
414+
} else {
415+
val f = format.eval(input)
416+
if (f == null) {
417+
null
418+
} else {
419+
Try(UTF8String.fromString(new SimpleDateFormat(
420+
f.asInstanceOf[UTF8String].toString).format(new java.util.Date(
421+
time.asInstanceOf[Long] * 1000L)))).getOrElse(null)
422+
}
423+
}
424+
}
425+
}
426+
427+
override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
428+
val sdf = classOf[SimpleDateFormat].getName
429+
if (format.foldable) {
430+
if (constFormat == null) {
431+
s"""
432+
boolean ${ev.isNull} = true;
433+
${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)};
434+
"""
435+
} else {
436+
val t = left.gen(ctx)
437+
s"""
438+
${t.code}
439+
boolean ${ev.isNull} = ${t.isNull};
440+
${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)};
441+
if (!${ev.isNull}) {
442+
try {
443+
${ev.primitive} = UTF8String.fromString(new $sdf("${constFormat.toString}").format(
444+
new java.util.Date(${t.primitive} * 1000L)));
445+
} catch (java.lang.Throwable e) {
446+
${ev.isNull} = true;
447+
}
448+
}
449+
"""
450+
}
451+
} else {
452+
nullSafeCodeGen(ctx, ev, (seconds, f) => {
453+
s"""
454+
try {
455+
${ev.primitive} = UTF8String.fromString((new $sdf($f.toString())).format(
456+
new java.util.Date($seconds * 1000L)));
457+
} catch (java.lang.Throwable e) {
458+
${ev.isNull} = true;
459+
}""".stripMargin
460+
})
461+
}
462+
}
463+
464+
}
465+
253466
/**
254467
* Returns the last day of the month which the date belongs to.
255468
*/

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,9 @@ import java.text.SimpleDateFormat
2222
import java.util.Calendar
2323

2424
import org.apache.spark.SparkFunSuite
25+
import org.apache.spark.sql.catalyst.InternalRow
2526
import org.apache.spark.sql.catalyst.util.DateTimeUtils
26-
import org.apache.spark.sql.types.{StringType, TimestampType, DateType}
27+
import org.apache.spark.sql.types._
2728

2829
class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
2930

@@ -303,4 +304,60 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
303304
checkEvaluation(
304305
NextDay(Literal(Date.valueOf("2015-07-23")), Literal.create(null, StringType)), null)
305306
}
307+
308+
test("from_unixtime") {
309+
val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
310+
val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS"
311+
val sdf2 = new SimpleDateFormat(fmt2)
312+
checkEvaluation(
313+
FromUnixTime(Literal(0L), Literal("yyyy-MM-dd HH:mm:ss")), sdf1.format(new Timestamp(0)))
314+
checkEvaluation(FromUnixTime(
315+
Literal(1000L), Literal("yyyy-MM-dd HH:mm:ss")), sdf1.format(new Timestamp(1000000)))
316+
checkEvaluation(
317+
FromUnixTime(Literal(-1000L), Literal(fmt2)), sdf2.format(new Timestamp(-1000000)))
318+
checkEvaluation(
319+
FromUnixTime(Literal.create(null, LongType), Literal.create(null, StringType)), null)
320+
checkEvaluation(
321+
FromUnixTime(Literal.create(null, LongType), Literal("yyyy-MM-dd HH:mm:ss")), null)
322+
checkEvaluation(FromUnixTime(Literal(1000L), Literal.create(null, StringType)), null)
323+
checkEvaluation(
324+
FromUnixTime(Literal(0L), Literal("not a valid format")), null)
325+
}
326+
327+
test("unix_timestamp") {
328+
val sdf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
329+
val fmt2 = "yyyy-MM-dd HH:mm:ss.SSS"
330+
val sdf2 = new SimpleDateFormat(fmt2)
331+
val fmt3 = "yy-MM-dd"
332+
val sdf3 = new SimpleDateFormat(fmt3)
333+
val date1 = Date.valueOf("2015-07-24")
334+
checkEvaluation(
335+
UnixTimestamp(Literal(sdf1.format(new Timestamp(0))), Literal("yyyy-MM-dd HH:mm:ss")), 0L)
336+
checkEvaluation(UnixTimestamp(
337+
Literal(sdf1.format(new Timestamp(1000000))), Literal("yyyy-MM-dd HH:mm:ss")), 1000L)
338+
checkEvaluation(
339+
UnixTimestamp(Literal(new Timestamp(1000000)), Literal("yyyy-MM-dd HH:mm:ss")), 1000L)
340+
checkEvaluation(
341+
UnixTimestamp(Literal(date1), Literal("yyyy-MM-dd HH:mm:ss")),
342+
DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(date1)) / 1000L)
343+
checkEvaluation(
344+
UnixTimestamp(Literal(sdf2.format(new Timestamp(-1000000))), Literal(fmt2)), -1000L)
345+
checkEvaluation(UnixTimestamp(
346+
Literal(sdf3.format(Date.valueOf("2015-07-24"))), Literal(fmt3)),
347+
DateTimeUtils.daysToMillis(DateTimeUtils.fromJavaDate(Date.valueOf("2015-07-24"))) / 1000L)
348+
val t1 = UnixTimestamp(
349+
CurrentTimestamp(), Literal("yyyy-MM-dd HH:mm:ss")).eval().asInstanceOf[Long]
350+
val t2 = UnixTimestamp(
351+
CurrentTimestamp(), Literal("yyyy-MM-dd HH:mm:ss")).eval().asInstanceOf[Long]
352+
assert(t2 - t1 <= 1)
353+
checkEvaluation(
354+
UnixTimestamp(Literal.create(null, DateType), Literal.create(null, StringType)), null)
355+
checkEvaluation(
356+
UnixTimestamp(Literal.create(null, DateType), Literal("yyyy-MM-dd HH:mm:ss")), null)
357+
checkEvaluation(UnixTimestamp(
358+
Literal(date1), Literal.create(null, StringType)), date1.getTime / 1000L)
359+
checkEvaluation(
360+
UnixTimestamp(Literal("2015-07-24"), Literal("not a valid format")), null)
361+
}
362+
306363
}

sql/core/src/main/scala/org/apache/spark/sql/functions.scala

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2110,6 +2110,48 @@ object functions {
21102110
*/
21112111
def weekofyear(columnName: String): Column = weekofyear(Column(columnName))
21122112

2113+
/**
2114+
* Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string
2115+
* representing the timestamp of that moment in the current system time zone in the given
2116+
* format.
2117+
* @group datetime_funcs
2118+
* @since 1.5.0
2119+
*/
2120+
def from_unixtime(ut: Column): Column = FromUnixTime(ut.expr, Literal("yyyy-MM-dd HH:mm:ss"))
2121+
2122+
/**
2123+
* Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string
2124+
* representing the timestamp of that moment in the current system time zone in the given
2125+
* format.
2126+
* @group datetime_funcs
2127+
* @since 1.5.0
2128+
*/
2129+
def from_unixtime(ut: Column, f: String): Column = FromUnixTime(ut.expr, Literal(f))
2130+
2131+
/**
2132+
* Gets current Unix timestamp in seconds.
2133+
* @group datetime_funcs
2134+
* @since 1.5.0
2135+
*/
2136+
def unix_timestamp(): Column = UnixTimestamp(CurrentTimestamp(), Literal("yyyy-MM-dd HH:mm:ss"))
2137+
2138+
/**
2139+
* Converts time string in format yyyy-MM-dd HH:mm:ss to Unix timestamp (in seconds),
2140+
* using the default timezone and the default locale, return null if fail.
2141+
* @group datetime_funcs
2142+
* @since 1.5.0
2143+
*/
2144+
def unix_timestamp(s: Column): Column = UnixTimestamp(s.expr, Literal("yyyy-MM-dd HH:mm:ss"))
2145+
2146+
/**
2147+
* Convert time string with given pattern
2148+
* (see [http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html])
2149+
* to Unix time stamp (in seconds), return null if fail.
2150+
* @group datetime_funcs
2151+
* @since 1.5.0
2152+
*/
2153+
def unix_timestamp(s: Column, p: String): Column = UnixTimestamp(s.expr, Literal(p))
2154+
21132155
//////////////////////////////////////////////////////////////////////////////////////////////
21142156
// Collection functions
21152157
//////////////////////////////////////////////////////////////////////////////////////////////

0 commit comments

Comments
 (0)