-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-8176] [SPARK-8197] [SQL] function to_date/ trunc #7805
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d44ea5f
a476c5a
980b092
310dd55
2c7beba
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -507,7 +507,6 @@ case class FromUnixTime(sec: Expression, format: Expression) | |
| }) | ||
| } | ||
| } | ||
|
|
||
| } | ||
|
|
||
| /** | ||
|
|
@@ -696,3 +695,90 @@ case class MonthsBetween(date1: Expression, date2: Expression) | |
| }) | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Returns the date part of a timestamp or string. | ||
| */ | ||
| case class ToDate(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { | ||
|
|
||
| // Implicit casting of spark will accept string in both date and timestamp format, as | ||
| // well as TimestampType. | ||
| override def inputTypes: Seq[AbstractDataType] = Seq(DateType) | ||
|
|
||
| override def dataType: DataType = DateType | ||
|
|
||
| override def eval(input: InternalRow): Any = child.eval(input) | ||
|
|
||
| override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { | ||
| defineCodeGen(ctx, ev, d => d) | ||
| } | ||
| } | ||
|
|
||
| /* | ||
| * Returns date truncated to the unit specified by the format. | ||
| */ | ||
| case class TruncDate(date: Expression, format: Expression) | ||
| extends BinaryExpression with ImplicitCastInputTypes { | ||
| override def left: Expression = date | ||
| override def right: Expression = format | ||
|
|
||
| override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType) | ||
| override def dataType: DataType = DateType | ||
| override def prettyName: String = "trunc" | ||
|
|
||
| lazy val minItemConst = DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String]) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. mark this as private, and maybe rename it to truncationLevel? |
||
|
|
||
| override def eval(input: InternalRow): Any = { | ||
| val minItem = if (format.foldable) { | ||
| minItemConst | ||
| } else { | ||
| DateTimeUtils.parseTruncLevel(format.eval().asInstanceOf[UTF8String]) | ||
| } | ||
| if (minItem == -1) { | ||
| // unknown format | ||
| null | ||
| } else { | ||
| val d = date.eval(input) | ||
| if (d == null) { | ||
| null | ||
| } else { | ||
| DateTimeUtils.truncDate(d.asInstanceOf[Int], minItem) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { | ||
| val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") | ||
|
|
||
| if (format.foldable) { | ||
| if (minItemConst == -1) { | ||
| s""" | ||
| boolean ${ev.isNull} = true; | ||
| ${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)}; | ||
| """ | ||
| } else { | ||
| val d = date.gen(ctx) | ||
| s""" | ||
| ${d.code} | ||
| boolean ${ev.isNull} = ${d.isNull}; | ||
| ${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)}; | ||
| if (!${ev.isNull}) { | ||
| ${ev.primitive} = $dtu.truncDate(${d.primitive}, $minItemConst); | ||
| } | ||
| """ | ||
| } | ||
| } else { | ||
| nullSafeCodeGen(ctx, ev, (dateVal, fmt) => { | ||
| val form = ctx.freshName("form") | ||
| s""" | ||
| int $form = $dtu.parseTruncLevel($fmt); | ||
| if ($form == -1) { | ||
| ${ev.isNull} = true; | ||
| } else { | ||
| ${ev.primitive} = $dtu.truncDate($dateVal, $form); | ||
| } | ||
| """ | ||
| }) | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -779,4 +779,38 @@ object DateTimeUtils { | |
| } | ||
| date + (lastDayOfMonthInYear - dayInYear) | ||
| } | ||
|
|
||
| private val TRUNC_TO_YEAR = 1 | ||
| private val TRUNC_TO_MONTH = 2 | ||
| private val TRUNC_INVALID = -1 | ||
|
|
||
| /** | ||
| * Returns the trunc date from original date and trunc level. | ||
| * Trunc level should be generated using `parseTruncLevel()`, should only be 1 or 2. | ||
| */ | ||
| def truncDate(d: Int, level: Int): Int = { | ||
| if (level == TRUNC_TO_YEAR) { | ||
| d - DateTimeUtils.getDayInYear(d) + 1 | ||
| } else if (level == TRUNC_TO_MONTH) { | ||
| d - DateTimeUtils.getDayOfMonth(d) + 1 | ||
| } else { | ||
| throw new Exception(s"Invalid trunc level: $level") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sys.error("...") and add a comment explaining this should never be hit because trunc level is internally generated. |
||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Returns the truncate level, could be TRUNC_YEAR, TRUNC_MONTH, or TRUNC_INVALID, | ||
| * TRUNC_INVALID means unsupported truncate level. | ||
| */ | ||
| def parseTruncLevel(format: UTF8String): Int = { | ||
| if (format == null) { | ||
| TRUNC_INVALID | ||
| } else { | ||
| format.toString.toUpperCase match { | ||
| case "YEAR" | "YYYY" | "YY" => TRUNC_TO_YEAR | ||
| case "MON" | "MONTH" | "MM" => TRUNC_TO_MONTH | ||
| case _ => TRUNC_INVALID | ||
| } | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -47,4 +47,8 @@ object NonFoldableLiteral { | |
| val lit = Literal(value) | ||
| NonFoldableLiteral(lit.value, lit.dataType) | ||
| } | ||
| def create(value: Any, dataType: DataType): NonFoldableLiteral = { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is not needed, is it? NonFoldableLiteral already has this if you don't define it.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the value should be casted into Catalyst type, so it's needed. |
||
| val lit = Literal.create(value, dataType) | ||
| NonFoldableLiteral(lit.value, lit.dataType) | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2181,6 +2181,22 @@ object functions { | |
| */ | ||
| def unix_timestamp(s: Column, p: String): Column = UnixTimestamp(s.expr, Literal(p)) | ||
|
|
||
| /* | ||
| * Converts the column into DateType. | ||
| * | ||
| * @group datetime_funcs | ||
| * @since 1.5.0 | ||
| */ | ||
| def to_date(e: Column): Column = ToDate(e.expr) | ||
|
|
||
| /** | ||
| * Returns date truncated to the unit specified by the format. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. here we should document what the accepted values are for format, and give an example. Otherwise it is very hard for users to know what this function actually does. |
||
| * | ||
| * @group datetime_funcs | ||
| * @since 1.5.0 | ||
| */ | ||
| def trunc(date: Column, format: String): Column = TruncDate(date.expr, Literal(format)) | ||
|
|
||
| ////////////////////////////////////////////////////////////////////////////////////////////// | ||
| // Collection functions | ||
| ////////////////////////////////////////////////////////////////////////////////////////////// | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
tiny optimization - this can just call child.genCode
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
genCode is protected.