Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -181,14 +181,18 @@ object FunctionRegistry {
expression[Upper]("upper"),

// datetime functions
expression[AddMonths]("add_months"),
expression[CurrentDate]("current_date"),
expression[CurrentTimestamp]("current_timestamp"),
expression[DateAdd]("date_add"),
expression[DateFormatClass]("date_format"),
expression[DateSub]("date_sub"),
expression[DayOfMonth]("day"),
expression[DayOfYear]("dayofyear"),
expression[DayOfMonth]("dayofmonth"),
expression[Hour]("hour"),
expression[Month]("month"),
expression[MonthsBetween]("months_between"),
expression[Minute]("minute"),
expression[Quarter]("quarter"),
expression[Second]("second"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
import org.apache.spark.unsafe.types.{Interval, UTF8String}

/**
* Returns the current date at the start of query evaluation.
Expand Down Expand Up @@ -62,6 +62,53 @@ case class CurrentTimestamp() extends LeafExpression with CodegenFallback {
}
}

/**
* Adds a number of days to startdate.
*/
case class DateAdd(startDate: Expression, days: Expression)
extends BinaryExpression with ImplicitCastInputTypes {

override def left: Expression = startDate
override def right: Expression = days

override def inputTypes: Seq[AbstractDataType] = Seq(DateType, IntegerType)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think we want to support:

timestamp + interval returns timestamp
date + interval returns date
date + int returns date ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, will update today. BTW, How do we tell the difference if we are doing
string + interval/int,
should we just take strings as date?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

string can get implicitly cast to date or timestamp, can't it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but then we have to decide it is date or timestamp, and that need to be done at run time.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think we can just put timestamp first, and as a result implicitly cast string to timestamp.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think date_add('2015-07-22', 1) return '2015-07-23 00:00:00' is not so natural...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just checked with hive 1.2.
hive> select date_add("2015-01-01 00:11:22", 2);
2015-01-03
hive> select date_add(cast("2015-01-01 00:11:22" as timestamp), 2);
2015-01-03

shall we use the same pattern?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

date_add is used for adding days, where does hive support timestamp + interval? in + or another special operator?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hive and oracle use + sign
select birthday + interval 3 days from xxx;

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK that makes sense. Let's use date for string then.


override def dataType: DataType = DateType

override def nullSafeEval(start: Any, d: Any): Any = {
start.asInstanceOf[Int] + d.asInstanceOf[Int]
}

override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
nullSafeCodeGen(ctx, ev, (sd, d) => {
s"""${ev.primitive} = $sd + $d;"""
})
}
}

/**
* Subtracts a number of days to startdate.
*/
case class DateSub(startDate: Expression, days: Expression)
extends BinaryExpression with ImplicitCastInputTypes {
override def left: Expression = startDate
override def right: Expression = days

override def inputTypes: Seq[AbstractDataType] = Seq(DateType, IntegerType)

override def dataType: DataType = DateType

override def nullSafeEval(start: Any, d: Any): Any = {
start.asInstanceOf[Int] - d.asInstanceOf[Int]
}

override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
nullSafeCodeGen(ctx, ev, (sd, d) => {
s"""${ev.primitive} = $sd - $d;"""
})
}
}

case class Hour(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {

override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType)
Expand Down Expand Up @@ -258,3 +305,125 @@ case class DateFormatClass(left: Expression, right: Expression) extends BinaryEx
})
}
}

/**
* Time Adds Interval.
*/
case class TimeAdd(left: Expression, right: Expression)
extends BinaryExpression with ExpectsInputTypes {

override def toString: String = s"$left + $right"
override def inputTypes: Seq[AbstractDataType] =
Seq(TypeCollection(DateType, TimestampType), IntervalType)

override def dataType: DataType = TimestampType

override def nullSafeEval(start: Any, interval: Any): Any = {
val itvl = interval.asInstanceOf[Interval]
left.dataType match {
case DateType =>
DateTimeUtils.dateAddFullInterval(
start.asInstanceOf[Int], itvl.months, itvl.microseconds)
case TimestampType =>
DateTimeUtils.timestampAddFullInterval(
start.asInstanceOf[Long], itvl.months, itvl.microseconds)
}
}

override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
left.dataType match {
case DateType =>
defineCodeGen(ctx, ev, (sd, i) => {
s"""$dtu.dateAddFullInterval($sd, $i.months, $i.microseconds)"""
})
case TimestampType => // TimestampType
defineCodeGen(ctx, ev, (sd, i) => {
s"""$dtu.timestampAddFullInterval($sd, $i.months, $i.microseconds)"""
})
}
}
}

/**
* Time Subtracts Interval.
*/
case class TimeSub(left: Expression, right: Expression)
extends BinaryExpression with ExpectsInputTypes {

override def toString: String = s"$left - $right"
override def inputTypes: Seq[AbstractDataType] =
Seq(TypeCollection(DateType, TimestampType), IntervalType)

override def dataType: DataType = TimestampType

override def nullSafeEval(start: Any, interval: Any): Any = {
val itvl = interval.asInstanceOf[Interval]
left.dataType match {
case DateType =>
DateTimeUtils.dateAddFullInterval(
start.asInstanceOf[Int], 0 - itvl.months, 0 - itvl.microseconds)
case TimestampType =>
DateTimeUtils.timestampAddFullInterval(
start.asInstanceOf[Long], 0 - itvl.months, 0 - itvl.microseconds)
}
}

override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
left.dataType match {
case DateType =>
defineCodeGen(ctx, ev, (sd, i) => {
s"""$dtu.dateAddFullInterval($sd, 0 - $i.months, 0 - $i.microseconds)"""
})
case TimestampType => // TimestampType
defineCodeGen(ctx, ev, (sd, i) => {
s"""$dtu.timestampAddFullInterval($sd, 0 - $i.months, 0 - $i.microseconds)"""
})
}
}
}

/**
* Returns the date that is num_months after start_date.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shall we make argument names consistent with comment?

*/
case class AddMonths(left: Expression, right: Expression)
extends BinaryExpression with ImplicitCastInputTypes {

override def inputTypes: Seq[AbstractDataType] = Seq(DateType, IntegerType)

override def dataType: DataType = DateType

override def nullSafeEval(start: Any, months: Any): Any = {
DateTimeUtils.dateAddYearMonthInterval(start.asInstanceOf[Int], months.asInstanceOf[Int])
}

override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
defineCodeGen(ctx, ev, (sd, m) => {
s"""$dtu.dateAddYearMonthInterval($sd, $m)"""
})
}
}

/**
* Returns number of months between dates date1 and date2.
*/
case class MonthsBetween(left: Expression, right: Expression)
extends BinaryExpression with ImplicitCastInputTypes {

override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, TimestampType)

override def dataType: DataType = DoubleType

override def nullSafeEval(t1: Any, t2: Any): Any = {
DateTimeUtils.monthsBetween(t1.asInstanceOf[Long], t2.asInstanceOf[Long])
}

override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = {
val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
defineCodeGen(ctx, ev, (l, r) => {
s"""$dtu.monthsBetween($l, $r)"""
})
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -573,4 +573,109 @@ object DateTimeUtils {
dayInYear - 334
}
}

/**
* Returns the date value for the first day of the given month.
* The month is expressed in months since 1.1.1970, starting from 0.
*/
def getDaysFromMonths(months: Int): Int = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

def firstDayOfMonth?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1

val yearSinceEpoch = if (months < 0) months / 12 - 1 else months / 12
val monthInYearFromZero = months - yearSinceEpoch * 12
val daysFromYears = getDaysFromYears(yearSinceEpoch)
val febDays = if (isLeapYear(1970 + yearSinceEpoch)) 29 else 28
val daysForMonths = Seq(31, febDays, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use 28 for febDays, then make daysForMonths static.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a common part from #6986 , have changed there.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when you are updating this pr, you could reuse
private[this] val daysInNormalYear in this object.

daysForMonths.slice(0, monthInYearFromZero).sum + daysFromYears
}

/**
* Returns the date value for January 1 of the given year.
* The year is expressed in years since 1.1.1970, starting from 0.
*/
def getDaysFromYears(years: Int): Int = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

def firstDayOfYear ?

val remainYear = (years % 400 + 400) % 400
val cycles = (years - remainYear) / 400
val numLeaps = if (remainYear < 130) {
remainYear / 4
} else if (remainYear < 230) {
remainYear / 4 - 1
} else if (remainYear < 330) {
remainYear / 4 - 2
} else {
remainYear / 4 - 3
}
cycles * (365 * 400 + 397) + remainYear * 365 + numLeaps
}

/**
* Add date and year-month interval.
* Returns a date value, expressed in days since 1.1.1970.
*/
def dateAddYearMonthInterval(days: Int, months: Int): Int = {
val currentMonth = (getYear(days) - 1970) * 12 + getMonth(days) - 1 + months
val currentMonthInYear = if (currentMonth < 0) {
((currentMonth % 12) + 12) % 12
} else {
currentMonth % 12
}
val currentYear = if (currentMonth < 0) (currentMonth / 12) - 1 else currentMonth / 12
val febDays = if (isLeapYear(1970 + currentYear)) 29 else 28
val daysForMonths = Seq(31, febDays, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
getDaysFromMonths(currentMonth) + math.min(
getDayOfMonth(days), daysForMonths(currentMonthInYear)) - 1
}

/**
* Add date and full interval.
* Returns a timestamp value, expressed in microseconds since 1.1.1970 00:00:00.
*/
def dateAddFullInterval(days: Int, months: Int, microseconds: Long): Long = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not just use Interval as the second argument?

daysToMillis(dateAddYearMonthInterval(days, months)) * 1000L + microseconds
}

/**
* Add timestamp and full interval.
* Returns a timestamp value, expressed in microseconds since 1.1.1970 00:00:00.
*/
def timestampAddFullInterval(micros: Long, months: Int, microseconds: Long): Long = {
val days = millisToDays(micros / 1000L)
dateAddFullInterval(days, months, microseconds) + micros - daysToMillis(days) * 1000L
}

/**
* Returns the last dayInMonth in the month it belongs to. The date is expressed
* in days since 1.1.1970. the return value starts from 1.
*/
def getLastDayInMonthOfMonth(date: Int): Int = {
val month = getMonth(date)

val febDay = if (isLeapYear(getYear(date))) 29 else 28
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

only do this if month == 2

val days = Seq(31, febDay, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make it static

days(month - 1)
}

/**
* Returns number of months between time1 and time2. time1 and time2 are expressed in
* microseconds since 1.1.1970
*/
def monthsBetween(time1: Long, time2: Long): Double = {
val millis1 = time1.asInstanceOf[Long] / 1000L
val millis2 = time2.asInstanceOf[Long] / 1000L
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no need to cast here

val date1 = millisToDays(millis1)
val date2 = millisToDays(millis2)
val dayInMonth1 = getDayOfMonth(date1)
val dayInMonth2 = getDayOfMonth(date2)
val lastDayMonth1 = getLastDayInMonthOfMonth(date1)
val lastDayMonth2 = getLastDayInMonthOfMonth(date2)
val months1 = getYear(date1) * 12 + getMonth(date1) - 1
val months2 = getYear(date2) * 12 + getMonth(date2) - 1
val timeInDay1 = time1 - daysToMillis(date1) * 1000L
val timeInDay2 = time2 - daysToMillis(date2) * 1000L
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's a little wired here, can't we just use?

final val MICROS_PER_DAY = MILLIS_PER_DAY * 1000L
...
val timeInDay2 = time1 % MICROS_PER_DAY

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

daysToMillis will also consider timezone offset.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we really need to consider timezone here? time1 and time2 are in same timezone, isn't it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we are calculating month, so we need to use date to see if they are of same day in month or both last day of month, and to get day info we need to consider offset.

if (dayInMonth1 == dayInMonth2 || (lastDayMonth1 == dayInMonth1 &&
lastDayMonth2 == dayInMonth2)) {
(months1 - months2).toDouble
} else {
val timesBetween = (timeInDay1 - timeInDay2).toDouble / (MILLIS_PER_DAY * 1000)
(months1 - months2).toDouble + (dayInMonth1 - dayInMonth2 + timesBetween) / 31.0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we round this to 8 digits?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds reasonable.

}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ import java.text.SimpleDateFormat
import java.util.Calendar

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.apache.spark.sql.types.{StringType, TimestampType, DateType}
import org.apache.spark.unsafe.types.Interval

class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {

Expand Down Expand Up @@ -246,4 +248,57 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
}
}

test("date_add") {
checkEvaluation(
DateAdd(Literal(Date.valueOf("2016-02-28")), Literal(1)),
DateTimeUtils.fromJavaDate(Date.valueOf("2016-02-29")))
checkEvaluation(
DateAdd(Literal(Date.valueOf("2016-02-28")), Literal(-365)),
DateTimeUtils.fromJavaDate(Date.valueOf("2015-02-28")))
}

test("date_sub") {
checkEvaluation(
DateSub(Literal(Date.valueOf("2015-01-01")), Literal(1)),
DateTimeUtils.fromJavaDate(Date.valueOf("2014-12-31")))
checkEvaluation(
DateSub(Literal(Date.valueOf("2015-01-01")), Literal(-1)),
DateTimeUtils.fromJavaDate(Date.valueOf("2015-01-02")))
}

test("time_add") {
checkEvaluation(
TimeAdd(Literal(Date.valueOf("2016-01-29")), Literal(new Interval(1, 0))),
DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2016-02-29 00:00:00")))
checkEvaluation(
TimeAdd(Literal(Date.valueOf("2016-01-31")), Literal(new Interval(1, 2000000.toLong))),
DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2016-02-29 00:00:02")))
}

test("time_sub") {
checkEvaluation(
TimeSub(Literal(Timestamp.valueOf("2016-03-31 10:00:00")), Literal(new Interval(1, 0))),
DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2016-02-29 10:00:00")))
checkEvaluation(
TimeSub(
Literal(Timestamp.valueOf("2016-03-30 00:00:01")),
Literal(new Interval(1, 2000000.toLong))),
DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf("2016-02-28 23:59:59")))
}

test("add_months") {
checkEvaluation(AddMonths(Literal(Date.valueOf(
"2015-01-30")), Literal(1)), DateTimeUtils.fromJavaDate(Date.valueOf("2015-02-28")))
checkEvaluation(AddMonths(Literal(Date.valueOf(
"2016-03-30")), Literal(-1)), DateTimeUtils.fromJavaDate(Date.valueOf("2016-02-29")))
}

test("months_between") {
checkEvaluation(MonthsBetween(Literal(Timestamp.valueOf(
"2015-01-30 11:52:00")), Literal(Timestamp.valueOf("2015-01-30 11:50:00"))), 0.0)
checkEvaluation(MonthsBetween(Literal(Timestamp.valueOf(
"2015-01-31 00:00:00")), Literal(Timestamp.valueOf("2015-03-31 22:00:00"))), -2.0)
checkEvaluation(MonthsBetween(Literal(Timestamp.valueOf(
"2015-03-31 22:00:00")), Literal(Timestamp.valueOf("2015-02-28 00:00:00"))), 1.0)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

More test cast here to guarantee the behaviour of months_between. It's complicated and many conner cases exist I think

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1

}
}
Loading