Skip to content

Commit 9d7ea7e

Browse files
committed
address comments
1 parent 77e75a7 commit 9d7ea7e

File tree

9 files changed

+95
-47
lines changed

9 files changed

+95
-47
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ class Analyzer(
246246
ResolveLambdaVariables(conf) ::
247247
ResolveTimeZone(conf) ::
248248
ResolveRandomSeed ::
249-
ResolveBinaryArithmetic(conf) ::
249+
ResolveBinaryArithmetic ::
250250
TypeCoercion.typeCoercionRules(conf) ++
251251
extendedResolutionRules : _*),
252252
Batch("Post-Hoc Resolution", Once, postHocResolutionRules: _*),
@@ -268,16 +268,16 @@ class Analyzer(
268268
/**
269269
* For [[Add]]:
270270
* 1. if both side are interval, stays the same;
271-
* 2. else if ansi is on and one side is date and the other is interval,
272-
* turns it to [[ANSIDateAdd]];
271+
* 2. else if one side is date and the other is interval,
272+
* turns it to [[DateAddInterval]];
273273
* 3. else if one side is interval, turns it to [[TimeAdd]];
274274
* 4. else if one side is date, turns it to [[DateAdd]] ;
275275
* 5. else stays the same.
276276
*
277277
* For [[Subtract]]:
278278
* 1. if both side are interval, stays the same;
279-
* 2. else if ansi is on, the left side is date and the right side is interval,
280-
* turns it to [[ANSIDateAdd(l, -r)]];
279+
* 2. else if the left side is date and the right side is interval,
280+
* turns it to [[DateAddInterval(l, -r)]];
281281
* 3. else if the right side is an interval, turns it to [[TimeSub]];
282282
* 4. else if one side is timestamp, turns it to [[SubtractTimestamps]];
283283
* 5. else if the right side is date, turns it to [[DateDiff]]/[[SubtractDates]];
@@ -292,22 +292,22 @@ class Analyzer(
292292
* 1. If the left side is interval, turns it to [[DivideInterval]];
293293
* 2. otherwise, stays the same.
294294
*/
295-
case class ResolveBinaryArithmetic(conf: SQLConf) extends Rule[LogicalPlan] {
295+
object ResolveBinaryArithmetic extends Rule[LogicalPlan] {
296296
override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
297297
case p: LogicalPlan => p.transformExpressionsUp {
298298
case a @ Add(l, r) if a.childrenResolved => (l.dataType, r.dataType) match {
299299
case (CalendarIntervalType, CalendarIntervalType) => a
300-
case (DateType, CalendarIntervalType) if conf.ansiEnabled => ANSIDateAdd(l, r)
300+
case (DateType, CalendarIntervalType) => DateAddInterval(l, r)
301301
case (_, CalendarIntervalType) => Cast(TimeAdd(l, r), l.dataType)
302-
case (CalendarIntervalType, DateType) if conf.ansiEnabled => ANSIDateAdd(r, l)
302+
case (CalendarIntervalType, DateType) => DateAddInterval(r, l)
303303
case (CalendarIntervalType, _) => Cast(TimeAdd(r, l), r.dataType)
304304
case (DateType, dt) if dt != StringType => DateAdd(l, r)
305305
case (dt, DateType) if dt != StringType => DateAdd(r, l)
306306
case _ => a
307307
}
308308
case s @ Subtract(l, r) if s.childrenResolved => (l.dataType, r.dataType) match {
309309
case (CalendarIntervalType, CalendarIntervalType) => s
310-
case (DateType, CalendarIntervalType) if conf.ansiEnabled => ANSIDateAdd(l, UnaryMinus(r))
310+
case (DateType, CalendarIntervalType) => DateAddInterval(l, UnaryMinus(r))
311311
case (_, CalendarIntervalType) => Cast(TimeSub(l, r), l.dataType)
312312
case (TimestampType, _) => SubtractTimestamps(l, r)
313313
case (_, TimestampType) => SubtractTimestamps(l, r)

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import org.apache.spark.sql.catalyst.util.{DateTimeUtils, LegacyDateFormats, Tim
3535
import org.apache.spark.sql.catalyst.util.DateTimeConstants._
3636
import org.apache.spark.sql.catalyst.util.DateTimeUtils._
3737
import org.apache.spark.sql.catalyst.util.LegacyDateFormats.SIMPLE_DATE_FORMAT
38+
import org.apache.spark.sql.internal.SQLConf
3839
import org.apache.spark.sql.types._
3940
import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
4041

@@ -1158,10 +1159,20 @@ case class TimeAdd(start: Expression, interval: Expression, timeZoneId: Option[S
11581159
}
11591160

11601161
/**
1161-
* Adds an interval with day precision to date.
1162+
* Adds date and an interval.
1163+
*
1164+
* When ansi mode is on, the microseconds part of interval needs to be 0, otherwise a runtime
1165+
* [[IllegalArgumentException]] will be raised.
1166+
* When ansi mode is off, if the microseconds part of interval is 0, we perform date + interval
1167+
* for better performance. if the microseconds part is not 0, then the date will be converted to a
1168+
* timestamp to add with the whole interval parts.
11621169
*/
1163-
case class ANSIDateAdd(start: Expression, interval: Expression)
1164-
extends BinaryExpression with ExpectsInputTypes {
1170+
case class DateAddInterval(
1171+
start: Expression,
1172+
interval: Expression,
1173+
timeZoneId: Option[String] = None,
1174+
ansiEnabled: Boolean = SQLConf.get.ansiEnabled)
1175+
extends BinaryExpression with ExpectsInputTypes with TimeZoneAwareExpression {
11651176

11661177
override def left: Expression = start
11671178
override def right: Expression = interval
@@ -1173,16 +1184,40 @@ case class ANSIDateAdd(start: Expression, interval: Expression)
11731184
override def dataType: DataType = DateType
11741185

11751186
override def nullSafeEval(start: Any, interval: Any): Any = {
1176-
DateTimeUtils.dateAddInterval(
1177-
start.asInstanceOf[Int], interval.asInstanceOf[CalendarInterval])
1187+
val itvl = interval.asInstanceOf[CalendarInterval]
1188+
if (ansiEnabled || itvl.microseconds == 0) {
1189+
DateTimeUtils.dateAddInterval(start.asInstanceOf[Int], itvl)
1190+
} else {
1191+
val startTs = DateTimeUtils.epochDaysToMicros(start.asInstanceOf[Int], zoneId)
1192+
val resultTs = DateTimeUtils.timestampAddInterval(
1193+
startTs, itvl.months, itvl.days, itvl.microseconds, zoneId)
1194+
DateTimeUtils.microsToEpochDays(resultTs, zoneId)
1195+
}
11781196
}
11791197

11801198
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
11811199
val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
1182-
defineCodeGen(ctx, ev, (sd, i) => {
1183-
s"""$dtu.dateAddInterval($sd, $i)"""
1200+
nullSafeCodeGen(ctx, ev, (sd, i) => if (ansiEnabled) {
1201+
s"""${ev.value} = $dtu.dateAddInterval($sd, $i);"""
1202+
} else {
1203+
val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
1204+
val startTs = ctx.freshName("startTs")
1205+
val resultTs = ctx.freshName("resultTs")
1206+
s"""
1207+
|if ($i.microseconds == 0) {
1208+
| ${ev.value} = $dtu.dateAddInterval($sd, $i);
1209+
|} else {
1210+
| long $startTs = $dtu.epochDaysToMicros($sd, $zid);
1211+
| long $resultTs =
1212+
| $dtu.timestampAddInterval($startTs, $i.months, $i.days, $i.microseconds, $zid);
1213+
| ${ev.value} = $dtu.microsToEpochDays($resultTs, $zid);
1214+
|}
1215+
|""".stripMargin
11841216
})
11851217
}
1218+
1219+
override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
1220+
copy(timeZoneId = Option(timeZoneId))
11861221
}
11871222

11881223
/**

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -623,18 +623,15 @@ object DateTimeUtils {
623623
* Returns a date value, expressed in days since 1.1.1970.
624624
*
625625
* @throws DateTimeException if the result exceeds the supported date range
626-
* @throws IllegalArgumentException if the interval has
626+
* @throws IllegalArgumentException if the interval has `microseconds` part in ansi mode
627627
*/
628628
def dateAddInterval(
629629
start: SQLDate,
630630
interval: CalendarInterval): SQLDate = {
631631
require(interval.microseconds == 0,
632-
"Cannot add hours, minutes or seconds, milliseconds, microseconds to a date")
633-
LocalDate.ofEpochDay(start)
634-
.plusMonths(interval.months)
635-
.plusDays(interval.days)
636-
.toEpochDay
637-
.toInt
632+
"Cannot add hours, minutes or seconds, milliseconds, microseconds to a date in ansi mode")
633+
val ld = LocalDate.ofEpochDay(start).plusMonths(interval.months).plusDays(interval.days)
634+
localDateToDays(ld)
638635
}
639636

640637
/**

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import org.apache.spark.{SparkFunSuite, SparkUpgradeException}
2727
import org.apache.spark.sql.catalyst.InternalRow
2828
import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
2929
import org.apache.spark.sql.catalyst.util.{DateTimeUtils, IntervalUtils, TimestampFormatter}
30-
import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_SECOND
30+
import org.apache.spark.sql.catalyst.util.DateTimeConstants._
3131
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
3232
import org.apache.spark.sql.internal.SQLConf
3333
import org.apache.spark.sql.types._
@@ -359,22 +359,38 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
359359
checkConsistencyBetweenInterpretedAndCodegen(DateAdd, DateType, IntegerType)
360360
}
361361

362-
test("ansi_date_add") {
362+
test("date add interval") {
363363
val d = Date.valueOf("2016-02-28")
364-
checkEvaluation(
365-
ANSIDateAdd(Literal(d), Literal(new CalendarInterval(0, 1, 0))),
366-
DateTimeUtils.fromJavaDate(Date.valueOf("2016-02-29")))
367-
checkEvaluation(
368-
ANSIDateAdd(Literal(d), Literal(new CalendarInterval(1, 1, 0))),
369-
DateTimeUtils.fromJavaDate(Date.valueOf("2016-03-29")))
370-
checkEvaluation(ANSIDateAdd(Literal(d), Literal.create(null, CalendarIntervalType)),
371-
null)
372-
checkEvaluation(ANSIDateAdd(Literal.create(null, DateType),
373-
Literal(new CalendarInterval(1, 1, 0))),
374-
null)
375-
checkExceptionInExpression[IllegalArgumentException](
376-
ANSIDateAdd(Literal(d), Literal(new CalendarInterval(1, 1, 1))),
377-
"Cannot add hours, minutes or seconds, milliseconds, microseconds to a date")
364+
Seq("true", "false") foreach { flag =>
365+
withSQLConf((SQLConf.ANSI_ENABLED.key, flag)) {
366+
checkEvaluation(
367+
DateAddInterval(Literal(d), Literal(new CalendarInterval(0, 1, 0))),
368+
DateTimeUtils.fromJavaDate(Date.valueOf("2016-02-29")))
369+
checkEvaluation(
370+
DateAddInterval(Literal(d), Literal(new CalendarInterval(1, 1, 0))),
371+
DateTimeUtils.fromJavaDate(Date.valueOf("2016-03-29")))
372+
checkEvaluation(DateAddInterval(Literal(d), Literal.create(null, CalendarIntervalType)),
373+
null)
374+
checkEvaluation(DateAddInterval(Literal.create(null, DateType),
375+
Literal(new CalendarInterval(1, 1, 0))),
376+
null)
377+
}
378+
}
379+
380+
withSQLConf((SQLConf.ANSI_ENABLED.key, "true")) {
381+
checkExceptionInExpression[IllegalArgumentException](
382+
DateAddInterval(Literal(d), Literal(new CalendarInterval(1, 1, 25 * MICROS_PER_HOUR))),
383+
"Cannot add hours, minutes or seconds, milliseconds, microseconds to a date")
384+
}
385+
386+
withSQLConf((SQLConf.ANSI_ENABLED.key, "false")) {
387+
checkEvaluation(
388+
DateAddInterval(Literal(d), Literal(new CalendarInterval(1, 1, 25))),
389+
DateTimeUtils.fromJavaDate(Date.valueOf("2016-03-29")))
390+
checkEvaluation(
391+
DateAddInterval(Literal(d), Literal(new CalendarInterval(1, 1, 25 * MICROS_PER_HOUR))),
392+
DateTimeUtils.fromJavaDate(Date.valueOf("2016-03-30")))
393+
}
378394
}
379395

380396
test("date_sub") {

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSQLBuilderSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ class ExpressionSQLBuilderSuite extends SparkFunSuite {
177177
"`a` - INTERVAL '1 hours'"
178178
)
179179
checkSQL(
180-
ANSIDateAdd('a, interval),
180+
DateAddInterval('a, interval),
181181
"`a` + INTERVAL '1 hours'"
182182
)
183183
}

sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ select date'2011-11-11 11:11:11' + interval '2' second
163163
struct<>
164164
-- !query output
165165
java.lang.IllegalArgumentException
166-
requirement failed: Cannot add hours, minutes or seconds, milliseconds, microseconds to a date
166+
requirement failed: Cannot add hours, minutes or seconds, milliseconds, microseconds to a date in ansi mode
167167

168168

169169
-- !query
@@ -172,7 +172,7 @@ select date'2011-11-11 11:11:11' - interval '2' second
172172
struct<>
173173
-- !query output
174174
java.lang.IllegalArgumentException
175-
requirement failed: Cannot add hours, minutes or seconds, milliseconds, microseconds to a date
175+
requirement failed: Cannot add hours, minutes or seconds, milliseconds, microseconds to a date in ansi mode
176176

177177

178178
-- !query

sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -735,7 +735,7 @@ from interval_arithmetic
735735
struct<>
736736
-- !query output
737737
java.lang.IllegalArgumentException
738-
requirement failed: Cannot add hours, minutes or seconds, milliseconds, microseconds to a date
738+
requirement failed: Cannot add hours, minutes or seconds, milliseconds, microseconds to a date in ansi mode
739739

740740

741741
-- !query

sql/core/src/test/resources/sql-tests/results/datetime.sql.out

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,15 +134,15 @@ struct<CAST(TIMESTAMP '2011-11-11 11:11:11' - INTERVAL '2 days' AS TIMESTAMP):ti
134134
-- !query
135135
select date'2011-11-11 11:11:11' + interval '2' second
136136
-- !query schema
137-
struct<CAST(CAST(DATE '2011-11-11' AS TIMESTAMP) + INTERVAL '2 seconds' AS DATE):date>
137+
struct<DATE '2011-11-11' + INTERVAL '2 seconds':date>
138138
-- !query output
139139
2011-11-11
140140

141141

142142
-- !query
143143
select date'2011-11-11 11:11:11' - interval '2' second
144144
-- !query schema
145-
struct<CAST(CAST(DATE '2011-11-11' AS TIMESTAMP) - INTERVAL '2 seconds' AS DATE):date>
145+
struct<DATE '2011-11-11' + (- INTERVAL '2 seconds'):date>
146146
-- !query output
147147
2011-11-10
148148

sql/core/src/test/resources/sql-tests/results/interval.sql.out

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -668,7 +668,7 @@ select
668668
interval '2-2' year to month + dateval
669669
from interval_arithmetic
670670
-- !query schema
671-
struct<dateval:date,CAST(CAST(dateval AS TIMESTAMP) - INTERVAL '2 years 2 months' AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) - INTERVAL '-2 years -2 months' AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + INTERVAL '2 years 2 months' AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + INTERVAL '-2 years -2 months' AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + (- INTERVAL '2 years 2 months') AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + INTERVAL '2 years 2 months' AS DATE):date>
671+
struct<dateval:date,dateval + (- INTERVAL '2 years 2 months'):date,dateval + (- INTERVAL '-2 years -2 months'):date,dateval + INTERVAL '2 years 2 months':date,dateval + INTERVAL '-2 years -2 months':date,dateval + (- INTERVAL '2 years 2 months'):date,dateval + INTERVAL '2 years 2 months':date>
672672
-- !query output
673673
2012-01-01 2009-11-01 2014-03-01 2014-03-01 2009-11-01 2009-11-01 2014-03-01
674674

@@ -711,7 +711,7 @@ select
711711
interval '99 11:22:33.123456789' day to second + dateval
712712
from interval_arithmetic
713713
-- !query schema
714-
struct<dateval:date,CAST(CAST(dateval AS TIMESTAMP) - INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds' AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) - INTERVAL '-99 days -11 hours -22 minutes -33.123456 seconds' AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds' AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + INTERVAL '-99 days -11 hours -22 minutes -33.123456 seconds' AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + (- INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds') AS DATE):date,CAST(CAST(dateval AS TIMESTAMP) + INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds' AS DATE):date>
714+
struct<dateval:date,dateval + (- INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds'):date,dateval + (- INTERVAL '-99 days -11 hours -22 minutes -33.123456 seconds'):date,dateval + INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds':date,dateval + INTERVAL '-99 days -11 hours -22 minutes -33.123456 seconds':date,dateval + (- INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds'):date,dateval + INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds':date>
715715
-- !query output
716716
2012-01-01 2011-09-23 2012-04-09 2012-04-09 2011-09-23 2011-09-23 2012-04-09
717717

0 commit comments

Comments
 (0)