Skip to content

Commit 086eeb5

Browse files
committed
allow missing year/hour when parsing date/timestamp
1 parent 5bb1a09 commit 086eeb5

File tree

7 files changed

+207
-26
lines changed

7 files changed

+207
-26
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class Iso8601DateFormatter(
5050
val specialDate = convertSpecialDate(s.trim, zoneId)
5151
specialDate.getOrElse {
5252
try {
53-
val localDate = LocalDate.parse(s, formatter)
53+
val localDate = toLocalDate(formatter.parse(s))
5454
localDateToDays(localDate)
5555
} catch checkDiffResult(s, legacyFormatter.parse)
5656
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala

Lines changed: 35 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ package org.apache.spark.sql.catalyst.util
1919

2020
import java.time._
2121
import java.time.chrono.IsoChronology
22-
import java.time.format.{DateTimeFormatter, DateTimeFormatterBuilder, DateTimeParseException, ResolverStyle}
23-
import java.time.temporal.{ChronoField, TemporalAccessor, TemporalQueries}
22+
import java.time.format.{DateTimeFormatter, DateTimeFormatterBuilder, ResolverStyle}
23+
import java.time.temporal.{ChronoField, TemporalAccessor}
2424
import java.util.Locale
2525

2626
import com.google.common.cache.CacheBuilder
@@ -31,17 +31,39 @@ import org.apache.spark.sql.internal.SQLConf
3131
import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy._
3232

3333
trait DateTimeFormatterHelper {
34+
private def getOrDefault(accessor: TemporalAccessor, field: ChronoField, default: Int): Int = {
35+
if (accessor.isSupported(field)) {
36+
accessor.get(field)
37+
} else {
38+
default
39+
}
40+
}
41+
42+
protected def toLocalDate(temporalAccessor: TemporalAccessor): LocalDate = {
43+
val year = getOrDefault(temporalAccessor, ChronoField.YEAR, 1970)
44+
val month = getOrDefault(temporalAccessor, ChronoField.MONTH_OF_YEAR, 1)
45+
val day = getOrDefault(temporalAccessor, ChronoField.DAY_OF_MONTH, 1)
46+
LocalDate.of(year, month, day)
47+
}
48+
3449
// Converts the parsed temporal object to ZonedDateTime. It sets time components to zeros
3550
// if they does not exist in the parsed object.
3651
protected def toZonedDateTime(
3752
temporalAccessor: TemporalAccessor,
3853
zoneId: ZoneId): ZonedDateTime = {
39-
// Parsed input might not have time related part. In that case, time component is set to zeros.
40-
val parsedLocalTime = temporalAccessor.query(TemporalQueries.localTime)
41-
val localTime = if (parsedLocalTime == null) LocalTime.MIDNIGHT else parsedLocalTime
42-
// Parsed input must have date component. At least, year must present in temporalAccessor.
43-
val localDate = temporalAccessor.query(TemporalQueries.localDate)
44-
54+
val hour = if (temporalAccessor.isSupported(ChronoField.HOUR_OF_DAY)) {
55+
temporalAccessor.get(ChronoField.HOUR_OF_DAY)
56+
} else if (temporalAccessor.isSupported(ChronoField.HOUR_OF_AMPM)) {
57+
// When we reach here, is mean am/pm is not specified. Here we assume it's am.
58+
temporalAccessor.get(ChronoField.HOUR_OF_AMPM)
59+
} else {
60+
0
61+
}
62+
val minute = getOrDefault(temporalAccessor, ChronoField.MINUTE_OF_HOUR, 0)
63+
val second = getOrDefault(temporalAccessor, ChronoField.SECOND_OF_MINUTE, 0)
64+
val nanoSecond = getOrDefault(temporalAccessor, ChronoField.NANO_OF_SECOND, 0)
65+
val localTime = LocalTime.of(hour, minute, second, nanoSecond)
66+
val localDate = toLocalDate(temporalAccessor)
4567
ZonedDateTime.of(localDate, localTime, zoneId)
4668
}
4769

@@ -72,18 +94,14 @@ trait DateTimeFormatterHelper {
7294
// DateTimeParseException will address by the caller side.
7395
protected def checkDiffResult[T](
7496
s: String, legacyParseFunc: String => T): PartialFunction[Throwable, T] = {
75-
case e: DateTimeParseException if SQLConf.get.legacyTimeParserPolicy == EXCEPTION =>
76-
val res = try {
77-
Some(legacyParseFunc(s))
78-
} catch {
79-
case _: Throwable => None
80-
}
81-
if (res.nonEmpty) {
97+
case e: DateTimeException if SQLConf.get.legacyTimeParserPolicy == EXCEPTION =>
98+
try {
99+
legacyParseFunc(s)
82100
throw new SparkUpgradeException("3.0", s"Fail to parse '$s' in the new parser. You can " +
83101
s"set ${SQLConf.LEGACY_TIME_PARSER_POLICY.key} to LEGACY to restore the behavior " +
84102
s"before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.", e)
85-
} else {
86-
throw e
103+
} catch {
104+
case _: Throwable => throw e
87105
}
88106
}
89107
}
@@ -101,10 +119,6 @@ private object DateTimeFormatterHelper {
101119

102120
def toFormatter(builder: DateTimeFormatterBuilder, locale: Locale): DateTimeFormatter = {
103121
builder
104-
.parseDefaulting(ChronoField.MONTH_OF_YEAR, 1)
105-
.parseDefaulting(ChronoField.DAY_OF_MONTH, 1)
106-
.parseDefaulting(ChronoField.MINUTE_OF_HOUR, 0)
107-
.parseDefaulting(ChronoField.SECOND_OF_MINUTE, 0)
108122
.toFormatter(locale)
109123
.withChronology(IsoChronology.INSTANCE)
110124
.withResolverStyle(ResolverStyle.STRICT)

sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,4 +176,10 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
176176
}
177177
}
178178
}
179+
180+
test("missing date fields") {
181+
val formatter = DateFormatter("HH", ZoneOffset.UTC)
182+
val daysSinceEpoch = formatter.parse("20")
183+
assert(daysSinceEpoch === LocalDate.of(1970, 1, 1).toEpochDay)
184+
}
179185
}

sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,4 +291,95 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
291291
}
292292
}
293293
}
294+
295+
test("parsing hour with various patterns") {
296+
def createFormatter(pattern: String): TimestampFormatter = {
297+
// Use `SIMPLE_DATE_FORMAT`, so that the legacy parser also fails with invalid value range.
298+
TimestampFormatter(pattern, ZoneOffset.UTC, LegacyDateFormats.SIMPLE_DATE_FORMAT, false)
299+
}
300+
301+
withClue("HH") {
302+
val formatter = createFormatter("yyyy-MM-dd HH")
303+
304+
val micros1 = formatter.parse("2009-12-12 00")
305+
assert(micros1 === TimeUnit.SECONDS.toMicros(
306+
LocalDateTime.of(2009, 12, 12, 0, 0, 0).toEpochSecond(ZoneOffset.UTC)))
307+
308+
val micros2 = formatter.parse("2009-12-12 15")
309+
assert(micros2 === TimeUnit.SECONDS.toMicros(
310+
LocalDateTime.of(2009, 12, 12, 15, 0, 0).toEpochSecond(ZoneOffset.UTC)))
311+
312+
intercept[DateTimeException](formatter.parse("2009-12-12 24"))
313+
}
314+
315+
withClue("kk") {
316+
val formatter = createFormatter("yyyy-MM-dd kk")
317+
318+
intercept[DateTimeException](formatter.parse("2009-12-12 00"))
319+
320+
val micros1 = formatter.parse("2009-12-12 15")
321+
assert(micros1 === TimeUnit.SECONDS.toMicros(
322+
LocalDateTime.of(2009, 12, 12, 15, 0, 0).toEpochSecond(ZoneOffset.UTC)))
323+
324+
val micros2 = formatter.parse("2009-12-12 24")
325+
assert(micros2 === TimeUnit.SECONDS.toMicros(
326+
LocalDateTime.of(2009, 12, 12, 0, 0, 0).toEpochSecond(ZoneOffset.UTC)))
327+
}
328+
329+
withClue("KK") {
330+
val formatter = createFormatter("yyyy-MM-dd KK a")
331+
332+
val micros1 = formatter.parse("2009-12-12 00 am")
333+
assert(micros1 === TimeUnit.SECONDS.toMicros(
334+
LocalDateTime.of(2009, 12, 12, 0, 0, 0).toEpochSecond(ZoneOffset.UTC)))
335+
336+
// For `KK`, "12:00:00 am" is the same as "00:00:00 pm".
337+
val micros2 = formatter.parse("2009-12-12 12 am")
338+
assert(micros2 === TimeUnit.SECONDS.toMicros(
339+
LocalDateTime.of(2009, 12, 12, 12, 0, 0).toEpochSecond(ZoneOffset.UTC)))
340+
341+
val micros3 = formatter.parse("2009-12-12 00 pm")
342+
assert(micros3 === TimeUnit.SECONDS.toMicros(
343+
LocalDateTime.of(2009, 12, 12, 12, 0, 0).toEpochSecond(ZoneOffset.UTC)))
344+
345+
intercept[DateTimeException](formatter.parse("2009-12-12 12 pm"))
346+
}
347+
348+
withClue("hh") {
349+
val formatter = createFormatter("yyyy-MM-dd hh a")
350+
351+
intercept[DateTimeException](formatter.parse("2009-12-12 00 am"))
352+
353+
val micros1 = formatter.parse("2009-12-12 12 am")
354+
assert(micros1 === TimeUnit.SECONDS.toMicros(
355+
LocalDateTime.of(2009, 12, 12, 0, 0, 0).toEpochSecond(ZoneOffset.UTC)))
356+
357+
intercept[DateTimeException](formatter.parse("2009-12-12 00 pm"))
358+
359+
val micros2 = formatter.parse("2009-12-12 12 pm")
360+
assert(micros2 === TimeUnit.SECONDS.toMicros(
361+
LocalDateTime.of(2009, 12, 12, 12, 0, 0).toEpochSecond(ZoneOffset.UTC)))
362+
}
363+
}
364+
365+
test("missing date fields") {
366+
val formatter = TimestampFormatter("HH:mm:ss", ZoneOffset.UTC)
367+
val micros = formatter.parse("11:30:01")
368+
assert(micros === TimeUnit.SECONDS.toMicros(
369+
LocalDateTime.of(1970, 1, 1, 11, 30, 1).toEpochSecond(ZoneOffset.UTC)))
370+
}
371+
372+
test("missing am/pm field") {
373+
val formatter = TimestampFormatter("yyyy hh:mm:ss", ZoneOffset.UTC)
374+
val micros = formatter.parse("2009 11:30:01")
375+
assert(micros === TimeUnit.SECONDS.toMicros(
376+
LocalDateTime.of(2009, 1, 1, 11, 30, 1).toEpochSecond(ZoneOffset.UTC)))
377+
}
378+
379+
test("missing time fields") {
380+
val formatter = TimestampFormatter("yyyy HH", ZoneOffset.UTC)
381+
val micros = formatter.parse("2009 11")
382+
assert(micros === TimeUnit.SECONDS.toMicros(
383+
LocalDateTime.of(2009, 1, 1, 11, 0, 0).toEpochSecond(ZoneOffset.UTC)))
384+
}
294385
}

sql/core/src/test/resources/sql-tests/inputs/datetime.sql

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ select date_sub('2011-11-11', str) from v;
8686
select null - date '2019-10-06';
8787
select date '2001-10-01' - date '2001-09-28';
8888

89-
-- variable-length tests
89+
-- variable-length second fraction tests
9090
select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]');
9191
select to_timestamp('2019-10-06 10:11:12.0', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]');
9292
select to_timestamp('2019-10-06 10:11:12.1', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]');
@@ -95,7 +95,7 @@ select to_timestamp('2019-10-06 10:11:12.123UTC', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zz
9595
select to_timestamp('2019-10-06 10:11:12.1234', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]');
9696
select to_timestamp('2019-10-06 10:11:12.12345CST', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]');
9797
select to_timestamp('2019-10-06 10:11:12.123456PST', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]');
98-
-- exceeded max variable length
98+
-- second fraction exceeded max variable length
9999
select to_timestamp('2019-10-06 10:11:12.1234567PST', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]');
100100
-- special cases
101101
select to_timestamp('123456 2019-10-06 10:11:12.123456PST', 'SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]');
@@ -122,3 +122,9 @@ select to_timestamp("2019-10-06T10:11:12'12", "yyyy-MM-dd'T'HH:mm:ss''SSSS"); --
122122
select to_timestamp("2019-10-06T10:11:12'", "yyyy-MM-dd'T'HH:mm:ss''"); -- tail
123123
select to_timestamp("'2019-10-06T10:11:12", "''yyyy-MM-dd'T'HH:mm:ss"); -- head
124124
select to_timestamp("P2019-10-06T10:11:12", "'P'yyyy-MM-dd'T'HH:mm:ss"); -- head but as single quote
125+
126+
-- missing fields
127+
select to_timestamp("16", "dd");
128+
select to_date("16", "dd");
129+
select to_timestamp("2019 40", "yyyy mm");
130+
select to_timestamp("2019 10:10:10", "yyyy hh:mm:ss");

sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
-- Automatically generated by SQLQueryTestSuite
2-
-- Number of queries: 85
2+
-- Number of queries: 89
33

44

55
-- !query
@@ -730,3 +730,35 @@ select to_timestamp("P2019-10-06T10:11:12", "'P'yyyy-MM-dd'T'HH:mm:ss")
730730
struct<to_timestamp(P2019-10-06T10:11:12, 'P'yyyy-MM-dd'T'HH:mm:ss):timestamp>
731731
-- !query output
732732
2019-10-06 10:11:12
733+
734+
735+
-- !query
736+
select to_timestamp("16", "dd")
737+
-- !query schema
738+
struct<to_timestamp(16, dd):timestamp>
739+
-- !query output
740+
1970-01-16 00:00:00
741+
742+
743+
-- !query
744+
select to_date("16", "dd")
745+
-- !query schema
746+
struct<to_date(16, dd):date>
747+
-- !query output
748+
1970-01-16
749+
750+
751+
-- !query
752+
select to_timestamp("2019 40", "yyyy mm")
753+
-- !query schema
754+
struct<to_timestamp(2019 40, yyyy mm):timestamp>
755+
-- !query output
756+
2019-01-01 00:40:00
757+
758+
759+
-- !query
760+
select to_timestamp("2019 10:10:10", "yyyy hh:mm:ss")
761+
-- !query schema
762+
struct<to_timestamp(2019 10:10:10, yyyy hh:mm:ss):timestamp>
763+
-- !query output
764+
2019-01-01 10:10:10

sql/core/src/test/resources/sql-tests/results/datetime.sql.out

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
-- Automatically generated by SQLQueryTestSuite
2-
-- Number of queries: 85
2+
-- Number of queries: 89
33

44

55
-- !query
@@ -702,3 +702,35 @@ select to_timestamp("P2019-10-06T10:11:12", "'P'yyyy-MM-dd'T'HH:mm:ss")
702702
struct<to_timestamp(P2019-10-06T10:11:12, 'P'yyyy-MM-dd'T'HH:mm:ss):timestamp>
703703
-- !query output
704704
2019-10-06 10:11:12
705+
706+
707+
-- !query
708+
select to_timestamp("16", "dd")
709+
-- !query schema
710+
struct<to_timestamp(16, dd):timestamp>
711+
-- !query output
712+
1970-01-16 00:00:00
713+
714+
715+
-- !query
716+
select to_date("16", "dd")
717+
-- !query schema
718+
struct<to_date(16, dd):date>
719+
-- !query output
720+
1970-01-16
721+
722+
723+
-- !query
724+
select to_timestamp("2019 40", "yyyy mm")
725+
-- !query schema
726+
struct<to_timestamp(2019 40, yyyy mm):timestamp>
727+
-- !query output
728+
2019-01-01 00:40:00
729+
730+
731+
-- !query
732+
select to_timestamp("2019 10:10:10", "yyyy hh:mm:ss")
733+
-- !query schema
734+
struct<to_timestamp(2019 10:10:10, yyyy hh:mm:ss):timestamp>
735+
-- !query output
736+
2019-01-01 10:10:10

0 commit comments

Comments
 (0)