Skip to content

Commit fa521c1

Browse files
linhongliu-dbcloud-fan
authored andcommitted
[SPARK-36286][SQL] Block some invalid datetime string
### What changes were proposed in this pull request? In PR #32959, we found some weird datetime strings that can be parsed. ([details](#32959 (comment))) This PR blocks the invalid datetime string. ### Why are the changes needed? bug fix ### Does this PR introduce _any_ user-facing change? Yes, below strings will have different results when cast to datetime. ```sql select cast('12::' as timestamp); -- Before: 2021-07-07 12:00:00, After: NULL select cast('T' as timestamp); -- Before: 2021-07-07 00:00:00, After: NULL ``` ### How was this patch tested? some new test cases Closes #33490 from linhongliu-db/SPARK-35780-block-invalid-format. Authored-by: Linhong Liu <[email protected]> Signed-off-by: Wenchen Fan <[email protected]> (cherry picked from commit ed0e351) Signed-off-by: Wenchen Fan <[email protected]>
1 parent 999cf81 commit fa521c1

File tree

4 files changed

+16
-4
lines changed

4 files changed

+16
-4
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,9 @@ object DateTimeUtils {
254254
val maxDigitsYear = 6
255255
// For the nanosecond part, more than 6 digits is allowed, but will be truncated.
256256
segment == 6 || (segment == 0 && digits >= 4 && digits <= maxDigitsYear) ||
257-
(segment != 0 && segment != 6 && digits <= 2)
257+
// For the zoneId segment(7), it's could be zero digits when it's a region-based zone ID
258+
(segment == 7 && digits <= 2) ||
259+
(segment != 0 && segment != 6 && segment != 7 && digits > 0 && digits <= 2)
258260
}
259261
if (s == null || s.trimAll().numBytes() == 0) {
260262
return (Array.empty, None, false)
@@ -527,7 +529,8 @@ object DateTimeUtils {
527529
def isValidDigits(segment: Int, digits: Int): Boolean = {
528530
// An integer is able to represent a date within [+-]5 million years.
529531
var maxDigitsYear = 7
530-
(segment == 0 && digits >= 4 && digits <= maxDigitsYear) || (segment != 0 && digits <= 2)
532+
(segment == 0 && digits >= 4 && digits <= maxDigitsYear) ||
533+
(segment != 0 && digits > 0 && digits <= 2)
531534
}
532535
if (s == null || s.trimAll().numBytes() == 0) {
533536
return None

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,4 +576,8 @@ class CastSuite extends CastSuiteBase {
576576
checkEvaluation(cast(invalidInput, TimestampNTZType), null)
577577
}
578578
}
579+
580+
test("SPARK-36286: invalid string cast to timestamp") {
581+
checkEvaluation(cast(Literal("2015-03-18T"), TimestampType), null)
582+
}
579583
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
150150
c.set(Calendar.MILLISECOND, 0)
151151
checkCastStringToTimestamp("2015-03-18", new Timestamp(c.getTimeInMillis))
152152
checkCastStringToTimestamp("2015-03-18 ", new Timestamp(c.getTimeInMillis))
153-
checkCastStringToTimestamp("2015-03-18T", new Timestamp(c.getTimeInMillis))
154153

155154
c = Calendar.getInstance(tz)
156155
c.set(2015, 2, 18, 12, 3, 17)

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
147147
assert(toDate("1999 08 01").isEmpty)
148148
assert(toDate("1999-08 01").isEmpty)
149149
assert(toDate("1999 08").isEmpty)
150+
assert(toDate("1999-08-").isEmpty)
150151
assert(toDate("").isEmpty)
151152
assert(toDate(" ").isEmpty)
152153
}
@@ -182,7 +183,7 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
182183
checkStringToTimestamp("1969-12-31 16:00:00", Option(date(1969, 12, 31, 16, zid = zid)))
183184
checkStringToTimestamp("0001", Option(date(1, 1, 1, 0, zid = zid)))
184185
checkStringToTimestamp("2015-03", Option(date(2015, 3, 1, zid = zid)))
185-
Seq("2015-03-18", "2015-03-18 ", " 2015-03-18", " 2015-03-18 ", "2015-03-18T").foreach { s =>
186+
Seq("2015-03-18", "2015-03-18 ", " 2015-03-18", " 2015-03-18 ").foreach { s =>
186187
checkStringToTimestamp(s, Option(date(2015, 3, 18, zid = zid)))
187188
}
188189

@@ -289,6 +290,11 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
289290
checkStringToTimestamp("", None)
290291
checkStringToTimestamp(" ", None)
291292
checkStringToTimestamp("+", None)
293+
checkStringToTimestamp("T", None)
294+
checkStringToTimestamp("2015-03-18T", None)
295+
checkStringToTimestamp("12::", None)
296+
checkStringToTimestamp("2015-03-18T12:03:17-8:", None)
297+
checkStringToTimestamp("2015-03-18T12:03:17-8:30:", None)
292298

293299
// Truncating the fractional seconds
294300
expected = Option(date(2015, 3, 18, 12, 3, 17, 123456, zid = UTC))

0 commit comments

Comments
 (0)