Skip to content

Commit a27f021

Browse files
dchvnfishcus
authored andcommitted
[SPARK-36076][SQL][3.1] ArrayIndexOutOfBounds in Cast string to times…
…tamp ### What changes were proposed in this pull request? Casting string to timestamp might throw ArrayIndexOutOfBounds in certain cases This error only occur in branch 3.0, 3.1 and previous, it's not present on 3.2 or master Code to reproduce: ``` val df = Seq(":8:434421+ 98:38").toDF("c0") val df2 = df.withColumn("c1", col("c0").cast(DataTypes.TimestampType)) df2.show() ``` Error: ``` java.lang.ArrayIndexOutOfBoundsException: 9 at org.apache.spark.sql.catalyst.util.DateTimeUtils$.stringToTimestamp(DateTimeUtils.scala:328) at org.apache.spark.sql.catalyst.expressions.CastBase.$anonfun$castToTimestamp$2(Cast.scala:455) at org.apache.spark.sql.catalyst.expressions.CastBase.buildCast(Cast.scala:295) at org.apache.spark.sql.catalyst.expressions.CastBase.$anonfun$castToTimestamp$1(Cast.scala:451) at org.apache.spark.sql.catalyst.expressions.CastBase.nullSafeEval(Cast.scala:840) at org.apache.spark.sql.catalyst.expressions.UnaryExpression.eval(Expression.scala:476) ``` ### Why are the changes needed? Cast String to timestamp shouldn't throw error, it should return Null instead. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add test in DateTimeUtilsSuite Closes apache#33293 from dgd-contributor/SPARK-36076_CastStringToTimeStampThrowArrayIndexOutOfBoundsException. Authored-by: dgd-contributor <[email protected]> Signed-off-by: Gengliang Wang <[email protected]>
1 parent ccfbc17 commit a27f021

File tree

2 files changed

+23
-21
lines changed

2 files changed

+23
-21
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ object DateTimeUtils {
242242
* - +|-hhmmss
243243
* - Region-based zone IDs in the form `area/city`, such as `Europe/Paris`
244244
*/
245-
def stringToTimestamp(s: UTF8String, timeZoneId: ZoneId): Option[Long] = {
245+
def stringToTimestamp(s: UTF8String, timeZoneId: ZoneId): Option[Long] = try {
246246
if (s == null) {
247247
return None
248248
}
@@ -350,27 +350,25 @@ object DateTimeUtils {
350350
segments(6) /= 10
351351
digitsMilli -= 1
352352
}
353-
try {
354-
val zoneId = tz match {
355-
case None => timeZoneId
356-
case Some("+") => ZoneOffset.ofHoursMinutes(segments(7), segments(8))
357-
case Some("-") => ZoneOffset.ofHoursMinutes(-segments(7), -segments(8))
358-
case Some(zoneName: String) => getZoneId(zoneName.trim)
359-
}
360-
val nanoseconds = MICROSECONDS.toNanos(segments(6))
361-
val localTime = LocalTime.of(segments(3), segments(4), segments(5), nanoseconds.toInt)
362-
val localDate = if (justTime) {
363-
LocalDate.now(zoneId)
364-
} else {
365-
LocalDate.of(segments(0), segments(1), segments(2))
366-
}
367-
val localDateTime = LocalDateTime.of(localDate, localTime)
368-
val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId)
369-
val instant = Instant.from(zonedDateTime)
370-
Some(instantToMicros(instant))
371-
} catch {
372-
case NonFatal(_) => None
353+
val zoneId = tz match {
354+
case None => timeZoneId
355+
case Some("+") => ZoneOffset.ofHoursMinutes(segments(7), segments(8))
356+
case Some("-") => ZoneOffset.ofHoursMinutes(-segments(7), -segments(8))
357+
case Some(zoneName: String) => getZoneId(zoneName.trim)
358+
}
359+
val nanoseconds = MICROSECONDS.toNanos(segments(6))
360+
val localTime = LocalTime.of(segments(3), segments(4), segments(5), nanoseconds.toInt)
361+
val localDate = if (justTime) {
362+
LocalDate.now(zoneId)
363+
} else {
364+
LocalDate.of(segments(0), segments(1), segments(2))
373365
}
366+
val localDateTime = LocalDateTime.of(localDate, localTime)
367+
val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId)
368+
val instant = Instant.from(zonedDateTime)
369+
Some(instantToMicros(instant))
370+
} catch {
371+
case NonFatal(_) => None
374372
}
375373

376374
def stringToTimestampAnsi(s: UTF8String, timeZoneId: ZoneId): Long = {

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,10 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
283283
}
284284
}
285285

286+
test("SPARK-36076: Cast string to timestamp throw ArrayIndexOutOfBounds") {
287+
assert(toTimestamp(":8:434421+ 98:38", UTC) === None)
288+
}
289+
286290
test("SPARK-15379: special invalid date string") {
287291
// Test stringToDate
288292
assert(toDate("2015-02-29 00:00:00").isEmpty)

0 commit comments

Comments
 (0)