Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/sql-migration-guide-upgrade.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ displayTitle: Spark SQL Upgrading Guide

- the JDBC options `lowerBound` and `upperBound` are converted to TimestampType/DateType values in the same way as casting strings to TimestampType/DateType values. The conversion is based on Proleptic Gregorian calendar, and time zone defined by the SQL config `spark.sql.session.timeZone`. In Spark version 2.4 and earlier, the conversion is based on the hybrid calendar (Julian + Gregorian) and on default system time zone.

- In Spark version 2.4 and earlier, invalid time zone ids are silently ignored and replaced by GMT time zone, for example, in the from_utc_timestamp function. Since Spark 3.0, such time zone ids are rejected, and Spark throws `java.time.DateTimeException`.

## Upgrading From Spark SQL 2.3 to 2.4

- In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ import java.time._
import java.time.Year.isLeap
import java.time.temporal.IsoFields
import java.util.{Locale, TimeZone}
import java.util.concurrent.{ConcurrentHashMap, TimeUnit}
import java.util.function.{Function => JFunction}
import java.util.concurrent.TimeUnit

import scala.util.control.NonFatal

Expand Down Expand Up @@ -67,13 +66,9 @@ object DateTimeUtils {

def defaultTimeZone(): TimeZone = TimeZone.getDefault()

private val computedTimeZones = new ConcurrentHashMap[String, TimeZone]
private val computeTimeZone = new JFunction[String, TimeZone] {
override def apply(timeZoneId: String): TimeZone = TimeZone.getTimeZone(timeZoneId)
}

def getTimeZone(timeZoneId: String): TimeZone = {
computedTimeZones.computeIfAbsent(timeZoneId, computeTimeZone)
val zoneId = ZoneId.of(timeZoneId, ZoneId.SHORT_IDS)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @MaxGekk after upgrading Spark 2.3 to Spark3.0, we found this behaviour change are rejecting some valid timeZoneIds, for example

// GMT+8:00 is a valid timezone if parsed from TimeZone.getTimeZone("GMT+8:00")
// However, ZoneId.of("GMT+8:00", ZoneId.SHORT_IDS) are rejected with an exception
from_unix_time("2020-01-01 10:00:00", "GMT+8:00")

what do you think about support these kind of timezones, such as GMT+8:00?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TimeZone.getTimeZone(zoneId)
}

// we should use the exact day as Int, for example, (year, month, day) -> day
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -819,9 +819,17 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
test(null, "UTC", null)
test("2015-07-24 00:00:00", null, null)
test(null, null, null)
// Test escaping of timezone
GenerateUnsafeProjection.generate(
ToUTCTimestamp(Literal(Timestamp.valueOf("2015-07-24 00:00:00")), Literal("\"quote")) :: Nil)
}

test("to_utc_timestamp - invalid time zone id") {
Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz =>
val msg = intercept[java.time.DateTimeException] {
GenerateUnsafeProjection.generate(
ToUTCTimestamp(
Literal(Timestamp.valueOf("2015-07-24 00:00:00")), Literal(invalidTz)) :: Nil)
}.getMessage
assert(msg.contains(invalidTz))
}
}

test("from_utc_timestamp") {
Expand All @@ -842,7 +850,14 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
test(null, "UTC", null)
test("2015-07-24 00:00:00", null, null)
test(null, null, null)
// Test escaping of timezone
GenerateUnsafeProjection.generate(FromUTCTimestamp(Literal(0), Literal("\"quote")) :: Nil)
}

test("from_utc_timestamp - invalid time zone id") {
Seq("Invalid time zone", "\"quote", "UTC*42").foreach { invalidTz =>
val msg = intercept[java.time.DateTimeException] {
GenerateUnsafeProjection.generate(FromUTCTimestamp(Literal(0), Literal(invalidTz)) :: Nil)
}.getMessage
assert(msg.contains(invalidTz))
}
}
}
Loading