Skip to content

Commit f977d0b

Browse files
committed
Adding sync between Python and Java default timezones
1 parent 3585520 commit f977d0b

File tree

3 files changed

+72
-48
lines changed

3 files changed

+72
-48
lines changed

python/pyspark/sql/tests.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3000,6 +3000,14 @@ class ArrowTests(ReusedPySparkTestCase):
30003000
def setUpClass(cls):
30013001
from datetime import datetime
30023002
ReusedPySparkTestCase.setUpClass()
3003+
3004+
# Synchronize default timezone between Python and Java
3005+
tz = "America/Los_Angeles"
3006+
os.environ["TZ"] = tz
3007+
time.tzset()
3008+
cls.old_tz = cls.sc._jvm.org.apache.spark.sql.catalyst.util.DateTimeTestUtils\
3009+
.setDefaultTimeZone(tz)
3010+
30033011
cls.spark = SparkSession(cls.sc)
30043012
cls.spark.conf.set("spark.sql.execution.arrow.enable", "true")
30053013
cls.schema = StructType([
@@ -3014,6 +3022,13 @@ def setUpClass(cls):
30143022
("b", 2, 20, 0.4, 4.0, datetime(2012, 2, 2), datetime(2012, 2, 2, 2, 2, 2)),
30153023
("c", 3, 30, 0.8, 6.0, datetime(2100, 3, 3), datetime(2100, 3, 3, 3, 3, 3))]
30163024

3025+
@classmethod
3026+
def tearDownClass(cls):
3027+
del os.environ["TZ"]
3028+
time.tzset()
3029+
cls.sc._jvm.org.apache.spark.sql.catalyst.util.DateTimeTestUtils\
3030+
.setDefaultTimeZone(cls.old_tz)
3031+
30173032
def assertFramesEqual(self, df_with_arrow, df_without):
30183033
msg = ("DataFrame from Arrow is not equal" +
30193034
("\n\nWith Arrow:\n%s\n%s" % (df_with_arrow, df_with_arrow.dtypes)) +

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,11 @@ object DateTimeTestUtils {
3737
DateTimeUtils.resetThreadLocals()
3838
}
3939
}
40+
41+
def setDefaultTimeZone(id: String): String = {
42+
val originalDefaultTimeZone = DateTimeUtils.defaultTimeZone().getID
43+
DateTimeUtils.resetThreadLocals()
44+
TimeZone.setDefault(TimeZone.getTimeZone(id))
45+
originalDefaultTimeZone
46+
}
4047
}

sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala

Lines changed: 50 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import java.io.File
2020
import java.nio.charset.StandardCharsets
2121
import java.sql.{Date, Timestamp}
2222
import java.text.SimpleDateFormat
23-
import java.util.Locale
23+
import java.util.{Locale, TimeZone}
2424

2525
import com.google.common.io.Files
2626
import org.apache.arrow.memory.RootAllocator
@@ -31,7 +31,7 @@ import org.scalatest.BeforeAndAfterAll
3131

3232
import org.apache.spark.SparkException
3333
import org.apache.spark.sql.{DataFrame, Row}
34-
import org.apache.spark.sql.catalyst.util.DateTimeUtils
34+
import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils}
3535
import org.apache.spark.sql.test.SharedSQLContext
3636
import org.apache.spark.sql.types.{BinaryType, StructField, StructType}
3737
import org.apache.spark.util.Utils
@@ -841,52 +841,54 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll {
841841
}
842842

843843
test("timestamp type conversion") {
844-
val json =
845-
s"""
846-
|{
847-
| "schema" : {
848-
| "fields" : [ {
849-
| "name" : "timestamp",
850-
| "type" : {
851-
| "name" : "timestamp",
852-
| "unit" : "MICROSECOND",
853-
| "timezone" : "${DateTimeUtils.defaultTimeZone().getID}"
854-
| },
855-
| "nullable" : true,
856-
| "children" : [ ],
857-
| "typeLayout" : {
858-
| "vectors" : [ {
859-
| "type" : "VALIDITY",
860-
| "typeBitWidth" : 1
861-
| }, {
862-
| "type" : "DATA",
863-
| "typeBitWidth" : 64
864-
| } ]
865-
| }
866-
| } ]
867-
| },
868-
| "batches" : [ {
869-
| "count" : 4,
870-
| "columns" : [ {
871-
| "name" : "timestamp",
872-
| "count" : 4,
873-
| "VALIDITY" : [ 1, 1, 1, 1 ],
874-
| "DATA" : [ -1234, 0, 1365383415567000, 33057298500000000 ]
875-
| } ]
876-
| } ]
877-
|}
878-
""".stripMargin
879-
880-
val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS z", Locale.US)
881-
val ts1 = DateTimeUtils.toJavaTimestamp(-1234L)
882-
val ts2 = DateTimeUtils.toJavaTimestamp(0L)
883-
val ts3 = new Timestamp(sdf.parse("2013-04-08 01:10:15.567 UTC").getTime)
884-
val ts4 = new Timestamp(sdf.parse("3017-07-18 14:55:00.000 UTC").getTime)
885-
val data = Seq(ts1, ts2, ts3, ts4)
886-
887-
val df = data.toDF("timestamp")
888-
889-
collectAndValidate(df, json, "timestampData.json")
844+
DateTimeTestUtils.withDefaultTimeZone(TimeZone.getTimeZone("America/Los_Angeles")) {
845+
val json =
846+
s"""
847+
|{
848+
| "schema" : {
849+
| "fields" : [ {
850+
| "name" : "timestamp",
851+
| "type" : {
852+
| "name" : "timestamp",
853+
| "unit" : "MICROSECOND",
854+
| "timezone" : "${DateTimeUtils.defaultTimeZone().getID}"
855+
| },
856+
| "nullable" : true,
857+
| "children" : [ ],
858+
| "typeLayout" : {
859+
| "vectors" : [ {
860+
| "type" : "VALIDITY",
861+
| "typeBitWidth" : 1
862+
| }, {
863+
| "type" : "DATA",
864+
| "typeBitWidth" : 64
865+
| } ]
866+
| }
867+
| } ]
868+
| },
869+
| "batches" : [ {
870+
| "count" : 4,
871+
| "columns" : [ {
872+
| "name" : "timestamp",
873+
| "count" : 4,
874+
| "VALIDITY" : [ 1, 1, 1, 1 ],
875+
| "DATA" : [ -1234, 0, 1365383415567000, 33057298500000000 ]
876+
| } ]
877+
| } ]
878+
|}
879+
""".stripMargin
880+
881+
val sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS z", Locale.US)
882+
val ts1 = DateTimeUtils.toJavaTimestamp(-1234L)
883+
val ts2 = DateTimeUtils.toJavaTimestamp(0L)
884+
val ts3 = new Timestamp(sdf.parse("2013-04-08 01:10:15.567 UTC").getTime)
885+
val ts4 = new Timestamp(sdf.parse("3017-07-18 14:55:00.000 UTC").getTime)
886+
val data = Seq(ts1, ts2, ts3, ts4)
887+
888+
val df = data.toDF("timestamp")
889+
890+
collectAndValidate(df, json, "timestampData.json")
891+
}
890892
}
891893

892894
test("floating-point NaN") {

0 commit comments

Comments
 (0)