From 238b5563e444b6b936f2e2771ec7876f648af1e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Bry=C5=84ski?= Date: Thu, 14 Sep 2017 16:56:52 +0200 Subject: [PATCH 1/6] Change internal Timestamp conversion --- python/pyspark/sql/types.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 920cf009f599..7d51f2d8d356 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -19,6 +19,7 @@ import decimal import time import datetime +import dateutil import calendar import json import re @@ -178,6 +179,9 @@ def fromInternal(self, v): return datetime.date.fromordinal(v + self.EPOCH_ORDINAL) +_is_utc = datetime.datetime.now(dateutil.tz.tzlocal()).tzname() == "UTC" + + class TimestampType(AtomicType): """Timestamp (datetime.datetime) data type. """ @@ -196,7 +200,8 @@ def toInternal(self, dt): def fromInternal(self, ts): if ts is not None: # using int to avoid precision loss in float - return datetime.datetime.fromtimestamp(ts // 1000000).replace(microsecond=ts % 1000000) + y, m, d, hh, mm, ss, _, _, _ = time.gmtime(ts // 1000000) if _is_utc else time.localtime(ts // 1000000) + datetime.datetime(y, m, d, hh, mm, ss, ts % 1000000) class DecimalType(FractionalType): From 0cb2a482a41711531a9367b88bf1558f5c87ac4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Bry=C5=84ski?= Date: Thu, 14 Sep 2017 16:58:50 +0200 Subject: [PATCH 2/6] Typo fix --- python/pyspark/sql/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 7d51f2d8d356..6b8b54ddf243 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -201,7 +201,7 @@ def fromInternal(self, ts): if ts is not None: # using int to avoid precision loss in float y, m, d, hh, mm, ss, _, _, _ = time.gmtime(ts // 1000000) if _is_utc else time.localtime(ts // 1000000) - datetime.datetime(y, m, d, hh, mm, ss, ts % 1000000) + return datetime.datetime(y, m, d, hh, mm, ss, ts % 1000000) class DecimalType(FractionalType): From 02301eb4aa8686fcafdeba3b13ec772be8938ed6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Bry=C5=84ski?= Date: Thu, 14 Sep 2017 17:07:22 +0200 Subject: [PATCH 3/6] Import fix --- python/pyspark/sql/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 6b8b54ddf243..9ccfe44f07f3 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -19,7 +19,7 @@ import decimal import time import datetime -import dateutil +import dateutil.tz import calendar import json import re From 25bf50d33a403e7d8770ddd79e077d26ee808ff0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Bry=C5=84ski?= Date: Thu, 14 Sep 2017 17:19:32 +0200 Subject: [PATCH 4/6] PEP8 compliance --- python/pyspark/sql/types.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 9ccfe44f07f3..f705556afb9b 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -200,7 +200,8 @@ def toInternal(self, dt): def fromInternal(self, ts): if ts is not None: # using int to avoid precision loss in float - y, m, d, hh, mm, ss, _, _, _ = time.gmtime(ts // 1000000) if _is_utc else time.localtime(ts // 1000000) + y, m, d, hh, mm, ss, _, _, _ = (time.gmtime(ts // 1000000) if _is_utc + else time.localtime(ts // 1000000)) return datetime.datetime(y, m, d, hh, mm, ss, ts % 1000000) From f18f4c81dab4c0d597fe77eed04a2a62e26ef1ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Bry=C5=84ski?= Date: Thu, 14 Sep 2017 17:32:24 +0200 Subject: [PATCH 5/6] Change way to check if timezone equals UTC --- python/pyspark/sql/types.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index f705556afb9b..a063515401a0 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -19,7 +19,6 @@ import decimal import time import datetime -import dateutil.tz import calendar import json import re @@ -179,7 +178,7 @@ def fromInternal(self, v): return datetime.date.fromordinal(v + self.EPOCH_ORDINAL) -_is_utc = datetime.datetime.now(dateutil.tz.tzlocal()).tzname() == "UTC" +_is_utc = time.tzname[time.daylight] == "UTC" class TimestampType(AtomicType): From bed6193ad1d57a0f7873d1a7dccd6257e15a7dab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20Bry=C5=84ski?= Date: Sat, 16 Sep 2017 18:23:27 +0200 Subject: [PATCH 6/6] Add support for leap seconds --- python/pyspark/sql/types.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index a063515401a0..94dd385138e6 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -201,6 +201,7 @@ def fromInternal(self, ts): # using int to avoid precision loss in float y, m, d, hh, mm, ss, _, _, _ = (time.gmtime(ts // 1000000) if _is_utc else time.localtime(ts // 1000000)) + ss = min(ss, 59) # leap seconds support return datetime.datetime(y, m, d, hh, mm, ss, ts % 1000000)