From 20904c4323c5f908a42ea1b4cea6701626cebe28 Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Mon, 13 Jun 2016 15:34:55 -0700 Subject: [PATCH 1/5] fix toUTCTime and daysToMillis --- .../sql/catalyst/util/DateTimeUtils.scala | 20 +++++++++++--- .../org/apache/spark/sql/types/DateType.scala | 2 +- .../catalyst/util/DateTimeUtilsSuite.scala | 27 +++++++++++++++++++ 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index e08328a32079e..8e17260832a1a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -100,8 +100,8 @@ object DateTimeUtils { // reverse of millisToDays def daysToMillis(days: SQLDate): Long = { - val millisUtc = days.toLong * MILLIS_PER_DAY - millisUtc - threadLocalLocalTimeZone.get().getOffset(millisUtc) + val millisLocal = days.toLong * MILLIS_PER_DAY + millisLocal - getOffsetFromLocalMillis(millisLocal, threadLocalLocalTimeZone.get()) } def dateToString(days: SQLDate): String = @@ -850,6 +850,20 @@ object DateTimeUtils { } } + /** + * Lookup the offset for given millis seconds since 1970-01-01 00:00:00 in a timezone. + */ + private def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone): Long = { + val rawOffset = tz.getRawOffset + // the actual offset should be calculated based on milliseconds in UTC + var offset = tz.getOffset(millisLocal - rawOffset) + if (offset != rawOffset) { + // Could be in DST, try with best effort + offset = tz.getOffset(millisLocal - offset) + } + offset + } + /** * Returns a timestamp of given timezone from utc timestamp, with the same string * representation in their timezone. @@ -866,7 +880,7 @@ object DateTimeUtils { */ def toUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = { val tz = TimeZone.getTimeZone(timeZone) - val offset = tz.getOffset(time / 1000L) + val offset = getOffsetFromLocalMillis(time / 1000L, tz) time - offset * 1000L } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala index 1d73e40ffcd36..2c966230e447e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala @@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.ScalaReflectionLock * * Please use the singleton [[DataTypes.DateType]]. * - * Internally, this is represented as the number of days from epoch (1970-01-01 00:00:00 UTC). + * Internally, this is represented as the number of days from 1970-01-01. */ @DeveloperApi class DateType private() extends AtomicType { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 28e30c2219e3f..39c7683df5790 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -492,6 +492,13 @@ class DateTimeUtilsSuite extends SparkFunSuite { test("2011-12-25 09:00:00.123456", "JST", "2011-12-25 18:00:00.123456") test("2011-12-25 09:00:00.123456", "PST", "2011-12-25 01:00:00.123456") test("2011-12-25 09:00:00.123456", "Asia/Shanghai", "2011-12-25 17:00:00.123456") + + // Daylight Saving Time + test("2016-03-13 09:59:59.0", "PST", "2016-03-13 01:59:59.0") + test("2016-03-13 10:00:00.0", "PST", "2016-03-13 03:00:00.0") + test("2016-11-06 08:59:59.0", "PST", "2016-11-06 01:59:59.0") + test("2016-11-06 09:00:00.0", "PST", "2016-11-06 01:00:00.0") + test("2016-11-06 10:00:00.0", "PST", "2016-11-06 02:00:00.0") } test("to UTC timestamp") { @@ -503,5 +510,25 @@ class DateTimeUtilsSuite extends SparkFunSuite { test("2011-12-25 18:00:00.123456", "JST", "2011-12-25 09:00:00.123456") test("2011-12-25 01:00:00.123456", "PST", "2011-12-25 09:00:00.123456") test("2011-12-25 17:00:00.123456", "Asia/Shanghai", "2011-12-25 09:00:00.123456") + + // Daylight Saving Time + test("2016-03-13 01:59:59", "PST", "2016-03-13 09:59:59.0") + // 2016-03-13 02:00:00 PST does not exists + test("2016-03-13 02:00:00", "PST", "2016-03-13 10:00:00.0") + test("2016-03-13 03:00:00", "PST", "2016-03-13 10:00:00.0") + test("2016-11-06 00:59:59", "PST", "2016-11-06 07:59:59.0") + // 2016-11-06 01:00:00 PST could be 2016-11-06 08:00:00 UTC or 2016-11-06 09:00:00 UTC + test("2016-11-06 01:00:00", "PST", "2016-11-06 09:00:00.0") + test("2016-11-06 01:59:59", "PST", "2016-11-06 09:59:59.0") + test("2016-11-06 02:00:00", "PST", "2016-11-06 10:00:00.0") + } + + test("daysToMillis and millisToDays") { + (-1001 to 2000).foreach { d => + assert(millisToDays(daysToMillis(d)) === d) + val date = toJavaTimestamp(daysToMillis(d) * 1000L) + assert(date.getHours === 0) + assert(date.getMinutes === 0) + } } } From ac3d2b56c27dab6f406771f8e146cca6bf5e843e Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Tue, 14 Jun 2016 12:49:14 -0700 Subject: [PATCH 2/5] cover all timezone --- .../sql/catalyst/util/DateTimeUtils.scala | 26 +++++++++--- .../sql/catalyst/util/DateTimeTestUtils.scala | 40 +++++++++++++++++++ .../catalyst/util/DateTimeUtilsSuite.scala | 11 ++--- 3 files changed, 66 insertions(+), 11 deletions(-) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 8e17260832a1a..93178646337b8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -854,14 +854,18 @@ object DateTimeUtils { * Lookup the offset for given millis seconds since 1970-01-01 00:00:00 in a timezone. */ private def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone): Long = { - val rawOffset = tz.getRawOffset + var guess = tz.getRawOffset // the actual offset should be calculated based on milliseconds in UTC - var offset = tz.getOffset(millisLocal - rawOffset) - if (offset != rawOffset) { - // Could be in DST, try with best effort - offset = tz.getOffset(millisLocal - offset) + var actual = tz.getOffset(millisLocal - guess) + // At the start of DST, the local time is forwarded by an hour, so it's OK to get the + // local time bigger than current one after an round trip (local -> UTC -> local). + // At the end of DST, the local time is backwarded by an hour, actual offset will be + // less than guess, we should decrease the guess and try again. + while (actual < guess) { + guess = actual + actual = tz.getOffset(millisLocal - guess) } - offset + guess } /** @@ -883,4 +887,14 @@ object DateTimeUtils { val offset = getOffsetFromLocalMillis(time / 1000L, tz) time - offset * 1000L } + + /** + * Re-initialize the current thread's thread locals. Exposed for testing. + */ + private[util] def resetThreadLocals(): Unit = { + threadLocalGmtCalendar.remove() + threadLocalLocalTimeZone.remove() + threadLocalTimestampFormat.remove() + threadLocalDateFormat.remove() + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala new file mode 100644 index 0000000000000..0c1feb3aa0882 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.util + +import java.util.TimeZone + +/** + * Helper functions for testing date and time functionality. + */ +object DateTimeTestUtils { + + val ALL_TIMEZONES: Seq[TimeZone] = TimeZone.getAvailableIDs.toSeq.map(TimeZone.getTimeZone) + + def withDefaultTimeZone[T](newDefaultTimeZone: TimeZone)(block: => T): T = { + val originalDefaultTimeZone = TimeZone.getDefault + try { + DateTimeUtils.resetThreadLocals() + TimeZone.setDefault(newDefaultTimeZone) + block + } finally { + TimeZone.setDefault(originalDefaultTimeZone) + DateTimeUtils.resetThreadLocals() + } + } +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 39c7683df5790..8f61e678ac9c4 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -524,11 +524,12 @@ class DateTimeUtilsSuite extends SparkFunSuite { } test("daysToMillis and millisToDays") { - (-1001 to 2000).foreach { d => - assert(millisToDays(daysToMillis(d)) === d) - val date = toJavaTimestamp(daysToMillis(d) * 1000L) - assert(date.getHours === 0) - assert(date.getMinutes === 0) + for (tz <- DateTimeTestUtils.ALL_TIMEZONES) { + DateTimeTestUtils.withDefaultTimeZone(tz) { + (-2000 to 2000).foreach { d => + assert(millisToDays(daysToMillis(d)) === d) + } + } } } } From f23b54563f488c7654b6cb8911daf03a28d1bb95 Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Wed, 15 Jun 2016 14:35:52 -0700 Subject: [PATCH 3/5] more acurate --- .../spark/sql/catalyst/util/DateTimeUtils.scala | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 93178646337b8..34f0dc923997e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -857,13 +857,18 @@ object DateTimeUtils { var guess = tz.getRawOffset // the actual offset should be calculated based on milliseconds in UTC var actual = tz.getOffset(millisLocal - guess) - // At the start of DST, the local time is forwarded by an hour, so it's OK to get the - // local time bigger than current one after an round trip (local -> UTC -> local). - // At the end of DST, the local time is backwarded by an hour, actual offset will be - // less than guess, we should decrease the guess and try again. - while (actual < guess) { + if (actual != guess) { + // try with the result as guess guess = actual actual = tz.getOffset(millisLocal - guess) + // At the start of DST, the local time is forwarded by an hour, so it's OK to get the + // local time bigger than current one after an round trip (local -> UTC -> local). + // At the end of DST, the local time is backwarded by an hour, actual offset will be + // less than guess, we should decrease the guess and try again. + while (actual < guess) { + guess = actual + actual = tz.getOffset(millisLocal - guess) + } } guess } From 9df8c84d6589d3e1609b8ae6c39ef8c9462b7cad Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Thu, 16 Jun 2016 10:17:43 -0700 Subject: [PATCH 4/5] skip some days in tests --- .../sql/catalyst/util/DateTimeUtils.scala | 2 +- .../sql/catalyst/util/DateTimeUtilsSuite.scala | 18 +++++++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 34f0dc923997e..57c4b5074870d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -851,7 +851,7 @@ object DateTimeUtils { } /** - * Lookup the offset for given millis seconds since 1970-01-01 00:00:00 in a timezone. + * Lookup the offset for given millis seconds since 1970-01-01 00:00:00 in given timezone. */ private def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone): Long = { var guess = tz.getRawOffset diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 8f61e678ac9c4..f9cb97629fcf3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -524,10 +524,22 @@ class DateTimeUtilsSuite extends SparkFunSuite { } test("daysToMillis and millisToDays") { - for (tz <- DateTimeTestUtils.ALL_TIMEZONES) { + // There are some days are skipped entirely in some timezone, skip them here. + val skipped_days = Map[String, Int]( + "Kwajalein" -> 8632, + "Pacific/Apia" -> 15338, + "Pacific/Enderbury" -> 9131, + "Pacific/Fakaofo" -> 15338, + "Pacific/Kiritimati" -> 9131, + "Pacific/Kwajalein" -> 8632, + "MIT" -> 15338) + for (tz <- DateTimeTestUtils.ALL_TIMEZONES) { DateTimeTestUtils.withDefaultTimeZone(tz) { - (-2000 to 2000).foreach { d => - assert(millisToDays(daysToMillis(d)) === d) + val skipped = skipped_days.getOrElse(tz.getID, Int.MinValue) + (-20000 to 20000).foreach { d => + if (d != skipped) { + assert(millisToDays(daysToMillis(d)) === d) + } } } } From 88df1762cdc3fa13cb05234edeb3848420f00566 Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Sat, 18 Jun 2016 00:47:35 -0700 Subject: [PATCH 5/5] fallback to java.util.Date --- .../sql/catalyst/util/DateTimeUtils.scala | 36 ++++++++++++------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 57c4b5074870d..56bf9a7863e61 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -856,18 +856,30 @@ object DateTimeUtils { private def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone): Long = { var guess = tz.getRawOffset // the actual offset should be calculated based on milliseconds in UTC - var actual = tz.getOffset(millisLocal - guess) - if (actual != guess) { - // try with the result as guess - guess = actual - actual = tz.getOffset(millisLocal - guess) - // At the start of DST, the local time is forwarded by an hour, so it's OK to get the - // local time bigger than current one after an round trip (local -> UTC -> local). - // At the end of DST, the local time is backwarded by an hour, actual offset will be - // less than guess, we should decrease the guess and try again. - while (actual < guess) { - guess = actual - actual = tz.getOffset(millisLocal - guess) + val offset = tz.getOffset(millisLocal - guess) + if (offset != guess) { + guess = tz.getOffset(millisLocal - offset) + if (guess != offset) { + // fallback to do the reverse lookup using java.sql.Timestamp + // this should only happen near the start or end of DST + val days = Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt + val year = getYear(days) + val month = getMonth(days) + val day = getDayOfMonth(days) + + var millisOfDay = (millisLocal % MILLIS_PER_DAY).toInt + if (millisOfDay < 0) { + millisOfDay += MILLIS_PER_DAY.toInt + } + val seconds = (millisOfDay / 1000L).toInt + val hh = seconds / 3600 + val mm = seconds / 60 % 60 + val ss = seconds % 60 + val nano = millisOfDay % 1000 * 1000000 + + // create a Timestamp to get the unix timestamp (in UTC) + val timestamp = new Timestamp(year - 1900, month - 1, day, hh, mm, ss, nano) + guess = (millisLocal - timestamp.getTime).toInt } } guess