Skip to content

Commit 2000387

Browse files
committed
Add DaysWritable
1 parent 3946b24 commit 2000387

File tree

2 files changed

+100
-55
lines changed

2 files changed

+100
-55
lines changed
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.sql.hive
19+
20+
import java.io.{DataInput, DataOutput, IOException}
21+
import java.time.LocalDate
22+
import java.util.Calendar
23+
24+
import org.apache.hadoop.hive.serde2.io.DateWritable
25+
import org.apache.hadoop.io.WritableUtils
26+
27+
import org.apache.spark.sql.catalyst.util.{DateTimeConstants, DateTimeUtils}
28+
29+
/**
30+
* The class accepts/returns days in Gregorian calendar and rebase them
31+
* via conversion to local date in Julian calendar for dates before 1582-10-15
32+
* in read/write for backward compatibility with Spark 2.4 and earlier versions.
33+
*
34+
* @param gregorianDays The number of days since the epoch 1970-01-01 in
35+
* Gregorian calendar.
36+
*/
37+
class DaysWritable(var gregorianDays: Int = 0) extends DateWritable {
38+
39+
private final val JULIAN_CUTOVER_DAY =
40+
rebaseGregorianToJulianDays(DateTimeUtils.GREGORIAN_CUTOVER_DAY.toInt)
41+
42+
override def getDays: Int = gregorianDays
43+
44+
@throws[IOException]
45+
override def readFields(in: DataInput): Unit = {
46+
val days = WritableUtils.readVInt(in)
47+
gregorianDays = if (days < JULIAN_CUTOVER_DAY) {
48+
rebaseJulianToGregorianDays(days)
49+
} else {
50+
days
51+
}
52+
}
53+
54+
@throws[IOException]
55+
override def write(out: DataOutput): Unit = {
56+
val rebasedDays = if (gregorianDays < DateTimeUtils.GREGORIAN_CUTOVER_DAY) {
57+
rebaseGregorianToJulianDays(gregorianDays)
58+
} else {
59+
gregorianDays
60+
}
61+
WritableUtils.writeVInt(out, rebasedDays)
62+
}
63+
64+
// Rebasing days since the epoch to store the same number of days
65+
// as by Spark 2.4 and earlier versions. Spark 3.0 switched to
66+
// Proleptic Gregorian calendar (see SPARK-26651), and as a consequence of that,
67+
// this affects dates before 1582-10-15. Spark 2.4 and earlier versions use
68+
// Julian calendar for dates before 1582-10-15. So, the same local date may
69+
// be mapped to different number of days since the epoch in different calendars.
70+
// For example:
71+
// Proleptic Gregorian calendar: 1582-01-01 -> -141714
72+
// Julian calendar: 1582-01-01 -> -141704
73+
// The code below converts -141714 to -141704.
74+
private def rebaseGregorianToJulianDays(daysSinceEpoch: Int): Int = {
75+
val millis = Math.multiplyExact(daysSinceEpoch, DateTimeConstants.MILLIS_PER_DAY)
76+
val utcCal = new Calendar.Builder()
77+
.setCalendarType("gregory")
78+
.setTimeZone(DateTimeUtils.TimeZoneUTC)
79+
.setInstant(millis)
80+
.build()
81+
val localDate = LocalDate.of(
82+
utcCal.get(Calendar.YEAR),
83+
utcCal.get(Calendar.MONTH) + 1,
84+
utcCal.get(Calendar.DAY_OF_MONTH))
85+
Math.toIntExact(localDate.toEpochDay)
86+
}
87+
88+
private def rebaseJulianToGregorianDays(daysSinceEpoch: Int): Int = {
89+
val localDate = LocalDate.ofEpochDay(daysSinceEpoch)
90+
val utcCal = new Calendar.Builder()
91+
.setCalendarType("gregory")
92+
.setTimeZone(DateTimeUtils.TimeZoneUTC)
93+
.setDate(localDate.getYear, localDate.getMonthValue - 1, localDate.getDayOfMonth)
94+
.build()
95+
Math.toIntExact(Math.floorDiv(utcCal.getTimeInMillis, DateTimeConstants.MILLIS_PER_DAY))
96+
}
97+
}

sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala

Lines changed: 3 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818
package org.apache.spark.sql.hive
1919

2020
import java.lang.reflect.{ParameterizedType, Type, WildcardType}
21-
import java.time.LocalDate
22-
import java.util.Calendar
2321

2422
import scala.collection.JavaConverters._
2523

@@ -182,33 +180,6 @@ import org.apache.spark.unsafe.types.UTF8String
182180
*/
183181
private[hive] trait HiveInspectors {
184182

185-
private final val JULIAN_CUTOVER_DAY =
186-
rebaseGregorianToJulianDays(DateTimeUtils.GREGORIAN_CUTOVER_DAY.toInt)
187-
188-
private def rebaseJulianToGregorianDays(daysSinceEpoch: Int): Int = {
189-
val localDate = LocalDate.ofEpochDay(daysSinceEpoch)
190-
val utcCal = new Calendar.Builder()
191-
.setCalendarType("gregory")
192-
.setTimeZone(DateTimeUtils.TimeZoneUTC)
193-
.setDate(localDate.getYear, localDate.getMonthValue - 1, localDate.getDayOfMonth)
194-
.build()
195-
Math.toIntExact(Math.floorDiv(utcCal.getTimeInMillis, DateTimeConstants.MILLIS_PER_DAY))
196-
}
197-
198-
private def rebaseGregorianToJulianDays(daysSinceEpoch: Int): Int = {
199-
val millis = Math.multiplyExact(daysSinceEpoch, DateTimeConstants.MILLIS_PER_DAY)
200-
val utcCal = new Calendar.Builder()
201-
.setCalendarType("gregory")
202-
.setTimeZone(DateTimeUtils.TimeZoneUTC)
203-
.setInstant(millis)
204-
.build()
205-
val localDate = LocalDate.of(
206-
utcCal.get(Calendar.YEAR),
207-
utcCal.get(Calendar.MONTH) + 1,
208-
utcCal.get(Calendar.DAY_OF_MONTH))
209-
Math.toIntExact(localDate.toEpochDay)
210-
}
211-
212183
def javaTypeToDataType(clz: Type): DataType = clz match {
213184
// writable
214185
case c: Class[_] if c == classOf[hadoopIo.DoubleWritable] => DoubleType
@@ -646,14 +617,7 @@ private[hive] trait HiveInspectors {
646617
case x: DateObjectInspector if x.preferWritable() =>
647618
data: Any => {
648619
if (data != null) {
649-
// Rebasing written days via conversion to local dates.
650-
// See the comment for `getDateWritable()`.
651-
val daysSinceEpoch = x.getPrimitiveWritableObject(data).getDays
652-
if (daysSinceEpoch < JULIAN_CUTOVER_DAY) {
653-
rebaseJulianToGregorianDays(daysSinceEpoch)
654-
} else {
655-
daysSinceEpoch
656-
}
620+
x.getPrimitiveWritableObject(data).getDays
657621
} else {
658622
null
659623
}
@@ -1045,27 +1009,11 @@ private[hive] trait HiveInspectors {
10451009
new hadoopIo.BytesWritable(value.asInstanceOf[Array[Byte]])
10461010
}
10471011

1048-
private def getDateWritable(value: Any): hiveIo.DateWritable =
1012+
private def getDateWritable(value: Any): DaysWritable =
10491013
if (value == null) {
10501014
null
10511015
} else {
1052-
// Rebasing days since the epoch to store the same number of days
1053-
// as by Spark 2.4 and earlier versions. Spark 3.0 switched to
1054-
// Proleptic Gregorian calendar (see SPARK-26651), and as a consequence of that,
1055-
// this affects dates before 1582-10-15. Spark 2.4 and earlier versions use
1056-
// Julian calendar for dates before 1582-10-15. So, the same local date may
1057-
// be mapped to different number of days since the epoch in different calendars.
1058-
// For example:
1059-
// Proleptic Gregorian calendar: 1582-01-01 -> -141714
1060-
// Julian calendar: 1582-01-01 -> -141704
1061-
// The code below converts -141714 to -141704.
1062-
val daysSinceEpoch = value.asInstanceOf[Int]
1063-
val rebasedDays = if (daysSinceEpoch < DateTimeUtils.GREGORIAN_CUTOVER_DAY) {
1064-
rebaseGregorianToJulianDays(daysSinceEpoch)
1065-
} else {
1066-
daysSinceEpoch
1067-
}
1068-
new hiveIo.DateWritable(rebasedDays)
1016+
new DaysWritable(value.asInstanceOf[Int])
10691017
}
10701018

10711019
private def getTimestampWritable(value: Any): hiveIo.TimestampWritable =

0 commit comments

Comments
 (0)