Skip to content

Commit ede4b7d

Browse files
beliefergengliangwang
authored andcommitted
[SPARK-39339][SQL][FOLLOWUP] Fix bug TimestampNTZ type in JDBC data source is incorrect
### What changes were proposed in this pull request? #36726 supports TimestampNTZ type in JDBC data source. But the implement is incorrect. This PR just modify a test case and it will be failed ! The test case show below. ``` test("SPARK-39339: TimestampNTZType with different local time zones") { val tableName = "timestamp_ntz_diff_tz_support_table" DateTimeTestUtils.outstandingZoneIds.foreach { zoneId => DateTimeTestUtils.withDefaultTimeZone(zoneId) { Seq( "1972-07-04 03:30:00", "2019-01-20 12:00:00.502", "2019-01-20T00:00:00.123456", "1500-01-20T00:00:00.123456" ).foreach { case datetime => val df = spark.sql(s"select timestamp_ntz '$datetime'") df.write.format("jdbc") .mode("overwrite") .option("url", urlWithUserAndPass) .option("dbtable", tableName) .save() DateTimeTestUtils.outstandingZoneIds.foreach { zoneId => DateTimeTestUtils.withDefaultTimeZone(zoneId) { val res = spark.read.format("jdbc") .option("inferTimestampNTZType", "true") .option("url", urlWithUserAndPass) .option("dbtable", tableName) .load() checkAnswer(res, df) } } } } } } ``` The test case output failure show below. ``` Results do not match for query: Timezone: sun.util.calendar.ZoneInfo[id="Africa/Dakar",offset=0,dstSavings=0,useDaylight=false,transitions=3,lastRule=null] Timezone Env: == Parsed Logical Plan == Relation [TIMESTAMP_NTZ '1500-01-20 00:00:00.123456'#253] JDBCRelation(timestamp_ntz_diff_tz_support_table) [numPartitions=1] == Analyzed Logical Plan == TIMESTAMP_NTZ '1500-01-20 00:00:00.123456': timestamp_ntz Relation [TIMESTAMP_NTZ '1500-01-20 00:00:00.123456'#253] JDBCRelation(timestamp_ntz_diff_tz_support_table) [numPartitions=1] == Optimized Logical Plan == Relation [TIMESTAMP_NTZ '1500-01-20 00:00:00.123456'#253] JDBCRelation(timestamp_ntz_diff_tz_support_table) [numPartitions=1] == Physical Plan == *(1) Scan JDBCRelation(timestamp_ntz_diff_tz_support_table) [numPartitions=1] [TIMESTAMP_NTZ '1500-01-20 00:00:00.123456'#253] PushedFilters: [], ReadSchema: struct<TIMESTAMP_NTZ '1500-01-20 00:00:00.123456':timestamp_ntz> == Results == == Results == !== Correct Answer - 1 == == Spark Answer - 1 == struct<TIMESTAMP_NTZ '1500-01-20 00:00:00.123456':timestamp_ntz> struct<TIMESTAMP_NTZ '1500-01-20 00:00:00.123456':timestamp_ntz> ![1500-01-20T00:00:00.123456] [1500-01-20T00:16:08.123456] ScalaTestFailureLocation: org.apache.spark.sql.QueryTest$ at (QueryTest.scala:243) org.scalatest.exceptions.TestFailedException: ``` ### Why are the changes needed? Fix an implement bug. The reason of the bug is use `toJavaTimestamp` and `fromJavaTimestamp`. `toJavaTimestamp` and `fromJavaTimestamp` lead to the timestamp with JVM system time zone. ### Does this PR introduce _any_ user-facing change? 'No'. New feature. ### How was this patch tested? New test case. Closes #37013 from beliefer/SPARK-39339_followup. Authored-by: Jiaan Geng <[email protected]> Signed-off-by: Gengliang Wang <[email protected]>
1 parent a39fc87 commit ede4b7d

File tree

3 files changed

+49
-27
lines changed

3 files changed

+49
-27
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,19 @@ object DateTimeUtils {
158158
* @param micros The number of microseconds since 1970-01-01T00:00:00.000000Z.
159159
* @return A `java.sql.Timestamp` from number of micros since epoch.
160160
*/
161-
def toJavaTimestamp(micros: Long): Timestamp = {
162-
val rebasedMicros = rebaseGregorianToJulianMicros(micros)
163-
val seconds = Math.floorDiv(rebasedMicros, MICROS_PER_SECOND)
161+
def toJavaTimestamp(micros: Long): Timestamp =
162+
toJavaTimestampNoRebase(rebaseGregorianToJulianMicros(micros))
163+
164+
/**
165+
* Converts microseconds since the epoch to an instance of `java.sql.Timestamp`.
166+
*
167+
* @param micros The number of microseconds since 1970-01-01T00:00:00.000000Z.
168+
* @return A `java.sql.Timestamp` from number of micros since epoch.
169+
*/
170+
def toJavaTimestampNoRebase(micros: Long): Timestamp = {
171+
val seconds = Math.floorDiv(micros, MICROS_PER_SECOND)
164172
val ts = new Timestamp(seconds * MILLIS_PER_SECOND)
165-
val nanos = (rebasedMicros - seconds * MICROS_PER_SECOND) * NANOS_PER_MICROS
173+
val nanos = (micros - seconds * MICROS_PER_SECOND) * NANOS_PER_MICROS
166174
ts.setNanos(nanos.toInt)
167175
ts
168176
}
@@ -186,10 +194,18 @@ object DateTimeUtils {
186194
* Gregorian calendars.
187195
* @return The number of micros since epoch from `java.sql.Timestamp`.
188196
*/
189-
def fromJavaTimestamp(t: Timestamp): Long = {
190-
val micros = millisToMicros(t.getTime) + (t.getNanos / NANOS_PER_MICROS) % MICROS_PER_MILLIS
191-
rebaseJulianToGregorianMicros(micros)
192-
}
197+
def fromJavaTimestamp(t: Timestamp): Long =
198+
rebaseJulianToGregorianMicros(fromJavaTimestampNoRebase(t))
199+
200+
/**
201+
* Converts an instance of `java.sql.Timestamp` to the number of microseconds since
202+
* 1970-01-01T00:00:00.000000Z.
203+
*
204+
* @param t an instance of `java.sql.Timestamp`.
205+
* @return The number of micros since epoch from `java.sql.Timestamp`.
206+
*/
207+
def fromJavaTimestampNoRebase(t: Timestamp): Long =
208+
millisToMicros(t.getTime) + (t.getNanos / NANOS_PER_MICROS) % MICROS_PER_MILLIS
193209

194210
/**
195211
* Converts an Java object to microseconds.

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ import org.apache.spark.sql.catalyst.encoders.RowEncoder
3838
import org.apache.spark.sql.catalyst.expressions.SpecificInternalRow
3939
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
4040
import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils, GenericArrayData}
41-
import org.apache.spark.sql.catalyst.util.DateTimeUtils.{instantToMicros, localDateTimeToMicros, localDateToDays, toJavaDate, toJavaTimestamp}
41+
import org.apache.spark.sql.catalyst.util.DateTimeUtils.{instantToMicros, localDateTimeToMicros, localDateToDays, toJavaDate, toJavaTimestamp, toJavaTimestampNoRebase}
4242
import org.apache.spark.sql.connector.catalog.TableChange
4343
import org.apache.spark.sql.connector.catalog.index.{SupportsIndex, TableIndex}
4444
import org.apache.spark.sql.connector.expressions.NamedReference
@@ -473,7 +473,7 @@ object JdbcUtils extends Logging with SQLConfHelper {
473473
}
474474
}
475475

476-
case TimestampType | TimestampNTZType =>
476+
case TimestampType =>
477477
(rs: ResultSet, row: InternalRow, pos: Int) =>
478478
val t = rs.getTimestamp(pos + 1)
479479
if (t != null) {
@@ -482,6 +482,15 @@ object JdbcUtils extends Logging with SQLConfHelper {
482482
row.update(pos, null)
483483
}
484484

485+
case TimestampNTZType =>
486+
(rs: ResultSet, row: InternalRow, pos: Int) =>
487+
val t = rs.getTimestamp(pos + 1)
488+
if (t != null) {
489+
row.setLong(pos, DateTimeUtils.fromJavaTimestampNoRebase(t))
490+
} else {
491+
row.update(pos, null)
492+
}
493+
485494
case BinaryType =>
486495
(rs: ResultSet, row: InternalRow, pos: Int) =>
487496
row.update(pos, rs.getBytes(pos + 1))
@@ -594,16 +603,9 @@ object JdbcUtils extends Logging with SQLConfHelper {
594603
}
595604

596605
case TimestampNTZType =>
597-
if (conf.datetimeJava8ApiEnabled) {
598-
(stmt: PreparedStatement, row: Row, pos: Int) =>
599-
stmt.setTimestamp(pos + 1, toJavaTimestamp(instantToMicros(row.getAs[Instant](pos))))
600-
} else {
601-
(stmt: PreparedStatement, row: Row, pos: Int) =>
602-
stmt.setTimestamp(
603-
pos + 1,
604-
toJavaTimestamp(localDateTimeToMicros(row.getAs[java.time.LocalDateTime](pos)))
605-
)
606-
}
606+
(stmt: PreparedStatement, row: Row, pos: Int) =>
607+
val micros = localDateTimeToMicros(row.getAs[java.time.LocalDateTime](pos))
608+
stmt.setTimestamp(pos + 1, toJavaTimestampNoRebase(micros))
607609

608610
case DateType =>
609611
if (conf.datetimeJava8ApiEnabled) {

sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1950,13 +1950,17 @@ class JDBCSuite extends QueryTest
19501950
.option("dbtable", tableName)
19511951
.save()
19521952

1953-
val res = spark.read.format("jdbc")
1954-
.option("inferTimestampNTZType", "true")
1955-
.option("url", urlWithUserAndPass)
1956-
.option("dbtable", tableName)
1957-
.load()
1958-
1959-
checkAnswer(res, df)
1953+
DateTimeTestUtils.outstandingZoneIds.foreach { zoneId =>
1954+
DateTimeTestUtils.withDefaultTimeZone(zoneId) {
1955+
val res = spark.read.format("jdbc")
1956+
.option("inferTimestampNTZType", "true")
1957+
.option("url", urlWithUserAndPass)
1958+
.option("dbtable", tableName)
1959+
.load()
1960+
1961+
checkAnswer(res, df)
1962+
}
1963+
}
19601964
}
19611965
}
19621966
}

0 commit comments

Comments
 (0)