Skip to content

Commit 11daee3

Browse files
committed
Testing for legacy and new timestamp parser
1 parent e67a2a1 commit 11daee3

File tree

2 files changed

+93
-66
lines changed

2 files changed

+93
-66
lines changed

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala

Lines changed: 49 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,11 @@ package org.apache.spark.sql.catalyst.json
2020
import com.fasterxml.jackson.core.JsonFactory
2121

2222
import org.apache.spark.SparkFunSuite
23+
import org.apache.spark.sql.catalyst.plans.SQLHelper
24+
import org.apache.spark.sql.internal.SQLConf
2325
import org.apache.spark.sql.types._
2426

25-
class JsonInferSchemaSuite extends SparkFunSuite {
27+
class JsonInferSchemaSuite extends SparkFunSuite with SQLHelper {
2628

2729
def checkType(options: Map[String, String], json: String, dt: DataType): Unit = {
2830
val jsonOptions = new JSONOptions(options, "UTC", "")
@@ -41,44 +43,60 @@ class JsonInferSchemaSuite extends SparkFunSuite {
4143
}
4244

4345
test("inferring timestamp type") {
44-
checkTimestampType("yyyy", """{"a": "2018"}""")
45-
checkTimestampType("yyyy=MM", """{"a": "2018=12"}""")
46-
checkTimestampType("yyyy MM dd", """{"a": "2018 12 02"}""")
47-
checkTimestampType(
48-
"yyyy-MM-dd'T'HH:mm:ss.SSS",
49-
"""{"a": "2018-12-02T21:04:00.123"}""")
50-
checkTimestampType(
51-
"yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXX",
52-
"""{"a": "2018-12-02T21:04:00.123567+01:00"}""")
46+
Seq(true, false).foreach { legacyParser =>
47+
withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
48+
checkTimestampType("yyyy", """{"a": "2018"}""")
49+
checkTimestampType("yyyy=MM", """{"a": "2018=12"}""")
50+
checkTimestampType("yyyy MM dd", """{"a": "2018 12 02"}""")
51+
checkTimestampType(
52+
"yyyy-MM-dd'T'HH:mm:ss.SSS",
53+
"""{"a": "2018-12-02T21:04:00.123"}""")
54+
checkTimestampType(
55+
"yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXX",
56+
"""{"a": "2018-12-02T21:04:00.123567+01:00"}""")
57+
}
58+
}
5359
}
5460

5561
test("prefer decimals over timestamps") {
56-
checkType(
57-
options = Map(
58-
"prefersDecimal" -> "true",
59-
"timestampFormat" -> "yyyyMMdd.HHmmssSSS"
60-
),
61-
json = """{"a": "20181202.210400123"}""",
62-
dt = DecimalType(17, 9)
63-
)
62+
Seq(true, false).foreach { legacyParser =>
63+
withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
64+
checkType(
65+
options = Map(
66+
"prefersDecimal" -> "true",
67+
"timestampFormat" -> "yyyyMMdd.HHmmssSSS"
68+
),
69+
json = """{"a": "20181202.210400123"}""",
70+
dt = DecimalType(17, 9)
71+
)
72+
}
73+
}
6474
}
6575

6676
test("skip decimal type inferring") {
67-
checkType(
68-
options = Map(
69-
"prefersDecimal" -> "false",
70-
"timestampFormat" -> "yyyyMMdd.HHmmssSSS"
71-
),
72-
json = """{"a": "20181202.210400123"}""",
73-
dt = TimestampType
74-
)
77+
Seq(true, false).foreach { legacyParser =>
78+
withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
79+
checkType(
80+
options = Map(
81+
"prefersDecimal" -> "false",
82+
"timestampFormat" -> "yyyyMMdd.HHmmssSSS"
83+
),
84+
json = """{"a": "20181202.210400123"}""",
85+
dt = TimestampType
86+
)
87+
}
88+
}
7589
}
7690

7791
test("fallback to string type") {
78-
checkType(
79-
options = Map("timestampFormat" -> "yyyy,MM,dd.HHmmssSSS"),
80-
json = """{"a": "20181202.210400123"}""",
81-
dt = StringType
82-
)
92+
Seq(true, false).foreach { legacyParser =>
93+
withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
94+
checkType(
95+
options = Map("timestampFormat" -> "yyyy,MM,dd.HHmmssSSS"),
96+
json = """{"a": "20181202.210400123"}""",
97+
dt = StringType
98+
)
99+
}
100+
}
83101
}
84102
}

sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala

Lines changed: 44 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2592,44 +2592,53 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
25922592
}
25932593

25942594
test("inferring timestamp type") {
2595-
def schemaOf(jsons: String*): StructType = spark.read.json(jsons.toDS).schema
2596-
2597-
assert(schemaOf("""{"a":"2018-12-17T10:11:12.123-01:00"}""",
2598-
"""{"a":"2018-12-16T22:23:24.123-02:00"}""") === fromDDL("a timestamp"))
2599-
2600-
assert(schemaOf("""{"a":"2018-12-17T10:11:12.123-01:00"}""", """{"a":1}""")
2601-
=== fromDDL("a string"))
2602-
assert(schemaOf("""{"a":"2018-12-17T10:11:12.123-01:00"}""", """{"a":"123"}""")
2603-
=== fromDDL("a string"))
2604-
2605-
assert(schemaOf("""{"a":"2018-12-17T10:11:12.123-01:00"}""", """{"a":null}""")
2606-
=== fromDDL("a timestamp"))
2607-
assert(schemaOf("""{"a":null}""", """{"a":"2018-12-17T10:11:12.123-01:00"}""")
2608-
=== fromDDL("a timestamp"))
2595+
Seq(true, false).foreach { legacyParser =>
2596+
withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
2597+
def schemaOf(jsons: String*): StructType = spark.read.json(jsons.toDS).schema
2598+
2599+
assert(schemaOf(
2600+
"""{"a":"2018-12-17T10:11:12.123-01:00"}""",
2601+
"""{"a":"2018-12-16T22:23:24.123-02:00"}""") === fromDDL("a timestamp"))
2602+
2603+
assert(schemaOf("""{"a":"2018-12-17T10:11:12.123-01:00"}""", """{"a":1}""")
2604+
=== fromDDL("a string"))
2605+
assert(schemaOf("""{"a":"2018-12-17T10:11:12.123-01:00"}""", """{"a":"123"}""")
2606+
=== fromDDL("a string"))
2607+
2608+
assert(schemaOf("""{"a":"2018-12-17T10:11:12.123-01:00"}""", """{"a":null}""")
2609+
=== fromDDL("a timestamp"))
2610+
assert(schemaOf("""{"a":null}""", """{"a":"2018-12-17T10:11:12.123-01:00"}""")
2611+
=== fromDDL("a timestamp"))
2612+
}
2613+
}
26092614
}
26102615

26112616
test("roundtrip for timestamp type inferring") {
2612-
val customSchema = new StructType(Array(StructField("date", TimestampType, true)))
2613-
withTempDir { dir =>
2614-
val timestampsWithFormatPath = s"${dir.getCanonicalPath}/timestampsWithFormat.json"
2615-
val timestampsWithFormat = spark.read
2616-
.option("timestampFormat", "dd/MM/yyyy HH:mm")
2617-
.json(datesRecords)
2618-
assert(timestampsWithFormat.schema === customSchema)
2619-
2620-
timestampsWithFormat.write
2621-
.format("json")
2622-
.option("timestampFormat", "yyyy-MM-dd HH:mm:ss")
2623-
.option(DateTimeUtils.TIMEZONE_OPTION, "UTC")
2624-
.save(timestampsWithFormatPath)
2625-
2626-
val readBack = spark.read
2627-
.option("timestampFormat", "yyyy-MM-dd HH:mm:ss")
2628-
.option(DateTimeUtils.TIMEZONE_OPTION, "UTC")
2629-
.json(timestampsWithFormatPath)
2630-
2631-
assert(readBack.schema === customSchema)
2632-
checkAnswer(readBack, timestampsWithFormat)
2617+
Seq(true, false).foreach { legacyParser =>
2618+
withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
2619+
val customSchema = new StructType().add("date", TimestampType)
2620+
withTempDir { dir =>
2621+
val timestampsWithFormatPath = s"${dir.getCanonicalPath}/timestampsWithFormat.json"
2622+
val timestampsWithFormat = spark.read
2623+
.option("timestampFormat", "dd/MM/yyyy HH:mm")
2624+
.json(datesRecords)
2625+
assert(timestampsWithFormat.schema === customSchema)
2626+
2627+
timestampsWithFormat.write
2628+
.format("json")
2629+
.option("timestampFormat", "yyyy-MM-dd HH:mm:ss")
2630+
.option(DateTimeUtils.TIMEZONE_OPTION, "UTC")
2631+
.save(timestampsWithFormatPath)
2632+
2633+
val readBack = spark.read
2634+
.option("timestampFormat", "yyyy-MM-dd HH:mm:ss")
2635+
.option(DateTimeUtils.TIMEZONE_OPTION, "UTC")
2636+
.json(timestampsWithFormatPath)
2637+
2638+
assert(readBack.schema === customSchema)
2639+
checkAnswer(readBack, timestampsWithFormat)
2640+
}
2641+
}
26332642
}
26342643
}
26352644
}

0 commit comments

Comments
 (0)