Skip to content

Commit 6e2ffc2

Browse files
committed
Use LongType as the default type for integers in JSON schema inference.
1 parent fa6bdc6 commit 6e2ffc2

File tree

3 files changed

+17
-13
lines changed

3 files changed

+17
-13
lines changed

sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,12 @@
1717

1818
package org.apache.spark.sql.json
1919

20-
import java.io.StringWriter
21-
import java.sql.{Date, Timestamp}
20+
import java.sql.Timestamp
2221

2322
import scala.collection.Map
2423
import scala.collection.convert.Wrappers.{JMapWrapper, JListWrapper}
2524

26-
import com.fasterxml.jackson.core.{JsonGenerator, JsonProcessingException, JsonFactory}
25+
import com.fasterxml.jackson.core.{JsonGenerator, JsonProcessingException}
2726
import com.fasterxml.jackson.databind.ObjectMapper
2827

2928
import org.apache.spark.rdd.RDD
@@ -178,7 +177,12 @@ private[sql] object JsonRDD extends Logging {
178177
}
179178

180179
private def typeOfPrimitiveValue: PartialFunction[Any, DataType] = {
181-
ScalaReflection.typeOfObject orElse {
180+
// For Integer values, use LongType by default.
181+
val useLongType: PartialFunction[Any, DataType] = {
182+
case value: IntegerType.JvmType => LongType
183+
}
184+
185+
useLongType orElse ScalaReflection.typeOfObject orElse {
182186
// Since we do not have a data type backed by BigInteger,
183187
// when we see a Java BigInteger, we use DecimalType.
184188
case value: java.math.BigInteger => DecimalType.Unlimited

sql/core/src/test/java/org/apache/spark/sql/api/java/JavaApplySchemaSuite.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ public void applySchemaToJSON() {
164164
fields.add(DataTypes.createStructField("bigInteger", DataTypes.createDecimalType(), true));
165165
fields.add(DataTypes.createStructField("boolean", DataTypes.BooleanType, true));
166166
fields.add(DataTypes.createStructField("double", DataTypes.DoubleType, true));
167-
fields.add(DataTypes.createStructField("integer", DataTypes.IntegerType, true));
167+
fields.add(DataTypes.createStructField("integer", DataTypes.LongType, true));
168168
fields.add(DataTypes.createStructField("long", DataTypes.LongType, true));
169169
fields.add(DataTypes.createStructField("null", DataTypes.StringType, true));
170170
fields.add(DataTypes.createStructField("string", DataTypes.StringType, true));

sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ class JsonSuite extends QueryTest {
222222
StructField("bigInteger", DecimalType.Unlimited, true) ::
223223
StructField("boolean", BooleanType, true) ::
224224
StructField("double", DoubleType, true) ::
225-
StructField("integer", IntegerType, true) ::
225+
StructField("integer", LongType, true) ::
226226
StructField("long", LongType, true) ::
227227
StructField("null", StringType, true) ::
228228
StructField("string", StringType, true) :: Nil)
@@ -252,7 +252,7 @@ class JsonSuite extends QueryTest {
252252
StructField("arrayOfBigInteger", ArrayType(DecimalType.Unlimited, false), true) ::
253253
StructField("arrayOfBoolean", ArrayType(BooleanType, false), true) ::
254254
StructField("arrayOfDouble", ArrayType(DoubleType, false), true) ::
255-
StructField("arrayOfInteger", ArrayType(IntegerType, false), true) ::
255+
StructField("arrayOfInteger", ArrayType(LongType, false), true) ::
256256
StructField("arrayOfLong", ArrayType(LongType, false), true) ::
257257
StructField("arrayOfNull", ArrayType(StringType, true), true) ::
258258
StructField("arrayOfString", ArrayType(StringType, false), true) ::
@@ -265,7 +265,7 @@ class JsonSuite extends QueryTest {
265265
StructField("field1", BooleanType, true) ::
266266
StructField("field2", DecimalType.Unlimited, true) :: Nil), true) ::
267267
StructField("structWithArrayFields", StructType(
268-
StructField("field1", ArrayType(IntegerType, false), true) ::
268+
StructField("field1", ArrayType(LongType, false), true) ::
269269
StructField("field2", ArrayType(StringType, false), true) :: Nil), true) :: Nil)
270270

271271
assert(expectedSchema === jsonDF.schema)
@@ -486,7 +486,7 @@ class JsonSuite extends QueryTest {
486486
val jsonDF = jsonRDD(complexFieldValueTypeConflict)
487487

488488
val expectedSchema = StructType(
489-
StructField("array", ArrayType(IntegerType, false), true) ::
489+
StructField("array", ArrayType(LongType, false), true) ::
490490
StructField("num_struct", StringType, true) ::
491491
StructField("str_array", StringType, true) ::
492492
StructField("struct", StructType(
@@ -540,7 +540,7 @@ class JsonSuite extends QueryTest {
540540
val expectedSchema = StructType(
541541
StructField("a", BooleanType, true) ::
542542
StructField("b", LongType, true) ::
543-
StructField("c", ArrayType(IntegerType, false), true) ::
543+
StructField("c", ArrayType(LongType, false), true) ::
544544
StructField("d", StructType(
545545
StructField("field", BooleanType, true) :: Nil), true) ::
546546
StructField("e", StringType, true) :: Nil)
@@ -560,7 +560,7 @@ class JsonSuite extends QueryTest {
560560
StructField("bigInteger", DecimalType.Unlimited, true) ::
561561
StructField("boolean", BooleanType, true) ::
562562
StructField("double", DoubleType, true) ::
563-
StructField("integer", IntegerType, true) ::
563+
StructField("integer", LongType, true) ::
564564
StructField("long", LongType, true) ::
565565
StructField("null", StringType, true) ::
566566
StructField("string", StringType, true) :: Nil)
@@ -781,12 +781,12 @@ class JsonSuite extends QueryTest {
781781
ArrayType(ArrayType(ArrayType(ArrayType(StringType, false), false), true), false), true) ::
782782
StructField("field2",
783783
ArrayType(ArrayType(
784-
StructType(StructField("Test", IntegerType, true) :: Nil), false), true), true) ::
784+
StructType(StructField("Test", LongType, true) :: Nil), false), true), true) ::
785785
StructField("field3",
786786
ArrayType(ArrayType(
787787
StructType(StructField("Test", StringType, true) :: Nil), true), false), true) ::
788788
StructField("field4",
789-
ArrayType(ArrayType(ArrayType(IntegerType, false), true), false), true) :: Nil)
789+
ArrayType(ArrayType(ArrayType(LongType, false), true), false), true) :: Nil)
790790

791791
assert(schema === jsonDF.schema)
792792

0 commit comments

Comments
 (0)