@@ -43,7 +43,7 @@ private[sql] object InferSchema {
4343 }
4444
4545 // perform schema inference on each row and merge afterwards
46- schemaData.mapPartitions { iter =>
46+ val rootType = schemaData.mapPartitions { iter =>
4747 val factory = new JsonFactory ()
4848 iter.map { row =>
4949 try {
@@ -55,8 +55,13 @@ private[sql] object InferSchema {
5555 StructType (Seq (StructField (columnNameOfCorruptRecords, StringType )))
5656 }
5757 }
58- }.treeAggregate[DataType ](StructType (Seq ()))(compatibleRootType, compatibleRootType) match {
59- case st : StructType => nullTypeToStringType(st)
58+ }.treeAggregate[DataType ](StructType (Seq ()))(compatibleRootType, compatibleRootType)
59+
60+ canonicalizeType(rootType) match {
61+ case Some (st : StructType ) => st
62+ case _ =>
63+ // canonicalizeType erases all empty structs, including the only one we want to keep
64+ StructType (Seq ())
6065 }
6166 }
6267
@@ -116,22 +121,35 @@ private[sql] object InferSchema {
116121 }
117122 }
118123
119- private def nullTypeToStringType (struct : StructType ): StructType = {
120- val fields = struct.fields.map {
121- case StructField (fieldName, dataType, nullable, _) =>
122- val newType = dataType match {
123- case NullType => StringType
124- case ArrayType (NullType , containsNull) => ArrayType (StringType , containsNull)
125- case ArrayType (struct : StructType , containsNull) =>
126- ArrayType (nullTypeToStringType(struct), containsNull)
127- case struct : StructType => nullTypeToStringType(struct)
128- case other : DataType => other
129- }
124+ /**
125+ * Convert NullType to StringType and remove StructTypes with no fields
126+ */
127+ private def canonicalizeType : DataType => Option [DataType ] = {
128+ case at@ ArrayType (elementType, _) =>
129+ for {
130+ canonicalType <- canonicalizeType(elementType)
131+ } yield {
132+ at.copy(canonicalType)
133+ }
130134
131- StructField (fieldName, newType, nullable)
132- }
135+ case StructType (fields) =>
136+ val canonicalFields = for {
137+ field <- fields
138+ if field.name.nonEmpty
139+ canonicalType <- canonicalizeType(field.dataType)
140+ } yield {
141+ field.copy(dataType = canonicalType)
142+ }
143+
144+ if (canonicalFields.nonEmpty) {
145+ Some (StructType (canonicalFields))
146+ } else {
147+ // per SPARK-8093: empty structs should be deleted
148+ None
149+ }
133150
134- StructType (fields)
151+ case NullType => Some (StringType )
152+ case other => Some (other)
135153 }
136154
137155 /**
0 commit comments