@@ -113,31 +113,6 @@ private[parquet] class CatalystPrimitiveConverter(val updater: ParentContainerUp
113113 * When used as a root converter, [[NoopUpdater ]] should be used since root converters don't have
114114 * any "parent" container.
115115 *
116- * @note Constructor argument [[parquetType ]] refers to requested fields of the actual schema of the
117- * Parquet file being read, while constructor argument [[catalystType ]] refers to requested
118- * fields of the global schema. The key difference is that, in case of schema merging,
119- * [[parquetType ]] can be a subset of [[catalystType ]]. For example, it's possible to have
120- * the following [[catalystType ]]:
121- * {{{
122- * new StructType()
123- * .add("f1", IntegerType, nullable = false)
124- * .add("f2", StringType, nullable = true)
125- * .add("f3", new StructType()
126- * .add("f31", DoubleType, nullable = false)
127- * .add("f32", IntegerType, nullable = true)
128- * .add("f33", StringType, nullable = true), nullable = false)
129- * }}}
130- * and the following [[parquetType ]] (`f2` and `f32` are missing):
131- * {{{
132- * message root {
133- * required int32 f1;
134- * required group f3 {
135- * required double f31;
136- * optional binary f33 (utf8);
137- * }
138- * }
139- * }}}
140- *
141116 * @param parquetType Parquet schema of Parquet records
142117 * @param catalystType Spark SQL schema that corresponds to the Parquet record type
143118 * @param updater An updater which propagates converted field values to the parent container
@@ -179,31 +154,7 @@ private[parquet] class CatalystRowConverter(
179154
180155 // Converters for each field.
181156 private val fieldConverters : Array [Converter with HasParentContainerUpdater ] = {
182- // In case of schema merging, `parquetType` can be a subset of `catalystType`. We need to pad
183- // those missing fields and create converters for them, although values of these fields are
184- // always null.
185- val paddedParquetFields = {
186- val parquetFields = parquetType.getFields.asScala
187- val parquetFieldNames = parquetFields.map(_.getName).toSet
188- val missingFields = catalystType.filterNot(f => parquetFieldNames.contains(f.name))
189-
190- // We don't need to worry about feature flag arguments like `assumeBinaryIsString` when
191- // creating the schema converter here, since values of missing fields are always null.
192- val toParquet = new CatalystSchemaConverter ()
193-
194- (parquetFields ++ missingFields.map(toParquet.convertField)).sortBy { f =>
195- catalystType.indexWhere(_.name == f.getName)
196- }
197- }
198-
199- if (paddedParquetFields.length != catalystType.length) {
200- throw new UnsupportedOperationException (
201- " A Parquet file's schema has different number of fields with the table schema. " +
202- " Please enable schema merging by setting \" mergeSchema\" to true when load " +
203- " a Parquet dataset or set spark.sql.parquet.mergeSchema to true in SQLConf." )
204- }
205-
206- paddedParquetFields.zip(catalystType).zipWithIndex.map {
157+ parquetType.getFields.asScala.zip(catalystType).zipWithIndex.map {
207158 case ((parquetFieldType, catalystField), ordinal) =>
208159 // Converted field value should be set to the `ordinal`-th cell of `currentRow`
209160 newConverter(parquetFieldType, catalystField.dataType, new RowUpdater (currentRow, ordinal))
0 commit comments