File tree Expand file tree Collapse file tree 1 file changed +13
-0
lines changed
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet Expand file tree Collapse file tree 1 file changed +13
-0
lines changed Original file line number Diff line number Diff line change @@ -320,8 +320,21 @@ private[parquet] class ParquetRowConverter(
320320 case t : StructType =>
321321 val wrappedUpdater = {
322322 if (updater.isInstanceOf [RowUpdater ]) {
323+ // `updater` is a RowUpdater, implying that the parent container is a struct.
324+ // We do NOT need to perform defensive copying here because either:
325+ //
326+ // 1. The path from the schema root to this field consists only of nested
327+ // structs, so this converter will only be invoked once per record and
328+ // we don't need to copy because copying will be done in the final
329+ // UnsafeProjection, or
330+ // 2. The path from the schema root to this field contains a map or array,
331+ // in which case we will perform a recursive defensive copy via the
332+ // `else` branch below.
323333 updater
324334 } else {
335+ // `updater` is NOT a RowUpdater, implying that the parent container is not a struct.
336+ // Therefore, the parent container must be a map or array. We need to copy the row
337+ // because this converter might be invoked multiple times per Parquet input record.
325338 new ParentContainerUpdater {
326339 override def set (value : Any ): Unit =
327340 updater.set(value.asInstanceOf [InternalRow ].copy())
You can’t perform that action at this time.
0 commit comments