Skip to content

Commit 2ed8ea9

Browse files
committed
Add comments.
1 parent ed894b4 commit 2ed8ea9

File tree

1 file changed

+13
-0
lines changed

1 file changed

+13
-0
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,8 +320,21 @@ private[parquet] class ParquetRowConverter(
320320
case t: StructType =>
321321
val wrappedUpdater = {
322322
if (updater.isInstanceOf[RowUpdater]) {
323+
// `updater` is a RowUpdater, implying that the parent container is a struct.
324+
// We do NOT need to perform defensive copying here because either:
325+
//
326+
// 1. The path from the schema root to this field consists only of nested
327+
// structs, so this converter will only be invoked once per record and
328+
// we don't need to copy because copying will be done in the final
329+
// UnsafeProjection, or
330+
// 2. The path from the schema root to this field contains a map or array,
331+
// in which case we will perform a recursive defensive copy via the
332+
// `else` branch below.
323333
updater
324334
} else {
335+
// `updater` is NOT a RowUpdater, implying that the parent container is not a struct.
336+
// Therefore, the parent container must be a map or array. We need to copy the row
337+
// because this converter might be invoked multiple times per Parquet input record.
325338
new ParentContainerUpdater {
326339
override def set(value: Any): Unit =
327340
updater.set(value.asInstanceOf[InternalRow].copy())

0 commit comments

Comments
 (0)