Skip to content

Commit 4456f91

Browse files
committed
Avoid GenericArrayData constructor perf. problems (see SPARK-30413)
1 parent c7d1534 commit 4456f91

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,7 @@ private[parquet] class ParquetRowConverter(
518518

519519
override def getConverter(fieldIndex: Int): Converter = elementConverter
520520

521-
override def end(): Unit = updater.set(new GenericArrayData(currentArray.toArray))
521+
override def end(): Unit = updater.set(ParquetRowConverter.arrayListToArrayData(currentArray))
522522

523523
override def start(): Unit = currentArray.clear()
524524

@@ -566,7 +566,10 @@ private[parquet] class ParquetRowConverter(
566566
// The parquet map may contains null or duplicated map keys. When it happens, the behavior is
567567
// undefined.
568568
// TODO (SPARK-26174): disallow it with a config.
569-
updater.set(ArrayBasedMapData(currentKeys.toArray, currentValues.toArray))
569+
updater.set(
570+
new ArrayBasedMapData(
571+
ParquetRowConverter.arrayListToArrayData(currentKeys),
572+
ParquetRowConverter.arrayListToArrayData(currentValues)))
570573
}
571574

572575
override def start(): Unit = {
@@ -616,7 +619,7 @@ private[parquet] class ParquetRowConverter(
616619

617620
protected def newArrayUpdater(updater: ParentContainerUpdater) = new ParentContainerUpdater {
618621
override def start(): Unit = currentArray.clear()
619-
override def end(): Unit = updater.set(new GenericArrayData(currentArray.toArray))
622+
override def end(): Unit = updater.set(ParquetRowConverter.arrayListToArrayData(currentArray))
620623
override def set(value: Any): Unit = currentArray.add(value)
621624
}
622625
}
@@ -700,4 +703,9 @@ private[parquet] object ParquetRowConverter {
700703
val julianDay = buffer.getInt
701704
DateTimeUtils.fromJulianDay(julianDay, timeOfDayNanos)
702705
}
706+
707+
def arrayListToArrayData(arrayList: java.util.ArrayList[Any]): GenericArrayData = {
708+
// Cast to force use of primary constructor; see SPARK-30413
709+
new GenericArrayData(arrayList.toArray.asInstanceOf[Array[Any]])
710+
}
703711
}

0 commit comments

Comments
 (0)