Skip to content

Commit 6d16f59

Browse files
Roll back to using Scala ArrayBuffer, but continue using clear()
1 parent 4456f91 commit 6d16f59

File tree

1 file changed

+14
-21
lines changed

1 file changed

+14
-21
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import java.nio.ByteOrder
2222
import java.util.TimeZone
2323

2424
import scala.collection.JavaConverters._
25+
import scala.collection.mutable.ArrayBuffer
2526

2627
import org.apache.parquet.column.Dictionary
2728
import org.apache.parquet.io.api.{Binary, Converter, GroupConverter, PrimitiveConverter}
@@ -41,7 +42,7 @@ import org.apache.spark.unsafe.types.UTF8String
4142
* A [[ParentContainerUpdater]] is used by a Parquet converter to set converted values to some
4243
* corresponding parent container. For example, a converter for a `StructType` field may set
4344
* converted values to a [[InternalRow]]; or a converter for array elements may append converted
44-
* values to a [[java.util.ArrayList]].
45+
* values to an [[ArrayBuffer]].
4546
*/
4647
private[parquet] trait ParentContainerUpdater {
4748
/** Called before a record field is being converted */
@@ -467,7 +468,7 @@ private[parquet] class ParquetRowConverter(
467468
updater: ParentContainerUpdater)
468469
extends ParquetGroupConverter(updater) {
469470

470-
private[this] val currentArray = new java.util.ArrayList[Any]()
471+
private[this] val currentArray = ArrayBuffer.empty[Any]
471472

472473
private[this] val elementConverter: Converter = {
473474
val repeatedType = parquetSchema.getType(0)
@@ -506,7 +507,7 @@ private[parquet] class ParquetRowConverter(
506507
// If the repeated field corresponds to the element type, creates a new converter using the
507508
// type of the repeated field.
508509
newConverter(repeatedType, elementType, new ParentContainerUpdater {
509-
override def set(value: Any): Unit = currentArray.add(value)
510+
override def set(value: Any): Unit = currentArray += value
510511
})
511512
} else {
512513
// If the repeated field corresponds to the syntactic group in the standard 3-level Parquet
@@ -518,7 +519,7 @@ private[parquet] class ParquetRowConverter(
518519

519520
override def getConverter(fieldIndex: Int): Converter = elementConverter
520521

521-
override def end(): Unit = updater.set(ParquetRowConverter.arrayListToArrayData(currentArray))
522+
override def end(): Unit = updater.set(new GenericArrayData(currentArray.toArray))
522523

523524
override def start(): Unit = currentArray.clear()
524525

@@ -535,7 +536,7 @@ private[parquet] class ParquetRowConverter(
535536

536537
override def getConverter(fieldIndex: Int): Converter = converter
537538

538-
override def end(): Unit = currentArray.add(currentElement)
539+
override def end(): Unit = currentArray += currentElement
539540

540541
override def start(): Unit = currentElement = null
541542
}
@@ -548,8 +549,8 @@ private[parquet] class ParquetRowConverter(
548549
updater: ParentContainerUpdater)
549550
extends ParquetGroupConverter(updater) {
550551

551-
private[this] val currentKeys = new java.util.ArrayList[Any]()
552-
private[this] val currentValues = new java.util.ArrayList[Any]()
552+
private[this] val currentKeys = ArrayBuffer.empty[Any]
553+
private[this] val currentValues = ArrayBuffer.empty[Any]
553554

554555
private[this] val keyValueConverter = {
555556
val repeatedType = parquetType.getType(0).asGroupType()
@@ -566,10 +567,7 @@ private[parquet] class ParquetRowConverter(
566567
// The parquet map may contains null or duplicated map keys. When it happens, the behavior is
567568
// undefined.
568569
// TODO (SPARK-26174): disallow it with a config.
569-
updater.set(
570-
new ArrayBasedMapData(
571-
ParquetRowConverter.arrayListToArrayData(currentKeys),
572-
ParquetRowConverter.arrayListToArrayData(currentValues)))
570+
updater.set(ArrayBasedMapData(currentKeys.toArray, currentValues.toArray))
573571
}
574572

575573
override def start(): Unit = {
@@ -603,8 +601,8 @@ private[parquet] class ParquetRowConverter(
603601
override def getConverter(fieldIndex: Int): Converter = converters(fieldIndex)
604602

605603
override def end(): Unit = {
606-
currentKeys.add(currentKey)
607-
currentValues.add(currentValue)
604+
currentKeys += currentKey
605+
currentValues += currentValue
608606
}
609607

610608
override def start(): Unit = {
@@ -615,12 +613,12 @@ private[parquet] class ParquetRowConverter(
615613
}
616614

617615
private trait RepeatedConverter {
618-
private[this] val currentArray = new java.util.ArrayList[Any]()
616+
private[this] val currentArray = ArrayBuffer.empty[Any]
619617

620618
protected def newArrayUpdater(updater: ParentContainerUpdater) = new ParentContainerUpdater {
621619
override def start(): Unit = currentArray.clear()
622-
override def end(): Unit = updater.set(ParquetRowConverter.arrayListToArrayData(currentArray))
623-
override def set(value: Any): Unit = currentArray.add(value)
620+
override def end(): Unit = updater.set(new GenericArrayData(currentArray.toArray))
621+
override def set(value: Any): Unit = currentArray += value
624622
}
625623
}
626624

@@ -703,9 +701,4 @@ private[parquet] object ParquetRowConverter {
703701
val julianDay = buffer.getInt
704702
DateTimeUtils.fromJulianDay(julianDay, timeOfDayNanos)
705703
}
706-
707-
def arrayListToArrayData(arrayList: java.util.ArrayList[Any]): GenericArrayData = {
708-
// Cast to force use of primary constructor; see SPARK-30413
709-
new GenericArrayData(arrayList.toArray.asInstanceOf[Array[Any]])
710-
}
711704
}

0 commit comments

Comments
 (0)