From 892519d3bb7166ea184f0c070759b8a3b679e2c4 Mon Sep 17 00:00:00 2001
From: Michael Davies <Michael.BellDavies@gmail.com>
Date: Tue, 30 Dec 2014 13:00:25 +0000
Subject: [PATCH] [SPARK-4386] Improve performance when writing Parquet files

Convert type of RowWriteSupport.attributes to Array.

Analysis of performance for writing very wide tables shows that time is spent predominantly in apply method on  attributes var. Type of attributes previously was LinearSeqOptimized and apply is O(N) which made write O(N squared).

Measurements on 575 column table showed this change showed a 6x improvement in write times.
---
 .../org/apache/spark/sql/parquet/ParquetTableSupport.scala    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala
index ef3687e692964..9049eb5932b79 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala
@@ -130,7 +130,7 @@ private[parquet] object RowReadSupport {
 private[parquet] class RowWriteSupport extends WriteSupport[Row] with Logging {
 
   private[parquet] var writer: RecordConsumer = null
-  private[parquet] var attributes: Seq[Attribute] = null
+  private[parquet] var attributes: Array[Attribute] = null
 
   override def init(configuration: Configuration): WriteSupport.WriteContext = {
     val origAttributesStr: String = configuration.get(RowWriteSupport.SPARK_ROW_SCHEMA)
@@ -138,7 +138,7 @@ private[parquet] class RowWriteSupport extends WriteSupport[Row] with Logging {
     metadata.put(RowReadSupport.SPARK_METADATA_KEY, origAttributesStr)
 
     if (attributes == null) {
-      attributes = ParquetTypesConverter.convertFromString(origAttributesStr)
+      attributes = ParquetTypesConverter.convertFromString(origAttributesStr).toArray
     }
 
     log.debug(s"write support initialized for requested schema $attributes")