address comments

marmbrus · marmbrus · commit 40f037242fd7 · 2015-08-07T11:57:12.000-07:00
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala
@@ -369,7 +369,7 @@ private[spark] object SQLConf {
       doc = "When true, automatically infer the data types for partitioned columns.")
 
   val PARTITION_MAX_FILES =
-    intConf("spark.sql.sources.maxFiles",
+    intConf("spark.sql.sources.maxConcurrentWrites",
       defaultValue = Some(5),
       doc = "The maximum number of concurent files to open before falling back on sorting when " +
             "writing out files using dynamic partitioning.")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelation.scala
@@ -18,26 +18,18 @@
 package org.apache.spark.sql.execution.datasources
 
 import java.io.IOException
-import java.util.{Date, UUID}
-
-import scala.collection.JavaConversions.asScalaIterator
 
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce._
-import org.apache.hadoop.mapreduce.lib.output.{FileOutputCommitter => MapReduceFileOutputCommitter, FileOutputFormat}
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
 import org.apache.spark._
-import org.apache.spark.mapred.SparkHadoopMapRedUtil
-import org.apache.spark.mapreduce.SparkHadoopMapReduceUtil
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.codegen.GenerateProjection
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
-import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.{RunnableCommand, SQLExecution}
 import org.apache.spark.sql.sources._
-import org.apache.spark.sql.types.StringType
-import org.apache.spark.util.{Utils, SerializableConfiguration}
+import org.apache.spark.util.Utils
 
 
 /**
@@ -109,14 +101,11 @@ private[sql] case class InsertIntoHadoopFsRelation(
       // We create a DataFrame by applying the schema of relation to the data to make sure.
       // We are writing data based on the expected schema,
 
-      // For partitioned relation r, r.schema's column ordering can be different from the column
-      // ordering of data.logicalPlan (partition columns are all moved after data column). We
-      // need a Project to adjust the ordering, so that inside InsertIntoHadoopFsRelation, we can
-      // safely apply the schema of r.schema to the data.
-
+      // A partitioned relation schema's can be different from the input logicalPlan, since
+      // partition columns are all moved after data column. We Project to adjust the ordering.
       // TODO: this belongs in the analyzer.
       val project = Project(
-        relation.schema.map(field => new UnresolvedAttribute(Seq(field.name))), query)
+        relation.schema.map(field => UnresolvedAttribute.quoted(field.name)), query)
       val queryExecution = DataFrame(sqlContext, project).queryExecution
 
       SQLExecution.withNewExecutionId(sqlContext, queryExecution) {
@@ -128,14 +117,14 @@ private[sql] case class InsertIntoHadoopFsRelation(
           df.schema == relation.schema,
           s"""DataFrame must have the same schema as the relation to which is inserted.
              |DataFrame schema: ${df.schema}
-              |Relation schema: ${relation.schema}
+             |Relation schema: ${relation.schema}
           """.stripMargin)
         val partitionColumnsInSpec = relation.partitionColumns.fieldNames
         require(
           partitionColumnsInSpec.sameElements(partitionColumns),
           s"""Partition columns mismatch.
              |Expected: ${partitionColumnsInSpec.mkString(", ")}
-              |Actual: ${partitionColumns.mkString(", ")}
+             |Actual: ${partitionColumns.mkString(", ")}
           """.stripMargin)
 
         val writerContainer = if (partitionColumns.isEmpty) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriterContainer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriterContainer.scala
@@ -44,7 +44,6 @@ private[sql] abstract class BaseWriterContainer(
   with Logging
   with Serializable {
 
-  protected val needsConversion = relation.needConversion
   protected val dataSchema = relation.dataSchema
 
   protected val serializableConf = new SerializableConfiguration(job.getConfiguration)