address comments

cloud-fan · cloud-fan · commit 06b96ed704df · 2015-11-04T20:37:56.000+08:00
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -167,15 +167,17 @@ final class DataFrameWriter private[sql](df: DataFrame) {
   }
 
   private def insertInto(tableIdent: TableIdentifier): Unit = {
-    val partitions = partitioningColumns.map(_.map(col => col -> (None: Option[String])).toMap)
+    val partitions = normalizedParCols.map(_.map(col => col -> (None: Option[String])).toMap)
     val overwrite = mode == SaveMode.Overwrite
 
-    // A partitioned relation schema's can be different from the input logicalPlan, since
-    // partition columns are all moved after data column. We Project to adjust the ordering.
-    // TODO: this belongs in the analyzer.
-    val input = partitioningColumns.map { parCols =>
-      val projectList = df.logicalPlan.output.filterNot(c => parCols.contains(c.name)) ++
-        parCols.map(UnresolvedAttribute(_))
+    // A partitioned relation's schema can be different from the input logicalPlan, since
+    // partition columns are all moved after data columns. We Project to adjust the ordering.
+    // TODO: this belongs to the analyzer.
+    val input = normalizedParCols.map { parCols =>
+      val (inputPartCols, inputDataCols) = df.logicalPlan.output.partition { attr =>
+        parCols.contains(attr.name)
+      }
+      val projectList = inputDataCols ++ inputPartCols.map(c => UnresolvedAttribute(c.name))
       Project(projectList, df.logicalPlan)
     }.getOrElse(df.logicalPlan)
 
@@ -188,6 +190,16 @@ final class DataFrameWriter private[sql](df: DataFrame) {
         ifNotExists = false)).toRdd
   }
 
+  private def normalizedParCols: Option[Seq[String]] = partitioningColumns.map { parCols =>
+    parCols.map { col =>
+      df.logicalPlan.output
+        .map(_.name)
+        .find(df.queryExecution.analyzer.resolver(_, col))
+        .getOrElse(throw new AnalysisException(
+          s"Partition column $col not found in schema ${df.logicalPlan.schema}"))
+    }
+  }
+
   /**
    * Saves the content of the [[DataFrame]] as the specified table.
    *
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -1426,11 +1426,19 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     withTable("tbl11453") {
       Seq("1" -> "10", "2" -> "20").toDF("i", "j")
         .write.partitionBy("i").saveAsTable("tbl11453")
+
       Seq("3" -> "30").toDF("i", "j")
         .write.mode(SaveMode.Append).partitionBy("i").saveAsTable("tbl11453")
       checkAnswer(
         sqlContext.read.table("tbl11453").select("i", "j").orderBy("i"),
         Row("1", "10") :: Row("2", "20") :: Row("3", "30") :: Nil)
+
+      // make sure case sensitivity is correct.
+      Seq("4" -> "40").toDF("i", "j")
+        .write.mode(SaveMode.Append).partitionBy("I").saveAsTable("tbl11453")
+      checkAnswer(
+        sqlContext.read.table("tbl11453").select("i", "j").orderBy("i"),
+        Row("1", "10") :: Row("2", "20") :: Row("3", "30") :: Row("4", "40") :: Nil)
     }
   }
 }