address the issues in feedback and bug fixing

chenghao-intel · chenghao-intel · commit bdee89ea394b · 2015-10-21T22:46:37.000+08:00
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -547,31 +547,33 @@ abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[Partitio
     // We use leaf dirs containing data files to discover the schema.
     val leafDirs = fileStatusCache.leafDirToChildrenFiles.keys.toSeq
     userDefinedPartitionColumns match {
-      case Some(schema) =>
+      case Some(userProvidedSchema) if userProvidedSchema.nonEmpty =>
         val spec = PartitioningUtils.parsePartitions(
-          leafDirs, PartitioningUtils.DEFAULT_PARTITION_NAME, false)
+          leafDirs, PartitioningUtils.DEFAULT_PARTITION_NAME, typeInference = false)
 
         // Without auto inference, all of value in the `row` should be null or in StringType,
         // we need to cast into the data type that user specified.
-        def castPartitionValueWithGivenSchema(row: InternalRow, schema: StructType)
-        : InternalRow = {
-          InternalRow((0 until row.numFields) map { i =>
-            Cast(Literal.create(row.getString(i), StringType), schema.fields(i).dataType).eval()
+        def castPartitionValuesToUserSchema(row: InternalRow) = {
+          InternalRow((0 until row.numFields).map { i =>
+            Cast(
+              Literal.create(row.getString(i), StringType),
+              userProvidedSchema.fields(i).dataType).eval()
           }: _*)
         }
 
-        assert(schema.length == spec.partitionColumns.length &&
-          schema.fieldNames.sameElements(spec.partitionColumns.fieldNames),
-          s"Auto infer partition column is not match with user specified, " +
-            s"expect $schema, but got ${spec.partitionColumns}}")
+        assert(userProvidedSchema.length == spec.partitionColumns.length &&
+          userProvidedSchema.fieldNames.sameElements(spec.partitionColumns.fieldNames),
+          s"Actual partitioning column names did not match user-specified partitioning schema; " +
+            s"expect $userProvidedSchema, but got ${spec.partitionColumns}}")
 
-        PartitionSpec(schema, spec.partitions.map { part =>
-          part.copy(values = castPartitionValueWithGivenSchema(part.values, schema))
+        PartitionSpec(userProvidedSchema, spec.partitions.map { part =>
+          part.copy(values = castPartitionValuesToUserSchema(part.values))
         })
-      case None =>
-        val typeInference = sqlContext.conf.partitionColumnTypeInferenceEnabled()
+
+      case _ =>
+        // user did not provide a partitioning schema
         PartitioningUtils.parsePartitions(leafDirs, PartitioningUtils.DEFAULT_PARTITION_NAME,
-          typeInference)
+          typeInference = sqlContext.conf.partitionColumnTypeInferenceEnabled())
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala
@@ -510,7 +510,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes
     }
   }
 
-  test("Partition column type casting") {
+  test("SPARK-9735 Partition column type casting") {
     withTempPath { file =>
       val df = (for {
         i <- 1 to 3

Original file line number	Diff line number	Diff line change
`@@ -510,7 +510,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes`
`510`	`510`	`}`
`511`	`511`	`}`
`512`	`512`
`513`		`- test("Partition column type casting") {`
	`513`	`+ test("SPARK-9735 Partition column type casting") {`
`514`	`514`	`withTempPath { file =>`
`515`	`515`	`val df = (for {`
`516`	`516`	`i <- 1 to 3`