Skip to content

Commit bdee89e

Browse files
address the issues in feedback and bug fixing
1 parent 9f08f76 commit bdee89e

File tree

2 files changed

+18
-16
lines changed

2 files changed

+18
-16
lines changed

sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -547,31 +547,33 @@ abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[Partitio
547547
// We use leaf dirs containing data files to discover the schema.
548548
val leafDirs = fileStatusCache.leafDirToChildrenFiles.keys.toSeq
549549
userDefinedPartitionColumns match {
550-
case Some(schema) =>
550+
case Some(userProvidedSchema) if userProvidedSchema.nonEmpty =>
551551
val spec = PartitioningUtils.parsePartitions(
552-
leafDirs, PartitioningUtils.DEFAULT_PARTITION_NAME, false)
552+
leafDirs, PartitioningUtils.DEFAULT_PARTITION_NAME, typeInference = false)
553553

554554
// Without auto inference, all of value in the `row` should be null or in StringType,
555555
// we need to cast into the data type that user specified.
556-
def castPartitionValueWithGivenSchema(row: InternalRow, schema: StructType)
557-
: InternalRow = {
558-
InternalRow((0 until row.numFields) map { i =>
559-
Cast(Literal.create(row.getString(i), StringType), schema.fields(i).dataType).eval()
556+
def castPartitionValuesToUserSchema(row: InternalRow) = {
557+
InternalRow((0 until row.numFields).map { i =>
558+
Cast(
559+
Literal.create(row.getString(i), StringType),
560+
userProvidedSchema.fields(i).dataType).eval()
560561
}: _*)
561562
}
562563

563-
assert(schema.length == spec.partitionColumns.length &&
564-
schema.fieldNames.sameElements(spec.partitionColumns.fieldNames),
565-
s"Auto infer partition column is not match with user specified, " +
566-
s"expect $schema, but got ${spec.partitionColumns}}")
564+
assert(userProvidedSchema.length == spec.partitionColumns.length &&
565+
userProvidedSchema.fieldNames.sameElements(spec.partitionColumns.fieldNames),
566+
s"Actual partitioning column names did not match user-specified partitioning schema; " +
567+
s"expect $userProvidedSchema, but got ${spec.partitionColumns}}")
567568

568-
PartitionSpec(schema, spec.partitions.map { part =>
569-
part.copy(values = castPartitionValueWithGivenSchema(part.values, schema))
569+
PartitionSpec(userProvidedSchema, spec.partitions.map { part =>
570+
part.copy(values = castPartitionValuesToUserSchema(part.values))
570571
})
571-
case None =>
572-
val typeInference = sqlContext.conf.partitionColumnTypeInferenceEnabled()
572+
573+
case _ =>
574+
// user did not provide a partitioning schema
573575
PartitioningUtils.parsePartitions(leafDirs, PartitioningUtils.DEFAULT_PARTITION_NAME,
574-
typeInference)
576+
typeInference = sqlContext.conf.partitionColumnTypeInferenceEnabled())
575577
}
576578
}
577579

sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes
510510
}
511511
}
512512

513-
test("Partition column type casting") {
513+
test("SPARK-9735 Partition column type casting") {
514514
withTempPath { file =>
515515
val df = (for {
516516
i <- 1 to 3

0 commit comments

Comments
 (0)