@@ -77,16 +77,24 @@ private[sql] object PartitioningUtils {
7777 defaultPartitionName : String ,
7878 typeInference : Boolean ): PartitionSpec = {
7979 // First, we need to parse every partition's path and see if we can find partition values.
80- val pathsWithPartitionValues = paths.flatMap { path =>
81- parsePartition(path, defaultPartitionName, typeInference).map(path -> _)
82- }
80+ val (partitionValues, optBasePaths) = paths.map { path =>
81+ parsePartition(path, defaultPartitionName, typeInference)
82+ }.unzip
83+
84+ val pathsWithPartitionValues = paths.zip(partitionValues).flatMap(x => x._2.map(x._1 -> _))
8385
8486 if (pathsWithPartitionValues.isEmpty) {
8587 // This dataset is not partitioned.
8688 PartitionSpec .emptySpec
8789 } else {
8890 // This dataset is partitioned. We need to check whether all partitions have the same
8991 // partition columns and resolve potential type conflicts.
92+ val basePaths = optBasePaths.flatMap(x => x)
93+ assert(
94+ basePaths.distinct.size == 1 ,
95+ " Conflicting directory structures detected. Suspicious paths:\b " +
96+ basePaths.mkString(" \n\t " , " \n\t " , " \n\n " ))
97+
9098 val resolvedPartitionValues = resolvePartitions(pathsWithPartitionValues)
9199
92100 // Creates the StructType which represents the partition columns.
@@ -110,12 +118,12 @@ private[sql] object PartitioningUtils {
110118 }
111119
112120 /**
113- * Parses a single partition, returns column names and values of each partition column. For
114- * example, given:
121+ * Parses a single partition, returns column names and values of each partition column, also
122+ * the base path. For example, given:
115123 * {{{
116124 * path = hdfs://<host>:<port>/path/to/partition/a=42/b=hello/c=3.14
117125 * }}}
118- * it returns:
126+ * it returns the partition :
119127 * {{{
120128 * PartitionValues(
121129 * Seq("a", "b", "c"),
@@ -124,34 +132,40 @@ private[sql] object PartitioningUtils {
124132 * Literal.create("hello", StringType),
125133 * Literal.create(3.14, FloatType)))
126134 * }}}
135+ * and the base path:
136+ * {{{
137+ * /path/to/partition
138+ * }}}
127139 */
128140 private [sql] def parsePartition (
129141 path : Path ,
130142 defaultPartitionName : String ,
131- typeInference : Boolean ): Option [PartitionValues ] = {
143+ typeInference : Boolean ): ( Option [PartitionValues ], Option [ Path ]) = {
132144 val columns = ArrayBuffer .empty[(String , Literal )]
133145 // Old Hadoop versions don't have `Path.isRoot`
134146 var finished = path.getParent == null
135147 var chopped = path
148+ var basePath = path
136149
137150 while (! finished) {
138151 // Sometimes (e.g., when speculative task is enabled), temporary directories may be left
139152 // uncleaned. Here we simply ignore them.
140153 if (chopped.getName.toLowerCase == " _temporary" ) {
141- return None
154+ return ( None , None )
142155 }
143156
144157 val maybeColumn = parsePartitionColumn(chopped.getName, defaultPartitionName, typeInference)
145158 maybeColumn.foreach(columns += _)
159+ basePath = chopped
146160 chopped = chopped.getParent
147- finished = maybeColumn.isEmpty || chopped.getParent == null
161+ finished = ( maybeColumn.isEmpty && ! columns.isEmpty) || chopped.getParent == null
148162 }
149163
150164 if (columns.isEmpty) {
151- None
165+ ( None , Some (path))
152166 } else {
153167 val (columnNames, values) = columns.reverse.unzip
154- Some (PartitionValues (columnNames, values))
168+ ( Some (PartitionValues (columnNames, values)), Some (basePath ))
155169 }
156170 }
157171
0 commit comments