@@ -242,29 +242,15 @@ final class DataFrameWriter private[sql](df: DataFrame) extends Logging {
242242 } yield {
243243 require(n > 0 && n < 100000 , " Bucket number must be greater than 0 and less than 100000." )
244244
245- if (normalizedParCols.isEmpty) {
246- BucketSpec (n, normalizedBucketColNames.get, normalizedSortColNames.getOrElse(Nil ))
247- } else {
248- // When partitionBy and blockBy are used at the same time, the overlapping columns are
249- // useless. Thus, we removed these overlapping columns from blockBy.
250- val bucketColumns : Seq [String ] =
251- normalizedBucketColNames.get.filterNot(normalizedParCols.get.contains)
252-
253- if (bucketColumns.nonEmpty) {
254- if (bucketColumns.length != normalizedBucketColNames.get.length) {
255- val removedColumns : Seq [String ] =
256- normalizedBucketColNames.get.filter(normalizedParCols.get.contains)
257- logInfo(s " bucketBy columns is changed to ' ${bucketColumnNames.get.mkString(" , " )}' " +
258- s " after removing the columns ' ${removedColumns.mkString(" , " )}' that are part of " +
259- s " partitionBy columns ' ${partitioningColumns.get.mkString(" , " )}' " )
260- }
261- BucketSpec (n, bucketColumns, normalizedSortColNames.getOrElse(Nil ))
262- } else {
245+ // partitionBy columns cannot be used in blockedBy
246+ if (normalizedParCols.nonEmpty &&
247+ normalizedBucketColNames.get.toSet.intersect(normalizedParCols.get.toSet).nonEmpty) {
263248 throw new AnalysisException (
264- s " bucketBy columns ' ${bucketColumnNames.get.mkString(" , " )}' should not be the " +
265- s " subset of partitionBy columns ' ${partitioningColumns.get.mkString(" , " )}' " )
266- }
249+ s " bucketBy columns ' ${bucketColumnNames.get.mkString(" , " )}' should not be part of " +
250+ s " partitionBy columns ' ${partitioningColumns.get.mkString(" , " )}' " )
267251 }
252+
253+ BucketSpec (n, normalizedBucketColNames.get, normalizedSortColNames.getOrElse(Nil ))
268254 }
269255 }
270256
0 commit comments