apache
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala‎
Lines changed: 4 additions & 4 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelation.scala‎
Lines changed: 10 additions & 27 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelation.scala‎
Lines changed: 10 additions & 27 deletions
diff --git a/‎sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ResolvedDataSource.scala‎
Lines changed: 7 additions & 15 deletions b/‎sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ResolvedDataSource.scala‎
Lines changed: 7 additions & 15 deletions
@@ -129,8 +129,8 @@ final class DataFrameWriter private[sql](df: DataFrame) {
   }
 
   /**
-   * Buckets the output by the given columns on the file system. If specified, the output is
-   * laid out on the file system similar to Hive's bucketing scheme.
+   * Buckets the output by the given columns. If specified, the output is laid out on the file
+   * system similar to Hive's bucketing scheme.
    *
    * This is applicable for Parquet, JSON and ORC.
    *
@@ -144,7 +144,7 @@ final class DataFrameWriter private[sql](df: DataFrame) {
   }
 
   /**
-   * Sorts the bucketed output by the given columns.
+   * Sorts the output in each bucket by the given columns.
    *
    * This is applicable for Parquet, JSON and ORC.
    *
@@ -239,7 +239,7 @@ final class DataFrameWriter private[sql](df: DataFrame) {
     for {
       n <- numBuckets
     } yield {
-      require(n > 0, "Bucket number must be greater than 0.")
+      require(n > 0 && n < 100000, "Bucket number must be greater than 0 and less than 100000.")
       BucketSpec(n, normalizedBucketColNames.get, normalizedSortColNames.getOrElse(Nil))
     }
   }
 
@@ -125,39 +125,22 @@ private[sql] case class InsertIntoHadoopFsRelation(
              |Actual: ${partitionColumns.mkString(", ")}
           """.stripMargin)
 
-        val bucketSpec = relation match {
-          case relation: BucketedHadoopFsRelation => relation.bucketSpec
-          case _ => None
-        }
-
-        val writerContainer = if (partitionColumns.isEmpty && bucketSpec.isEmpty) {
+        val writerContainer = if (partitionColumns.isEmpty && relation.bucketSpec.isEmpty) {
           new DefaultWriterContainer(relation, job, isAppend)
         } else {
           val output = df.queryExecution.executedPlan.output
           val (partitionOutput, dataOutput) =
             output.partition(a => partitionColumns.contains(a.name))
 
-          if (bucketSpec.isEmpty) {
-            new DynamicPartitionWriterContainer(
-              relation,
-              job,
-              partitionOutput,
-              dataOutput,
-              output,
-              PartitioningUtils.DEFAULT_PARTITION_NAME,
-              sqlContext.conf.getConf(SQLConf.PARTITION_MAX_FILES),
-              isAppend)
-          } else {
-            new BucketedPartitionWriterContainer(
-              relation.asInstanceOf[BucketedHadoopFsRelation],
-              job,
-              partitionOutput,
-              bucketSpec.get,
-              dataOutput,
-              output,
-              PartitioningUtils.DEFAULT_PARTITION_NAME,
-              isAppend)
-          }
+          new DynamicPartitionWriterContainer(
+            relation,
+            job,
+            partitionOutput,
+            dataOutput,
+            output,
+            PartitioningUtils.DEFAULT_PARTITION_NAME,
+            sqlContext.conf.getConf(SQLConf.PARTITION_MAX_FILES),
+            isAppend)
         }
 
         // This call shouldn't be put into the `try` block below because it only initializes and
 
@@ -240,21 +240,13 @@ object ResolvedDataSource extends Logging {
         val equality = columnNameEquality(caseSensitive)
         val dataSchema = StructType(
           data.schema.filterNot(f => partitionColumns.exists(equality(_, f.name))))
-        val r = dataSource match {
-          case provider: BucketedHadoopFsRelationProvider => provider.createRelation(
-            sqlContext,
-            Array(outputPath.toString),
-            Option(dataSchema.asNullable),
-            Option(partitionColumnsSchema(data.schema, partitionColumns, caseSensitive)),
-            bucketSpec,
-            caseInsensitiveOptions)
-          case provider: HadoopFsRelationProvider => provider.createRelation(
-            sqlContext,
-            Array(outputPath.toString),
-            Option(dataSchema.asNullable),
-            Option(partitionColumnsSchema(data.schema, partitionColumns, caseSensitive)),
-            caseInsensitiveOptions)
-        }
+        val r = dataSource.createRelation(
+          sqlContext,
+          Array(outputPath.toString),
+          Some(dataSchema.asNullable),
+          Some(partitionColumnsSchema(data.schema, partitionColumns, caseSensitive)),
+          bucketSpec,
+          caseInsensitiveOptions)
 
         // For partitioned relation r, r.schema's column ordering can be different from the column
         // ordering of data.logicalPlan (partition columns are all moved after data column).  This
Original file line number	Diff line number	Diff line change
`@@ -129,8 +129,8 @@ final class DataFrameWriter private[sql](df: DataFrame) {`
`129`	`129`	`}`
`130`	`130`
`131`	`131`	`/**`
`132`		`- * Buckets the output by the given columns on the file system. If specified, the output is`
`133`		`- * laid out on the file system similar to Hive's bucketing scheme.`
	`132`	`+ * Buckets the output by the given columns. If specified, the output is laid out on the file`
	`133`	`+ * system similar to Hive's bucketing scheme.`
`134`	`134`	`*`
`135`	`135`	`* This is applicable for Parquet, JSON and ORC.`
`136`	`136`	`*`
`@@ -144,7 +144,7 @@ final class DataFrameWriter private[sql](df: DataFrame) {`
`144`	`144`	`}`
`145`	`145`
`146`	`146`	`/**`
`147`		`- * Sorts the bucketed output by the given columns.`
	`147`	`+ * Sorts the output in each bucket by the given columns.`
`148`	`148`	`*`
`149`	`149`	`* This is applicable for Parquet, JSON and ORC.`
`150`	`150`	`*`
`@@ -239,7 +239,7 @@ final class DataFrameWriter private[sql](df: DataFrame) {`
`239`	`239`	`for {`
`240`	`240`	`n <- numBuckets`
`241`	`241`	`} yield {`
`242`		`- require(n > 0, "Bucket number must be greater than 0.")`
	`242`	`+ require(n > 0 && n < 100000, "Bucket number must be greater than 0 and less than 100000.")`
`243`	`243`	`BucketSpec(n, normalizedBucketColNames.get, normalizedSortColNames.getOrElse(Nil))`
`244`	`244`	`}`
`245`	`245`	`}`