address comments

carsonwang · carsonwang · commit 2e087785d754 · 2019-03-22T14:21:16.000+08:00
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -278,23 +278,24 @@ object SQLConf {
   val ADAPTIVE_EXECUTION_ENABLED = buildConf("spark.sql.adaptive.enabled")
     .doc("When true, enable adaptive query execution.")
     .booleanConf
-    .createWithDefault(true)
+    .createWithDefault(false)
 
   val SHUFFLE_MIN_NUM_POSTSHUFFLE_PARTITIONS =
     buildConf("spark.sql.adaptive.minNumPostShufflePartitions")
       .doc("The advisory minimum number of post-shuffle partitions used in adaptive execution.")
       .intConf
-      .checkValue(numPartitions => numPartitions > 0, "The minimum shuffle partition number " +
+      .checkValue(_ > 0, "The minimum shuffle partition number " +
         "must be a positive integer.")
       .createWithDefault(1)
 
   val SHUFFLE_MAX_NUM_POSTSHUFFLE_PARTITIONS =
     buildConf("spark.sql.adaptive.maxNumPostShufflePartitions")
-      .doc("The advisory maximum number of post-shuffle partitions used in adaptive execution.")
+      .doc("The advisory maximum number of post-shuffle partitions used in adaptive execution. " +
+        "The by default equals to spark.sql.shuffle.partitions")
       .intConf
-      .checkValue(numPartitions => numPartitions > 0, "The maximum shuffle partition number " +
+      .checkValue(_ > 0, "The maximum shuffle partition number " +
         "must be a positive integer.")
-      .createWithDefault(500)
+      .createOptional
 
   val SUBEXPRESSION_ELIMINATION_ENABLED =
     buildConf("spark.sql.subexpressionElimination.enabled")
@@ -1735,7 +1736,8 @@ class SQLConf extends Serializable with Logging {
 
   def minNumPostShufflePartitions: Int = getConf(SHUFFLE_MIN_NUM_POSTSHUFFLE_PARTITIONS)
 
-  def maxNumPostShufflePartitions: Int = getConf(SHUFFLE_MAX_NUM_POSTSHUFFLE_PARTITIONS)
+  def maxNumPostShufflePartitions: Int =
+    getConf(SHUFFLE_MAX_NUM_POSTSHUFFLE_PARTITIONS).getOrElse(numShufflePartitions)
 
   def minBatchesToRetain: Int = getConf(MIN_BATCHES_TO_RETAIN)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.execution.exchange._
  * There are 2 kinds of query stages:
  *   1. Shuffle query stage. This stage materializes its output to shuffle files, and Spark launches
  *      another job to execute the further operators.
- *   2. Broadcast stage. This stage materializes its output to an array in driver JVM. Spark
+ *   2. Broadcast query stage. This stage materializes its output to an array in driver JVM. Spark
  *      broadcasts the array before executing the further operators.
  */
 abstract class QueryStageExec extends LeafExecNode {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageManager.scala
@@ -34,7 +34,7 @@ import org.apache.spark.util.{EventLoop, ThreadUtils}
  *
  * When one query stage finishes materialization, a list of adaptive optimizer rules will be
  * executed, trying to optimize the query plan with the data statistics collected from the the
- * materialized data. Then we travers the query plan again and try to insert more query stages.
+ * materialized data. Then we traverse the query plan again and try to insert more query stages.
  *
  * To create query stages, we traverse the query tree bottom up. When we hit an exchange node,
  * and all the child query stages of this exchange node are materialized, we create a new
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ReduceNumShufflePartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ReduceNumShufflePartitionsSuite.scala
@@ -527,47 +527,47 @@ class ReduceNumShufflePartitionsSuite extends SparkFunSuite with BeforeAndAfterA
       spark.sql("SET spark.sql.exchange.reuse=true")
       val df = spark.range(1).selectExpr("id AS key", "id AS value")
 
-      // test case 1: a fragment has 3 child fragments but they are the same fragment.
-      // ResultQueryFragment 1
-      //   ShuffleQueryFragment 0
-      //   ReusedQueryFragment 0
-      //   ReusedQueryFragment 0
+      // test case 1: a query stage has 3 child stages but they are the same stage.
+      // ResultQueryStage 1
+      //   ShuffleQueryStage 0
+      //   ReusedQueryStage 0
+      //   ReusedQueryStage 0
       val resultDf = df.join(df, "key").join(df, "key")
       val finalPlan = resultDf.queryExecution.executedPlan
         .asInstanceOf[AdaptiveSparkPlanExec].finalPlan
       assert(finalPlan.collect { case p: ReusedQueryStageExec => p }.length == 2)
       assert(finalPlan.collect { case p: CoalescedShuffleReaderExec => p }.length == 3)
       checkAnswer(resultDf, Row(0, 0, 0, 0) :: Nil)
 
-      // test case 2: a fragment has 2 parent fragments.
-      // ResultQueryFragment 3
-      //   ShuffleQueryFragment 1
-      //     ShuffleQueryFragment 0
-      //   ShuffleQueryFragment 2
-      //     ReusedQueryFragment 0
+      // test case 2: a query stage has 2 parent stages.
+      // ResultQueryStage 3
+      //   ShuffleQueryStage 1
+      //     ShuffleQueryStage 0
+      //   ShuffleQueryStage 2
+      //     ReusedQueryStage 0
       val grouped = df.groupBy("key").agg(max("value").as("value"))
       val resultDf2 = grouped.groupBy(col("key") + 1).max("value")
         .union(grouped.groupBy(col("key") + 2).max("value"))
 
       val finalPlan2 = resultDf2.queryExecution.executedPlan
         .asInstanceOf[AdaptiveSparkPlanExec].finalPlan
 
-      // The result fragment has 2 children
-      val level1Fragments = finalPlan2.collect { case q: QueryStageExec => q }
-      assert(level1Fragments.length == 2)
+      // The result stage has 2 children
+      val level1Stages = finalPlan2.collect { case q: QueryStageExec => q }
+      assert(level1Stages.length == 2)
 
-      val leafFragments = level1Fragments.flatMap { fragment =>
-        // All of the child fragments of result fragment have only one child fragment.
-        val children = fragment.plan.collect { case q: QueryStageExec => q }
+      val leafStages = level1Stages.flatMap { stage =>
+        // All of the child stages of result stage have only one child stage.
+        val children = stage.plan.collect { case q: QueryStageExec => q }
         assert(children.length == 1)
         children
       }
-      assert(leafFragments.length == 2)
+      assert(leafStages.length == 2)
 
-      val reusedFragments = level1Fragments.flatMap { fragment =>
-        fragment.plan.collect { case r: ReusedQueryStageExec => r }
+      val reusedStages = level1Stages.flatMap { stage =>
+        stage.plan.collect { case r: ReusedQueryStageExec => r }
       }
-      assert(reusedFragments.length == 1)
+      assert(reusedStages.length == 1)
 
       checkAnswer(resultDf2, Row(1, 0) :: Row(2, 0) :: Nil)
     }

Original file line number	Diff line number	Diff line change
`@@ -34,7 +34,7 @@ import org.apache.spark.util.{EventLoop, ThreadUtils}`
`34`	`34`	`*`
`35`	`35`	`* When one query stage finishes materialization, a list of adaptive optimizer rules will be`
`36`	`36`	`* executed, trying to optimize the query plan with the data statistics collected from the the`
`37`		`- * materialized data. Then we travers the query plan again and try to insert more query stages.`
	`37`	`+ * materialized data. Then we traverse the query plan again and try to insert more query stages.`
`38`	`38`	`*`
`39`	`39`	`* To create query stages, we traverse the query tree bottom up. When we hit an exchange node,`
`40`	`40`	`* and all the child query stages of this exchange node are materialized, we create a new`