update

cloud-fan · cloud-fan · commit f2c2408d1127 · 2020-03-10T00:47:25.000+08:00
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -488,7 +488,7 @@ object SQLConf {
   val SKEW_JOIN_SKEWED_PARTITION_FACTOR =
     buildConf("spark.sql.adaptive.skewJoin.skewedPartitionFactor")
       .doc("A partition is considered as skewed if its size is larger than this factor " +
-        "multiplying the median partition size and also larger than 2 multiplying " +
+        "multiplying the median partition size and also larger than " +
         s"'${ADVISORY_PARTITION_SIZE_IN_BYTES.key}'")
       .version("3.0.0")
       .intConf
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
@@ -66,7 +66,7 @@ case class CoalesceShufflePartitions(conf: SQLConf) extends Rule[SparkPlan] {
       val distinctNumPreShufflePartitions =
         validMetrics.map(stats => stats.bytesByPartitionId.length).distinct
       if (validMetrics.nonEmpty && distinctNumPreShufflePartitions.length == 1) {
-        val partitionSpecs = ShufflePartitionsCoalescer.coalescePartitions(
+        val partitionSpecs = ShufflePartitionsUtil.coalescePartitions(
           validMetrics.toArray,
           firstPartitionIndex = 0,
           lastPartitionIndex = distinctNumPreShufflePartitions.head,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala
@@ -63,11 +63,11 @@ case class OptimizeSkewedJoin(conf: SQLConf) extends Rule[SparkPlan] {
   /**
    * A partition is considered as a skewed partition if its size is larger than the median
    * partition size * ADAPTIVE_EXECUTION_SKEWED_PARTITION_FACTOR and also larger than
-   * ADVISORY_PARTITION_SIZE_IN_BYTES * 2.
+   * ADVISORY_PARTITION_SIZE_IN_BYTES.
    */
   private def isSkewed(size: Long, medianSize: Long): Boolean = {
     size > medianSize * conf.getConf(SQLConf.SKEW_JOIN_SKEWED_PARTITION_FACTOR) &&
-      size > conf.getConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES) * 2
+      size > conf.getConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES)
   }
 
   private def medianSize(stats: MapOutputStatistics): Long = {
@@ -110,7 +110,7 @@ case class OptimizeSkewedJoin(conf: SQLConf) extends Rule[SparkPlan] {
       targetSize: Long): Array[Int] = {
     val shuffleId = stage.shuffle.shuffleDependency.shuffleHandle.shuffleId
     val mapPartitionSizes = getMapSizesForReduceId(shuffleId, partitionId)
-    ShufflePartitionsCoalescer.splitSizeListByTargetSize(mapPartitionSizes, targetSize)
+    ShufflePartitionsUtil.splitSizeListByTargetSize(mapPartitionSizes, targetSize)
   }
 
   private def getStatistics(stage: ShuffleQueryStageExec): MapOutputStatistics = {
@@ -195,21 +195,25 @@ case class OptimizeSkewedJoin(conf: SQLConf) extends Rule[SparkPlan] {
           }
 
           val leftParts = if (isLeftSkew) {
-            leftSkewDesc.addPartitionSize(leftSize)
-            createSkewPartitions(
-              partitionIndex,
-              getMapStartIndices(left, partitionIndex, leftTargetSize),
-              getNumMappers(left))
+            val mapStartIndices = getMapStartIndices(left, partitionIndex, leftTargetSize)
+            if (mapStartIndices.length > 1) {
+              leftSkewDesc.addPartitionSize(leftSize)
+              createSkewPartitions(partitionIndex, mapStartIndices, getNumMappers(left))
+            } else {
+              Seq(CoalescedPartitionSpec(partitionIndex, partitionIndex + 1))
+            }
           } else {
             Seq(CoalescedPartitionSpec(partitionIndex, partitionIndex + 1))
           }
 
           val rightParts = if (isRightSkew) {
-            rightSkewDesc.addPartitionSize(rightSize)
-            createSkewPartitions(
-              partitionIndex,
-              getMapStartIndices(right, partitionIndex, rightTargetSize),
-              getNumMappers(right))
+            val mapStartIndices = getMapStartIndices(right, partitionIndex, rightTargetSize)
+            if (mapStartIndices.length > 1) {
+              rightSkewDesc.addPartitionSize(rightSize)
+              createSkewPartitions(partitionIndex, mapStartIndices, getNumMappers(right))
+            } else {
+              Seq(CoalescedPartitionSpec(partitionIndex, partitionIndex + 1))
+            }
           } else {
             Seq(CoalescedPartitionSpec(partitionIndex, partitionIndex + 1))
           }
@@ -259,7 +263,7 @@ case class OptimizeSkewedJoin(conf: SQLConf) extends Rule[SparkPlan] {
     if (!shouldCoalesce || nonSkewPartitionIndices.length == 1) {
       nonSkewPartitionIndices.map(i => CoalescedPartitionSpec(i, i + 1))
     } else {
-      ShufflePartitionsCoalescer.coalescePartitions(
+      ShufflePartitionsUtil.coalescePartitions(
         Array(leftStats, rightStats),
         firstPartitionIndex = nonSkewPartitionIndices.head,
         // `lastPartitionIndex` is exclusive.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsUtil.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsUtil.scala
@@ -23,7 +23,9 @@ import org.apache.spark.MapOutputStatistics
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.execution.{CoalescedPartitionSpec, ShufflePartitionSpec}
 
-object ShufflePartitionsCoalescer extends Logging {
+object ShufflePartitionsUtil extends Logging {
+  final val SMALL_PARTITION_FACTOR = 0.2
+  final val MERGED_PARTITION_FACTOR = 1.2
 
   /**
    * Coalesce the same range of partitions (`firstPartitionIndex` to `lastPartitionIndex`, the
@@ -117,38 +119,43 @@ object ShufflePartitionsCoalescer extends Logging {
 
   /**
    * Given a list of size, return an array of indices to split the list into multiple partitions,
-   * so that the size sum of each partition is close to target size. Each index indicates the start
-   * of a partition.
+   * so that the size sum of each partition is close to the target size. Each index indicates the
+   * start of a partition.
    */
   def splitSizeListByTargetSize(sizes: Seq[Long], targetSize: Long): Array[Int] = {
     val partitionStartIndices = ArrayBuffer[Int]()
     partitionStartIndices += 0
     var i = 0
-    var currentSizeSum = 0L
+    var currentPartitionSize = 0L
     var lastPartitionSize = -1L
 
     def tryMergePartitions() = {
       // When we are going to start a new partition, it's possible that the current partition or
       // the previous partition is very small and it's better to merge the current partition into
       // the previous partition.
       val shouldMergePartitions = lastPartitionSize > -1 &&
-        ((currentSizeSum + lastPartitionSize) < targetSize * 1.3 ||
-        (currentSizeSum < targetSize * 0.3 || lastPartitionSize < targetSize * 0.3))
+        ((currentPartitionSize + lastPartitionSize) < targetSize * MERGED_PARTITION_FACTOR ||
+        (currentPartitionSize < targetSize * SMALL_PARTITION_FACTOR ||
+          lastPartitionSize < targetSize * SMALL_PARTITION_FACTOR))
       if (shouldMergePartitions) {
+        // We decide to merge the current partition into the previous one, so the start index of
+        // the current partition should be removed.
         partitionStartIndices.remove(partitionStartIndices.length - 1)
-        lastPartitionSize += currentSizeSum
+        lastPartitionSize += currentPartitionSize
       } else {
-        lastPartitionSize = currentSizeSum
+        lastPartitionSize = currentPartitionSize
       }
     }
 
     while (i < sizes.length) {
-      if (i > 0 && currentSizeSum + sizes(i) > targetSize) {
+      // If including the next size in the current partition exceeds the target size, package the
+      // current partition and start a new partition.
+      if (i > 0 && currentPartitionSize + sizes(i) > targetSize) {
         tryMergePartitions()
         partitionStartIndices += i
-        currentSizeSum = sizes(i)
+        currentPartitionSize = sizes(i)
       } else {
-        currentSizeSum += sizes(i)
+        currentPartitionSize += sizes(i)
       }
       i += 1
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ShufflePartitionsUtilSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ShufflePartitionsUtilSuite.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.{MapOutputStatistics, SparkFunSuite}
-import org.apache.spark.sql.execution.adaptive.ShufflePartitionsCoalescer
+import org.apache.spark.sql.execution.adaptive.ShufflePartitionsUtil
 
-class ShufflePartitionsCoalescerSuite extends SparkFunSuite {
+class ShufflePartitionsUtilSuite extends SparkFunSuite {
 
   private def checkEstimation(
       bytesByPartitionIdArray: Array[Array[Long]],
@@ -31,7 +31,7 @@ class ShufflePartitionsCoalescerSuite extends SparkFunSuite {
       case (bytesByPartitionId, index) =>
         new MapOutputStatistics(index, bytesByPartitionId)
     }
-    val estimatedPartitionStartIndices = ShufflePartitionsCoalescer.coalescePartitions(
+    val estimatedPartitionStartIndices = ShufflePartitionsUtil.coalescePartitions(
       mapOutputStatistics,
       0,
       bytesByPartitionIdArray.head.length,
@@ -257,23 +257,23 @@ class ShufflePartitionsCoalescerSuite extends SparkFunSuite {
     val targetSize = 100
 
     // merge the small partitions at the beginning/end
-    val sizeList1 = Seq[Long](20, 90, 20, 25, 80, 20)
-    assert(ShufflePartitionsCoalescer.splitSizeListByTargetSize(sizeList1, targetSize).toSeq ==
-      Seq(0, 2, 4))
+    val sizeList1 = Seq[Long](15, 90, 15, 15, 15, 90, 15)
+    assert(ShufflePartitionsUtil.splitSizeListByTargetSize(sizeList1, targetSize).toSeq ==
+      Seq(0, 2, 5))
 
     // merge the small partitions in the middle
-    val sizeList2 = Seq[Long](20, 25, 90, 20, 90, 20, 25)
-    assert(ShufflePartitionsCoalescer.splitSizeListByTargetSize(sizeList2, targetSize).toSeq ==
+    val sizeList2 = Seq[Long](30, 15, 90, 10, 90, 15, 30)
+    assert(ShufflePartitionsUtil.splitSizeListByTargetSize(sizeList2, targetSize).toSeq ==
       Seq(0, 2, 4, 5))
 
     // merge the small partition even if it leads to a very large partition
-    val sizeList3 = Seq[Long](20, 1000, 20, 1000)
-    assert(ShufflePartitionsCoalescer.splitSizeListByTargetSize(sizeList3, targetSize).toSeq ==
+    val sizeList3 = Seq[Long](15, 1000, 15, 1000)
+    assert(ShufflePartitionsUtil.splitSizeListByTargetSize(sizeList3, targetSize).toSeq ==
       Seq(0, 3))
 
-    // merge the small partitions even if it exceeds targetSize * 0.3
+    // merge the small partitions even if it exceeds targetSize * 0.2
     val sizeList4 = Seq[Long](35, 75, 90, 20, 35, 35, 35)
-    assert(ShufflePartitionsCoalescer.splitSizeListByTargetSize(sizeList4, targetSize).toSeq ==
+    assert(ShufflePartitionsUtil.splitSizeListByTargetSize(sizeList4, targetSize).toSeq ==
       Seq(0, 2, 3))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -614,7 +614,7 @@ class AdaptiveQueryExecSuite
       SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key -> "2000") {
       withTempView("skewData1", "skewData2") {
         spark
-          .range(0, 1300, 1, 10)
+          .range(0, 1000, 1, 10)
           .selectExpr("id % 2 as key1", "id as value1")
           .createOrReplaceTempView("skewData1")
         spark
@@ -635,36 +635,36 @@ class AdaptiveQueryExecSuite
         // skewed inner join optimization
         val (_, innerAdaptivePlan) = runAdaptiveAndVerifyResult(
           "SELECT * FROM skewData1 join skewData2 ON key1 = key2")
-        // left stats: [4297, 0, 0, 0, 4674]
+        // left stats: [3496, 0, 0, 0, 4014]
         // right stats:[6292, 0, 0, 0, 0]
         // Partition 0: both left and right sides are skewed, left side is divided
         //              into 2 splits and right side is divided into 4 splits, so
         //              2 x 4 sub-partitions.
         // Partition 1, 2, 3: not skewed, and coalesced into 1 partition.
-        // Partition 4: only left side is skewed, and divide into 3 splits, so
-        //              3 sub-partitions.
+        // Partition 4: only left side is skewed, and divide into 2 splits, so
+        //              2 sub-partitions.
         // So total (8 + 1 + 3) partitions.
         val innerSmj = findTopLevelSortMergeJoin(innerAdaptivePlan)
-        checkSkewJoin(innerSmj, 8 + 1 + 3)
+        checkSkewJoin(innerSmj, 8 + 1 + 2)
 
         // skewed left outer join optimization
         val (_, leftAdaptivePlan) = runAdaptiveAndVerifyResult(
           "SELECT * FROM skewData1 left outer join skewData2 ON key1 = key2")
-        // left stats: [4297, 0, 0, 0, 4674]
+        // left stats: [3496, 0, 0, 0, 4014]
         // right stats:[6292, 0, 0, 0, 0]
         // Partition 0: both left and right sides are skewed, but left join can't split right side,
         //              so only left side is divided into 2 splits, and thus 2 sub-partitions.
         // Partition 1, 2, 3: not skewed, and coalesced into 1 partition.
-        // Partition 4: only left side is skewed, and divide into 3 splits, so
-        //              3 sub-partitions.
-        // So total (2 + 1 + 3) partitions.
+        // Partition 4: only left side is skewed, and divide into 2 splits, so
+        //              2 sub-partitions.
+        // So total (2 + 1 + 2) partitions.
         val leftSmj = findTopLevelSortMergeJoin(leftAdaptivePlan)
-        checkSkewJoin(leftSmj, 2 + 1 + 3)
+        checkSkewJoin(leftSmj, 2 + 1 + 2)
 
         // skewed right outer join optimization
         val (_, rightAdaptivePlan) = runAdaptiveAndVerifyResult(
           "SELECT * FROM skewData1 right outer join skewData2 ON key1 = key2")
-        // left stats: [4297, 0, 0, 0, 4674]
+        // left stats: [3496, 0, 0, 0, 4014]
         // right stats:[6292, 0, 0, 0, 0]
         // Partition 0: both left and right sides are skewed, but right join can't split left side,
         //              so only right side is divided into 4 splits, and thus 4 sub-partitions.
@@ -674,22 +674,6 @@ class AdaptiveQueryExecSuite
         // So total (4 + 1 + 1) partitions.
         val rightSmj = findTopLevelSortMergeJoin(rightAdaptivePlan)
         checkSkewJoin(rightSmj, 4 + 1 + 1)
-
-        withSQLConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key -> "3000") {
-          val (_, innerAdaptivePlan) = runAdaptiveAndVerifyResult(
-            "SELECT * FROM skewData1 join skewData2 ON key1 = key2")
-          // left stats: [4297, 0, 0, 0, 4674]
-          // right stats:[6292, 0, 0, 0, 0]
-          // Partition 0: left side is smaller than 3000 * 2, so it's not skewed,
-          //              right side is skewed divided into 2 splits, so
-          //              2 sub-partitions.
-          // Partition 1, 2, 3: not skewed, and coalesced into 1 partition.
-          // Partition 4: left side is smaller than 3000 * 2, so it's not skewed,
-          //              right side is not skewed either, so just 1 partition.
-          // So total (2 + 1 + 1) partitions.
-          val innerSmj = findTopLevelSortMergeJoin(innerAdaptivePlan)
-          checkSkewJoin(innerSmj, 2 + 1 + 1)
-        }
       }
     }
   }