Skip to content

Commit d136b72

Browse files
JkSelfcloud-fan
authored andcommitted
[SPARK-31253][SQL][FOLLOW-UP] Improve the partition data size metrics in CustomShuffleReaderExec
### What changes were proposed in this pull request? Currently the partition data size metrics contain three entries (min/max/avg) in Spark UI, which is not user friendly. This PR lets the metrics with min/max/avg in one entry by calling SQLMetrics.postDriverMetricUpdates multiple times. Before this PR, the spark UI is shown in the following: ![image](https://user-images.githubusercontent.com/11972570/78980137-da1a2200-7b4f-11ea-81ee-76858e887bde.png) After this PR. the spark UI is shown in the following: ![image](https://user-images.githubusercontent.com/11972570/78980192-fae27780-7b4f-11ea-9faa-07f58699acfd.png) ### Why are the changes needed? Improving UI ### Does this PR introduce any user-facing change? No ### How was this patch tested? existing ut Closes apache#28175 from JkSelf/improveAqeMetrics. Authored-by: jiake <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent 40f9dbb commit d136b72

File tree

4 files changed

+65
-41
lines changed

4 files changed

+65
-41
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CustomShuffleReaderExec.scala

Lines changed: 52 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
package org.apache.spark.sql.execution.adaptive
1919

20+
import scala.collection.mutable.ArrayBuffer
21+
2022
import org.apache.spark.rdd.RDD
2123
import org.apache.spark.sql.catalyst.InternalRow
2224
import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
@@ -95,54 +97,68 @@ case class CustomShuffleReaderExec private(
9597
case _ => None
9698
}
9799

98-
private def partitionDataSizeMetrics = {
99-
val maxSize = SQLMetrics.createSizeMetric(sparkContext, "maximum partition data size")
100-
val minSize = SQLMetrics.createSizeMetric(sparkContext, "minimum partition data size")
101-
val avgSize = SQLMetrics.createSizeMetric(sparkContext, "average partition data size")
102-
val mapStatsOpt = shuffleStage.get.mapStats
103-
val sizes = mapStatsOpt.map { mapStats =>
104-
val mapSizes = mapStats.bytesByPartitionId
105-
partitionSpecs.map {
106-
case CoalescedPartitionSpec(startReducerIndex, endReducerIndex) =>
107-
startReducerIndex.until(endReducerIndex).map(mapSizes).sum
108-
case p: PartialReducerPartitionSpec => p.dataSize
109-
case p => throw new IllegalStateException("unexpected " + p)
100+
private def sendDriverMetrics(): Unit = {
101+
val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
102+
var driverAccumUpdates: Seq[(Long, Long)] = Seq.empty
103+
104+
val numPartitionsMetric = metrics("numPartitions")
105+
numPartitionsMetric.set(partitionSpecs.length)
106+
driverAccumUpdates = driverAccumUpdates :+
107+
(numPartitionsMetric.id, partitionSpecs.length.toLong)
108+
109+
if (hasSkewedPartition) {
110+
val skewedMetric = metrics("numSkewedPartitions")
111+
val numSkewedPartitions = partitionSpecs.collect {
112+
case p: PartialReducerPartitionSpec => p.reducerIndex
113+
}.distinct.length
114+
skewedMetric.set(numSkewedPartitions)
115+
driverAccumUpdates = driverAccumUpdates :+ (skewedMetric.id, numSkewedPartitions.toLong)
116+
}
117+
118+
if(!isLocalReader) {
119+
val partitionMetrics = metrics("partitionDataSize")
120+
val mapStats = shuffleStage.get.mapStats
121+
122+
if (mapStats.isEmpty) {
123+
partitionMetrics.set(0)
124+
driverAccumUpdates = driverAccumUpdates :+ (partitionMetrics.id, 0L)
125+
} else {
126+
var sum = 0L
127+
partitionSpecs.foreach {
128+
case CoalescedPartitionSpec(startReducerIndex, endReducerIndex) =>
129+
val dataSize = startReducerIndex.until(endReducerIndex).map(
130+
mapStats.get.bytesByPartitionId(_)).sum
131+
driverAccumUpdates = driverAccumUpdates :+ (partitionMetrics.id, dataSize)
132+
sum += dataSize
133+
case p: PartialReducerPartitionSpec =>
134+
driverAccumUpdates = driverAccumUpdates :+ (partitionMetrics.id, p.dataSize)
135+
sum += p.dataSize
136+
case p => throw new IllegalStateException("unexpected " + p)
137+
}
138+
139+
// Set sum value to "partitionDataSize" metric.
140+
partitionMetrics.set(sum)
110141
}
111-
}.getOrElse(Seq(0L))
112-
113-
maxSize.set(sizes.max)
114-
minSize.set(sizes.min)
115-
avgSize.set(sizes.sum / sizes.length)
116-
Map(
117-
"maxPartitionDataSize" -> maxSize,
118-
"minPartitionDataSize" -> minSize,
119-
"avgPartitionDataSize" -> avgSize)
120-
}
142+
}
121143

122-
private def skewedPartitionMetrics = {
123-
val metrics = SQLMetrics.createMetric(sparkContext, "number of skewed partitions")
124-
val numSkewedPartitions = partitionSpecs.collect {
125-
case p: PartialReducerPartitionSpec => p.reducerIndex
126-
}.distinct.length
127-
metrics.set(numSkewedPartitions)
128-
Map("numSkewedPartitions" -> metrics)
144+
SQLMetrics.postDriverMetricsUpdatedByValue(sparkContext, executionId, driverAccumUpdates)
129145
}
130146

131147
@transient override lazy val metrics: Map[String, SQLMetric] = {
132148
if (shuffleStage.isDefined) {
133-
val numPartitions = SQLMetrics.createMetric(sparkContext, "number of partitions")
134-
numPartitions.set(partitionSpecs.length)
135-
Map("numPartitions" -> numPartitions) ++ {
149+
Map("numPartitions" -> SQLMetrics.createMetric(sparkContext, "number of partitions")) ++ {
136150
if (isLocalReader) {
137151
// We split the mapper partition evenly when creating local shuffle reader, so no
138152
// data size info is available.
139153
Map.empty
140154
} else {
141-
partitionDataSizeMetrics
155+
Map("partitionDataSize" ->
156+
SQLMetrics.createSizeMetric(sparkContext, "partition data size"))
142157
}
143158
} ++ {
144159
if (hasSkewedPartition) {
145-
skewedPartitionMetrics
160+
Map("numSkewedPartitions" ->
161+
SQLMetrics.createMetric(sparkContext, "number of skewed partitions"))
146162
} else {
147163
Map.empty
148164
}
@@ -154,8 +170,8 @@ case class CustomShuffleReaderExec private(
154170
}
155171

156172
private lazy val cachedShuffleRDD: RDD[InternalRow] = {
157-
val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
158-
SQLMetrics.postDriverMetricUpdates(sparkContext, executionId, metrics.values.toSeq)
173+
sendDriverMetrics()
174+
159175
shuffleStage.map { stage =>
160176
new ShuffledRowRDD(
161177
stage.shuffle.shuffleDependency, stage.shuffle.readMetrics, partitionSpecs.toArray)

sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,16 @@ object SQLMetrics {
222222
}
223223
}
224224

225+
def postDriverMetricsUpdatedByValue(
226+
sc: SparkContext,
227+
executionId: String,
228+
accumUpdates: Seq[(Long, Long)]): Unit = {
229+
if (executionId != null) {
230+
sc.listenerBus.post(
231+
SparkListenerDriverAccumUpdates(executionId.toLong, accumUpdates))
232+
}
233+
}
234+
225235
/**
226236
* Updates metrics based on the driver side value. This is useful for certain metrics that
227237
* are only updated on the driver, e.g. subquery execution time, or number of files.

sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -450,7 +450,7 @@ private class LiveExecutionData(val executionId: Long) extends LiveEntity {
450450

451451
var jobs = Map[Int, JobExecutionStatus]()
452452
var stages = Set[Int]()
453-
var driverAccumUpdates = Map[Long, Long]()
453+
var driverAccumUpdates = Seq[(Long, Long)]()
454454

455455
@volatile var metricsValues: Map[Long, String] = null
456456

sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -809,11 +809,9 @@ class AdaptiveQueryExecSuite
809809
assert(!reader.hasSkewedPartition)
810810
assert(reader.hasCoalescedPartition)
811811
assert(reader.metrics.keys.toSeq.sorted == Seq(
812-
"avgPartitionDataSize", "maxPartitionDataSize", "minPartitionDataSize", "numPartitions"))
812+
"numPartitions", "partitionDataSize"))
813813
assert(reader.metrics("numPartitions").value == reader.partitionSpecs.length)
814-
assert(reader.metrics("avgPartitionDataSize").value > 0)
815-
assert(reader.metrics("maxPartitionDataSize").value > 0)
816-
assert(reader.metrics("minPartitionDataSize").value > 0)
814+
assert(reader.metrics("partitionDataSize").value > 0)
817815

818816
withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80") {
819817
val (_, adaptivePlan) = runAdaptiveAndVerifyResult(

0 commit comments

Comments
 (0)