Skip to content

Commit 7d877c3

Browse files
Davies Liudavies
authored andcommitted
[SPARK-12902] [SQL] visualization for generated operators
This PR brings back visualization for generated operators, they looks like: ![sql](https://cloud.githubusercontent.com/assets/40902/12460920/0dc7956a-bf6b-11e5-9c3f-8389f452526e.png) ![stage](https://cloud.githubusercontent.com/assets/40902/12460923/11806ac4-bf6b-11e5-9c72-e84a62c5ea93.png) Note: SQL metrics are not supported right now, because they are very slow, will be supported once we have batch mode. Author: Davies Liu <[email protected]> Closes #10828 from davies/viz_codegen.
1 parent c037d25 commit 7d877c3

File tree

9 files changed

+104
-32
lines changed

9 files changed

+104
-32
lines changed

core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ function renderDot(dot, container, forJob) {
284284
renderer(container, g);
285285

286286
// Find the stage cluster and mark it for styling and post-processing
287-
container.selectAll("g.cluster[name*=\"Stage\"]").classed("stage", true);
287+
container.selectAll("g.cluster[name^=\"Stage \"]").classed("stage", true);
288288
}
289289

290290
/* -------------------- *

core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,11 @@ private[ui] object RDDOperationGraph extends Logging {
130130
}
131131
}
132132
// Attach the outermost cluster to the root cluster, and the RDD to the innermost cluster
133-
rddClusters.headOption.foreach { cluster => rootCluster.attachChildCluster(cluster) }
133+
rddClusters.headOption.foreach { cluster =>
134+
if (!rootCluster.childClusters.contains(cluster)) {
135+
rootCluster.attachChildCluster(cluster)
136+
}
137+
}
134138
rddClusters.lastOption.foreach { cluster => cluster.attachChildNode(node) }
135139
}
136140
}

sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.css

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@
2020
text-shadow: none;
2121
}
2222

23+
#plan-viz-graph svg g.cluster rect {
24+
fill: #A0DFFF;
25+
stroke: #3EC0FF;
26+
stroke-width: 1px;
27+
}
28+
2329
#plan-viz-graph svg g.node rect {
2430
fill: #C3EBFF;
2531
stroke: #3EC0FF;

sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,17 @@ class SparkPlanInfo(
3636
private[sql] object SparkPlanInfo {
3737

3838
def fromSparkPlan(plan: SparkPlan): SparkPlanInfo = {
39+
val children = plan match {
40+
case WholeStageCodegen(child, _) => child :: Nil
41+
case InputAdapter(child) => child :: Nil
42+
case plan => plan.children
43+
}
3944
val metrics = plan.metrics.toSeq.map { case (key, metric) =>
4045
new SQLMetricInfo(metric.name.getOrElse(key), metric.id,
4146
Utils.getFormattedClassName(metric.param))
4247
}
43-
val children = plan.children.map(fromSparkPlan)
4448

45-
new SparkPlanInfo(plan.nodeName, plan.simpleString, children, plan.metadata, metrics)
49+
new SparkPlanInfo(plan.nodeName, plan.simpleString, children.map(fromSparkPlan),
50+
plan.metadata, metrics)
4651
}
4752
}

sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ private[sql] class ExecutionPage(parent: SQLTab) extends WebUIPage("execution")
9999
}
100100

101101
private def planVisualization(metrics: Map[Long, String], graph: SparkPlanGraph): Seq[Node] = {
102-
val metadata = graph.nodes.flatMap { node =>
102+
val metadata = graph.allNodes.flatMap { node =>
103103
val nodeId = s"plan-meta-data-${node.id}"
104104
<div id={nodeId}>{node.desc}</div>
105105
}
@@ -110,7 +110,7 @@ private[sql] class ExecutionPage(parent: SQLTab) extends WebUIPage("execution")
110110
<div class="dot-file">
111111
{graph.makeDotFile(metrics)}
112112
</div>
113-
<div id="plan-viz-metadata-size">{graph.nodes.size.toString}</div>
113+
<div id="plan-viz-metadata-size">{graph.allNodes.size.toString}</div>
114114
{metadata}
115115
</div>
116116
{planVisualizationResources}

sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ private[sql] class SQLListener(conf: SparkConf) extends SparkListener with Loggi
219219
case SparkListenerSQLExecutionStart(executionId, description, details,
220220
physicalPlanDescription, sparkPlanInfo, time) =>
221221
val physicalPlanGraph = SparkPlanGraph(sparkPlanInfo)
222-
val sqlPlanMetrics = physicalPlanGraph.nodes.flatMap { node =>
222+
val sqlPlanMetrics = physicalPlanGraph.allNodes.flatMap { node =>
223223
node.metrics.map(metric => metric.accumulatorId -> metric)
224224
}
225225
val executionUIData = new SQLExecutionUIData(

sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala

Lines changed: 72 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ import java.util.concurrent.atomic.AtomicLong
2121

2222
import scala.collection.mutable
2323

24-
import org.apache.spark.sql.execution.SparkPlanInfo
24+
import org.apache.spark.sql.execution.{InputAdapter, SparkPlanInfo, WholeStageCodegen}
2525
import org.apache.spark.sql.execution.metric.SQLMetrics
2626

2727
/**
@@ -41,6 +41,16 @@ private[ui] case class SparkPlanGraph(
4141
dotFile.append("}")
4242
dotFile.toString()
4343
}
44+
45+
/**
46+
* All the SparkPlanGraphNodes, including those inside of WholeStageCodegen.
47+
*/
48+
val allNodes: Seq[SparkPlanGraphNode] = {
49+
nodes.flatMap {
50+
case cluster: SparkPlanGraphCluster => cluster.nodes :+ cluster
51+
case node => Seq(node)
52+
}
53+
}
4454
}
4555

4656
private[sql] object SparkPlanGraph {
@@ -52,30 +62,48 @@ private[sql] object SparkPlanGraph {
5262
val nodeIdGenerator = new AtomicLong(0)
5363
val nodes = mutable.ArrayBuffer[SparkPlanGraphNode]()
5464
val edges = mutable.ArrayBuffer[SparkPlanGraphEdge]()
55-
buildSparkPlanGraphNode(planInfo, nodeIdGenerator, nodes, edges)
65+
buildSparkPlanGraphNode(planInfo, nodeIdGenerator, nodes, edges, null, null)
5666
new SparkPlanGraph(nodes, edges)
5767
}
5868

5969
private def buildSparkPlanGraphNode(
6070
planInfo: SparkPlanInfo,
6171
nodeIdGenerator: AtomicLong,
6272
nodes: mutable.ArrayBuffer[SparkPlanGraphNode],
63-
edges: mutable.ArrayBuffer[SparkPlanGraphEdge]): SparkPlanGraphNode = {
64-
val metrics = planInfo.metrics.map { metric =>
65-
SQLPlanMetric(metric.name, metric.accumulatorId,
66-
SQLMetrics.getMetricParam(metric.metricParam))
73+
edges: mutable.ArrayBuffer[SparkPlanGraphEdge],
74+
parent: SparkPlanGraphNode,
75+
subgraph: SparkPlanGraphCluster): Unit = {
76+
if (planInfo.nodeName == classOf[WholeStageCodegen].getSimpleName) {
77+
val cluster = new SparkPlanGraphCluster(
78+
nodeIdGenerator.getAndIncrement(),
79+
planInfo.nodeName,
80+
planInfo.simpleString,
81+
mutable.ArrayBuffer[SparkPlanGraphNode]())
82+
nodes += cluster
83+
buildSparkPlanGraphNode(
84+
planInfo.children.head, nodeIdGenerator, nodes, edges, parent, cluster)
85+
} else if (planInfo.nodeName == classOf[InputAdapter].getSimpleName) {
86+
buildSparkPlanGraphNode(planInfo.children.head, nodeIdGenerator, nodes, edges, parent, null)
87+
} else {
88+
val metrics = planInfo.metrics.map { metric =>
89+
SQLPlanMetric(metric.name, metric.accumulatorId,
90+
SQLMetrics.getMetricParam(metric.metricParam))
91+
}
92+
val node = new SparkPlanGraphNode(
93+
nodeIdGenerator.getAndIncrement(), planInfo.nodeName,
94+
planInfo.simpleString, planInfo.metadata, metrics)
95+
if (subgraph == null) {
96+
nodes += node
97+
} else {
98+
subgraph.nodes += node
99+
}
100+
101+
if (parent != null) {
102+
edges += SparkPlanGraphEdge(node.id, parent.id)
103+
}
104+
planInfo.children.foreach(
105+
buildSparkPlanGraphNode(_, nodeIdGenerator, nodes, edges, node, subgraph))
67106
}
68-
val node = SparkPlanGraphNode(
69-
nodeIdGenerator.getAndIncrement(), planInfo.nodeName,
70-
planInfo.simpleString, planInfo.metadata, metrics)
71-
72-
nodes += node
73-
val childrenNodes = planInfo.children.map(
74-
child => buildSparkPlanGraphNode(child, nodeIdGenerator, nodes, edges))
75-
for (child <- childrenNodes) {
76-
edges += SparkPlanGraphEdge(child.id, node.id)
77-
}
78-
node
79107
}
80108
}
81109

@@ -86,12 +114,12 @@ private[sql] object SparkPlanGraph {
86114
* @param name the name of this SparkPlan node
87115
* @param metrics metrics that this SparkPlan node will track
88116
*/
89-
private[ui] case class SparkPlanGraphNode(
90-
id: Long,
91-
name: String,
92-
desc: String,
93-
metadata: Map[String, String],
94-
metrics: Seq[SQLPlanMetric]) {
117+
private[ui] class SparkPlanGraphNode(
118+
val id: Long,
119+
val name: String,
120+
val desc: String,
121+
val metadata: Map[String, String],
122+
val metrics: Seq[SQLPlanMetric]) {
95123

96124
def makeDotNode(metricsValue: Map[Long, String]): String = {
97125
val builder = new mutable.StringBuilder(name)
@@ -117,6 +145,27 @@ private[ui] case class SparkPlanGraphNode(
117145
}
118146
}
119147

148+
/**
149+
* Represent a tree of SparkPlan for WholeStageCodegen.
150+
*/
151+
private[ui] class SparkPlanGraphCluster(
152+
id: Long,
153+
name: String,
154+
desc: String,
155+
val nodes: mutable.ArrayBuffer[SparkPlanGraphNode])
156+
extends SparkPlanGraphNode(id, name, desc, Map.empty, Nil) {
157+
158+
override def makeDotNode(metricsValue: Map[Long, String]): String = {
159+
s"""
160+
| subgraph cluster${id} {
161+
| label=${name};
162+
| ${nodes.map(_.makeDotNode(metricsValue)).mkString(" \n")}
163+
| }
164+
""".stripMargin
165+
}
166+
}
167+
168+
120169
/**
121170
* Represent an edge in the SparkPlan tree. `fromId` is the parent node id, and `toId` is the child
122171
* node id.

sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext {
8686
// If we can track all jobs, check the metric values
8787
val metricValues = sqlContext.listener.getExecutionMetrics(executionId)
8888
val actualMetrics = SparkPlanGraph(SparkPlanInfo.fromSparkPlan(
89-
df.queryExecution.executedPlan)).nodes.filter { node =>
89+
df.queryExecution.executedPlan)).allNodes.filter { node =>
9090
expectedMetrics.contains(node.id)
9191
}.map { node =>
9292
val nodeMetrics = node.metrics.map { metric =>
@@ -134,6 +134,14 @@ class SQLMetricsSuite extends SparkFunSuite with SharedSQLContext {
134134
)
135135
}
136136

137+
test("WholeStageCodegen metrics") {
138+
// Assume the execution plan is
139+
// WholeStageCodegen(nodeId = 0, Range(nodeId = 2) -> Filter(nodeId = 1))
140+
// TODO: update metrics in generated operators
141+
val df = sqlContext.range(10).filter('id < 5)
142+
testSparkPlanMetrics(df, 1, Map.empty)
143+
}
144+
137145
test("TungstenAggregate metrics") {
138146
// Assume the execution plan is
139147
// ... -> TungstenAggregate(nodeId = 2) -> Exchange(nodeId = 1)

sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLListenerSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ class SQLListenerSuite extends SparkFunSuite with SharedSQLContext {
8383
val df = createTestDataFrame
8484
val accumulatorIds =
8585
SparkPlanGraph(SparkPlanInfo.fromSparkPlan(df.queryExecution.executedPlan))
86-
.nodes.flatMap(_.metrics.map(_.accumulatorId))
86+
.allNodes.flatMap(_.metrics.map(_.accumulatorId))
8787
// Assume all accumulators are long
8888
var accumulatorValue = 0L
8989
val accumulatorUpdates = accumulatorIds.map { id =>

0 commit comments

Comments
 (0)