-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-11206] Support SQL UI on the history server (resubmit) #10061
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
fdf9d28
ff4075d
b9870e6
3833055
c0abfc6
a5b1cf4
7b30bc7
d52288b
7a2aced
caab0ba
0af5afe
8d565f2
1954d71
927bae8
b03d98b
51f913b
bca3f5f
60033f8
fe5c165
8d94707
56f24ba
690277e
5270209
8222a0c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| org.apache.spark.sql.execution.ui.SQLHistoryListenerFactory |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1236,6 +1236,7 @@ class SQLContext private[sql]( | |
| sparkContext.addSparkListener(new SparkListener { | ||
| override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd): Unit = { | ||
| SQLContext.clearInstantiatedContext() | ||
| SQLContext.clearSqlListener() | ||
| } | ||
| }) | ||
|
|
||
|
|
@@ -1263,6 +1264,8 @@ object SQLContext { | |
| */ | ||
| @transient private val instantiatedContext = new AtomicReference[SQLContext]() | ||
|
|
||
| @transient private val sqlListener = new AtomicReference[SQLListener]() | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to make sure that it's not overlooked, please see my comment on the other PRs regarding whether this needs to actually hold a
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Many unit tests use
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, I see that we need this in order to be able to return a value from |
||
|
|
||
| /** | ||
| * Get the singleton SQLContext if it exists or create a new one using the given SparkContext. | ||
| * | ||
|
|
@@ -1307,6 +1310,10 @@ object SQLContext { | |
| Option(instantiatedContext.get()) | ||
| } | ||
|
|
||
| private[sql] def clearSqlListener(): Unit = { | ||
| sqlListener.set(null) | ||
| } | ||
|
|
||
| /** | ||
| * Changes the SQLContext that will be returned in this thread and its children when | ||
| * SQLContext.getOrCreate() is called. This can be used to ensure that a given thread receives | ||
|
|
@@ -1355,9 +1362,13 @@ object SQLContext { | |
| * Create a SQLListener then add it into SparkContext, and create an SQLTab if there is SparkUI. | ||
| */ | ||
| private[sql] def createListenerAndUI(sc: SparkContext): SQLListener = { | ||
| val listener = new SQLListener(sc.conf) | ||
| sc.addSparkListener(listener) | ||
| sc.ui.foreach(new SQLTab(listener, _)) | ||
| listener | ||
| if (sqlListener.get() == null) { | ||
| val listener = new SQLListener(sc.conf) | ||
| if (sqlListener.compareAndSet(null, listener)) { | ||
| sc.addSparkListener(listener) | ||
| sc.ui.foreach(new SQLTab(listener, _)) | ||
| } | ||
| } | ||
| sqlListener.get() | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,46 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.execution | ||
|
|
||
| import org.apache.spark.annotation.DeveloperApi | ||
| import org.apache.spark.sql.execution.metric.SQLMetricInfo | ||
| import org.apache.spark.util.Utils | ||
|
|
||
| /** | ||
| * :: DeveloperApi :: | ||
| * Stores information about a SQL SparkPlan. | ||
| */ | ||
| @DeveloperApi | ||
| class SparkPlanInfo( | ||
| val nodeName: String, | ||
| val simpleString: String, | ||
| val children: Seq[SparkPlanInfo], | ||
| val metrics: Seq[SQLMetricInfo]) | ||
|
|
||
| private[sql] object SparkPlanInfo { | ||
|
|
||
| def fromSparkPlan(plan: SparkPlan): SparkPlanInfo = { | ||
| val metrics = plan.metrics.toSeq.map { case (key, metric) => | ||
| new SQLMetricInfo(metric.name.getOrElse(key), metric.id, | ||
| Utils.getFormattedClassName(metric.param)) | ||
| } | ||
| val children = plan.children.map(fromSparkPlan) | ||
|
|
||
| new SparkPlanInfo(plan.nodeName, plan.simpleString, children, metrics) | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.execution.metric | ||
|
|
||
| import org.apache.spark.annotation.DeveloperApi | ||
|
|
||
| /** | ||
| * :: DeveloperApi :: | ||
| * Stores information about a SQL Metric. | ||
| */ | ||
| @DeveloperApi | ||
| class SQLMetricInfo( | ||
| val name: String, | ||
| val accumulatorId: Long, | ||
| val metricParam: String) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is the reason to add this line?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is very possible that we silently pull in unnecessary information. If we have new event types, we should handle those explicitly instead of relying on this line. I am proposing to revert this line.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As I've said in similar conversations in other contexts, I'm strongly opposed to what you're suggesting. In fact I'm an advocate for exactly the opposite, and that's why I filed SPARK-12141.
BTW just removing that line would break the feature this patch is implementing, unless you write a whole lot of code to manually serialize all the SQL-related events.
Events are a public API, and they should be carefully crafted, since changing them affects user applications (including event logs). If there is unnecessary information in the event, then it's a bug in the event definition, not here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yea. I totally agree. However, my concern is that having this line at here will make the developer harder to spot issues during the development. Since the serialization works automatically, we are not making a self-review on what will be serialized and what methods will be called during serialization a mandatory step, which makes the auditing work much harder. Although it introduces more work to the developer to make every event explicitly handled, when we review the pull request, we can clearly know what will be serialized and how a event is serialized when a pull request is submitted. What do you think?
btw, if I am missing any context, please let me know :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm perfectly ok with making auditing of these events harder if it means you're not writing manual serialization and de-serialization code like JsonProtocol.scala. The drawbacks of the latter are much worse for code readability and maintainability.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
BTW this is really not the right forum to discuss this. If you want to discuss big changes like you're proposing, please discuss on the bug I opened (referenced above) or start a thread on the mailing list.
Your suggestion of removing that line will just break the feature and, to restore it, would require an insane amount of code motion and new code to be written. To start with, the SQL events are not even available in "core", so you can't reference the classes here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes I think this is a terrible idea. Actually back in the days when we introduced magic serialization, I was against it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you guys please comment on the bug I opened or the mailing list? Commenting on a long closed github PR is not really the best forum.
I'd really like to understand why you think automatic serialization is a bad idea, since we use it in so many places. I think exactly the opposite - manual serialization is unmaintainable, error-prone, and a waste of developer time.