@@ -22,15 +22,12 @@ import java.util.concurrent.atomic.AtomicLong
2222
2323import org .apache .spark .SparkContext
2424import org .apache .spark .sql .SparkSession
25- import org .apache .spark .sql .execution .ui .{SparkListenerSQLExecutionEnd ,
26- SparkListenerSQLExecutionStart }
25+ import org .apache .spark .sql .execution .ui .{SparkListenerSQLExecutionEnd , SparkListenerSQLExecutionStart }
2726
2827object SQLExecution {
2928
3029 val EXECUTION_ID_KEY = " spark.sql.execution.id"
3130
32- private val IGNORE_NESTED_EXECUTION_ID = " spark.sql.execution.ignoreNestedExecutionId"
33-
3431 private val _nextExecutionId = new AtomicLong (0 )
3532
3633 private def nextExecutionId : Long = _nextExecutionId.getAndIncrement
@@ -45,10 +42,8 @@ object SQLExecution {
4542
4643 private [sql] def checkSQLExecutionId (sparkSession : SparkSession ): Unit = {
4744 val sc = sparkSession.sparkContext
48- val isNestedExecution = sc.getLocalProperty(IGNORE_NESTED_EXECUTION_ID ) != null
49- val hasExecutionId = sc.getLocalProperty(EXECUTION_ID_KEY ) != null
5045 // only throw an exception during tests. a missing execution ID should not fail a job.
51- if (testing && ! isNestedExecution && ! hasExecutionId ) {
46+ if (testing && sc.getLocalProperty( EXECUTION_ID_KEY ) == null ) {
5247 // Attention testers: when a test fails with this exception, it means that the action that
5348 // started execution of a query didn't call withNewExecutionId. The execution ID should be
5449 // set by calling withNewExecutionId in the action that begins execution, like
@@ -66,56 +61,27 @@ object SQLExecution {
6661 queryExecution : QueryExecution )(body : => T ): T = {
6762 val sc = sparkSession.sparkContext
6863 val oldExecutionId = sc.getLocalProperty(EXECUTION_ID_KEY )
69- if (oldExecutionId == null ) {
70- val executionId = SQLExecution .nextExecutionId
71- sc.setLocalProperty(EXECUTION_ID_KEY , executionId.toString)
72- executionIdToQueryExecution.put(executionId, queryExecution)
73- try {
74- // sparkContext.getCallSite() would first try to pick up any call site that was previously
75- // set, then fall back to Utils.getCallSite(); call Utils.getCallSite() directly on
76- // streaming queries would give us call site like "run at <unknown>:0"
77- val callSite = sparkSession.sparkContext.getCallSite()
78-
79- sparkSession.sparkContext.listenerBus.post(SparkListenerSQLExecutionStart (
80- executionId, callSite.shortForm, callSite.longForm, queryExecution.toString,
81- SparkPlanInfo .fromSparkPlan(queryExecution.executedPlan), System .currentTimeMillis()))
82- try {
83- body
84- } finally {
85- sparkSession.sparkContext.listenerBus.post(SparkListenerSQLExecutionEnd (
86- executionId, System .currentTimeMillis()))
87- }
88- } finally {
89- executionIdToQueryExecution.remove(executionId)
90- sc.setLocalProperty(EXECUTION_ID_KEY , null )
91- }
92- } else if (sc.getLocalProperty(IGNORE_NESTED_EXECUTION_ID ) != null ) {
93- // If `IGNORE_NESTED_EXECUTION_ID` is set, just ignore the execution id while evaluating the
94- // `body`, so that Spark jobs issued in the `body` won't be tracked.
64+ val executionId = SQLExecution .nextExecutionId
65+ sc.setLocalProperty(EXECUTION_ID_KEY , executionId.toString)
66+ executionIdToQueryExecution.put(executionId, queryExecution)
67+ try {
68+ // sparkContext.getCallSite() would first try to pick up any call site that was previously
69+ // set, then fall back to Utils.getCallSite(); call Utils.getCallSite() directly on
70+ // streaming queries would give us call site like "run at <unknown>:0"
71+ val callSite = sparkSession.sparkContext.getCallSite()
72+
73+ sparkSession.sparkContext.listenerBus.post(SparkListenerSQLExecutionStart (
74+ executionId, callSite.shortForm, callSite.longForm, queryExecution.toString,
75+ SparkPlanInfo .fromSparkPlan(queryExecution.executedPlan), System .currentTimeMillis()))
9576 try {
96- sc.setLocalProperty(EXECUTION_ID_KEY , null )
9777 body
9878 } finally {
99- sc.setLocalProperty(EXECUTION_ID_KEY , oldExecutionId)
79+ sparkSession.sparkContext.listenerBus.post(SparkListenerSQLExecutionEnd (
80+ executionId, System .currentTimeMillis()))
10081 }
101- } else {
102- // Don't support nested `withNewExecutionId`. This is an example of the nested
103- // `withNewExecutionId`:
104- //
105- // class DataFrame {
106- // def foo: T = withNewExecutionId { something.createNewDataFrame().collect() }
107- // }
108- //
109- // Note: `collect` will call withNewExecutionId
110- // In this case, only the "executedPlan" for "collect" will be executed. The "executedPlan"
111- // for the outer DataFrame won't be executed. So it's meaningless to create a new Execution
112- // for the outer DataFrame. Even if we track it, since its "executedPlan" doesn't run,
113- // all accumulator metrics will be 0. It will confuse people if we show them in Web UI.
114- //
115- // A real case is the `DataFrame.count` method.
116- throw new IllegalArgumentException (s " $EXECUTION_ID_KEY is already set, please wrap your " +
117- " action with SQLExecution.ignoreNestedExecutionId if you don't want to track the Spark " +
118- " jobs issued by the nested execution." )
82+ } finally {
83+ executionIdToQueryExecution.remove(executionId)
84+ sc.setLocalProperty(EXECUTION_ID_KEY , oldExecutionId)
11985 }
12086 }
12187
@@ -133,20 +99,4 @@ object SQLExecution {
13399 sc.setLocalProperty(SQLExecution .EXECUTION_ID_KEY , oldExecutionId)
134100 }
135101 }
136-
137- /**
138- * Wrap an action which may have nested execution id. This method can be used to run an execution
139- * inside another execution, e.g., `CacheTableCommand` need to call `Dataset.collect`. Note that,
140- * all Spark jobs issued in the body won't be tracked in UI.
141- */
142- def ignoreNestedExecutionId [T ](sparkSession : SparkSession )(body : => T ): T = {
143- val sc = sparkSession.sparkContext
144- val allowNestedPreviousValue = sc.getLocalProperty(IGNORE_NESTED_EXECUTION_ID )
145- try {
146- sc.setLocalProperty(IGNORE_NESTED_EXECUTION_ID , " true" )
147- body
148- } finally {
149- sc.setLocalProperty(IGNORE_NESTED_EXECUTION_ID , allowNestedPreviousValue)
150- }
151- }
152102}
0 commit comments