-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-20213][SQL][follow-up] introduce SQLExecution.ignoreNestedExecutionId #18419
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -29,6 +29,8 @@ object SQLExecution { | |
|
|
||
| val EXECUTION_ID_KEY = "spark.sql.execution.id" | ||
|
|
||
| private val IGNORE_NESTED_EXECUTION_ID = "spark.sql.execution.ignoreNestedExecutionId" | ||
|
|
||
| private val _nextExecutionId = new AtomicLong(0) | ||
|
|
||
| private def nextExecutionId: Long = _nextExecutionId.getAndIncrement | ||
|
|
@@ -42,8 +44,11 @@ object SQLExecution { | |
| private val testing = sys.props.contains("spark.testing") | ||
|
|
||
| private[sql] def checkSQLExecutionId(sparkSession: SparkSession): Unit = { | ||
| val sc = sparkSession.sparkContext | ||
| val isNestedExecution = sc.getLocalProperty(IGNORE_NESTED_EXECUTION_ID) != null | ||
| val hasExecutionId = sc.getLocalProperty(EXECUTION_ID_KEY) != null | ||
| // only throw an exception during tests. a missing execution ID should not fail a job. | ||
| if (testing && sparkSession.sparkContext.getLocalProperty(EXECUTION_ID_KEY) == null) { | ||
| if (testing && !isNestedExecution && !hasExecutionId) { | ||
| // Attention testers: when a test fails with this exception, it means that the action that | ||
| // started execution of a query didn't call withNewExecutionId. The execution ID should be | ||
| // set by calling withNewExecutionId in the action that begins execution, like | ||
|
|
@@ -65,7 +70,7 @@ object SQLExecution { | |
| val executionId = SQLExecution.nextExecutionId | ||
| sc.setLocalProperty(EXECUTION_ID_KEY, executionId.toString) | ||
| executionIdToQueryExecution.put(executionId, queryExecution) | ||
| val r = try { | ||
| try { | ||
| // sparkContext.getCallSite() would first try to pick up any call site that was previously | ||
| // set, then fall back to Utils.getCallSite(); call Utils.getCallSite() directly on | ||
| // streaming queries would give us call site like "run at <unknown>:0" | ||
|
|
@@ -84,7 +89,15 @@ object SQLExecution { | |
| executionIdToQueryExecution.remove(executionId) | ||
| sc.setLocalProperty(EXECUTION_ID_KEY, null) | ||
| } | ||
| r | ||
| } else if (sc.getLocalProperty(IGNORE_NESTED_EXECUTION_ID) != null) { | ||
| // If `IGNORE_NESTED_EXECUTION_ID` is set, just ignore the execution id while evaluating the | ||
| // `body`, so that Spark jobs issued in the `body` won't be tracked. | ||
| try { | ||
| sc.setLocalProperty(EXECUTION_ID_KEY, null) | ||
| body | ||
| } finally { | ||
| sc.setLocalProperty(EXECUTION_ID_KEY, oldExecutionId) | ||
| } | ||
| } else { | ||
| // Don't support nested `withNewExecutionId`. This is an example of the nested | ||
| // `withNewExecutionId`: | ||
|
|
@@ -100,7 +113,9 @@ object SQLExecution { | |
| // all accumulator metrics will be 0. It will confuse people if we show them in Web UI. | ||
| // | ||
| // A real case is the `DataFrame.count` method. | ||
| throw new IllegalArgumentException(s"$EXECUTION_ID_KEY is already set") | ||
| throw new IllegalArgumentException(s"$EXECUTION_ID_KEY is already set, please wrap your " + | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nested execution is a developer problem, not a user problem. That's why the original PR did not throw If this is thrown at runtime, adding the text about
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The problem is that this is an easy error to hit and it shouldn't affect end users. It is better to warn that something is wrong than to fail a job that would otherwise succeed for a bug in Spark. As for the error message, I think it is fine if we intend to leave it in. I'd just rather not fail user jobs here. I assume that DataSource developers will have tests, but probably not ones that know to set spark.testing. Is there a better way to detect test cases? |
||
| "action with SQLExecution.ignoreNestedExecutionId if you don't want to track the Spark " + | ||
| "jobs issued by the nested execution.") | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -118,4 +133,20 @@ object SQLExecution { | |
| sc.setLocalProperty(SQLExecution.EXECUTION_ID_KEY, oldExecutionId) | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Wrap an action which may have nested execution id. This method can be used to run an execution | ||
| * inside another execution, e.g., `CacheTableCommand` need to call `Dataset.collect`. Note that, | ||
| * all Spark jobs issued in the body won't be tracked in UI. | ||
| */ | ||
| def ignoreNestedExecutionId[T](sparkSession: SparkSession)(body: => T): T = { | ||
| val sc = sparkSession.sparkContext | ||
| val allowNestedPreviousValue = sc.getLocalProperty(IGNORE_NESTED_EXECUTION_ID) | ||
| try { | ||
| sc.setLocalProperty(IGNORE_NESTED_EXECUTION_ID, "true") | ||
| body | ||
| } finally { | ||
| sc.setLocalProperty(IGNORE_NESTED_EXECUTION_ID, allowNestedPreviousValue) | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@viirya now we won't track the spark jobs even in
SparkListenerThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good.