-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-10248] [core] track exceptions in dagscheduler event loop in tests #8466
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -796,7 +796,8 @@ class DAGScheduler( | |
|
|
||
| private[scheduler] def cleanUpAfterSchedulerStop() { | ||
| for (job <- activeJobs) { | ||
| val error = new SparkException("Job cancelled because SparkContext was shut down") | ||
| val error = | ||
| new SparkException(s"Job ${job.jobId} cancelled because SparkContext was shut down") | ||
| job.listener.jobFailed(error) | ||
| // Tell the listeners that all of the running stages have ended. Don't bother | ||
| // cancelling the stages because if the DAG scheduler is stopped, the entire application | ||
|
|
@@ -1290,7 +1291,7 @@ class DAGScheduler( | |
| case TaskResultLost => | ||
| // Do nothing here; the TaskScheduler handles these failures and resubmits the task. | ||
|
|
||
| case other => | ||
| case _: ExecutorLostFailure | TaskKilled | UnknownReason => | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this a related change? If we add another
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If so, I think the compiler will report uncompleted matching as a warning and the build will fail. Right?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see, that's fine
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this was needed just for the added test case -- I needed a way to intentionally throw an error inside the event loop, and with this change if And yeah, if you add another type but dont' include it in this match, you'll get a fatal warning in compilation since its a sealed trait. |
||
| // Unrecognized failure - also do nothing. If the task fails repeatedly, the TaskScheduler | ||
| // will abort the job. | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -43,6 +43,13 @@ class DAGSchedulerEventProcessLoopTester(dagScheduler: DAGScheduler) | |
| case NonFatal(e) => onError(e) | ||
| } | ||
| } | ||
|
|
||
| override def onError(e: Throwable): Unit = { | ||
| logError("Error in DAGSchedulerEventLoop: ", e) | ||
| dagScheduler.stop() | ||
| throw e | ||
| } | ||
|
|
||
| } | ||
|
|
||
| /** | ||
|
|
@@ -297,13 +304,18 @@ class DAGSchedulerSuite | |
|
|
||
| test("zero split job") { | ||
| var numResults = 0 | ||
| var failureReason: Option[Exception] = None | ||
| val fakeListener = new JobListener() { | ||
| override def taskSucceeded(partition: Int, value: Any) = numResults += 1 | ||
| override def jobFailed(exception: Exception) = throw exception | ||
| override def taskSucceeded(partition: Int, value: Any): Unit = numResults += 1 | ||
| override def jobFailed(exception: Exception): Unit = { | ||
| failureReason = Some(exception) | ||
| } | ||
| } | ||
| val jobId = submit(new MyRDD(sc, 0, Nil), Array(), listener = fakeListener) | ||
| assert(numResults === 0) | ||
| cancel(jobId) | ||
| assert(failureReason.isDefined) | ||
| assert(failureReason.get.getMessage() === "Job 0 cancelled ") | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this test used to just log an exception on |
||
| } | ||
|
|
||
| test("run trivial job") { | ||
|
|
@@ -1516,6 +1528,18 @@ class DAGSchedulerSuite | |
| assert(stackTraceString.contains("org.scalatest.FunSuite")) | ||
| } | ||
|
|
||
| test("catch errors in event loop") { | ||
| // this is a test of our testing framework -- make sure errors in event loop don't get ignored | ||
|
|
||
| // just run some bad event that will throw an exception -- we'll give a null TaskEndReason | ||
| val rdd1 = new MyRDD(sc, 1, Nil) | ||
| submit(rdd1, Array(0)) | ||
| intercept[Exception] { | ||
| complete(taskSets(0), Seq( | ||
| (null, makeMapStatus("hostA", 1)))) | ||
| } | ||
| } | ||
|
|
||
| test("simple map stage submission") { | ||
| val shuffleMapRdd = new MyRDD(sc, 2, Nil) | ||
| val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(1)) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not necessary, but was helpful for some debugging, figured it couldn't hurt.