-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-18838][CORE] Introduce multiple queues in LiveListenerBus #18253
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4ef43c7
913c49f
ad21d5d
fdaea8a
f3963e6
631e89c
d6c98d1
8528142
ef6b45b
cc5c9f7
441af9b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -55,8 +55,7 @@ private[spark] class EventLoggingListener( | |
| appAttemptId : Option[String], | ||
| logBaseDir: URI, | ||
| sparkConf: SparkConf, | ||
| hadoopConf: Configuration) | ||
| extends SparkListener with Logging { | ||
| hadoopConf: Configuration) extends Logging { | ||
|
|
||
| import EventLoggingListener._ | ||
|
|
||
|
|
@@ -90,6 +89,8 @@ private[spark] class EventLoggingListener( | |
| // Visible for tests only. | ||
| private[scheduler] val logPath = getLogPath(logBaseDir, appId, appAttemptId, compressionCodecName) | ||
|
|
||
| private var nbMessageProcessed = 0 | ||
|
|
||
| /** | ||
| * Creates the log file in the configured log directory. | ||
| */ | ||
|
|
@@ -134,97 +135,38 @@ private[spark] class EventLoggingListener( | |
| } | ||
|
|
||
| /** Log the event as JSON. */ | ||
| private def logEvent(event: SparkListenerEvent, flushLogger: Boolean = false) { | ||
| private def logEvent(event: SparkListenerEvent) { | ||
| val eventJson = JsonProtocol.sparkEventToJson(event) | ||
| // scalastyle:off println | ||
| writer.foreach(_.println(compact(render(eventJson)))) | ||
| // scalastyle:on println | ||
| if (flushLogger) { | ||
| writer.foreach(_.flush()) | ||
| hadoopDataStream.foreach(ds => ds.getWrappedStream match { | ||
| case wrapped: DFSOutputStream => wrapped.hsync(EnumSet.of(SyncFlag.UPDATE_LENGTH)) | ||
| case _ => ds.hflush() | ||
| }) | ||
| } | ||
| if (testing) { | ||
| loggedEvents += eventJson | ||
| flush() | ||
| } | ||
| } | ||
|
|
||
| // Events that do not trigger a flush | ||
| override def onStageSubmitted(event: SparkListenerStageSubmitted): Unit = logEvent(event) | ||
|
|
||
| override def onTaskStart(event: SparkListenerTaskStart): Unit = logEvent(event) | ||
|
|
||
| override def onTaskGettingResult(event: SparkListenerTaskGettingResult): Unit = logEvent(event) | ||
|
|
||
| override def onTaskEnd(event: SparkListenerTaskEnd): Unit = logEvent(event) | ||
|
|
||
| override def onEnvironmentUpdate(event: SparkListenerEnvironmentUpdate): Unit = { | ||
| logEvent(redactEvent(event)) | ||
| } | ||
|
|
||
| // Events that trigger a flush | ||
| override def onStageCompleted(event: SparkListenerStageCompleted): Unit = { | ||
| logEvent(event, flushLogger = true) | ||
| private def flush(): Unit = { | ||
| writer.foreach(_.flush()) | ||
| hadoopDataStream.foreach(ds => ds.getWrappedStream match { | ||
| case wrapped: DFSOutputStream => wrapped.hsync(EnumSet.of(SyncFlag.UPDATE_LENGTH)) | ||
| case _ => ds.hflush() | ||
| }) | ||
| } | ||
|
|
||
| override def onJobStart(event: SparkListenerJobStart): Unit = logEvent(event, flushLogger = true) | ||
|
|
||
| override def onJobEnd(event: SparkListenerJobEnd): Unit = logEvent(event, flushLogger = true) | ||
|
|
||
| override def onBlockManagerAdded(event: SparkListenerBlockManagerAdded): Unit = { | ||
| logEvent(event, flushLogger = true) | ||
| } | ||
|
|
||
| override def onBlockManagerRemoved(event: SparkListenerBlockManagerRemoved): Unit = { | ||
| logEvent(event, flushLogger = true) | ||
| } | ||
|
|
||
| override def onUnpersistRDD(event: SparkListenerUnpersistRDD): Unit = { | ||
| logEvent(event, flushLogger = true) | ||
| } | ||
|
|
||
| override def onApplicationStart(event: SparkListenerApplicationStart): Unit = { | ||
| logEvent(event, flushLogger = true) | ||
| } | ||
|
|
||
| override def onApplicationEnd(event: SparkListenerApplicationEnd): Unit = { | ||
| logEvent(event, flushLogger = true) | ||
| } | ||
| override def onExecutorAdded(event: SparkListenerExecutorAdded): Unit = { | ||
| logEvent(event, flushLogger = true) | ||
| } | ||
|
|
||
| override def onExecutorRemoved(event: SparkListenerExecutorRemoved): Unit = { | ||
| logEvent(event, flushLogger = true) | ||
| } | ||
|
|
||
| override def onExecutorBlacklisted(event: SparkListenerExecutorBlacklisted): Unit = { | ||
| logEvent(event, flushLogger = true) | ||
| } | ||
|
|
||
| override def onExecutorUnblacklisted(event: SparkListenerExecutorUnblacklisted): Unit = { | ||
| logEvent(event, flushLogger = true) | ||
| } | ||
|
|
||
| override def onNodeBlacklisted(event: SparkListenerNodeBlacklisted): Unit = { | ||
| logEvent(event, flushLogger = true) | ||
| } | ||
|
|
||
| override def onNodeUnblacklisted(event: SparkListenerNodeUnblacklisted): Unit = { | ||
| logEvent(event, flushLogger = true) | ||
| } | ||
|
|
||
| // No-op because logging every update would be overkill | ||
| override def onBlockUpdated(event: SparkListenerBlockUpdated): Unit = {} | ||
|
|
||
| // No-op because logging every update would be overkill | ||
| override def onExecutorMetricsUpdate(event: SparkListenerExecutorMetricsUpdate): Unit = { } | ||
|
|
||
| override def onOtherEvent(event: SparkListenerEvent): Unit = { | ||
| def log(event: SparkListenerEvent): Unit = { | ||
| if (event.logEvent) { | ||
|
||
| logEvent(event, flushLogger = true) | ||
| val toLog = event match { | ||
| case update: SparkListenerEnvironmentUpdate => | ||
| redactEvent(update) | ||
| case _ => event | ||
| } | ||
| logEvent(toLog) | ||
| nbMessageProcessed = nbMessageProcessed + 1 | ||
| if (nbMessageProcessed >= FLUSH_FREQUENCY) { | ||
| flush() | ||
| nbMessageProcessed = 0 | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -278,6 +220,12 @@ private[spark] object EventLoggingListener extends Logging { | |
| val IN_PROGRESS = ".inprogress" | ||
| val DEFAULT_LOG_DIR = "/tmp/spark-events" | ||
|
|
||
| private val FLUSH_FREQUENCY = 200 | ||
|
|
||
| val EVENT_FILTER: SparkListenerEvent => Boolean = | ||
| ev => !(ev.isInstanceOf[SparkListenerBlockUpdated] || | ||
| ev.isInstanceOf[SparkListenerExecutorMetricsUpdate]) | ||
|
|
||
| private val LOG_FILE_PERMISSIONS = new FsPermission(Integer.parseInt("770", 8).toShort) | ||
|
|
||
| // A cache for compression codecs to avoid creating the same codec many times | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm having a hard time finding the declaration of this method. I can't find it in your code nor in the existing master branch. Can you link to it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Found this @vanzin: https://github.com/apache/spark/pull/18253/files#diff-ca0fe05a42fd5edcab8a1bdaa8e58db9R86
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In LiveListenerBus.scala line 86