apache · shahidki31 · Dec 9, 2019 · Dec 9, 2019 · HeartSaVioR · Dec 12, 2019
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -129,6 +129,13 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
 
   private val storePath = conf.get(LOCAL_STORE_DIR).map(new File(_))
   private val fastInProgressParsing = conf.get(FAST_IN_PROGRESS_PARSING)
+  // If incremental parsing support configuration is enabled, underlying store will not close
+  // during invalidate UI or detached UI. Metadata of the event read will store in the
+  // `IncrmentalInfo`. Whenever a new event come, parsing will happen from the line it
+  // read last time. Currently it supports inmemory store. TODO: Support for disk store.
+  private val isIncrementalParsingEnabled = storePath.isEmpty &&
+    conf.get(History.INCREMENTAL_PARSING_ENABLED)
+  private val storeMap = new ConcurrentHashMap[(String, Option[String]), KVStore]()
 
   // Visible for testing.
   private[history] val listing: KVStore = storePath.map { path =>
@@ -342,7 +349,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
           loadDiskStore(sm, appId, attempt)
 
         case _ =>
-          createInMemoryStore(attempt)
+          createInMemoryStore(appId, attempt)
       }
     } catch {
       case _: FileNotFoundException =>
@@ -411,19 +418,22 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     val uiOption = synchronized {
       activeUIs.remove((appId, attemptId))
     }
-    uiOption.foreach { loadedUI =>
-      loadedUI.lock.writeLock().lock()
-      try {
-        loadedUI.ui.store.close()
-      } finally {
-        loadedUI.lock.writeLock().unlock()
-      }
+    // If incremental parsing is enabled, will not close the underlying store.
+    if (!isIncrementalParsingEnabled) {
+      uiOption.foreach { loadedUI =>
+        loadedUI.lock.writeLock().lock()
+        try {
+          loadedUI.ui.store.close()
+        } finally {
+          loadedUI.lock.writeLock().unlock()
+        }
 
-      diskManager.foreach { dm =>
-        // If the UI is not valid, delete its files from disk, if any. This relies on the fact that
-        // ApplicationCache will never call this method concurrently with getAppUI() for the same
-        // appId / attemptId.
-        dm.release(appId, attemptId, delete = !loadedUI.valid)
+        diskManager.foreach { dm =>
+          // If the UI is not valid, delete its files from disk, if any. This relies on the fact
+          // that ApplicationCache will never call this method concurrently with getAppUI() for
+          // the same appId / attemptId.
+          dm.release(appId, attemptId, delete = !loadedUI.valid)
+        }
       }
     }
   }
@@ -615,10 +625,20 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
           }
 
           maybeUI.foreach { ui =>
+            if (isIncrementalParsingEnabled) {
+              storeMap.remove(appId -> attemptId)
+            }
             ui.invalidate()
             ui.ui.store.close()
           }
 
+         if (isIncrementalParsingEnabled) {
+           try {
+             listing.delete(classOf[IncrimentalMetaInfo], Array(Some(appId), attemptId))
+           } catch {
+             case _: NoSuchElementException =>
+           }
+         }
           diskManager.foreach(_.release(appId, attemptId, delete = true))
           true
         }
@@ -694,7 +714,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
 
     logInfo(s"Parsing $logPath for listing data...")
     val logFiles = reader.listEventLogFiles
-    parseAppEventLogs(logFiles, bus, !appCompleted, eventsFilter)
+    parseAppEventLogs(logFiles, bus, !appCompleted, None, eventsFilter)
 
     // If enabled above, the listing listener will halt parsing when there's enough information to
     // create a listing entry. When the app is completed, or fast parsing is disabled, we still need
@@ -735,7 +755,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
           source.next()
         }
 
-        bus.replay(source, lastFile.getPath.toString, !appCompleted, eventsFilter)
+        bus.replay(source, lastFile.getPath.toString, !appCompleted, eventsFilter, linesToSkip = -1)
       }
     }
 
@@ -793,7 +813,11 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     synchronized {
       activeUIs.get((appId, attemptId)).foreach { ui =>
         ui.invalidate()
-        ui.ui.store.close()
+        // If incremental parsing is enabled, will not close the underlying store
+        // on invalidate UI.
+        if (!isIncrementalParsingEnabled) {
+          ui.ui.store.close()
+        }
       }
     }
   }
@@ -948,24 +972,30 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
   private def rebuildAppStore(
       store: KVStore,
       reader: EventLogFileReader,
-      lastUpdated: Long): Unit = {
+      lastUpdated: Long,
+      incrimentInfo: Option[IncrimentalMetaInfo] = None): Unit = {
     // Disable async updates, since they cause higher memory usage, and it's ok to take longer
     // to parse the event logs in the SHS.
     val replayConf = conf.clone().set(ASYNC_TRACKING_ENABLED, false)
     val trackingStore = new ElementTrackingStore(store, replayConf)
     val replayBus = new ReplayListenerBus()
     val listener = new AppStatusListener(trackingStore, replayConf, false,
       lastUpdateTime = Some(lastUpdated))
+
+    incrimentInfo.foreach(info => listener.initialize(info.appId, info.attemptId))
     replayBus.addListener(listener)
 
     for {
       plugin <- loadPlugins()
       listener <- plugin.createListeners(conf, trackingStore)
-    } replayBus.addListener(listener)
+    } {
+      incrimentInfo.foreach(info => plugin.initialize(listener, info.appId, info.attemptId))
+      replayBus.addListener(listener)
+    }
 
     try {
       logInfo(s"Parsing ${reader.rootPath} to re-build UI...")
-      parseAppEventLogs(reader.listEventLogFiles, replayBus, !reader.completed)
+      parseAppEventLogs(reader.listEventLogFiles, replayBus, !reader.completed, incrimentInfo)
       trackingStore.close(false)
       logInfo(s"Finished parsing ${reader.rootPath}")
     } catch {
@@ -981,14 +1011,28 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       logFiles: Seq[FileStatus],
       replayBus: ReplayListenerBus,
       maybeTruncated: Boolean,
+      info: Option[IncrimentalMetaInfo] = None,
       eventsFilter: ReplayEventsFilter = SELECT_ALL_FILTER): Unit = {
     // stop replaying next log files if ReplayListenerBus indicates some error or halt
     var continueReplay = true
+    var lineToSkip = info.map(_.lineToSkip).getOrElse(-1)
+    val fileToStart = info.map(_.fileIndex).getOrElse(0)
+    var fileIndex = 0
     logFiles.foreach { file =>
-      if (continueReplay) {
+      if (continueReplay && fileIndex >= fileToStart) {
         Utils.tryWithResource(EventLogFileReader.openEventLog(file.getPath, fs)) { in =>
-          continueReplay = replayBus.replay(in, file.getPath.toString,
-            maybeTruncated = maybeTruncated, eventsFilter = eventsFilter)
+          val result = replayBus.replay(in, file.getPath.toString,
+            maybeTruncated = maybeTruncated, eventsFilter = eventsFilter, lineToSkip)
+          continueReplay = result.success
+          // We need to reset the lineToSkip to -1 as we need to parse next file from the beginning
+          lineToSkip = -1
+          if (info.isDefined && (!continueReplay || fileIndex >= logFiles.size -1)) {
+            val updatedInfo = info.get.copy(fileIndex = fileIndex, lineToSkip = result.linesRead)
+            listing.write(updatedInfo)
+          }
+        }
+        if (info.isDefined) {
+          fileIndex += 1
         }
       }
     }
@@ -1095,11 +1139,30 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     KVUtils.open(newStorePath, metadata)
   }
 
-  private def createInMemoryStore(attempt: AttemptInfoWrapper): KVStore = {
-    val store = new InMemoryStore()
+  private def createInMemoryStore(appId: String, attempt: AttemptInfoWrapper): KVStore = {
+    val store = if (isIncrementalParsingEnabled) {
+      storeMap.getOrDefault(appId -> attempt.info.attemptId, new InMemoryStore())
+    } else {
+      new InMemoryStore
+    }
     val reader = EventLogFileReader(fs, new Path(logDir, attempt.logPath),
       attempt.lastIndex)
-    rebuildAppStore(store, reader, attempt.info.lastUpdated.getTime())
+    // Incremental info is valid only if incremental parsing feature is enabled.
+    val info: Option[IncrimentalMetaInfo] = try {
+      if (isIncrementalParsingEnabled) {
+        Some(listing.read(classOf[IncrimentalMetaInfo], Array(Some(appId), attempt.info.attemptId)))
+      } else None
+    } catch {
+      case _: NoSuchElementException =>
+        val info = IncrimentalMetaInfo(appId, attempt.info.attemptId,
+          fileIndex = 0, lineToSkip = -1)
+        listing.write(info)
+        Some(info)
+    }
+    rebuildAppStore(store, reader, attempt.info.lastUpdated.getTime(), info)
+    if (isIncrementalParsingEnabled) {
+      storeMap.put(appId -> attempt.info.attemptId, store)
+    }
     store
   }
 
@@ -1174,6 +1237,15 @@ private[history] case class LogInfo(
     lastIndex: Option[Long],
     isComplete: Boolean)
 
+private[history] case class IncrimentalMetaInfo(
+    appId: String,
+    attemptId: Option[String],
+    fileIndex: Int,
+    lineToSkip: Int) {
+  @JsonIgnore @KVIndex
+  private def stage: Array[Option[String]] = Array(Some(appId), attemptId)
+}
+
 private[history] class AttemptInfoWrapper(
     val info: ApplicationAttemptInfo,
     val logPath: String,

diff --git a/core/src/main/scala/org/apache/spark/internal/config/History.scala b/core/src/main/scala/org/apache/spark/internal/config/History.scala
@@ -107,6 +107,10 @@ private[spark] object History {
     .toSequence
     .createWithDefault(Nil)
 
+  val INCREMENTAL_PARSING_ENABLED = ConfigBuilder("spark.history.incremental.parsing.enabled")
+    .booleanConf
+    .createWithDefault(true)
+
   val NUM_REPLAY_THREADS = ConfigBuilder("spark.history.fs.numReplayThreads")
     .intConf
     .createWithDefaultFunction(() => Math.ceil(Runtime.getRuntime.availableProcessors() / 4f).toInt)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
@@ -55,9 +55,10 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
       logData: InputStream,
       sourceName: String,
       maybeTruncated: Boolean = false,
-      eventsFilter: ReplayEventsFilter = SELECT_ALL_FILTER): Boolean = {
+      eventsFilter: ReplayEventsFilter = SELECT_ALL_FILTER,
+      linesToSkip: Int = -1): ReplayResult = {
     val lines = Source.fromInputStream(logData)(Codec.UTF8).getLines()
-    replay(lines, sourceName, maybeTruncated, eventsFilter)
+    replay(lines, sourceName, maybeTruncated, eventsFilter, linesToSkip)
   }
 
   /**
@@ -68,25 +69,29 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
       lines: Iterator[String],
       sourceName: String,
       maybeTruncated: Boolean,
-      eventsFilter: ReplayEventsFilter): Boolean = {
+      eventsFilter: ReplayEventsFilter,
+      linesToSkip: Int): ReplayResult = {
     var currentLine: String = null
     var lineNumber: Int = 0
+    var lastLine = linesToSkip
     val unrecognizedEvents = new scala.collection.mutable.HashSet[String]
     val unrecognizedProperties = new scala.collection.mutable.HashSet[String]
 
     try {
       val lineEntries = lines
         .zipWithIndex
-        .filter { case (line, _) => eventsFilter(line) }
+        .filter { case (line, index) =>
+          index > linesToSkip && eventsFilter(line)
+        }
 
       while (lineEntries.hasNext) {
         try {
           val entry = lineEntries.next()
 
           currentLine = entry._1
           lineNumber = entry._2 + 1
-
           postToAll(JsonProtocol.sparkEventFromJson(parse(currentLine)))
+          lastLine = entry._2
         } catch {
           case e: ClassNotFoundException =>
             // Ignore unknown events, parse through the event log file.
@@ -116,18 +121,19 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
             }
         }
       }
-      true
+      ReplayResult(success = true, lastLine)
     } catch {
       case e: HaltReplayException =>
         // Just stop replay.
-        false
-      case _: EOFException if maybeTruncated => false
+        ReplayResult(success = false, lastLine)
+      case _: EOFException if maybeTruncated =>
+        ReplayResult(success = false, lastLine)
       case ioe: IOException =>
         throw ioe
       case e: Exception =>
         logError(s"Exception parsing Spark event log: $sourceName", e)
         logError(s"Malformed line #$lineNumber: $currentLine\n")
-        false
+        ReplayResult(success = false, lastLine)
     }
   }
 
@@ -137,6 +143,8 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
 
 }
 
+private[spark] case class ReplayResult(success: Boolean, linesRead: Int)
+
 /**
  * Exception that can be thrown by listeners to halt replay. This is handled by ReplayListenerBus
  * only, and will cause errors if thrown when using other bus implementations.

diff --git a/core/src/main/scala/org/apache/spark/status/AppHistoryServerPlugin.scala b/core/src/main/scala/org/apache/spark/status/AppHistoryServerPlugin.scala
@@ -31,6 +31,11 @@ private[spark] trait AppHistoryServerPlugin {
    */
   def createListeners(conf: SparkConf, store: ElementTrackingStore): Seq[SparkListener]
 
+  /**
+   * Initialize the listener parameters for incremental parsing of the event logs.
+   */
+  def initialize(sparkListener: SparkListener, appId: String, attemptId: Option[String]): Unit = {}
+
   /**
    * Sets up UI of this plugin to rebuild the history UI.
    */

diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -52,6 +52,8 @@ private[spark] class AppStatusListener(
   private var appInfo: v1.ApplicationInfo = null
   private var appSummary = new AppSummary(0, 0)
   private var coresPerTask: Int = 1
+  private var appId: String = _
+  private var attemptId: Option[String] = None
 
   // How often to update live entities. -1 means "never update" when replaying applications,
   // meaning only the last write will happen. For live applications, this avoids a few
@@ -100,6 +102,41 @@ private[spark] class AppStatusListener(
     if (!live) {
       val now = System.nanoTime()
       flush(update(_, now))
+      if (appId != null) {
+        // If incremental parsing is enabled, write the listener data to the store
+        val data = new AppStatusListenerData(appId, attemptId, liveStages, liveJobs,
+          liveExecutors, deadExecutors, liveTasks, liveRDDs,
+          pools, appInfo, coresPerTask, appSummary, activeExecutorCount)
+        kvstore.write(data)
+      }
+    }
+  }
+
+  def initialize(appId: String, attemptId: Option[String]): Unit = {
+    if (!live) {
+      // If incremental parsing is enabled, read and update the listener data
+      // from store
+      this.appId = appId
+      this.attemptId = attemptId
+      try {
+        val listenerData = kvstore.read(classOf[AppStatusListenerData],
+          Array(Some(appId), attemptId))
+        listenerData.liveStages.entrySet().asScala.foreach { entry =>
+          liveStages.put(entry.getKey, entry.getValue)
+        }
+        listenerData.liveJobs.map{entry => liveJobs.put(entry._1, entry._2)}
+        listenerData.liveExecutors.map{entry => liveExecutors.put(entry._1, entry._2)}
+        listenerData.deadExecutors.map{entry => deadExecutors.put(entry._1, entry._2)}
+        listenerData.liveTasks.map{entry => liveTasks.put(entry._1, entry._2)}
+        listenerData.liveRDDs.map{entry => liveRDDs.put(entry._1, entry._2)}
+        listenerData.pools.map{entry => pools.put(entry._1, entry._2)}
+        appInfo = listenerData.appInfo
+        appSummary = listenerData.appSummary
+        coresPerTask = listenerData.coresPerTask
+        activeExecutorCount = listenerData.activeExecutorCount
+      } catch {
+        case _: NoSuchElementException =>
+      }
     }
   }