Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,25 @@ private[spark] class HistoryAppStatusStore(
}
}

val STDOUT_URL_GROUPS = conf.get(LIVE_EXECUTOR_LOR_URL_CAPTURING_GROUPS)
.map(_.split(","))
.getOrElse(Array.empty[String])

val STDOUT_URL_REGEX = conf.get(LIVE_EXECUTOR_LOG_URL_REGEX)
.map(r => new Regex(r, STDOUT_URL_GROUPS: _*))

private def replaceLogUrls(exec: v1.ExecutorSummary, urlPattern: String): v1.ExecutorSummary = {
val attributes = exec.attributes
val attributes = STDOUT_URL_REGEX.flatMap { regEx =>
exec.executorLogs
.get("stdout")
.flatMap(regEx.findFirstMatchIn)
.map { urlMatch =>
STDOUT_URL_GROUPS.foldLeft(exec.attributes) { (attribs, groupName) =>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this block should be executed at all when exec.attributes is not empty; IMO this should only apply to legacy event logs that don't record that information.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 Let's just use attributes if it exists. I can see the case this helps even if attributes are presented: the number of attributes are updated which new attributes can be extracted from URL, but at least with current YARN URL we have extracted almost everything so it will unlikely happen.

attribs.updated(groupName, urlMatch.group(groupName))
}
}
}
.getOrElse(exec.attributes)

// Relation between pattern {{FILE_NAME}} and attribute {{LOG_FILES}}
// Given that HistoryAppStatusStore don't know which types of log files can be provided
Expand Down
12 changes: 12 additions & 0 deletions core/src/main/scala/org/apache/spark/internal/config/History.scala
Original file line number Diff line number Diff line change
Expand Up @@ -143,4 +143,16 @@ private[spark] object History {
"not running spark applications.")
.booleanConf
.createWithDefault(true)

val LIVE_EXECUTOR_LOG_URL_REGEX =
ConfigBuilder("spark.history.executor.log.url.regex")
.doc("Regex to extract node and container information from log urls")
.stringConf
.createOptional

val LIVE_EXECUTOR_LOR_URL_CAPTURING_GROUPS =
ConfigBuilder("spark.history.executor.log.url.regex.groups")
.doc("CSV of group names that can later be used to build post runtime URL")
.stringConf
.createOptional
}