From 9bcb02da53cbe8a402960ce812c0fdaa1faa375b Mon Sep 17 00:00:00 2001 From: Alex Bozarth Date: Fri, 26 Aug 2016 13:12:50 -0700 Subject: [PATCH 1/6] Added limit param to applications list api and maxApplications conf to HistoryServer --- .../apache/spark/deploy/history/HistoryServer.scala | 5 ++++- .../spark/status/api/v1/ApplicationListResource.scala | 10 ++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala index d821474bdb59..c51bdf5cb15a 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala @@ -55,6 +55,9 @@ class HistoryServer( // How many applications to retain private val retainedApplications = conf.getInt("spark.history.retainedApplications", 50) + // How many applications are available + private val maxApplications = conf.getInt("spark.history.maxApplications", Integer.MAX_VALUE) + // application private val appCache = new ApplicationCache(this, retainedApplications, new SystemClock()) @@ -171,7 +174,7 @@ class HistoryServer( * @return List of all known applications. */ def getApplicationList(): Iterable[ApplicationHistoryInfo] = { - provider.getListing() + provider.getListing().take(maxApplications) } def getApplicationInfoList: Iterator[ApplicationInfo] = { diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala index 02fd2985fa20..075b9ba37dc8 100644 --- a/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala +++ b/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala @@ -29,7 +29,8 @@ private[v1] class ApplicationListResource(uiRoot: UIRoot) { def appList( @QueryParam("status") status: JList[ApplicationStatus], @DefaultValue("2010-01-01") @QueryParam("minDate") minDate: SimpleDateParam, - @DefaultValue("3000-01-01") @QueryParam("maxDate") maxDate: SimpleDateParam) + @DefaultValue("3000-01-01") @QueryParam("maxDate") maxDate: SimpleDateParam, + @QueryParam("limit") limit: Integer) : Iterator[ApplicationInfo] = { val allApps = uiRoot.getApplicationInfoList val adjStatus = { @@ -41,7 +42,7 @@ private[v1] class ApplicationListResource(uiRoot: UIRoot) { } val includeCompleted = adjStatus.contains(ApplicationStatus.COMPLETED) val includeRunning = adjStatus.contains(ApplicationStatus.RUNNING) - allApps.filter { app => + val appList = allApps.filter { app => val anyRunning = app.attempts.exists(!_.completed) // if any attempt is still running, we consider the app to also still be running val statusOk = (!anyRunning && includeCompleted) || @@ -53,6 +54,11 @@ private[v1] class ApplicationListResource(uiRoot: UIRoot) { } statusOk && dateOk } + if (limit != null) { + appList.take(limit) + } else { + appList + } } } From fab32f980d44ae42b690453dcfb192dd83d05e34 Mon Sep 17 00:00:00 2001 From: Alex Bozarth Date: Fri, 26 Aug 2016 13:21:05 -0700 Subject: [PATCH 2/6] Switched to use ConfigBuilder --- .../scala/org/apache/spark/deploy/history/HistoryServer.scala | 3 ++- .../main/scala/org/apache/spark/internal/config/package.scala | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala index c51bdf5cb15a..63192243e83b 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala @@ -28,6 +28,7 @@ import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder} import org.apache.spark.{SecurityManager, SparkConf} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.internal.Logging +import org.apache.spark.internal.config._ import org.apache.spark.status.api.v1.{ApiRootResource, ApplicationInfo, ApplicationsListResource, UIRoot} import org.apache.spark.ui.{SparkUI, UIUtils, WebUI} import org.apache.spark.ui.JettyUtils._ @@ -56,7 +57,7 @@ class HistoryServer( private val retainedApplications = conf.getInt("spark.history.retainedApplications", 50) // How many applications are available - private val maxApplications = conf.getInt("spark.history.maxApplications", Integer.MAX_VALUE) + private val maxApplications = conf.get(HISTORY_MAX_APPS); // application private val appCache = new ApplicationCache(this, retainedApplications, new SystemClock()) diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 47174e4efee8..3bafbd991734 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -119,4 +119,8 @@ package object config { private[spark] val UI_RETAINED_TASKS = ConfigBuilder("spark.ui.retainedTasks") .intConf .createWithDefault(100000) + + // To limit how many applications are available in the History Server + private val HISTORY_MAX_APPS = + ConfigBuilder("spark.history.maxApplications").intConf.createWithDefault(Integer.MAX_VALUE) } From 6163f132f686571707262eee2141f890a1de05d4 Mon Sep 17 00:00:00 2001 From: Alex Bozarth Date: Fri, 26 Aug 2016 13:35:32 -0700 Subject: [PATCH 3/6] Added tests and fixed a typos --- .../spark/internal/config/package.scala | 2 +- .../limit_app_list_json_expectation.json | 67 +++++++++++++++++++ .../deploy/history/HistoryServerSuite.scala | 1 + dev/.rat-excludes | 1 + 4 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 core/src/test/resources/HistoryServerExpectations/limit_app_list_json_expectation.json diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 3bafbd991734..1e7e866d376e 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -121,6 +121,6 @@ package object config { .createWithDefault(100000) // To limit how many applications are available in the History Server - private val HISTORY_MAX_APPS = + private[spark] val HISTORY_MAX_APPS = ConfigBuilder("spark.history.maxApplications").intConf.createWithDefault(Integer.MAX_VALUE) } diff --git a/core/src/test/resources/HistoryServerExpectations/limit_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/limit_app_list_json_expectation.json new file mode 100644 index 000000000000..9165f549d7d2 --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/limit_app_list_json_expectation.json @@ -0,0 +1,67 @@ +[ { + "id" : "local-1430917381534", + "name" : "Spark shell", + "attempts" : [ { + "startTime" : "2015-05-06T13:03:00.893GMT", + "endTime" : "2015-05-06T13:03:11.398GMT", + "lastUpdated" : "", + "duration" : 10505, + "sparkUser" : "irashid", + "completed" : true, + "startTimeEpoch" : 1430917380893, + "endTimeEpoch" : 1430917391398, + "lastUpdatedEpoch" : 0 + } ] +}, { + "id" : "local-1430917381535", + "name" : "Spark shell", + "attempts" : [ { + "attemptId" : "2", + "startTime" : "2015-05-06T13:03:00.893GMT", + "endTime" : "2015-05-06T13:03:00.950GMT", + "lastUpdated" : "", + "duration" : 57, + "sparkUser" : "irashid", + "completed" : true, + "startTimeEpoch" : 1430917380893, + "endTimeEpoch" : 1430917380950, + "lastUpdatedEpoch" : 0 + }, { + "attemptId" : "1", + "startTime" : "2015-05-06T13:03:00.880GMT", + "endTime" : "2015-05-06T13:03:00.890GMT", + "lastUpdated" : "", + "duration" : 10, + "sparkUser" : "irashid", + "completed" : true, + "startTimeEpoch" : 1430917380880, + "endTimeEpoch" : 1430917380890, + "lastUpdatedEpoch" : 0 + } ] +}, { + "id" : "local-1426533911241", + "name" : "Spark shell", + "attempts" : [ { + "attemptId" : "2", + "startTime" : "2015-03-17T23:11:50.242GMT", + "endTime" : "2015-03-17T23:12:25.177GMT", + "lastUpdated" : "", + "duration" : 34935, + "sparkUser" : "irashid", + "completed" : true, + "startTimeEpoch" : 1426633910242, + "endTimeEpoch" : 1426633945177, + "lastUpdatedEpoch" : 0 + }, { + "attemptId" : "1", + "startTime" : "2015-03-16T19:25:10.242GMT", + "endTime" : "2015-03-16T19:25:45.177GMT", + "lastUpdated" : "", + "duration" : 34935, + "sparkUser" : "irashid", + "completed" : true, + "startTimeEpoch" : 1426533910242, + "endTimeEpoch" : 1426533945177, + "lastUpdatedEpoch" : 0 + } ] +} ] diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala index 631a7cd9d5d7..ae3f5d9c012e 100644 --- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala @@ -100,6 +100,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers "minDate app list json" -> "applications?minDate=2015-02-10", "maxDate app list json" -> "applications?maxDate=2015-02-10", "maxDate2 app list json" -> "applications?maxDate=2015-02-03T16:42:40.000GMT", + "limit app list json" -> "applications?limit=3", "one app json" -> "applications/local-1422981780767", "one app multi-attempt json" -> "applications/local-1426533911241", "job list json" -> "applications/local-1422981780767/jobs", diff --git a/dev/.rat-excludes b/dev/.rat-excludes index 9171f3806e42..a3efddeaa515 100644 --- a/dev/.rat-excludes +++ b/dev/.rat-excludes @@ -101,3 +101,4 @@ org.apache.spark.scheduler.ExternalClusterManager .*\.sql .Rbuildignore org.apache.spark.deploy.yarn.security.ServiceCredentialProvider +spark-warehouse From 172dc56858ae35e70950aad78675dd932a9e4e93 Mon Sep 17 00:00:00 2001 From: Alex Bozarth Date: Fri, 26 Aug 2016 16:23:36 -0700 Subject: [PATCH 4/6] Switched to js solution and updated docs --- .../org/apache/spark/ui/static/historypage.js | 8 +++++++- .../org/apache/spark/deploy/history/HistoryPage.scala | 3 ++- .../apache/spark/deploy/history/HistoryServer.scala | 6 +++--- .../org/apache/spark/internal/config/package.scala | 6 +++--- dev/.rat-excludes | 1 - docs/monitoring.md | 11 ++++++++++- 6 files changed, 25 insertions(+), 10 deletions(-) diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage.js b/core/src/main/resources/org/apache/spark/ui/static/historypage.js index 5b9afb59ef8e..c8094005c65d 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/historypage.js +++ b/core/src/main/resources/org/apache/spark/ui/static/historypage.js @@ -15,6 +15,12 @@ * limitations under the License. */ +var appLimit = -1; + +function setAppLimit(val) { + appLimit = val; +} + function makeIdNumeric(id) { var strs = id.split("_"); if (strs.length < 3) { @@ -89,7 +95,7 @@ $(document).ready(function() { requestedIncomplete = getParameterByName("showIncomplete", searchString); requestedIncomplete = (requestedIncomplete == "true" ? true : false); - $.getJSON("api/v1/applications", function(response,status,jqXHR) { + $.getJSON("api/v1/applications?limit=" + appLimit, function(response,status,jqXHR) { var array = []; var hasMultipleAttempts = false; for (i in response) { diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala index 74f78021ed6c..b4f5a6114f3d 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala @@ -45,7 +45,8 @@ private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("") ++ ++ ++ - + ++ + } else if (requestedIncomplete) {

No incomplete applications found!

} else { diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala index 63192243e83b..c178917d8da3 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala @@ -56,8 +56,8 @@ class HistoryServer( // How many applications to retain private val retainedApplications = conf.getInt("spark.history.retainedApplications", 50) - // How many applications are available - private val maxApplications = conf.get(HISTORY_MAX_APPS); + // How many applications the summary ui displays + private[history] val maxApplications = conf.get(HISTORY_UI_MAX_APPS); // application private val appCache = new ApplicationCache(this, retainedApplications, new SystemClock()) @@ -175,7 +175,7 @@ class HistoryServer( * @return List of all known applications. */ def getApplicationList(): Iterable[ApplicationHistoryInfo] = { - provider.getListing().take(maxApplications) + provider.getListing() } def getApplicationInfoList: Iterator[ApplicationInfo] = { diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 1e7e866d376e..54bb6cd3a7a3 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -120,7 +120,7 @@ package object config { .intConf .createWithDefault(100000) - // To limit how many applications are available in the History Server - private[spark] val HISTORY_MAX_APPS = - ConfigBuilder("spark.history.maxApplications").intConf.createWithDefault(Integer.MAX_VALUE) + // To limit how many applications are shown in the History Server summary ui + private[spark] val HISTORY_UI_MAX_APPS = + ConfigBuilder("spark.history.ui.maxApplications").intConf.createWithDefault(Integer.MAX_VALUE) } diff --git a/dev/.rat-excludes b/dev/.rat-excludes index a3efddeaa515..9171f3806e42 100644 --- a/dev/.rat-excludes +++ b/dev/.rat-excludes @@ -101,4 +101,3 @@ org.apache.spark.scheduler.ExternalClusterManager .*\.sql .Rbuildignore org.apache.spark.deploy.yarn.security.ServiceCredentialProvider -spark-warehouse diff --git a/docs/monitoring.md b/docs/monitoring.md index 6fdf87b4be57..aaca3ed485c4 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -118,6 +118,14 @@ The history server can be configured as follows: applications will be removed. + + spark.history.ui.maxApplications + Int.MaxValue + + The number of applications to display on the history summary page. Application UIs are + still available even if they are not displayed on the history summary page. + + spark.history.ui.port 18080 @@ -242,7 +250,8 @@ can be identified by their `[attempt-id]`. In the API listed below, when running
Examples:
?minDate=2015-02-10
?minDate=2015-02-03T16:42:40.000GMT -
?maxDate=[date] latest date/time to list; uses same format as minDate. +
?maxDate=[date] latest date/time to list; uses same format as minDate. +
?limit=[limit] limits the number of applications listed. /applications/[app-id]/jobs From bbb971826538487ca1058da084b03a1056cfe5dc Mon Sep 17 00:00:00 2001 From: Alex Bozarth Date: Mon, 29 Aug 2016 12:23:39 -0700 Subject: [PATCH 5/6] Fixed some wording in the docs --- docs/monitoring.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/monitoring.md b/docs/monitoring.md index aaca3ed485c4..53166f211be3 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -114,16 +114,16 @@ The history server can be configured as follows: spark.history.retainedApplications 50 - The number of application UIs to retain. If this cap is exceeded, then the oldest - applications will be removed. + The number of application UIs to retain in the cache. If this cap is exceeded, then the oldest + applications will be removed from the cache. spark.history.ui.maxApplications Int.MaxValue - The number of applications to display on the history summary page. Application UIs are - still available even if they are not displayed on the history summary page. + The number of applications to display on the history summary page. Application UIs are still + available by accessing their URLs directly even if they are not displayed on the history summary page. From 8da22e30415a6f6baa536002f137eb0862a52d44 Mon Sep 17 00:00:00 2001 From: Alex Bozarth Date: Tue, 30 Aug 2016 11:36:27 -0700 Subject: [PATCH 6/6] updated doc wording --- docs/monitoring.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/monitoring.md b/docs/monitoring.md index 53166f211be3..5804e4f26cd9 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -114,8 +114,9 @@ The history server can be configured as follows: spark.history.retainedApplications 50 - The number of application UIs to retain in the cache. If this cap is exceeded, then the oldest - applications will be removed from the cache. + The number of applications to retain UI data for in the cache. If this cap is exceeded, then + the oldest applications will be removed from the cache. If an application is not in the cache, + it will have to be loaded from disk if its accessed from the UI.