diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 0e36a30c933d..b277da2d6cc2 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -512,7 +512,9 @@ class SparkContext(config: SparkConf) extends Logging { _applicationAttemptId = taskScheduler.applicationAttemptId() _conf.set("spark.app.id", _applicationId) if (_conf.getBoolean("spark.ui.reverseProxy", false)) { - System.setProperty("spark.ui.proxyBase", "/proxy/" + _applicationId) + val proxyUrl = _conf.get("spark.ui.reverseProxyUrl", "").stripSuffix("/") + + "/proxy/" + _applicationId + System.setProperty("spark.ui.proxyBase", proxyUrl) } _ui.foreach(_.setAppId(_applicationId)) _env.blockManager.initialize(_applicationId) diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala index 816bf37e39fe..9785a9581d7c 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala @@ -132,7 +132,13 @@ private[deploy] class Master( webUi.bind() masterWebUiUrl = "http://" + masterPublicAddress + ":" + webUi.boundPort if (reverseProxy) { - masterWebUiUrl = conf.get("spark.ui.reverseProxyUrl", masterWebUiUrl) + conf.getOption("spark.ui.reverseProxyUrl") map { reverseProxyUrl => + val proxyUrlNoSlash = reverseProxyUrl.stripSuffix("/") + System.setProperty("spark.ui.proxyBase", proxyUrlNoSlash) + // If the master URL has a path component, it must end with a slash. + // Otherwise the browser generates incorrect relative links + masterWebUiUrl = proxyUrlNoSlash + "/" + } logInfo(s"Spark Master is acting as a reverse proxy. Master, Workers and " + s"Applications UIs are available at $masterWebUiUrl") } diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala index d4d8521cc820..f5c50029963e 100644 --- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala @@ -157,7 +157,8 @@ private[deploy] class ExecutorRunner( // Add webUI log urls val baseUrl = if (conf.getBoolean("spark.ui.reverseProxy", false)) { - s"/proxy/$workerId/logPage/?appId=$appId&executorId=$execId&logType=" + conf.get("spark.ui.reverseProxyUrl", "").stripSuffix("/") + + s"/proxy/$workerId/logPage/?appId=$appId&executorId=$execId&logType=" } else { s"http://$publicAddress:$webUiPort/logPage/?appId=$appId&executorId=$execId&logType=" } diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala index 00b9d1af373d..923759b404ab 100755 --- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala @@ -203,7 +203,13 @@ private[deploy] class Worker( master = Some(masterRef) connected = true if (conf.getBoolean("spark.ui.reverseProxy", false)) { - logInfo(s"WorkerWebUI is available at $activeMasterWebUiUrl/proxy/$workerId") + logInfo("WorkerWebUI is available at %s/proxy/%s".format( + activeMasterWebUiUrl.stripSuffix("/"), workerId)) + // if reverseProxyUrl is not set, then we continue to generate relative URLs + // starting with "/" throughout the UI and do not use activeMasterWebUiUrl + val proxyUrl = conf.get("spark.ui.reverseProxyUrl", "").stripSuffix("/") + + "/proxy/" + workerId + System.setProperty("spark.ui.proxyBase", proxyUrl) } // Cancel any outstanding re-registration attempts because we found a new master cancelLastRegistrationRetry() diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala index d161843dd223..6e739b706061 100644 --- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala @@ -519,7 +519,7 @@ private[spark] object UIUtils extends Logging { */ def makeHref(proxy: Boolean, id: String, origHref: String): String = { if (proxy) { - s"/proxy/$id" + prependBaseUri("/proxy/", id) } else { origHref } diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala index 2127da48ece4..b6a025a1f3e5 100644 --- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala @@ -39,6 +39,10 @@ import org.apache.spark.rpc.{RpcEndpoint, RpcEnv} class MasterSuite extends SparkFunSuite with Matchers with Eventually with PrivateMethodTester with BeforeAndAfter { + // regex to extract worker links from the master webui HTML + // groups represent URL and worker ID + val WORKER_LINK_RE = """\s*(worker-.+?)\s*""".r + private var _master: Master = _ after { @@ -139,9 +143,10 @@ class MasterSuite extends SparkFunSuite val conf = new SparkConf() val localCluster = new LocalSparkCluster(2, 2, 512, conf) localCluster.start() + val masterUrl = s"http://localhost:${localCluster.masterWebUIPort}" try { eventually(timeout(5 seconds), interval(100 milliseconds)) { - val json = Source.fromURL(s"http://localhost:${localCluster.masterWebUIPort}/json") + val json = Source.fromURL(s"$masterUrl/json") .getLines().mkString("\n") val JArray(workers) = (parse(json) \ "workers") workers.size should be (2) @@ -151,6 +156,16 @@ class MasterSuite extends SparkFunSuite .getLines().mkString("\n")) (workerResponse \ "cores").extract[Int] should be (2) } + + val html = Source.fromURL(s"$masterUrl/").getLines().mkString("\n") + html should include ("Spark Master at spark://") + val workerLinks = (WORKER_LINK_RE findAllMatchIn html).toList + workerLinks.size should be (2) + workerLinks foreach { case WORKER_LINK_RE(workerUrl, workerId) => + val workerHtml = Source.fromURL(workerUrl).getLines().mkString("\n") + workerHtml should include ("Spark Worker at") + workerHtml should include ("Running Executors (0)") + } } } finally { localCluster.stop() @@ -159,31 +174,111 @@ class MasterSuite extends SparkFunSuite test("master/worker web ui available with reverseProxy") { implicit val formats = org.json4s.DefaultFormats - val reverseProxyUrl = "http://localhost:8080" + val conf = new SparkConf() + conf.set("spark.ui.reverseProxy", "true") + val localCluster = new LocalSparkCluster(2, 2, 512, conf) + localCluster.start() + val masterUrl = s"http://localhost:${localCluster.masterWebUIPort}" + try { + eventually(timeout(5 seconds), interval(100 milliseconds)) { + val json = Source.fromURL(s"$masterUrl/json") + .getLines().mkString("\n") + val JArray(workers) = (parse(json) \ "workers") + workers.size should be (2) + workers.foreach { workerSummaryJson => + // the webuiaddress intentionally points to the local web ui. + // explicitly construct reverse proxy url targeting the master + val JString(workerId) = workerSummaryJson \ "id" + val url = s"$masterUrl/proxy/${workerId}/json" + val workerResponse = parse(Source.fromURL(url).getLines().mkString("\n")) + (workerResponse \ "cores").extract[Int] should be (2) + } + + // with LocalCluster, we have masters and workers in the same JVM, each overwriting + // system property spark.ui.proxyBase. + // so we need to manage this property explicitly for test + System.getProperty("spark.ui.proxyBase") should startWith ("/proxy/worker-") + System.getProperties().remove("spark.ui.proxyBase") + val html = Source.fromURL(s"$masterUrl/").getLines().mkString("\n") + html should include ("Spark Master at spark://") + html should include ("""href="/static""") + html should include ("""src="/static""") + verifyWorkerUI(html, masterUrl) + } + } finally { + localCluster.stop() + System.getProperties().remove("spark.ui.proxyBase") + } + } + + test("master/worker web ui available behind front-end reverseProxy") { + implicit val formats = org.json4s.DefaultFormats + val reverseProxyUrl = "http://proxyhost:8080/path/to/spark" val conf = new SparkConf() conf.set("spark.ui.reverseProxy", "true") conf.set("spark.ui.reverseProxyUrl", reverseProxyUrl) val localCluster = new LocalSparkCluster(2, 2, 512, conf) localCluster.start() + val masterUrl = s"http://localhost:${localCluster.masterWebUIPort}" try { eventually(timeout(5 seconds), interval(100 milliseconds)) { - val json = Source.fromURL(s"http://localhost:${localCluster.masterWebUIPort}/json") + val json = Source.fromURL(s"$masterUrl/json") .getLines().mkString("\n") val JArray(workers) = (parse(json) \ "workers") workers.size should be (2) workers.foreach { workerSummaryJson => + // the webuiaddress intentionally points to the local web ui. + // explicitly construct reverse proxy url targeting the master val JString(workerId) = workerSummaryJson \ "id" - val url = s"http://localhost:${localCluster.masterWebUIPort}/proxy/${workerId}/json" + val url = s"$masterUrl/proxy/${workerId}/json" val workerResponse = parse(Source.fromURL(url).getLines().mkString("\n")) (workerResponse \ "cores").extract[Int] should be (2) - (workerResponse \ "masterwebuiurl").extract[String] should be (reverseProxyUrl) + (workerResponse \ "masterwebuiurl").extract[String] should be (reverseProxyUrl + "/") } + + // with LocalCluster, we have masters and workers in the same JVM, each overwriting + // system property spark.ui.proxyBase. + // so we need to manage this property explicitly for test + System.getProperty("spark.ui.proxyBase") should startWith + (s"$reverseProxyUrl/proxy/worker-") + System.setProperty("spark.ui.proxyBase", reverseProxyUrl) + val html = Source.fromURL(s"$masterUrl/").getLines().mkString("\n") + html should include ("Spark Master at spark://") + verifyStaticResourcesServedByProxy(html, reverseProxyUrl) + verifyWorkerUI(html, masterUrl, reverseProxyUrl) } } finally { localCluster.stop() + System.getProperties().remove("spark.ui.proxyBase") } } + private def verifyWorkerUI(masterHtml: String, masterUrl: String, + reverseProxyUrl: String = ""): Unit = { + val workerLinks = (WORKER_LINK_RE findAllMatchIn masterHtml).toList + workerLinks.size should be (2) + workerLinks foreach { + case WORKER_LINK_RE(workerUrl, workerId) => + workerUrl should be (s"$reverseProxyUrl/proxy/$workerId") + // there is no real front-end proxy as defined in $reverseProxyUrl + // construct url directly targeting the master + val url = s"$masterUrl/proxy/$workerId/" + System.setProperty("spark.ui.proxyBase", workerUrl) + val workerHtml = Source.fromURL(url).getLines().mkString("\n") + workerHtml should include ("Spark Worker at") + workerHtml should include ("Running Executors (0)") + verifyStaticResourcesServedByProxy(workerHtml, workerUrl) + case _ => fail // make sure we don't accidentially skip the tests + } + } + + private def verifyStaticResourcesServedByProxy(html: String, proxyUrl: String): Unit = { + html should not include ("""href="/static""") + html should include (s"""href="$proxyUrl/static""") + html should not include ("""src="/static""") + html should include (s"""src="$proxyUrl/static""") + } + test("basic scheduling - spread out") { basicScheduling(spreadOut = true) } diff --git a/docs/configuration.md b/docs/configuration.md index 4729f1b0404c..97359ed67f97 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -708,14 +708,42 @@ Apart from these, the following properties are also available, and may be useful
spark.ui.reverseProxyspark.ui.reverseProxyUrlhttp://mydomain.com/path/to/spark/, allowing you to serve the
+ UI for multiple Spark clusters and other web applications through the same virtual host and
+ port.
+ Normally, this should be an absolute URL including scheme (http/https), host and port.
+ It is possible to specify a relative URL starting with "/" here. In this case, all URLs
+ generated by the Spark UI and Spark REST APIs will be server-relative links -- this will still
+ work, as the entire Spark UI is served through the same host and port.
+ http://mydomain.com/path/to/spark to
+ http://mydomain.com/path/to/spark/ (trailing slash after path prefix); otherwise
+ relative links on the master page do not work correctly.spark.ui.reverseProxy is turned on. This setting is not needed when the Spark
+ master web UI is directly reachable.