diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 0e36a30c933d..b277da2d6cc2 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -512,7 +512,9 @@ class SparkContext(config: SparkConf) extends Logging { _applicationAttemptId = taskScheduler.applicationAttemptId() _conf.set("spark.app.id", _applicationId) if (_conf.getBoolean("spark.ui.reverseProxy", false)) { - System.setProperty("spark.ui.proxyBase", "/proxy/" + _applicationId) + val proxyUrl = _conf.get("spark.ui.reverseProxyUrl", "").stripSuffix("/") + + "/proxy/" + _applicationId + System.setProperty("spark.ui.proxyBase", proxyUrl) } _ui.foreach(_.setAppId(_applicationId)) _env.blockManager.initialize(_applicationId) diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala index 816bf37e39fe..9785a9581d7c 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala @@ -132,7 +132,13 @@ private[deploy] class Master( webUi.bind() masterWebUiUrl = "http://" + masterPublicAddress + ":" + webUi.boundPort if (reverseProxy) { - masterWebUiUrl = conf.get("spark.ui.reverseProxyUrl", masterWebUiUrl) + conf.getOption("spark.ui.reverseProxyUrl") map { reverseProxyUrl => + val proxyUrlNoSlash = reverseProxyUrl.stripSuffix("/") + System.setProperty("spark.ui.proxyBase", proxyUrlNoSlash) + // If the master URL has a path component, it must end with a slash. + // Otherwise the browser generates incorrect relative links + masterWebUiUrl = proxyUrlNoSlash + "/" + } logInfo(s"Spark Master is acting as a reverse proxy. Master, Workers and " + s"Applications UIs are available at $masterWebUiUrl") } diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala index d4d8521cc820..f5c50029963e 100644 --- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala @@ -157,7 +157,8 @@ private[deploy] class ExecutorRunner( // Add webUI log urls val baseUrl = if (conf.getBoolean("spark.ui.reverseProxy", false)) { - s"/proxy/$workerId/logPage/?appId=$appId&executorId=$execId&logType=" + conf.get("spark.ui.reverseProxyUrl", "").stripSuffix("/") + + s"/proxy/$workerId/logPage/?appId=$appId&executorId=$execId&logType=" } else { s"http://$publicAddress:$webUiPort/logPage/?appId=$appId&executorId=$execId&logType=" } diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala index 00b9d1af373d..923759b404ab 100755 --- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala @@ -203,7 +203,13 @@ private[deploy] class Worker( master = Some(masterRef) connected = true if (conf.getBoolean("spark.ui.reverseProxy", false)) { - logInfo(s"WorkerWebUI is available at $activeMasterWebUiUrl/proxy/$workerId") + logInfo("WorkerWebUI is available at %s/proxy/%s".format( + activeMasterWebUiUrl.stripSuffix("/"), workerId)) + // if reverseProxyUrl is not set, then we continue to generate relative URLs + // starting with "/" throughout the UI and do not use activeMasterWebUiUrl + val proxyUrl = conf.get("spark.ui.reverseProxyUrl", "").stripSuffix("/") + + "/proxy/" + workerId + System.setProperty("spark.ui.proxyBase", proxyUrl) } // Cancel any outstanding re-registration attempts because we found a new master cancelLastRegistrationRetry() diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala index d161843dd223..6e739b706061 100644 --- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala @@ -519,7 +519,7 @@ private[spark] object UIUtils extends Logging { */ def makeHref(proxy: Boolean, id: String, origHref: String): String = { if (proxy) { - s"/proxy/$id" + prependBaseUri("/proxy/", id) } else { origHref } diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala index 2127da48ece4..b6a025a1f3e5 100644 --- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala @@ -39,6 +39,10 @@ import org.apache.spark.rpc.{RpcEndpoint, RpcEnv} class MasterSuite extends SparkFunSuite with Matchers with Eventually with PrivateMethodTester with BeforeAndAfter { + // regex to extract worker links from the master webui HTML + // groups represent URL and worker ID + val WORKER_LINK_RE = """\s*(worker-.+?)\s*""".r + private var _master: Master = _ after { @@ -139,9 +143,10 @@ class MasterSuite extends SparkFunSuite val conf = new SparkConf() val localCluster = new LocalSparkCluster(2, 2, 512, conf) localCluster.start() + val masterUrl = s"http://localhost:${localCluster.masterWebUIPort}" try { eventually(timeout(5 seconds), interval(100 milliseconds)) { - val json = Source.fromURL(s"http://localhost:${localCluster.masterWebUIPort}/json") + val json = Source.fromURL(s"$masterUrl/json") .getLines().mkString("\n") val JArray(workers) = (parse(json) \ "workers") workers.size should be (2) @@ -151,6 +156,16 @@ class MasterSuite extends SparkFunSuite .getLines().mkString("\n")) (workerResponse \ "cores").extract[Int] should be (2) } + + val html = Source.fromURL(s"$masterUrl/").getLines().mkString("\n") + html should include ("Spark Master at spark://") + val workerLinks = (WORKER_LINK_RE findAllMatchIn html).toList + workerLinks.size should be (2) + workerLinks foreach { case WORKER_LINK_RE(workerUrl, workerId) => + val workerHtml = Source.fromURL(workerUrl).getLines().mkString("\n") + workerHtml should include ("Spark Worker at") + workerHtml should include ("Running Executors (0)") + } } } finally { localCluster.stop() @@ -159,31 +174,111 @@ class MasterSuite extends SparkFunSuite test("master/worker web ui available with reverseProxy") { implicit val formats = org.json4s.DefaultFormats - val reverseProxyUrl = "http://localhost:8080" + val conf = new SparkConf() + conf.set("spark.ui.reverseProxy", "true") + val localCluster = new LocalSparkCluster(2, 2, 512, conf) + localCluster.start() + val masterUrl = s"http://localhost:${localCluster.masterWebUIPort}" + try { + eventually(timeout(5 seconds), interval(100 milliseconds)) { + val json = Source.fromURL(s"$masterUrl/json") + .getLines().mkString("\n") + val JArray(workers) = (parse(json) \ "workers") + workers.size should be (2) + workers.foreach { workerSummaryJson => + // the webuiaddress intentionally points to the local web ui. + // explicitly construct reverse proxy url targeting the master + val JString(workerId) = workerSummaryJson \ "id" + val url = s"$masterUrl/proxy/${workerId}/json" + val workerResponse = parse(Source.fromURL(url).getLines().mkString("\n")) + (workerResponse \ "cores").extract[Int] should be (2) + } + + // with LocalCluster, we have masters and workers in the same JVM, each overwriting + // system property spark.ui.proxyBase. + // so we need to manage this property explicitly for test + System.getProperty("spark.ui.proxyBase") should startWith ("/proxy/worker-") + System.getProperties().remove("spark.ui.proxyBase") + val html = Source.fromURL(s"$masterUrl/").getLines().mkString("\n") + html should include ("Spark Master at spark://") + html should include ("""href="/static""") + html should include ("""src="/static""") + verifyWorkerUI(html, masterUrl) + } + } finally { + localCluster.stop() + System.getProperties().remove("spark.ui.proxyBase") + } + } + + test("master/worker web ui available behind front-end reverseProxy") { + implicit val formats = org.json4s.DefaultFormats + val reverseProxyUrl = "http://proxyhost:8080/path/to/spark" val conf = new SparkConf() conf.set("spark.ui.reverseProxy", "true") conf.set("spark.ui.reverseProxyUrl", reverseProxyUrl) val localCluster = new LocalSparkCluster(2, 2, 512, conf) localCluster.start() + val masterUrl = s"http://localhost:${localCluster.masterWebUIPort}" try { eventually(timeout(5 seconds), interval(100 milliseconds)) { - val json = Source.fromURL(s"http://localhost:${localCluster.masterWebUIPort}/json") + val json = Source.fromURL(s"$masterUrl/json") .getLines().mkString("\n") val JArray(workers) = (parse(json) \ "workers") workers.size should be (2) workers.foreach { workerSummaryJson => + // the webuiaddress intentionally points to the local web ui. + // explicitly construct reverse proxy url targeting the master val JString(workerId) = workerSummaryJson \ "id" - val url = s"http://localhost:${localCluster.masterWebUIPort}/proxy/${workerId}/json" + val url = s"$masterUrl/proxy/${workerId}/json" val workerResponse = parse(Source.fromURL(url).getLines().mkString("\n")) (workerResponse \ "cores").extract[Int] should be (2) - (workerResponse \ "masterwebuiurl").extract[String] should be (reverseProxyUrl) + (workerResponse \ "masterwebuiurl").extract[String] should be (reverseProxyUrl + "/") } + + // with LocalCluster, we have masters and workers in the same JVM, each overwriting + // system property spark.ui.proxyBase. + // so we need to manage this property explicitly for test + System.getProperty("spark.ui.proxyBase") should startWith + (s"$reverseProxyUrl/proxy/worker-") + System.setProperty("spark.ui.proxyBase", reverseProxyUrl) + val html = Source.fromURL(s"$masterUrl/").getLines().mkString("\n") + html should include ("Spark Master at spark://") + verifyStaticResourcesServedByProxy(html, reverseProxyUrl) + verifyWorkerUI(html, masterUrl, reverseProxyUrl) } } finally { localCluster.stop() + System.getProperties().remove("spark.ui.proxyBase") } } + private def verifyWorkerUI(masterHtml: String, masterUrl: String, + reverseProxyUrl: String = ""): Unit = { + val workerLinks = (WORKER_LINK_RE findAllMatchIn masterHtml).toList + workerLinks.size should be (2) + workerLinks foreach { + case WORKER_LINK_RE(workerUrl, workerId) => + workerUrl should be (s"$reverseProxyUrl/proxy/$workerId") + // there is no real front-end proxy as defined in $reverseProxyUrl + // construct url directly targeting the master + val url = s"$masterUrl/proxy/$workerId/" + System.setProperty("spark.ui.proxyBase", workerUrl) + val workerHtml = Source.fromURL(url).getLines().mkString("\n") + workerHtml should include ("Spark Worker at") + workerHtml should include ("Running Executors (0)") + verifyStaticResourcesServedByProxy(workerHtml, workerUrl) + case _ => fail // make sure we don't accidentially skip the tests + } + } + + private def verifyStaticResourcesServedByProxy(html: String, proxyUrl: String): Unit = { + html should not include ("""href="/static""") + html should include (s"""href="$proxyUrl/static""") + html should not include ("""src="/static""") + html should include (s"""src="$proxyUrl/static""") + } + test("basic scheduling - spread out") { basicScheduling(spreadOut = true) } diff --git a/docs/configuration.md b/docs/configuration.md index 4729f1b0404c..97359ed67f97 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -708,14 +708,42 @@ Apart from these, the following properties are also available, and may be useful spark.ui.reverseProxy false - Enable running Spark Master as reverse proxy for worker and application UIs. In this mode, Spark master will reverse proxy the worker and application UIs to enable access without requiring direct access to their hosts. Use it with caution, as worker and application UI will not be accessible directly, you will only be able to access them through spark master/proxy public URL. This setting affects all the workers and application UIs running in the cluster and must be set on all the workers, drivers and masters. + Enable running Spark Master as reverse proxy for worker and application UIs. In this mode, + the Spark master will reverse proxy the worker and application UIs to enable access without + requiring direct access to their hosts. + Use it with caution, as worker and application UI will not be accessible directly, you will + only be able to access them through the spark master/proxy public URL. + This setting affects all the workers and application UIs running in the cluster and must be + set on all the workers, drivers and masters. spark.ui.reverseProxyUrl - This is the URL where your proxy is running. This URL is for proxy which is running in front of Spark Master. This is useful when running proxy for authentication e.g. OAuth proxy. Make sure this is a complete URL including scheme (http/https) and port to reach your proxy. + If the Spark UI should be served through another front-end reverse proxy, this is the URL + for accessing the Spark master UI through that reverse proxy. + This is useful when running proxy for authentication e.g. an OAuth proxy. The URL may contain + a path prefix, like http://mydomain.com/path/to/spark/, allowing you to serve the + UI for multiple Spark clusters and other web applications through the same virtual host and + port. + Normally, this should be an absolute URL including scheme (http/https), host and port. + It is possible to specify a relative URL starting with "/" here. In this case, all URLs + generated by the Spark UI and Spark REST APIs will be server-relative links -- this will still + work, as the entire Spark UI is served through the same host and port. +
The setting affects link generation in the Spark UI, but the front-end reverse proxy + is responsible for + + This setting affects all the workers and application UIs running in the cluster and must be set + identically on all the workers, drivers and masters. In is only effective when + spark.ui.reverseProxy is turned on. This setting is not needed when the Spark + master web UI is directly reachable. diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala index 5f0d92bccd80..7d3200d6c8d5 100644 --- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala +++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala @@ -126,10 +126,10 @@ private[repl] trait SparkILoopInit { val _sc = spark.sparkContext if (_sc.getConf.getBoolean("spark.ui.reverseProxy", false)) { val proxyUrl = _sc.getConf.get("spark.ui.reverseProxyUrl", null) - if (proxyUrl != null) { + if (proxyUrl != null && proxyUrl.startsWith("http")) { println(s"Spark Context Web UI is available at ${proxyUrl}/proxy/${_sc.applicationId}") } else { - println(s"Spark Context Web UI is available at Spark Master Public URL") + println(s"Spark Context Web UI is available at Spark Master under proxy/${_sc.applicationId}") } } else { _sc.uiWebUrl.foreach { diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala index 76a66c1beada..342a5eb366a2 100644 --- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala +++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala @@ -45,10 +45,10 @@ class SparkILoop(in0: Option[BufferedReader], out: JPrintWriter) val _sc = spark.sparkContext if (_sc.getConf.getBoolean("spark.ui.reverseProxy", false)) { val proxyUrl = _sc.getConf.get("spark.ui.reverseProxyUrl", null) - if (proxyUrl != null) { + if (proxyUrl != null && proxyUrl.startsWith("http")) { println(s"Spark Context Web UI is available at ${proxyUrl}/proxy/${_sc.applicationId}") } else { - println(s"Spark Context Web UI is available at Spark Master Public URL") + println(s"Spark Context Web UI is available at Spark Master under proxy/${_sc.applicationId}") } } else { _sc.uiWebUrl.foreach {