Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion core/src/main/scala/org/apache/spark/SparkContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,9 @@ class SparkContext(config: SparkConf) extends Logging {
_applicationAttemptId = taskScheduler.applicationAttemptId()
_conf.set("spark.app.id", _applicationId)
if (_conf.getBoolean("spark.ui.reverseProxy", false)) {
System.setProperty("spark.ui.proxyBase", "/proxy/" + _applicationId)
val proxyUrl = _conf.get("spark.ui.reverseProxyUrl", "").stripSuffix("/") +
"/proxy/" + _applicationId
System.setProperty("spark.ui.proxyBase", proxyUrl)
}
_ui.foreach(_.setAppId(_applicationId))
_env.blockManager.initialize(_applicationId)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,13 @@ private[deploy] class Master(
webUi.bind()
masterWebUiUrl = "http://" + masterPublicAddress + ":" + webUi.boundPort
if (reverseProxy) {
masterWebUiUrl = conf.get("spark.ui.reverseProxyUrl", masterWebUiUrl)
conf.getOption("spark.ui.reverseProxyUrl") map { reverseProxyUrl =>
val proxyUrlNoSlash = reverseProxyUrl.stripSuffix("/")
System.setProperty("spark.ui.proxyBase", proxyUrlNoSlash)
// If the master URL has a path component, it must end with a slash.
// Otherwise the browser generates incorrect relative links
masterWebUiUrl = proxyUrlNoSlash + "/"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This section of code seems like more work than necessary. Can't you just set masterWebUiUrl = reverseProxyUrl here and then move reverseProxyUrl.stripSuffix("/") into System.setProperty making this a two line function?

Copy link
Contributor Author

@okoethibm okoethibm Apr 4, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we have a front-end reverse proxy path like mydomain.com:80/path/to/spark, then the spark.ui.proxyBase property (prefix for URL generation) must not include a trailing slash, the way it's used in UiUtils, like prependBaseUri("/static/bootstrap.min.css").
However, the explicit URL address pointing to the master UI page (e.g. the back-lilnk from workers to master, which masterWebUiUrl feeds into) must include a trailing slash, if it has a path component, because the master UI page contains relative liks like "app?...".
Without a path component in the master URL (mydomain.com:8080), the trailing slash did not matter for resolving these links, but with a path component, they must resolve to mydomain.com:80/path/to/spark/app (not mydomain.com:80/path/to/app), therefore the base URL must have a trailing slash.

The code is intended to work regardless whether spark.ui.reverseProxyUrl was specified with or without a trailing slash, so the safe way to ensure a single trailing slash was to first strip an optional slash and then add one. Your suggestion would omit the slash if there is none specified in the config.
If there's a clean way to move the stripSuffix handling into the config itself, that would make the code prettier, though

}
logInfo(s"Spark Master is acting as a reverse proxy. Master, Workers and " +
s"Applications UIs are available at $masterWebUiUrl")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,8 @@ private[deploy] class ExecutorRunner(
// Add webUI log urls
val baseUrl =
if (conf.getBoolean("spark.ui.reverseProxy", false)) {
s"/proxy/$workerId/logPage/?appId=$appId&executorId=$execId&logType="
conf.get("spark.ui.reverseProxyUrl", "").stripSuffix("/") +
s"/proxy/$workerId/logPage/?appId=$appId&executorId=$execId&logType="
} else {
s"http://$publicAddress:$webUiPort/logPage/?appId=$appId&executorId=$execId&logType="
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,13 @@ private[deploy] class Worker(
master = Some(masterRef)
connected = true
if (conf.getBoolean("spark.ui.reverseProxy", false)) {
logInfo(s"WorkerWebUI is available at $activeMasterWebUiUrl/proxy/$workerId")
logInfo("WorkerWebUI is available at %s/proxy/%s".format(
activeMasterWebUiUrl.stripSuffix("/"), workerId))
// if reverseProxyUrl is not set, then we continue to generate relative URLs
// starting with "/" throughout the UI and do not use activeMasterWebUiUrl
val proxyUrl = conf.get("spark.ui.reverseProxyUrl", "").stripSuffix("/") +
"/proxy/" + workerId
System.setProperty("spark.ui.proxyBase", proxyUrl)
}
// Cancel any outstanding re-registration attempts because we found a new master
cancelLastRegistrationRetry()
Expand Down
2 changes: 1 addition & 1 deletion core/src/main/scala/org/apache/spark/ui/UIUtils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,7 @@ private[spark] object UIUtils extends Logging {
*/
def makeHref(proxy: Boolean, id: String, origHref: String): String = {
if (proxy) {
s"/proxy/$id"
prependBaseUri("/proxy/", id)
} else {
origHref
}
Expand Down
105 changes: 100 additions & 5 deletions core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ import org.apache.spark.rpc.{RpcEndpoint, RpcEnv}
class MasterSuite extends SparkFunSuite
with Matchers with Eventually with PrivateMethodTester with BeforeAndAfter {

// regex to extract worker links from the master webui HTML
// groups represent URL and worker ID
val WORKER_LINK_RE = """<a href="(.+?)">\s*(worker-.+?)\s*</a>""".r

private var _master: Master = _

after {
Expand Down Expand Up @@ -139,9 +143,10 @@ class MasterSuite extends SparkFunSuite
val conf = new SparkConf()
val localCluster = new LocalSparkCluster(2, 2, 512, conf)
localCluster.start()
val masterUrl = s"http://localhost:${localCluster.masterWebUIPort}"
try {
eventually(timeout(5 seconds), interval(100 milliseconds)) {
val json = Source.fromURL(s"http://localhost:${localCluster.masterWebUIPort}/json")
val json = Source.fromURL(s"$masterUrl/json")
.getLines().mkString("\n")
val JArray(workers) = (parse(json) \ "workers")
workers.size should be (2)
Expand All @@ -151,6 +156,16 @@ class MasterSuite extends SparkFunSuite
.getLines().mkString("\n"))
(workerResponse \ "cores").extract[Int] should be (2)
}

val html = Source.fromURL(s"$masterUrl/").getLines().mkString("\n")
html should include ("Spark Master at spark://")
val workerLinks = (WORKER_LINK_RE findAllMatchIn html).toList
workerLinks.size should be (2)
workerLinks foreach { case WORKER_LINK_RE(workerUrl, workerId) =>
val workerHtml = Source.fromURL(workerUrl).getLines().mkString("\n")
workerHtml should include ("Spark Worker at")
workerHtml should include ("Running Executors (0)")
}
}
} finally {
localCluster.stop()
Expand All @@ -159,31 +174,111 @@ class MasterSuite extends SparkFunSuite

test("master/worker web ui available with reverseProxy") {
implicit val formats = org.json4s.DefaultFormats
val reverseProxyUrl = "http://localhost:8080"
val conf = new SparkConf()
conf.set("spark.ui.reverseProxy", "true")
val localCluster = new LocalSparkCluster(2, 2, 512, conf)
localCluster.start()
val masterUrl = s"http://localhost:${localCluster.masterWebUIPort}"
try {
eventually(timeout(5 seconds), interval(100 milliseconds)) {
val json = Source.fromURL(s"$masterUrl/json")
.getLines().mkString("\n")
val JArray(workers) = (parse(json) \ "workers")
workers.size should be (2)
workers.foreach { workerSummaryJson =>
// the webuiaddress intentionally points to the local web ui.
// explicitly construct reverse proxy url targeting the master
val JString(workerId) = workerSummaryJson \ "id"
val url = s"$masterUrl/proxy/${workerId}/json"
val workerResponse = parse(Source.fromURL(url).getLines().mkString("\n"))
(workerResponse \ "cores").extract[Int] should be (2)
}

// with LocalCluster, we have masters and workers in the same JVM, each overwriting
// system property spark.ui.proxyBase.
// so we need to manage this property explicitly for test
System.getProperty("spark.ui.proxyBase") should startWith ("/proxy/worker-")
System.getProperties().remove("spark.ui.proxyBase")
val html = Source.fromURL(s"$masterUrl/").getLines().mkString("\n")
html should include ("Spark Master at spark://")
html should include ("""href="/static""")
html should include ("""src="/static""")
verifyWorkerUI(html, masterUrl)
}
} finally {
localCluster.stop()
System.getProperties().remove("spark.ui.proxyBase")
}
}

test("master/worker web ui available behind front-end reverseProxy") {
implicit val formats = org.json4s.DefaultFormats
val reverseProxyUrl = "http://proxyhost:8080/path/to/spark"
val conf = new SparkConf()
conf.set("spark.ui.reverseProxy", "true")
conf.set("spark.ui.reverseProxyUrl", reverseProxyUrl)
val localCluster = new LocalSparkCluster(2, 2, 512, conf)
localCluster.start()
val masterUrl = s"http://localhost:${localCluster.masterWebUIPort}"
try {
eventually(timeout(5 seconds), interval(100 milliseconds)) {
val json = Source.fromURL(s"http://localhost:${localCluster.masterWebUIPort}/json")
val json = Source.fromURL(s"$masterUrl/json")
.getLines().mkString("\n")
val JArray(workers) = (parse(json) \ "workers")
workers.size should be (2)
workers.foreach { workerSummaryJson =>
// the webuiaddress intentionally points to the local web ui.
// explicitly construct reverse proxy url targeting the master
val JString(workerId) = workerSummaryJson \ "id"
val url = s"http://localhost:${localCluster.masterWebUIPort}/proxy/${workerId}/json"
val url = s"$masterUrl/proxy/${workerId}/json"
val workerResponse = parse(Source.fromURL(url).getLines().mkString("\n"))
(workerResponse \ "cores").extract[Int] should be (2)
(workerResponse \ "masterwebuiurl").extract[String] should be (reverseProxyUrl)
(workerResponse \ "masterwebuiurl").extract[String] should be (reverseProxyUrl + "/")
}

// with LocalCluster, we have masters and workers in the same JVM, each overwriting
// system property spark.ui.proxyBase.
// so we need to manage this property explicitly for test
System.getProperty("spark.ui.proxyBase") should startWith
(s"$reverseProxyUrl/proxy/worker-")
System.setProperty("spark.ui.proxyBase", reverseProxyUrl)
val html = Source.fromURL(s"$masterUrl/").getLines().mkString("\n")
html should include ("Spark Master at spark://")
verifyStaticResourcesServedByProxy(html, reverseProxyUrl)
verifyWorkerUI(html, masterUrl, reverseProxyUrl)
}
} finally {
localCluster.stop()
System.getProperties().remove("spark.ui.proxyBase")
}
}

private def verifyWorkerUI(masterHtml: String, masterUrl: String,
reverseProxyUrl: String = ""): Unit = {
val workerLinks = (WORKER_LINK_RE findAllMatchIn masterHtml).toList
workerLinks.size should be (2)
workerLinks foreach {
case WORKER_LINK_RE(workerUrl, workerId) =>
workerUrl should be (s"$reverseProxyUrl/proxy/$workerId")
// there is no real front-end proxy as defined in $reverseProxyUrl
// construct url directly targeting the master
val url = s"$masterUrl/proxy/$workerId/"
System.setProperty("spark.ui.proxyBase", workerUrl)
val workerHtml = Source.fromURL(url).getLines().mkString("\n")
workerHtml should include ("Spark Worker at")
workerHtml should include ("Running Executors (0)")
verifyStaticResourcesServedByProxy(workerHtml, workerUrl)
case _ => fail // make sure we don't accidentially skip the tests
}
}

private def verifyStaticResourcesServedByProxy(html: String, proxyUrl: String): Unit = {
html should not include ("""href="/static""")
html should include (s"""href="$proxyUrl/static""")
html should not include ("""src="/static""")
html should include (s"""src="$proxyUrl/static""")
}

test("basic scheduling - spread out") {
basicScheduling(spreadOut = true)
}
Expand Down
32 changes: 30 additions & 2 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -708,14 +708,42 @@ Apart from these, the following properties are also available, and may be useful
<td><code>spark.ui.reverseProxy</code></td>
<td>false</td>
<td>
Enable running Spark Master as reverse proxy for worker and application UIs. In this mode, Spark master will reverse proxy the worker and application UIs to enable access without requiring direct access to their hosts. Use it with caution, as worker and application UI will not be accessible directly, you will only be able to access them through spark master/proxy public URL. This setting affects all the workers and application UIs running in the cluster and must be set on all the workers, drivers and masters.
Enable running Spark Master as reverse proxy for worker and application UIs. In this mode,
the Spark master will reverse proxy the worker and application UIs to enable access without
requiring direct access to their hosts.
Use it with caution, as worker and application UI will not be accessible directly, you will
only be able to access them through the spark master/proxy public URL.
This setting affects all the workers and application UIs running in the cluster and must be
set on all the workers, drivers and masters.
</td>
</tr>
<tr>
<td><code>spark.ui.reverseProxyUrl</code></td>
<td></td>
<td>
This is the URL where your proxy is running. This URL is for proxy which is running in front of Spark Master. This is useful when running proxy for authentication e.g. OAuth proxy. Make sure this is a complete URL including scheme (http/https) and port to reach your proxy.
If the Spark UI should be served through another front-end reverse proxy, this is the URL
for accessing the Spark master UI through that reverse proxy.
This is useful when running proxy for authentication e.g. an OAuth proxy. The URL may contain
a path prefix, like <code>http://mydomain.com/path/to/spark/</code>, allowing you to serve the
UI for multiple Spark clusters and other web applications through the same virtual host and
port.
Normally, this should be an absolute URL including scheme (http/https), host and port.
It is possible to specify a relative URL starting with "/" here. In this case, all URLs
generated by the Spark UI and Spark REST APIs will be server-relative links -- this will still
work, as the entire Spark UI is served through the same host and port.
<br/>The setting affects link generation in the Spark UI, but the front-end reverse proxy
is responsible for
<ul>
<li>stripping a path prefix before forwarding the request,</li>
<li>rewriting redirects which point directly to the Spark master,</li>
<li>redirecting access from <code>http://mydomain.com/path/to/spark</code> to
<code>http://mydomain.com/path/to/spark/</code> (trailing slash after path prefix); otherwise
relative links on the master page do not work correctly.</li>
</ul>
This setting affects all the workers and application UIs running in the cluster and must be set
identically on all the workers, drivers and masters. In is only effective when
<code>spark.ui.reverseProxy</code> is turned on. This setting is not needed when the Spark
master web UI is directly reachable.
</td>
</tr>
<tr>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,10 @@ private[repl] trait SparkILoopInit {
val _sc = spark.sparkContext
if (_sc.getConf.getBoolean("spark.ui.reverseProxy", false)) {
val proxyUrl = _sc.getConf.get("spark.ui.reverseProxyUrl", null)
if (proxyUrl != null) {
if (proxyUrl != null && proxyUrl.startsWith("http")) {
println(s"Spark Context Web UI is available at ${proxyUrl}/proxy/${_sc.applicationId}")
} else {
println(s"Spark Context Web UI is available at Spark Master Public URL")
println(s"Spark Context Web UI is available at Spark Master under proxy/${_sc.applicationId}")
}
} else {
_sc.uiWebUrl.foreach {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ class SparkILoop(in0: Option[BufferedReader], out: JPrintWriter)
val _sc = spark.sparkContext
if (_sc.getConf.getBoolean("spark.ui.reverseProxy", false)) {
val proxyUrl = _sc.getConf.get("spark.ui.reverseProxyUrl", null)
if (proxyUrl != null) {
if (proxyUrl != null && proxyUrl.startsWith("http")) {
println(s"Spark Context Web UI is available at ${proxyUrl}/proxy/${_sc.applicationId}")
} else {
println(s"Spark Context Web UI is available at Spark Master Public URL")
println(s"Spark Context Web UI is available at Spark Master under proxy/${_sc.applicationId}")
}
} else {
_sc.uiWebUrl.foreach {
Expand Down