From 6d4f47e9ded7254dffec13c4776056eef4441bed Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Mon, 17 Nov 2014 14:08:35 -0800 Subject: [PATCH 1/6] Added a test case as a regression check for SPARK-4434 --- core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala b/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala index 4161aede1d1d0..0a1eb8c9c8740 100644 --- a/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala @@ -24,6 +24,7 @@ class ClientSuite extends FunSuite with Matchers { test("correctly validates driver jar URL's") { ClientArguments.isValidJarUrl("http://someHost:8080/foo.jar") should be (true) ClientArguments.isValidJarUrl("file://some/path/to/a/jarFile.jar") should be (true) + ClientArguments.isValidJarUrl("file:///some/path/to/a/jarFile.jar") should be (true) ClientArguments.isValidJarUrl("hdfs://someHost:1234/foo.jar") should be (true) ClientArguments.isValidJarUrl("hdfs://someHost:1234/foo") should be (false) From d4b99efc04f2cf6aa22f1e19bd6fc4d511e18fe4 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Fri, 24 Oct 2014 13:08:21 -0700 Subject: [PATCH 2/6] [SPARK-4075] [Deploy] Jar url validation is not enough for Jar file In deploy.ClientArguments.isValidJarUrl, the url is checked as follows. def isValidJarUrl(s: String): Boolean = s.matches("(.+):(.+)jar") So, it allows like 'hdfs:file.jar' (no authority). Author: Kousuke Saruta Closes #2925 from sarutak/uri-syntax-check-improvement and squashes the following commits: cf06173 [Kousuke Saruta] Improved URI syntax checking --- .../org/apache/spark/deploy/ClientArguments.scala | 11 ++++++++++- .../scala/org/apache/spark/deploy/ClientSuite.scala | 6 ++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala b/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala index 39150deab863c..4e802e02c4149 100644 --- a/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala @@ -17,6 +17,8 @@ package org.apache.spark.deploy +import java.net.{URI, URISyntaxException} + import scala.collection.mutable.ListBuffer import org.apache.log4j.Level @@ -114,5 +116,12 @@ private[spark] class ClientArguments(args: Array[String]) { } object ClientArguments { - def isValidJarUrl(s: String): Boolean = s.matches("(.+):(.+)jar") + def isValidJarUrl(s: String): Boolean = { + try { + val uri = new URI(s) + uri.getScheme != null && uri.getAuthority != null && s.endsWith("jar") + } catch { + case _: URISyntaxException => false + } + } } diff --git a/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala b/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala index 0a1eb8c9c8740..b350e05a3a627 100644 --- a/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala @@ -30,6 +30,12 @@ class ClientSuite extends FunSuite with Matchers { ClientArguments.isValidJarUrl("hdfs://someHost:1234/foo") should be (false) ClientArguments.isValidJarUrl("/missing/a/protocol/jarfile.jar") should be (false) ClientArguments.isValidJarUrl("not-even-a-path.jar") should be (false) + + // No authority + ClientArguments.isValidJarUrl("hdfs:someHost:1234/jarfile.jar") should be (false) + + // Invalid syntax + ClientArguments.isValidJarUrl("hdfs:") should be (false) } } From 9e09da2f58a9559735e3622d25f9f6213453ee01 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Mon, 17 Nov 2014 16:54:51 -0800 Subject: [PATCH 3/6] Fixed URI validation for jar file --- .../scala/org/apache/spark/deploy/ClientArguments.scala | 4 ++-- .../test/scala/org/apache/spark/deploy/ClientSuite.scala | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala b/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala index 4e802e02c4149..6b8e1fe93c733 100644 --- a/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala @@ -75,7 +75,7 @@ private[spark] class ClientArguments(args: Array[String]) { if (!ClientArguments.isValidJarUrl(_jarUrl)) { println(s"Jar url '${_jarUrl}' is not in valid format.") - println(s"Must be a jar file path in URL format (e.g. hdfs://XX.jar, file://XX.jar)") + println(s"Must be a jar file path in URL format (e.g. hdfs://XX.jar, file:///XX.jar)") printUsageAndExit(-1) } @@ -119,7 +119,7 @@ object ClientArguments { def isValidJarUrl(s: String): Boolean = { try { val uri = new URI(s) - uri.getScheme != null && uri.getAuthority != null && s.endsWith("jar") + uri.getScheme != null && uri.getPath != null && uri.getPath.endsWith(".jar") } catch { case _: URISyntaxException => false } diff --git a/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala b/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala index b350e05a3a627..1eac749bdf6fa 100644 --- a/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala @@ -23,7 +23,14 @@ import org.scalatest.Matchers class ClientSuite extends FunSuite with Matchers { test("correctly validates driver jar URL's") { ClientArguments.isValidJarUrl("http://someHost:8080/foo.jar") should be (true) - ClientArguments.isValidJarUrl("file://some/path/to/a/jarFile.jar") should be (true) + + // file scheme with authority is valid. + ClientArguments.isValidJarUrl("file://somehost/path/to/a/jarFile.jar") should be (true) + + // file scheme without authority is not valid. + ClientArguments.isValidJarUrl("file://jarFile.jar") should be (false) + + // file scheme without authority but with triple slash is valid. ClientArguments.isValidJarUrl("file:///some/path/to/a/jarFile.jar") should be (true) ClientArguments.isValidJarUrl("hdfs://someHost:1234/foo.jar") should be (true) From 4f302101049f346cc7287950fa4dc6dac04fcc19 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Mon, 17 Nov 2014 22:01:16 -0800 Subject: [PATCH 4/6] Modified comments --- .../scala/org/apache/spark/deploy/ClientArguments.scala | 2 +- .../test/scala/org/apache/spark/deploy/ClientSuite.scala | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala b/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala index 6b8e1fe93c733..898a8cfa42620 100644 --- a/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala @@ -75,7 +75,7 @@ private[spark] class ClientArguments(args: Array[String]) { if (!ClientArguments.isValidJarUrl(_jarUrl)) { println(s"Jar url '${_jarUrl}' is not in valid format.") - println(s"Must be a jar file path in URL format (e.g. hdfs://XX.jar, file:///XX.jar)") + println(s"Must be a jar file path in URL format (e.g. hdfs://host:port/XX.jar, file:///XX.jar)") printUsageAndExit(-1) } diff --git a/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala b/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala index 1eac749bdf6fa..9bd7ed7fe148c 100644 --- a/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala @@ -24,10 +24,11 @@ class ClientSuite extends FunSuite with Matchers { test("correctly validates driver jar URL's") { ClientArguments.isValidJarUrl("http://someHost:8080/foo.jar") should be (true) - // file scheme with authority is valid. + // file scheme with authority and path is valid. ClientArguments.isValidJarUrl("file://somehost/path/to/a/jarFile.jar") should be (true) - // file scheme without authority is not valid. + // file scheme without path is not valid. + // In this case, jarFile.jar is recognized as authority. ClientArguments.isValidJarUrl("file://jarFile.jar") should be (false) // file scheme without authority but with triple slash is valid. @@ -38,10 +39,10 @@ class ClientSuite extends FunSuite with Matchers { ClientArguments.isValidJarUrl("/missing/a/protocol/jarfile.jar") should be (false) ClientArguments.isValidJarUrl("not-even-a-path.jar") should be (false) - // No authority + // This URI don't have authority and path. ClientArguments.isValidJarUrl("hdfs:someHost:1234/jarfile.jar") should be (false) - // Invalid syntax + // Invalid syntax. ClientArguments.isValidJarUrl("hdfs:") should be (false) } From c1c80ca3689775c2d50647193f40e1e76f6a92f6 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Mon, 17 Nov 2014 22:08:42 -0800 Subject: [PATCH 5/6] Fixed style --- .../main/scala/org/apache/spark/deploy/ClientArguments.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala b/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala index 898a8cfa42620..2e1e52906ceeb 100644 --- a/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala @@ -75,7 +75,8 @@ private[spark] class ClientArguments(args: Array[String]) { if (!ClientArguments.isValidJarUrl(_jarUrl)) { println(s"Jar url '${_jarUrl}' is not in valid format.") - println(s"Must be a jar file path in URL format (e.g. hdfs://host:port/XX.jar, file:///XX.jar)") + println(s"Must be a jar file path in URL format " + + "(e.g. hdfs://host:port/XX.jar, file:///XX.jar)") printUsageAndExit(-1) } From 82bc9cc2d96aa70b786dcd0f39fbe93f8abb4ecc Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Tue, 18 Nov 2014 10:45:32 -0800 Subject: [PATCH 6/6] Fixed wrong grammar in comment --- core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala b/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala index 9bd7ed7fe148c..d2dae34be7bfb 100644 --- a/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/ClientSuite.scala @@ -39,7 +39,7 @@ class ClientSuite extends FunSuite with Matchers { ClientArguments.isValidJarUrl("/missing/a/protocol/jarfile.jar") should be (false) ClientArguments.isValidJarUrl("not-even-a-path.jar") should be (false) - // This URI don't have authority and path. + // This URI doesn't have authority and path. ClientArguments.isValidJarUrl("hdfs:someHost:1234/jarfile.jar") should be (false) // Invalid syntax.