From 0aee8faad9cb60721b153c9bc2187f87a4036b9e Mon Sep 17 00:00:00 2001 From: pgandhi Date: Thu, 31 May 2018 09:36:13 -0500 Subject: [PATCH 1/5] [SPARK-22151] : PYTHONPATH not picked up from the spark.yarn.appMasterEnv properly Running in yarn cluster mode and trying to set pythonpath via spark.yarn.appMasterEnv.PYTHONPATH doesn't work. the yarn Client code looks at the env variables: val pythonPathStr = (sys.env.get("PYTHONPATH") ++ pythonPath) But when you set spark.yarn.appMasterEnv it puts it into the local env. So the python path set in spark.yarn.appMasterEnv isn't properly set. You can work around if you are running in cluster mode by setting it on the client like: PYTHONPATH=./addon/python/ spark-submit In Client.scala, PYTHONPATH was being overridden, so changed code to append values to PYTHONPATH --- .../scala/org/apache/spark/deploy/yarn/Client.scala | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 7225ff03dc34e..2fe761dd9da90 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -813,8 +813,14 @@ private[spark] class Client( if (pythonPath.nonEmpty) { val pythonPathStr = (sys.env.get("PYTHONPATH") ++ pythonPath) .mkString(ApplicationConstants.CLASS_PATH_SEPARATOR) - env("PYTHONPATH") = pythonPathStr - sparkConf.setExecutorEnv("PYTHONPATH", pythonPathStr) + val newValue = + if (env.contains("PYTHONPATH")) { + env("PYTHONPATH") + ApplicationConstants.CLASS_PATH_SEPARATOR + pythonPathStr + } else { + pythonPathStr + } + env("PYTHONPATH") = newValue + sparkConf.setExecutorEnv("PYTHONPATH", newValue) } if (isClusterMode) { From 5e733aeee66e7aedcccd9eed76f539ce77919e78 Mon Sep 17 00:00:00 2001 From: pgandhi Date: Fri, 8 Jun 2018 11:15:17 -0500 Subject: [PATCH 2/5] [SPARK-22151] : Made changes as per comments - spark.executorEnv should not be overridden by appMasterEnv - Replacing ++ with ++=: --- .../main/scala/org/apache/spark/deploy/yarn/Client.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 2fe761dd9da90..8659b1d70107a 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -811,7 +811,7 @@ private[spark] class Client( // Finally, update the Spark config to propagate PYTHONPATH to the AM and executors. if (pythonPath.nonEmpty) { - val pythonPathStr = (sys.env.get("PYTHONPATH") ++ pythonPath) + val pythonPathStr = (sys.env.get("PYTHONPATH") ++=: pythonPath) .mkString(ApplicationConstants.CLASS_PATH_SEPARATOR) val newValue = if (env.contains("PYTHONPATH")) { @@ -820,7 +820,9 @@ private[spark] class Client( pythonPathStr } env("PYTHONPATH") = newValue - sparkConf.setExecutorEnv("PYTHONPATH", newValue) + if (!sparkConf.getExecutorEnv.toMap.contains("PYTHONPATH")) { + sparkConf.setExecutorEnv("PYTHONPATH", newValue) + } } if (isClusterMode) { From 6ba543e9ac994a6940b71463c1ab6867166a13ef Mon Sep 17 00:00:00 2001 From: pgandhi Date: Thu, 28 Jun 2018 13:09:43 -0500 Subject: [PATCH 3/5] [SPARK-22151] : Appending original pythonpath value to ExecutorEnv.PYTHONPATH Instead of overriding the executorenv PYTHONPATH, appending existing pythonpath string to it and avoiding passing value of appMasterEnv.PYTHONPATH to the executorEnv variable --- .../main/scala/org/apache/spark/deploy/yarn/Client.scala | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 8659b1d70107a..8108f334a5631 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -811,7 +811,7 @@ private[spark] class Client( // Finally, update the Spark config to propagate PYTHONPATH to the AM and executors. if (pythonPath.nonEmpty) { - val pythonPathStr = (sys.env.get("PYTHONPATH") ++=: pythonPath) + val pythonPathStr = (sys.env.get("PYTHONPATH") ++ pythonPath) .mkString(ApplicationConstants.CLASS_PATH_SEPARATOR) val newValue = if (env.contains("PYTHONPATH")) { @@ -821,7 +821,11 @@ private[spark] class Client( } env("PYTHONPATH") = newValue if (!sparkConf.getExecutorEnv.toMap.contains("PYTHONPATH")) { - sparkConf.setExecutorEnv("PYTHONPATH", newValue) + sparkConf.setExecutorEnv("PYTHONPATH", pythonPathStr) + } else { + val pythonPathExecutorEnv = sparkConf.getExecutorEnv.toMap.get("PYTHONPATH").get + + ApplicationConstants.CLASS_PATH_SEPARATOR + pythonPathStr + sparkConf.setExecutorEnv("PYTHONPATH", pythonPathExecutorEnv) } } From 5423befa2c27affc0a5a54f02144a34a77af34c4 Mon Sep 17 00:00:00 2001 From: pgandhi Date: Mon, 16 Jul 2018 13:39:28 -0500 Subject: [PATCH 4/5] [SPARK-22151] : Addressing comments on setting executorEnv config --- .../org/apache/spark/deploy/yarn/Client.scala | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 8108f334a5631..8fd32c2338a05 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -820,13 +820,14 @@ private[spark] class Client( pythonPathStr } env("PYTHONPATH") = newValue - if (!sparkConf.getExecutorEnv.toMap.contains("PYTHONPATH")) { - sparkConf.setExecutorEnv("PYTHONPATH", pythonPathStr) - } else { - val pythonPathExecutorEnv = sparkConf.getExecutorEnv.toMap.get("PYTHONPATH").get + - ApplicationConstants.CLASS_PATH_SEPARATOR + pythonPathStr - sparkConf.setExecutorEnv("PYTHONPATH", pythonPathExecutorEnv) - } + val pythonPathExecutorEnv = + if (!sparkConf.getExecutorEnv.toMap.contains("PYTHONPATH")) { + pythonPathStr + } else { + sparkConf.getExecutorEnv.toMap.get("PYTHONPATH").get + + ApplicationConstants.CLASS_PATH_SEPARATOR + pythonPathStr + } + sparkConf.setExecutorEnv("PYTHONPATH", pythonPathExecutorEnv) } if (isClusterMode) { From 49f37a80bb274efa50f2ec295d6e9009eeb7b24a Mon Sep 17 00:00:00 2001 From: pgandhi Date: Mon, 16 Jul 2018 16:55:41 -0500 Subject: [PATCH 5/5] [SPARK-22151] : Reducing number of lines by using ++ operator --- .../org/apache/spark/deploy/yarn/Client.scala | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 8fd32c2338a05..dcc9db06f879e 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -811,22 +811,11 @@ private[spark] class Client( // Finally, update the Spark config to propagate PYTHONPATH to the AM and executors. if (pythonPath.nonEmpty) { - val pythonPathStr = (sys.env.get("PYTHONPATH") ++ pythonPath) + val pythonPathList = (sys.env.get("PYTHONPATH") ++ pythonPath) + env("PYTHONPATH") = (env.get("PYTHONPATH") ++ pythonPathList) .mkString(ApplicationConstants.CLASS_PATH_SEPARATOR) - val newValue = - if (env.contains("PYTHONPATH")) { - env("PYTHONPATH") + ApplicationConstants.CLASS_PATH_SEPARATOR + pythonPathStr - } else { - pythonPathStr - } - env("PYTHONPATH") = newValue - val pythonPathExecutorEnv = - if (!sparkConf.getExecutorEnv.toMap.contains("PYTHONPATH")) { - pythonPathStr - } else { - sparkConf.getExecutorEnv.toMap.get("PYTHONPATH").get + - ApplicationConstants.CLASS_PATH_SEPARATOR + pythonPathStr - } + val pythonPathExecutorEnv = (sparkConf.getExecutorEnv.toMap.get("PYTHONPATH") ++ + pythonPathList).mkString(ApplicationConstants.CLASS_PATH_SEPARATOR) sparkConf.setExecutorEnv("PYTHONPATH", pythonPathExecutorEnv) }