File tree Expand file tree Collapse file tree 3 files changed +13
-8
lines changed
core/src/main/scala/org/apache/spark/deploy
yarn/src/main/scala/org/apache/spark/deploy/yarn Expand file tree Collapse file tree 3 files changed +13
-8
lines changed Original file line number Diff line number Diff line change @@ -328,8 +328,12 @@ object SparkSubmit {
328328 }
329329 }
330330
331- if (args.isPython && System .getenv(" PYSPARK_ARCHIVES_PATH" ) != null ) {
332- args.files = mergeFileLists(args.files, System .getenv(" PYSPARK_ARCHIVES_PATH" ))
331+ // In yarn mode for a python app, if PYSPARK_ARCHIVES_PATH is in the user environment
332+ // add pyspark archives to files that can be distributed with the job
333+ if (args.isPython && clusterManager == YARN ){
334+ sys.env.get(" PYSPARK_ARCHIVES_PATH" ).map { archives =>
335+ args.files = mergeFileLists(args.files, Utils .resolveURIs(archives))
336+ }
333337 }
334338
335339 // If we're running a R app, set the main class to our specific R runner
Original file line number Diff line number Diff line change @@ -326,10 +326,15 @@ private[spark] class Client(
326326 distCacheMgr.setDistFilesEnv(env)
327327 distCacheMgr.setDistArchivesEnv(env)
328328
329- if (System .getenv(" PYSPARK_ARCHIVES_PATH" ) != null ) {
330- val pythonPath = System .getenv(" PYSPARK_ARCHIVES_PATH" ).split(" ," ).map(
329+ // If PYSPARK_ARCHIVES_PATH is in the user environment, set PYTHONPATH to be passed
330+ // on to the ApplicationMaster and the executors.
331+ sys.env.get(" PYSPARK_ARCHIVES_PATH" ).map { archives =>
332+ // archives will be distributed to each machine's working directory, so strip the
333+ // path prefix
334+ val pythonPath = archives.split(" ," ).map(
331335 p => (new Path (p)).getName).mkString(" :" )
332336 env(" PYTHONPATH" ) = pythonPath
337+ sparkConf.setExecutorEnv(" PYTHONPATH" , pythonPath)
333338 }
334339
335340 // Pick up any environment variables for the AM provided through spark.yarn.appMasterEnv.*
Original file line number Diff line number Diff line change @@ -285,10 +285,6 @@ class ExecutorRunnable(
285285 YarnSparkHadoopUtil .addPathToEnvironment(env, key, value)
286286 }
287287
288- if (System .getenv(" PYTHONPATH" ) != null ) {
289- env(" PYTHONPATH" ) = System .getenv(" PYTHONPATH" )
290- }
291-
292288 // Keep this for backwards compatibility but users should move to the config
293289 sys.env.get(" SPARK_YARN_USER_ENV" ).foreach { userEnvs =>
294290 YarnSparkHadoopUtil .setEnvFromInputString(env, userEnvs)
You can’t perform that action at this time.
0 commit comments