5050
5151. " $FWDIR " /bin/load-spark-env.sh
5252
53- # Figure out which Python executable to use
53+ # In Spark <= 1.1, setting IPYTHON=1 would cause the driver to be launched using the `ipython`
54+ # executable, while the worker would still be launched using PYSPARK_PYTHON.
55+ #
56+ # In Spark 1.2, we removed the documentation of the IPYTHON and IPYTHON_OPTS variables and added
57+ # PYSPARK_DRIVER_PYTHON and PYSPARK_DRIVER_PYTHON_OPTS to allow IPython to be used for the driver.
58+ # Now, users can simply set PYSPARK_DRIVER_PYTHON=ipython to use IPython and set
59+ # PYSPARK_DRIVER_PYTHON_OPTS to pass options when starting the Python driver
60+ # (e.g. PYSPARK_DRIVER_PYTHON_OPTS='notebook'). This supports full customization of the IPython
61+ # and executor Python executables.
62+ #
63+ # For backwards-compatibility, we retain the old IPYTHON and IPYTHON_OPTS variables.
64+
65+ # Determine the Python executable to use if PYSPARK_PYTHON or PYSPARK_DRIVER_PYTHON isn't set:
66+ if hash python2.7 2> /dev/null; then
67+ # Attempt to use Python 2.7, if installed:
68+ DEFAULT_PYTHON=" python2.7"
69+ else
70+ DEFAULT_PYTHON=" python"
71+ fi
72+
73+ # Determine the Python executable to use for the driver:
74+ if [[ -n " $IPYTHON_OPTS " || " $IPYTHON " == " 1" ]]; then
75+ # If IPython options are specified, assume user wants to run IPython
76+ # (for backwards-compatibility)
77+ PYSPARK_DRIVER_PYTHON_OPTS=" $PYSPARK_DRIVER_PYTHON_OPTS $IPYTHON_OPTS "
78+ PYSPARK_DRIVER_PYTHON=" ipython"
79+ elif [[ -z " $PYSPARK_DRIVER_PYTHON " ]]; then
80+ PYSPARK_DRIVER_PYTHON=" ${PYSPARK_PYTHON:- " $DEFAULT_PYTHON " } "
81+ fi
82+
83+ # Determine the Python executable to use for the executors:
5484if [[ -z " $PYSPARK_PYTHON " ]]; then
55- if [[ " $IPYTHON " = " 1 " || -n " $IPYTHON_OPTS " ]]; then
56- # for backward compatibility
57- PYSPARK_PYTHON= " ipython "
85+ if [[ $PYSPARK_DRIVER_PYTHON == * ipython * && $DEFAULT_PYTHON != " python2.7 " ]]; then
86+ echo " IPython requires Python 2.7+; please install python2.7 or set PYSPARK_PYTHON " 1>&2
87+ exit 1
5888 else
59- PYSPARK_PYTHON=" python "
89+ PYSPARK_PYTHON=" $DEFAULT_PYTHON "
6090 fi
6191fi
6292export PYSPARK_PYTHON
6393
64- if [[ -z " $PYSPARK_PYTHON_OPTS " && -n " $IPYTHON_OPTS " ]]; then
65- # for backward compatibility
66- PYSPARK_PYTHON_OPTS=" $IPYTHON_OPTS "
67- fi
68-
6994# Add the PySpark classes to the Python path:
7095export PYTHONPATH=" $SPARK_HOME /python/:$PYTHONPATH "
7196export PYTHONPATH=" $SPARK_HOME /python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH "
@@ -93,9 +118,9 @@ if [[ -n "$SPARK_TESTING" ]]; then
93118 unset YARN_CONF_DIR
94119 unset HADOOP_CONF_DIR
95120 if [[ -n " $PYSPARK_DOC_TEST " ]]; then
96- exec " $PYSPARK_PYTHON " -m doctest $1
121+ exec " $PYSPARK_DRIVER_PYTHON " -m doctest $1
97122 else
98- exec " $PYSPARK_PYTHON " $1
123+ exec " $PYSPARK_DRIVER_PYTHON " $1
99124 fi
100125 exit
101126fi
@@ -111,5 +136,5 @@ if [[ "$1" =~ \.py$ ]]; then
111136else
112137 # PySpark shell requires special handling downstream
113138 export PYSPARK_SHELL=1
114- exec " $PYSPARK_PYTHON " $PYSPARK_PYTHON_OPTS
139+ exec " $PYSPARK_DRIVER_PYTHON " $PYSPARK_DRIVER_PYTHON_OPTS
115140fi
0 commit comments