-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-21094][PYTHON] Add popen_kwargs to launch_gateway #18339
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -37,15 +37,21 @@ | |
| from pyspark.util import _exception_message | ||
|
|
||
|
|
||
| def launch_gateway(conf=None): | ||
| def launch_gateway(conf=None, popen_kwargs=None): | ||
| """ | ||
| launch jvm gateway | ||
| :param conf: spark configuration passed to spark-submit | ||
| :param popen_kwargs: Dictionary of kwargs to pass to Popen when spawning | ||
|
||
| the py4j JVM. This is a developer feature intended for use in | ||
| customizing how pyspark interacts with the py4j JVM (e.g., capturing | ||
| stdout/stderr). | ||
| :return: | ||
| """ | ||
| if "PYSPARK_GATEWAY_PORT" in os.environ: | ||
| gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"]) | ||
| gateway_secret = os.environ["PYSPARK_GATEWAY_SECRET"] | ||
| # Process already exists | ||
| proc = None | ||
| else: | ||
| SPARK_HOME = _find_spark_home() | ||
| # Launch the Py4j gateway using Spark's run command so that we pick up the | ||
|
|
@@ -76,15 +82,20 @@ def launch_gateway(conf=None): | |
| env["_PYSPARK_DRIVER_CONN_INFO_PATH"] = conn_info_file | ||
|
|
||
| # Launch the Java gateway. | ||
| popen_kwargs = {} if popen_kwargs is None else popen_kwargs | ||
| # We open a pipe to stdin so that the Java gateway can die when the pipe is broken | ||
| popen_kwargs['stdin'] = PIPE | ||
| # We always set the necessary environment variables. | ||
| popen_kwargs['env'] = env | ||
| if not on_windows: | ||
| # Don't send ctrl-c / SIGINT to the Java gateway: | ||
| def preexec_func(): | ||
| signal.signal(signal.SIGINT, signal.SIG_IGN) | ||
| proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env) | ||
| popen_kwargs['preexec_fn'] = preexec_func | ||
| proc = Popen(command, **popen_kwargs) | ||
| else: | ||
| # preexec_fn not supported on Windows | ||
| proc = Popen(command, stdin=PIPE, env=env) | ||
| proc = Popen(command, **popen_kwargs) | ||
|
|
||
| # Wait for the file to appear, or for the process to exit, whichever happens first. | ||
| while not proc.poll() and not os.path.isfile(conn_info_file): | ||
|
|
@@ -119,6 +130,8 @@ def killChild(): | |
| gateway = JavaGateway( | ||
| gateway_parameters=GatewayParameters(port=gateway_port, auth_token=gateway_secret, | ||
| auto_convert=True)) | ||
| # Store a reference to the Popen object for use by the caller (e.g., in reading stdout/stderr) | ||
| gateway.proc = proc | ||
|
|
||
| # Import the classes used by PySpark | ||
| java_import(gateway.jvm, "org.apache.spark.SparkConf") | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd make this _popen_kwargs to indicate it's usage is possibly not super supported.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would a comment in the docstring to that effect be better? I haven't seen
_var_nameused in Python projects to indicate a developer feature. (But of course, maybe I've just not seen it yet!)