From 21c9d7c5af9d1647b496734dcd8fa3901bf8b19a Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Sun, 22 Jun 2014 21:10:04 -0700 Subject: [PATCH 1/5] Do not mask stderr from output --- python/pyspark/java_gateway.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py index 19235d5f79f85..0b8f33117c9c6 100644 --- a/python/pyspark/java_gateway.py +++ b/python/pyspark/java_gateway.py @@ -43,10 +43,10 @@ def launch_gateway(): # Don't send ctrl-c / SIGINT to the Java gateway: def preexec_func(): signal.signal(signal.SIGINT, signal.SIG_IGN) - proc = Popen(command, stdout=PIPE, stdin=PIPE, stderr=PIPE, preexec_fn=preexec_func) + proc = Popen(command, stdout=PIPE, stdin=PIPE, preexec_fn=preexec_func) else: # preexec_fn not supported on Windows - proc = Popen(command, stdout=PIPE, stdin=PIPE, stderr=PIPE) + proc = Popen(command, stdout=PIPE, stdin=PIPE) try: # Determine which ephemeral port the server started on: From 0d1861f008e0d84d429a2991f82240639f33ea7b Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Mon, 23 Jun 2014 14:01:17 -0700 Subject: [PATCH 2/5] Provide more helpful output if stdout is garbled Before: ValueError: invalid literal for int() with base 10 After: Launching GatewayServer failed because of stdout interference. Silence the following and try again. --- python/pyspark/java_gateway.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py index 0b8f33117c9c6..530d5fb751c88 100644 --- a/python/pyspark/java_gateway.py +++ b/python/pyspark/java_gateway.py @@ -47,14 +47,20 @@ def preexec_func(): else: # preexec_fn not supported on Windows proc = Popen(command, stdout=PIPE, stdin=PIPE) - + try: # Determine which ephemeral port the server started on: - gateway_port = int(proc.stdout.readline()) + gateway_port = proc.stdout.readline() + gateway_port = int(gateway_port) + except ValueError: + error_msg = "Launching GatewayServer failed because of stdout interference. " + error_msg += "Silence the following and try again:\n\n" + error_msg += " %s" % gateway_port + raise Exception(error_msg) except: - error_code = proc.poll() - raise Exception("Launching GatewayServer failed with exit code %d: %s" % - (error_code, "".join(proc.stderr.readlines()))) + exit_code = proc.poll() + exit_code_msg = " Exit code was %d." % exit_code if exit_code else "" + raise Exception("Launching GatewayServer failed!" + exit_code_msg) # Create a thread to echo output from the GatewayServer, which is required # for Java log output to show up: From 6dfbd1ed861ff8679738ba95a3651e42aab32842 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Mon, 23 Jun 2014 14:12:39 -0700 Subject: [PATCH 3/5] Don't swallow original exception --- python/pyspark/java_gateway.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py index 530d5fb751c88..2591aa1e77359 100644 --- a/python/pyspark/java_gateway.py +++ b/python/pyspark/java_gateway.py @@ -57,10 +57,10 @@ def preexec_func(): error_msg += "Silence the following and try again:\n\n" error_msg += " %s" % gateway_port raise Exception(error_msg) - except: + except Exception as e: exit_code = proc.poll() exit_code_msg = " Exit code was %d." % exit_code if exit_code else "" - raise Exception("Launching GatewayServer failed!" + exit_code_msg) + raise Exception("Launching GatewayServer failed!" + exit_code_msg, e) # Create a thread to echo output from the GatewayServer, which is required # for Java log output to show up: From a09805b38a715211f9065a7ae9bb563206b57323 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Mon, 23 Jun 2014 16:44:28 -0700 Subject: [PATCH 4/5] Return more than 1 line of error message to user --- python/pyspark/java_gateway.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py index 2591aa1e77359..bb746f1ee2499 100644 --- a/python/pyspark/java_gateway.py +++ b/python/pyspark/java_gateway.py @@ -53,14 +53,11 @@ def preexec_func(): gateway_port = proc.stdout.readline() gateway_port = int(gateway_port) except ValueError: + (stdout, _) = proc.communicate() error_msg = "Launching GatewayServer failed because of stdout interference. " error_msg += "Silence the following and try again:\n\n" - error_msg += " %s" % gateway_port + error_msg += gateway_port + stdout raise Exception(error_msg) - except Exception as e: - exit_code = proc.poll() - exit_code_msg = " Exit code was %d." % exit_code if exit_code else "" - raise Exception("Launching GatewayServer failed!" + exit_code_msg, e) # Create a thread to echo output from the GatewayServer, which is required # for Java log output to show up: From 20849a8fffb64ebb4bf4f27611f371db22c01c66 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Tue, 24 Jun 2014 12:34:38 -0700 Subject: [PATCH 5/5] Tone down stdout interference message --- python/pyspark/java_gateway.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py index bb746f1ee2499..0dbead4415b02 100644 --- a/python/pyspark/java_gateway.py +++ b/python/pyspark/java_gateway.py @@ -54,8 +54,10 @@ def preexec_func(): gateway_port = int(gateway_port) except ValueError: (stdout, _) = proc.communicate() - error_msg = "Launching GatewayServer failed because of stdout interference. " - error_msg += "Silence the following and try again:\n\n" + exit_code = proc.poll() + error_msg = "Launching GatewayServer failed" + error_msg += " with exit code %d!" % exit_code if exit_code else "! " + error_msg += "(Warning: unexpected output detected.)\n\n" error_msg += gateway_port + stdout raise Exception(error_msg)