From 2d3c321f8be2de4cdca5a654b3f0a6a20a29dc2c Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Tue, 27 Feb 2018 00:47:57 +0900 Subject: [PATCH 1/2] Make pyspark.util._exception_message produce the trace from Java side for Py4JJavaError --- python/pyspark/tests.py | 11 +++++++++++ python/pyspark/util.py | 3 +++ 2 files changed, 14 insertions(+) diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py index 511585763cb0..9111dbbed592 100644 --- a/python/pyspark/tests.py +++ b/python/pyspark/tests.py @@ -2293,6 +2293,17 @@ def set(self, x=None, other=None, other_x=None): self.assertEqual(b._x, 2) +class UtilTests(PySparkTestCase): + def test_py4j_exception_message(self): + from pyspark.util import _exception_message + + with self.assertRaises(Py4JJavaError) as context: + # This attempts java.lang.String(null) which throws an NPE. + self.sc._jvm.java.lang.String(None) + + self.assertTrue('NullPointerException' in _exception_message(context.exception)) + + @unittest.skipIf(not _have_scipy, "SciPy not installed") class SciPyTests(PySparkTestCase): diff --git a/python/pyspark/util.py b/python/pyspark/util.py index e5d332ce5442..2f792690b30f 100644 --- a/python/pyspark/util.py +++ b/python/pyspark/util.py @@ -15,6 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from py4j.protocol import Py4JJavaError __all__ = [] @@ -33,6 +34,8 @@ def _exception_message(excp): >>> msg == _exception_message(excp) True """ + if isinstance(excp, Py4JJavaError): + return excp.__str__() if hasattr(excp, "message"): return excp.message return str(excp) From f2c884f49e579dafbe4189d4e566c1164ffb4b85 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Tue, 27 Feb 2018 12:15:24 +0900 Subject: [PATCH 2/2] Add a comment to explain the workaround --- python/pyspark/util.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/pyspark/util.py b/python/pyspark/util.py index 2f792690b30f..ad4a0bc68ef4 100644 --- a/python/pyspark/util.py +++ b/python/pyspark/util.py @@ -35,6 +35,10 @@ def _exception_message(excp): True """ if isinstance(excp, Py4JJavaError): + # 'Py4JJavaError' doesn't contain the stack trace available on the Java side in 'message' + # attribute in Python 2. We should call 'str' function on this exception in general but + # 'Py4JJavaError' has an issue about addressing non-ascii strings. So, here we work + # around by the direct call, '__str__()'. Please see SPARK-23517. return excp.__str__() if hasattr(excp, "message"): return excp.message