From 4e8df6c64754c0567a532e587a554d67053d2aeb Mon Sep 17 00:00:00 2001 From: Xusen Yin Date: Fri, 15 Jan 2016 22:05:22 +0800 Subject: [PATCH 1/2] change ser/de of JavaArray and JavaList --- python/pyspark/mllib/common.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/python/pyspark/mllib/common.py b/python/pyspark/mllib/common.py index 9fda1b1682f57..08ab625ce4bc9 100644 --- a/python/pyspark/mllib/common.py +++ b/python/pyspark/mllib/common.py @@ -107,10 +107,7 @@ def _java2py(sc, r, encoding="bytes"): if clsName in _picklable_classes: r = sc._jvm.SerDe.dumps(r) elif isinstance(r, (JavaArray, JavaList)): - try: - r = sc._jvm.SerDe.dumps(r) - except Py4JJavaError: - pass # not pickable + r = list(r) if isinstance(r, (bytearray, bytes)): r = PickleSerializer().loads(bytes(r), encoding=encoding) From 8903447f2f9d61448f0cd7dc425d1d3b5e119933 Mon Sep 17 00:00:00 2001 From: Xusen Yin Date: Sat, 16 Jan 2016 10:31:43 +0800 Subject: [PATCH 2/2] change to original fix --- .../org/apache/spark/mllib/api/python/PythonMLLibAPI.scala | 6 +++++- python/pyspark/mllib/common.py | 5 ++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala index 061db56c74938..bdf6ed9eb5b5d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala @@ -1473,7 +1473,11 @@ private[spark] object SerDe extends Serializable { initialize() def dumps(obj: AnyRef): Array[Byte] = { - new Pickler().dumps(obj) + obj match { + // Pickler in Python side cannot deserialize Scala Array normally. See SPARK-12834. + case array: Array[_] => new Pickler().dumps(array.toSeq.asJava) + case _ => new Pickler().dumps(obj) + } } def loads(bytes: Array[Byte]): AnyRef = { diff --git a/python/pyspark/mllib/common.py b/python/pyspark/mllib/common.py index 08ab625ce4bc9..9fda1b1682f57 100644 --- a/python/pyspark/mllib/common.py +++ b/python/pyspark/mllib/common.py @@ -107,7 +107,10 @@ def _java2py(sc, r, encoding="bytes"): if clsName in _picklable_classes: r = sc._jvm.SerDe.dumps(r) elif isinstance(r, (JavaArray, JavaList)): - r = list(r) + try: + r = sc._jvm.SerDe.dumps(r) + except Py4JJavaError: + pass # not pickable if isinstance(r, (bytearray, bytes)): r = PickleSerializer().loads(bytes(r), encoding=encoding)