From 993dc9c5c6af025ee9757f205c6463c30ab37e46 Mon Sep 17 00:00:00 2001 From: Xusen Yin Date: Wed, 13 Jan 2016 01:32:22 +0800 Subject: [PATCH 1/5] change labels --- python/pyspark/ml/feature.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index b02d41b52ab25..ea874dcd8f5ee 100644 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -1263,7 +1263,7 @@ class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid): >>> sorted(set([(i[0], i[1]) for i in td.select(td.id, td.indexed).collect()]), ... key=lambda x: x[0]) [(0, 0.0), (1, 2.0), (2, 1.0), (3, 0.0), (4, 0.0), (5, 1.0)] - >>> inverter = IndexToString(inputCol="indexed", outputCol="label2", labels=model.labels()) + >>> inverter = IndexToString(inputCol="indexed", outputCol="label2", labels=model.labels) >>> itd = inverter.transform(td) >>> sorted(set([(i[0], str(i[1])) for i in itd.select(itd.id, itd.label2).collect()]), ... key=lambda x: x[0]) @@ -1305,13 +1305,14 @@ class StringIndexerModel(JavaModel): .. versionadded:: 1.4.0 """ + @property @since("1.5.0") def labels(self): """ Ordered list of labels, corresponding to indices to be assigned. """ - return self._java_obj.labels + return self._call_java("labels") @inherit_doc From c5a170da04f9941f4515f3263939bab9886c2c85 Mon Sep 17 00:00:00 2001 From: Xusen Yin Date: Wed, 13 Jan 2016 19:16:24 +0800 Subject: [PATCH 2/5] change Array to JavaList when encountering Pickle --- .../org/apache/spark/mllib/api/python/PythonMLLibAPI.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala index 061db56c74938..55ac2f9fc646b 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala @@ -1473,7 +1473,10 @@ private[spark] object SerDe extends Serializable { initialize() def dumps(obj: AnyRef): Array[Byte] = { - new Pickler().dumps(obj) + obj match { + case array: Array[_] => new Pickler().dumps(array.toSeq.asJava) + case _ => new Pickler().dumps(obj) + } } def loads(bytes: Array[Byte]): AnyRef = { From becc49c2fd137a77836b58730f98f986e2aea36d Mon Sep 17 00:00:00 2001 From: Xusen Yin Date: Wed, 13 Jan 2016 19:20:15 +0800 Subject: [PATCH 3/5] add comment --- .../scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala index 55ac2f9fc646b..32c3929c15287 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala @@ -1474,6 +1474,7 @@ private[spark] object SerDe extends Serializable { def dumps(obj: AnyRef): Array[Byte] = { obj match { + // Pickler in Python side cannot deserialize Scala Array normally. See SPARK-12780. case array: Array[_] => new Pickler().dumps(array.toSeq.asJava) case _ => new Pickler().dumps(obj) } From f5623b661404b461a6cc4fb1178ae246afa23a43 Mon Sep 17 00:00:00 2001 From: Xusen Yin Date: Fri, 15 Jan 2016 17:12:08 +0800 Subject: [PATCH 4/5] remove code in Scala --- .../org/apache/spark/mllib/api/python/PythonMLLibAPI.scala | 6 +----- python/pyspark/ml/feature.py | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala index 32c3929c15287..061db56c74938 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala @@ -1473,11 +1473,7 @@ private[spark] object SerDe extends Serializable { initialize() def dumps(obj: AnyRef): Array[Byte] = { - obj match { - // Pickler in Python side cannot deserialize Scala Array normally. See SPARK-12780. - case array: Array[_] => new Pickler().dumps(array.toSeq.asJava) - case _ => new Pickler().dumps(obj) - } + new Pickler().dumps(obj) } def loads(bytes: Array[Byte]): AnyRef = { diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index ea874dcd8f5ee..1c2609a2404f3 100644 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -1312,7 +1312,7 @@ def labels(self): """ Ordered list of labels, corresponding to indices to be assigned. """ - return self._call_java("labels") + return list(self._java_obj.labels()) @inherit_doc From 293efba4f3e6df0b1ac590e16e4b78c00bb15e0e Mon Sep 17 00:00:00 2001 From: Xusen Yin Date: Tue, 26 Jan 2016 08:53:36 -0800 Subject: [PATCH 5/5] change to call java --- python/pyspark/ml/feature.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 1c2609a2404f3..ea874dcd8f5ee 100644 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -1312,7 +1312,7 @@ def labels(self): """ Ordered list of labels, corresponding to indices to be assigned. """ - return list(self._java_obj.labels()) + return self._call_java("labels") @inherit_doc