From cb4915e0d4c532494f913b4f4f5b82198fb71796 Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Tue, 17 Feb 2015 16:06:54 -0800 Subject: [PATCH 1/2] fix repartition --- python/pyspark/sql/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 841724095f693..7dc7bfd28a755 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -418,7 +418,7 @@ def repartition(self, numPartitions): """ Return a new :class:`DataFrame` that has exactly `numPartitions` partitions. """ - return DataFrame(self._jdf.repartition(numPartitions, None), self.sql_ctx) + return DataFrame(self._jdf.repartition(numPartitions), self.sql_ctx) def distinct(self): """ From 79059fde59f6af3dae31ad93726dd95f4fda3939 Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Tue, 17 Feb 2015 16:46:43 -0800 Subject: [PATCH 2/2] add test --- python/pyspark/sql/dataframe.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 7dc7bfd28a755..065c9874263aa 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -417,12 +417,18 @@ def unpersist(self, blocking=True): def repartition(self, numPartitions): """ Return a new :class:`DataFrame` that has exactly `numPartitions` partitions. + + >>> df.repartition(10).rdd.getNumPartitions() + 10 """ return DataFrame(self._jdf.repartition(numPartitions), self.sql_ctx) def distinct(self): """ Return a new :class:`DataFrame` containing the distinct rows in this DataFrame. + + >>> df.distinct().count() + 2L """ return DataFrame(self._jdf.distinct(), self.sql_ctx)