From 9f2de76e6c0eda72ca650735e79feaa1d9e288e9 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Thu, 9 Jan 2020 10:49:21 +0900 Subject: [PATCH 1/2] Explicitly note that we don't add "pandas compatible" aliases --- python/pyspark/sql/dataframe.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 8f4454a08d57d..5ebef0db77876 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -2135,10 +2135,16 @@ def transform(self, func): "should have been DataFrame." % type(result) return result - ########################################################################################## - # Pandas compatibility - ########################################################################################## + where = copy_func( + filter, + sinceversion=1.3, + doc=":func:`where` is an alias for :func:`filter`.") + # Two aliases below were added for pandas compatibility many years ago. + # There are too many differences compared to pandas and we cannot just + # make it "compatible" by adding aliases. Therefore, we stop adding such + # aliases as of SPARK-30464 at this moment. Two methods below remain just + # for legacy users currently. groupby = copy_func( groupBy, sinceversion=1.4, @@ -2149,11 +2155,6 @@ def transform(self, func): sinceversion=1.4, doc=":func:`drop_duplicates` is an alias for :func:`dropDuplicates`.") - where = copy_func( - filter, - sinceversion=1.3, - doc=":func:`where` is an alias for :func:`filter`.") - def _to_scala_map(sc, jm): """ From 1aebf05ecaee7bd746d40dfec990bef4e9ddf2d8 Mon Sep 17 00:00:00 2001 From: HyukjinKwon Date: Thu, 9 Jan 2020 11:00:44 +0900 Subject: [PATCH 2/2] Address comments --- python/pyspark/sql/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 5ebef0db77876..84fee0816d824 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -2143,7 +2143,7 @@ def transform(self, func): # Two aliases below were added for pandas compatibility many years ago. # There are too many differences compared to pandas and we cannot just # make it "compatible" by adding aliases. Therefore, we stop adding such - # aliases as of SPARK-30464 at this moment. Two methods below remain just + # aliases as of Spark 3.0. Two methods below remain just # for legacy users currently. groupby = copy_func( groupBy,