From 1e9ec27ec511f9d661c4c262c55ebb50275f6942 Mon Sep 17 00:00:00 2001 From: Alexander Ponomaroff Date: Fri, 15 Mar 2019 02:07:43 -0400 Subject: [PATCH 1/6] Dropna argument is now respected when false in pivot_table --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/core/reshape/pivot.py | 3 ++- pandas/tests/reshape/test_pivot.py | 27 +++++++++++++++++++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index d186fdfe0f322..1f66581f44abf 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -245,6 +245,7 @@ Reshaping - Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (:issue:`24212`) - :func:`to_records` now accepts dtypes to its `column_dtypes` parameter (:issue:`24895`) - Bug in :func:`concat` where order of ``OrderedDict`` (and ``dict`` in Python 3.6+) is not respected, when passed in as ``objs`` argument (:issue:`21510`) +- Bug in :func:`pivot_table` where columns with NaN values are dropped even if ``dropna`` argument is ``False`` (:issue:`22159`) Sparse diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 8d7616c4b6b61..eddf9a8b316b5 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -35,7 +35,8 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', table = pivot_table(data, values=values, index=index, columns=columns, fill_value=fill_value, aggfunc=func, - margins=margins, margins_name=margins_name) + margins=margins, dropna=dropna, + margins_name=margins_name) pieces.append(table) keys.append(getattr(func, '__name__', func)) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index e4fbb204af533..3fd134d0b07a6 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1289,6 +1289,33 @@ def test_pivot_number_of_levels_larger_than_int32(self): df.pivot_table(index='ind1', columns='ind2', values='count', aggfunc='count') + def test_pivot_table_aggfunc_dropna(self): + # GH 22159 + df = pd.DataFrame({'fruit': ['apple', 'peach', 'apple'], + 'size': [1, 1, 2], + 'taste': [7, 6, 6]}) + + def ret_one(x): + return 1 + + def ret_sum(x): + return sum(x) + + def ret_none(x): + return None + + df2 = pd.pivot_table(df, columns='fruit', + aggfunc=[ret_sum, ret_none, ret_one], + dropna=False) + + data = [[3, 1, None, None, 1, 1], [13, 6, None, None, 1, 1]] + col = pd.MultiIndex.from_product([['ret_sum', 'ret_none', 'ret_one'], + ['apple', 'peach']], + names=[None, 'fruit']) + df3 = pd.DataFrame(data, index=['size', 'taste'], columns=col) + + tm.assert_frame_equal(df2, df3) + class TestCrosstab(object): From 2372dbc7c5e9cf9e0fc1b44ac4c05371e9dce311 Mon Sep 17 00:00:00 2001 From: Alexander Ponomaroff Date: Mon, 18 Mar 2019 13:52:26 -0400 Subject: [PATCH 2/6] Parametize test --- doc/source/whatsnew/v0.25.0.rst | 2 +- pandas/tests/reshape/test_pivot.py | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 1f66581f44abf..35d46fdf5a047 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -245,7 +245,7 @@ Reshaping - Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (:issue:`24212`) - :func:`to_records` now accepts dtypes to its `column_dtypes` parameter (:issue:`24895`) - Bug in :func:`concat` where order of ``OrderedDict`` (and ``dict`` in Python 3.6+) is not respected, when passed in as ``objs`` argument (:issue:`21510`) -- Bug in :func:`pivot_table` where columns with NaN values are dropped even if ``dropna`` argument is ``False`` (:issue:`22159`) +- Bug in :func:`pivot_table` where columns with ``NaN`` values are dropped even if ``dropna`` argument is ``False``, when the ``aggfunc`` argument contains a ``list`` (:issue:`22159`) Sparse diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 3fd134d0b07a6..3610a43fa119a 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1289,7 +1289,10 @@ def test_pivot_number_of_levels_larger_than_int32(self): df.pivot_table(index='ind1', columns='ind2', values='count', aggfunc='count') - def test_pivot_table_aggfunc_dropna(self): + @pytest.mark.parametrize('input_vals', [ + (True), (False) + ]) + def test_pivot_table_aggfunc_dropna(self, input_vals): # GH 22159 df = pd.DataFrame({'fruit': ['apple', 'peach', 'apple'], 'size': [1, 1, 2], @@ -1306,7 +1309,7 @@ def ret_none(x): df2 = pd.pivot_table(df, columns='fruit', aggfunc=[ret_sum, ret_none, ret_one], - dropna=False) + dropna=input_vals) data = [[3, 1, None, None, 1, 1], [13, 6, None, None, 1, 1]] col = pd.MultiIndex.from_product([['ret_sum', 'ret_none', 'ret_one'], @@ -1314,6 +1317,9 @@ def ret_none(x): names=[None, 'fruit']) df3 = pd.DataFrame(data, index=['size', 'taste'], columns=col) + if input_vals == True: + df3 = df3.iloc[:, df3.columns.get_level_values(0)!='ret_none'] + tm.assert_frame_equal(df2, df3) From 00c8e33bf3d5843cbaccb95ff2b3ce1434eda223 Mon Sep 17 00:00:00 2001 From: Alexander Ponomaroff Date: Mon, 18 Mar 2019 13:55:13 -0400 Subject: [PATCH 3/6] Fixed pep8 issues --- pandas/tests/reshape/test_pivot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 3610a43fa119a..9c464cc29279b 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1317,8 +1317,8 @@ def ret_none(x): names=[None, 'fruit']) df3 = pd.DataFrame(data, index=['size', 'taste'], columns=col) - if input_vals == True: - df3 = df3.iloc[:, df3.columns.get_level_values(0)!='ret_none'] + if input_vals is True: + df3 = df3.iloc[:, df3.columns.get_level_values(0) != 'ret_none'] tm.assert_frame_equal(df2, df3) From 851797682c785aaddbf0d8b6e603ade37603fd44 Mon Sep 17 00:00:00 2001 From: Alexander Ponomaroff Date: Fri, 5 Apr 2019 12:38:47 -0400 Subject: [PATCH 4/6] Fix up the test --- pandas/tests/reshape/test_pivot.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 9c464cc29279b..0f83cfb9b35fd 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1289,10 +1289,10 @@ def test_pivot_number_of_levels_larger_than_int32(self): df.pivot_table(index='ind1', columns='ind2', values='count', aggfunc='count') - @pytest.mark.parametrize('input_vals', [ - (True), (False) + @pytest.mark.parametrize('dropna', [ + True, False ]) - def test_pivot_table_aggfunc_dropna(self, input_vals): + def test_pivot_table_aggfunc_dropna(self, dropna): # GH 22159 df = pd.DataFrame({'fruit': ['apple', 'peach', 'apple'], 'size': [1, 1, 2], @@ -1305,22 +1305,22 @@ def ret_sum(x): return sum(x) def ret_none(x): - return None + return np.nan - df2 = pd.pivot_table(df, columns='fruit', - aggfunc=[ret_sum, ret_none, ret_one], - dropna=input_vals) + result = pd.pivot_table(df, columns='fruit', + aggfunc=[ret_sum, ret_none, ret_one], + dropna=dropna) - data = [[3, 1, None, None, 1, 1], [13, 6, None, None, 1, 1]] + data = [[3, 1, np.nan, np.nan, 1, 1], [13, 6, np.nan, np.nan, 1, 1]] col = pd.MultiIndex.from_product([['ret_sum', 'ret_none', 'ret_one'], ['apple', 'peach']], names=[None, 'fruit']) - df3 = pd.DataFrame(data, index=['size', 'taste'], columns=col) + expected = pd.DataFrame(data, index=['size', 'taste'], columns=col) - if input_vals is True: - df3 = df3.iloc[:, df3.columns.get_level_values(0) != 'ret_none'] + if dropna: + expected = expected.dropna(axis='columns') - tm.assert_frame_equal(df2, df3) + tm.assert_frame_equal(result, expected) class TestCrosstab(object): From 0f6ea456b180a6eddccf6216764fa8c7e55ee0a1 Mon Sep 17 00:00:00 2001 From: Alexander Ponomaroff Date: Tue, 9 Apr 2019 14:55:54 -0400 Subject: [PATCH 5/6] Removed parametrize --- pandas/tests/reshape/test_pivot.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index eb06700258ea6..508f4fb3628c1 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1288,9 +1288,6 @@ def test_pivot_number_of_levels_larger_than_int32(self): df.pivot_table(index='ind1', columns='ind2', values='count', aggfunc='count') - @pytest.mark.parametrize('dropna', [ - True, False - ]) def test_pivot_table_aggfunc_dropna(self, dropna): # GH 22159 df = pd.DataFrame({'fruit': ['apple', 'peach', 'apple'], From 121d45f1dc629a0455219dabf99bff200aea5a60 Mon Sep 17 00:00:00 2001 From: Alexander Ponomaroff Date: Tue, 9 Apr 2019 15:44:42 -0400 Subject: [PATCH 6/6] Added test with scalar aggfunc --- pandas/tests/reshape/test_pivot.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 508f4fb3628c1..1ee2ebf5de34e 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1318,6 +1318,25 @@ def ret_none(x): tm.assert_frame_equal(result, expected) + def test_pivot_table_aggfunc_scalar_dropna(self, dropna): + # GH 22159 + df = pd.DataFrame({'A': ['one', 'two', 'one'], + 'x': [3, np.nan, 2], + 'y': [1, np.nan, np.nan]}) + + result = pd.pivot_table(df, columns='A', + aggfunc=np.mean, + dropna=dropna) + + data = [[2.5, np.nan], [1, np.nan]] + col = pd.Index(['one', 'two'], name='A') + expected = pd.DataFrame(data, index=['x', 'y'], columns=col) + + if dropna: + expected = expected.dropna(axis='columns') + + tm.assert_frame_equal(result, expected) + class TestCrosstab(object):