diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index b89646d465fff..81651fcf9053d 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -444,9 +444,9 @@ Groupby/Resample/Rolling - Bug in :meth:`pandas.core.groupby.GroupBy.cumsum`, :meth:`pandas.core.groupby.GroupBy.cumprod`, :meth:`pandas.core.groupby.GroupBy.cummin` and :meth:`pandas.core.groupby.GroupBy.cummax` with categorical column having absent categories, would return incorrect result or segfault (:issue:`16771`) - Bug in :meth:`pandas.core.groupby.GroupBy.nth` where NA values in the grouping would return incorrect results (:issue:`26011`) - Bug in :meth:`pandas.core.groupby.SeriesGroupBy.transform` where transforming an empty group would raise error (:issue:`26208`) +- Bug in :meth:`pandas.core.frame.DataFrame.groupby` where passing a :class:`pandas.core.groupby.grouper.Grouper` would return incorrect groups when using the ``.groups`` accessor (:issue:`26326`) - Bug in :meth:`pandas.core.groupby.GroupBy.agg` where incorrect results are returned for uint64 columns. (:issue:`26310`) - Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 63931dda6acb2..04d407ebc670d 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -280,7 +280,7 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None, if self.name is None: self.name = grouper.result_index.name self.obj = self.grouper.obj - self.grouper = grouper + self.grouper = grouper._get_grouper() else: if self.grouper is None and self.name is not None: diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 4c6796fbc4ac8..ee9d57a537340 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -150,6 +150,15 @@ def _get_splitter(self, data, axis=0): comp_ids, _, ngroups = self.group_info return get_splitter(data, comp_ids, ngroups, axis=axis) + def _get_grouper(self): + """ + We are a grouper as part of another's groupings. + + We have a specific method of grouping, so cannot + convert to a Index for our grouper. + """ + return self.groupings[0].grouper + def _get_group_keys(self): if len(self.groupings) == 1: return self.levels[0] @@ -707,6 +716,15 @@ def groups(self): def nkeys(self): return 1 + def _get_grouper(self): + """ + We are a grouper as part of another's groupings. + + We have a specific method of grouping, so cannot + convert to a Index for our grouper. + """ + return self + def get_iterator(self, data, axis=0): """ Groupby iterator diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 2511063110f92..b7abef9357072 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1736,3 +1736,19 @@ def test_groupby_multiindex_series_keys_len_equal_group_axis(): expected = pd.Series([3], index=ei) assert_series_equal(result, expected) + + +def test_groupby_groups_in_BaseGrouper(): + # GH 26326 + # Test if DataFrame grouped with a pandas.Grouper has correct groups + mi = pd.MultiIndex.from_product([['A', 'B'], + ['C', 'D']], names=['alpha', 'beta']) + df = pd.DataFrame({'foo': [1, 2, 1, 2], 'bar': [1, 2, 3, 4]}, + index=mi) + result = df.groupby([pd.Grouper(level='alpha'), 'beta']) + expected = df.groupby(['alpha', 'beta']) + assert(result.groups == expected.groups) + + result = df.groupby(['beta', pd.Grouper(level='alpha')]) + expected = df.groupby(['beta', 'alpha']) + assert(result.groups == expected.groups)