Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -756,6 +756,7 @@ Groupby/resample/rolling
- Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`)
- Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`)
- Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`)
- Bug in :meth:`DataFrameGroupBy.sum` and :math:`SeriesGroupBy.sum` where in case of all-nan values for object dtype the result is incorrectly set to 0 instead of ``None``. (:issue:`58811`)
- Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`)
- Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`)
- Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)
Expand Down
14 changes: 11 additions & 3 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -725,8 +725,12 @@ def group_sum(
raise ValueError("len(index) != len(labels)")

nobs = np.zeros((<object>out).shape, dtype=np.int64)
# the below is equivalent to `np.zeros_like(out)` but faster
sumx = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
if sum_t is object:
# For object dtype, fill value should not be 0 (#60229)
sumx = np.full((<object>out).shape, NAN, dtype=object)
else:
# the below is equivalent to `np.zeros_like(out)` but faster
sumx = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)
compensation = np.zeros((<object>out).shape, dtype=(<object>out).base.dtype)

N, K = (<object>values).shape
Expand Down Expand Up @@ -760,7 +764,10 @@ def group_sum(
if uses_mask:
isna_result = result_mask[lab, j]
else:
isna_result = _treat_as_na(sumx[lab, j], is_datetimelike)
isna_result = (
_treat_as_na(sumx[lab, j], is_datetimelike) and
nobs[lab, j] > 0
)

if isna_result:
# If sum is already NA, don't add to it. This is important for
Expand Down Expand Up @@ -795,6 +802,7 @@ def group_sum(
compensation[lab, j] = 0
sumx[lab, j] = t
elif not skipna:
nobs[lab, j] += 1
if uses_mask:
result_mask[lab, j] = True
else:
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,7 @@ def test_observed(request, using_infer_string, observed):
expected = cartesian_product_for_groupers(
expected, [cat1, cat2], list("AB"), fill_value=0
)
expected.loc[expected.C == 0, "C"] = np.nan

tm.assert_frame_equal(result, expected)

Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/groupby/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,21 @@ def test_sum_skipna_object(skipna):
tm.assert_series_equal(result, expected)


def test_sum_allnan_object(skipna):
# GH#60229
df = DataFrame(
{
"val": [np.nan] * 10,
"cat": ["A", "B"] * 5,
}
).astype({"val": object})
expected = Series(
[np.nan, np.nan], index=pd.Index(["A", "B"], name="cat"), name="val"
).astype(object)
result = df.groupby("cat")["val"].sum(skipna=skipna)
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize(
"func, values, dtype, result_dtype",
[
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/test_timegrouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def test_groupby_with_timegrouper(self):
unit=df.index.unit,
)
expected = DataFrame(
{"Buyer": 0, "Quantity": 0},
{"Buyer": np.nan, "Quantity": 0},
index=exp_dti,
)
# Cast to object to avoid implicit cast when setting entry to "CarlCarlCarl"
Expand Down
Loading