@@ -160,6 +160,33 @@ def test_agg_apply_corner(ts, tsframe):
160160 tm .assert_frame_equal (res , exp_df )
161161
162162
163+ def test_with_na_groups (any_real_numpy_dtype ):
164+ index = Index (np .arange (10 ))
165+ values = Series (np .ones (10 ), index , dtype = any_real_numpy_dtype )
166+ labels = Series (
167+ [np .nan , "foo" , "bar" , "bar" , np .nan , np .nan , "bar" , "bar" , np .nan , "foo" ],
168+ index = index ,
169+ )
170+
171+ # this SHOULD be an int
172+ grouped = values .groupby (labels )
173+ agged = grouped .agg (len )
174+ expected = Series ([4 , 2 ], index = ["bar" , "foo" ])
175+
176+ tm .assert_series_equal (agged , expected , check_dtype = False )
177+
178+ # assert issubclass(agged.dtype.type, np.integer)
179+
180+ # explicitly return a float from my function
181+ def f (x ):
182+ return float (len (x ))
183+
184+ agged = grouped .agg (f )
185+ expected = Series ([4.0 , 2.0 ], index = ["bar" , "foo" ])
186+
187+ tm .assert_series_equal (agged , expected )
188+
189+
163190def test_agg_grouping_is_list_tuple (ts ):
164191 df = DataFrame (
165192 np .random .default_rng (2 ).standard_normal ((30 , 4 )),
@@ -1049,6 +1076,73 @@ def test_grouby_agg_loses_results_with_as_index_false_relabel_multiindex():
10491076 tm .assert_frame_equal (result , expected )
10501077
10511078
1079+ def test_groupby_as_index_agg (df ):
1080+ grouped = df .groupby ("A" , as_index = False )
1081+
1082+ # single-key
1083+
1084+ result = grouped [["C" , "D" ]].agg ("mean" )
1085+ expected = grouped .mean (numeric_only = True )
1086+ tm .assert_frame_equal (result , expected )
1087+
1088+ result2 = grouped .agg ({"C" : "mean" , "D" : "sum" })
1089+ expected2 = grouped .mean (numeric_only = True )
1090+ expected2 ["D" ] = grouped .sum ()["D" ]
1091+ tm .assert_frame_equal (result2 , expected2 )
1092+
1093+ grouped = df .groupby ("A" , as_index = True )
1094+
1095+ msg = r"nested renamer is not supported"
1096+ with pytest .raises (SpecificationError , match = msg ):
1097+ grouped ["C" ].agg ({"Q" : "sum" })
1098+
1099+ # multi-key
1100+
1101+ grouped = df .groupby (["A" , "B" ], as_index = False )
1102+
1103+ result = grouped .agg ("mean" )
1104+ expected = grouped .mean ()
1105+ tm .assert_frame_equal (result , expected )
1106+
1107+ result2 = grouped .agg ({"C" : "mean" , "D" : "sum" })
1108+ expected2 = grouped .mean ()
1109+ expected2 ["D" ] = grouped .sum ()["D" ]
1110+ tm .assert_frame_equal (result2 , expected2 )
1111+
1112+ expected3 = grouped ["C" ].sum ()
1113+ expected3 = DataFrame (expected3 ).rename (columns = {"C" : "Q" })
1114+ msg = "Passing a dictionary to SeriesGroupBy.agg is deprecated"
1115+ with tm .assert_produces_warning (FutureWarning , match = msg ):
1116+ result3 = grouped ["C" ].agg ({"Q" : "sum" })
1117+ tm .assert_frame_equal (result3 , expected3 )
1118+
1119+ # GH7115 & GH8112 & GH8582
1120+ df = DataFrame (
1121+ np .random .default_rng (2 ).integers (0 , 100 , (50 , 3 )),
1122+ columns = ["jim" , "joe" , "jolie" ],
1123+ )
1124+ ts = Series (np .random .default_rng (2 ).integers (5 , 10 , 50 ), name = "jim" )
1125+
1126+ gr = df .groupby (ts )
1127+ gr .nth (0 ) # invokes set_selection_from_grouper internally
1128+
1129+ msg = "The behavior of DataFrame.sum with axis=None is deprecated"
1130+ with tm .assert_produces_warning (FutureWarning , match = msg , check_stacklevel = False ):
1131+ res = gr .apply (sum )
1132+ with tm .assert_produces_warning (FutureWarning , match = msg , check_stacklevel = False ):
1133+ alt = df .groupby (ts ).apply (sum )
1134+ tm .assert_frame_equal (res , alt )
1135+
1136+ for attr in ["mean" , "max" , "count" , "idxmax" , "cumsum" , "all" ]:
1137+ gr = df .groupby (ts , as_index = False )
1138+ left = getattr (gr , attr )()
1139+
1140+ gr = df .groupby (ts .values , as_index = True )
1141+ right = getattr (gr , attr )().reset_index (drop = True )
1142+
1143+ tm .assert_frame_equal (left , right )
1144+
1145+
10521146@pytest .mark .parametrize (
10531147 "func" , [lambda s : s .mean (), lambda s : np .mean (s ), lambda s : np .nanmean (s )]
10541148)
@@ -1252,6 +1346,28 @@ def test_agg_multiple_lambda(self):
12521346 tm .assert_frame_equal (result2 , expected )
12531347
12541348
1349+ def test_pass_args_kwargs_duplicate_columns (tsframe , as_index ):
1350+ # go through _aggregate_frame with self.axis == 0 and duplicate columns
1351+ tsframe .columns = ["A" , "B" , "A" , "C" ]
1352+ gb = tsframe .groupby (lambda x : x .month , as_index = as_index )
1353+
1354+ warn = None if as_index else FutureWarning
1355+ msg = "A grouping .* was excluded from the result"
1356+ with tm .assert_produces_warning (warn , match = msg ):
1357+ res = gb .agg (np .percentile , 80 , axis = 0 )
1358+
1359+ ex_data = {
1360+ 1 : tsframe [tsframe .index .month == 1 ].quantile (0.8 ),
1361+ 2 : tsframe [tsframe .index .month == 2 ].quantile (0.8 ),
1362+ }
1363+ expected = DataFrame (ex_data ).T
1364+ if not as_index :
1365+ # TODO: try to get this more consistent?
1366+ expected .index = Index (range (2 ))
1367+
1368+ tm .assert_frame_equal (res , expected )
1369+
1370+
12551371def test_groupby_get_by_index ():
12561372 # GH 33439
12571373 df = DataFrame ({"A" : ["S" , "W" , "W" ], "B" : [1.0 , 1.0 , 2.0 ]})
0 commit comments