@@ -1259,45 +1259,40 @@ def test_get_nonexistent_category():
12591259 )
12601260
12611261
1262- def test_dataframe_groupby_on_2_categoricals_when_observed_is_true (
1263- reduction_func :str ):
1262+ def test_dataframe_groupby_on_2_categoricals_when_observed_is_true (reduction_func : str ):
12641263
1265- if reduction_func == ' ngroup' :
1264+ if reduction_func == " ngroup" :
12661265 pytest .skip ("ngroup does not return the Categories on the index" )
12671266
12681267 res , unobserved_cats = _dataframe_groupby_on_2_categoricals (
1269- reduction_func , observed = True )
1270-
1268+ reduction_func , observed = True
1269+ )
1270+
12711271 for cat in unobserved_cats :
1272- assert cat not in res .index
1273-
1274-
1275- def _dataframe_groupby_on_2_categoricals (reduction_func :str , observed :bool ):
1276-
1277- df = pd .DataFrame ({
1278- "cat_1" : pd .Categorical (list ("AABB" ), categories = list ("ABC" )),
1279- "cat_2" : pd .Categorical (list ("1111" ), categories = list ("12" )),
1280- "value" : [.1 , .1 , .1 , .1 ]
1281- })
1282- unobserved_cats = [
1283- ('A' , '2' ),
1284- ('B' , '2' ),
1285- ('C' , '1' ),
1286- ('C' , '2' )
1287- ]
1288-
1289- df_grp = df .groupby (['cat_1' , 'cat_2' ], observed = observed )
1290-
1291- args = {
1292- 'nth' : [0 ],
1293- 'corrwith' : [df ]
1294- }.get (reduction_func , [])
1272+ assert cat not in res .index
1273+
1274+
1275+ def _dataframe_groupby_on_2_categoricals (reduction_func : str , observed : bool ):
1276+
1277+ df = pd .DataFrame (
1278+ {
1279+ "cat_1" : pd .Categorical (list ("AABB" ), categories = list ("ABC" )),
1280+ "cat_2" : pd .Categorical (list ("1111" ), categories = list ("12" )),
1281+ "value" : [0.1 , 0.1 , 0.1 , 0.1 ],
1282+ }
1283+ )
1284+ unobserved_cats = [("A" , "2" ), ("B" , "2" ), ("C" , "1" ), ("C" , "2" )]
1285+
1286+ df_grp = df .groupby (["cat_1" , "cat_2" ], observed = observed )
1287+
1288+ args = {"nth" : [0 ], "corrwith" : [df ]}.get (reduction_func , [])
12951289 res = getattr (df_grp , reduction_func )(* args )
1296-
1290+
12971291 return res , unobserved_cats
12981292
12991293
1300- _results_for_groupbys_with_missing_categories = dict ([
1294+ _results_for_groupbys_with_missing_categories = dict (
1295+ [
13011296 ("all" , np .NaN ),
13021297 ("any" , np .NaN ),
13031298 ("count" , 0 ),
@@ -1321,33 +1316,38 @@ def _dataframe_groupby_on_2_categoricals(reduction_func:str, observed:bool):
13211316 ("std" , np .NaN ),
13221317 ("sum" , np .NaN ),
13231318 ("var" , np .NaN ),
1324- ])
1319+ ]
1320+ )
13251321
13261322
1327- @pytest .mark .parametrize (' observed' , [False , None ])
1323+ @pytest .mark .parametrize (" observed" , [False , None ])
13281324def test_dataframe_groupby_on_2_categoricals_when_observed_is_false (
1329- reduction_func :str , observed :bool , request ):
1330-
1331- if reduction_func == 'ngroup' :
1325+ reduction_func : str , observed : bool , request
1326+ ):
1327+
1328+ if reduction_func == "ngroup" :
13321329 pytest .skip ("ngroup does not return the Categories on the index" )
1333-
1334- if reduction_func == ' count' :
1330+
1331+ if reduction_func == " count" :
13351332 mark = pytest .mark .xfail (
1336- reason = ("DataFrameGroupBy.count returns np.NaN for missing "
1337- "categories, when it should return 0" ))
1333+ reason = (
1334+ "DataFrameGroupBy.count returns np.NaN for missing "
1335+ "categories, when it should return 0"
1336+ )
1337+ )
13381338 request .node .add_marker (mark )
13391339
13401340 res , unobserved_cats = _dataframe_groupby_on_2_categoricals (
1341- reduction_func , observed )
1342-
1341+ reduction_func , observed
1342+ )
1343+
13431344 expected = _results_for_groupbys_with_missing_categories [reduction_func ]
1344-
1345+
13451346 if expected is np .nan :
13461347 assert res .loc [unobserved_cats ].isnull ().all ().all ()
13471348 else :
13481349 assert (res .loc [unobserved_cats ] == expected ).all ().all ()
1349-
1350-
1350+
13511351
13521352def test_series_groupby_on_2_categoricals_unobserved (
13531353 reduction_func : str , observed : bool , request
@@ -1379,17 +1379,18 @@ def test_series_groupby_on_2_categoricals_unobserved(
13791379
13801380
13811381def test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nans (
1382- reduction_func :str , request ):
1382+ reduction_func : str , request
1383+ ):
13831384 # GH 17605
13841385 # Tests whether the unobserved categories in the result contain 0 or NaN
1385-
1386+
13861387 if reduction_func == "ngroup" :
13871388 pytest .skip ("ngroup is not truly a reduction" )
1388-
1389+
13891390 if reduction_func == "corrwith" : # GH 32293
13901391 mark = pytest .mark .xfail (reason = "TODO: implemented SeriesGroupBy.corrwith" )
13911392 request .node .add_marker (mark )
1392-
1393+
13931394 df = pd .DataFrame (
13941395 {
13951396 "cat_1" : pd .Categorical (list ("AABB" ), categories = list ("ABC" )),
@@ -1403,7 +1404,7 @@ def test_series_groupby_on_2_categoricals_unobserved_zeroes_or_nans(
14031404 series_groupby = df .groupby (["cat_1" , "cat_2" ], observed = False )["value" ]
14041405 agg = getattr (series_groupby , reduction_func )
14051406 result = agg (* args )
1406-
1407+
14071408 zero_or_nan = _results_for_groupbys_with_missing_categories [reduction_func ]
14081409
14091410 for idx in unobserved :
0 commit comments