@@ -1373,7 +1373,22 @@ def test_groupby_multidim_map(self):
13731373
13741374 @pytest .mark .parametrize ("use_flox" , [True , False ])
13751375 @pytest .mark .parametrize ("coords" , [np .arange (4 ), np .arange (4 )[::- 1 ], [2 , 0 , 3 , 1 ]])
1376- def test_groupby_bins (self , coords : np .typing .ArrayLike , use_flox : bool ) -> None :
1376+ @pytest .mark .parametrize (
1377+ "cut_kwargs" ,
1378+ (
1379+ {"labels" : None , "include_lowest" : True },
1380+ {"labels" : None , "include_lowest" : False },
1381+ {"labels" : ["a" , "b" ]},
1382+ {"labels" : [1.2 , 3.5 ]},
1383+ {"labels" : ["b" , "a" ]},
1384+ ),
1385+ )
1386+ def test_groupby_bins (
1387+ self ,
1388+ coords : np .typing .ArrayLike ,
1389+ use_flox : bool ,
1390+ cut_kwargs : dict ,
1391+ ) -> None :
13771392 array = DataArray (
13781393 np .arange (4 ), dims = "dim_0" , coords = {"dim_0" : coords }, name = "a"
13791394 )
@@ -1384,11 +1399,10 @@ def test_groupby_bins(self, coords: np.typing.ArrayLike, use_flox: bool) -> None
13841399 bins = [0 , 1.5 , 5 ]
13851400
13861401 df = array .to_dataframe ()
1387- df ["dim_0_bins" ] = pd .cut (array ["dim_0" ], bins )
1402+ df ["dim_0_bins" ] = pd .cut (array ["dim_0" ], bins , ** cut_kwargs )
13881403
13891404 expected_df = df .groupby ("dim_0_bins" ).sum ()
13901405 # TODO: can't convert df with IntervalIndex to Xarray
1391-
13921406 expected = (
13931407 expected_df .reset_index (drop = True )
13941408 .to_xarray ()
@@ -1397,25 +1411,55 @@ def test_groupby_bins(self, coords: np.typing.ArrayLike, use_flox: bool) -> None
13971411 )
13981412
13991413 with xr .set_options (use_flox = use_flox ):
1400- actual = array .groupby_bins ("dim_0" , bins = bins ).sum ()
1414+ actual = array .groupby_bins ("dim_0" , bins = bins , ** cut_kwargs ).sum ()
14011415 assert_identical (expected , actual )
14021416
1403- actual = array .groupby_bins ("dim_0" , bins = bins , labels = [1.2 , 3.5 ]).sum ()
1404- assert_identical (expected .assign_coords (dim_0_bins = [1.2 , 3.5 ]), actual )
1405-
1406- actual = array .groupby_bins ("dim_0" , bins = bins ).map (lambda x : x .sum ())
1417+ actual = array .groupby_bins ("dim_0" , bins = bins , ** cut_kwargs ).map (
1418+ lambda x : x .sum ()
1419+ )
14071420 assert_identical (expected , actual )
14081421
14091422 # make sure original array dims are unchanged
14101423 assert len (array .dim_0 ) == 4
14111424
1412- da = xr .DataArray (np .ones ((2 , 3 , 4 )))
1413- bins = [- 1 , 0 , 1 , 2 ]
1414- with xr .set_options (use_flox = False ):
1415- actual = da .groupby_bins ("dim_0" , bins ).mean (...)
1416- with xr .set_options (use_flox = True ):
1417- expected = da .groupby_bins ("dim_0" , bins ).mean (...)
1418- assert_allclose (actual , expected )
1425+ def test_groupby_bins_ellipsis (self ):
1426+ da = xr .DataArray (np .ones ((2 , 3 , 4 )))
1427+ bins = [- 1 , 0 , 1 , 2 ]
1428+ with xr .set_options (use_flox = False ):
1429+ actual = da .groupby_bins ("dim_0" , bins ).mean (...)
1430+ with xr .set_options (use_flox = True ):
1431+ expected = da .groupby_bins ("dim_0" , bins ).mean (...)
1432+ assert_allclose (actual , expected )
1433+
1434+ @pytest .mark .parametrize ("use_flox" , [True , False ])
1435+ def test_groupby_bins_gives_correct_subset (self , use_flox : bool ) -> None :
1436+ # GH7766
1437+ rng = np .random .default_rng (42 )
1438+ coords = rng .normal (5 , 5 , 1000 )
1439+ bins = np .logspace (- 4 , 1 , 10 )
1440+ labels = [
1441+ "one" ,
1442+ "two" ,
1443+ "three" ,
1444+ "four" ,
1445+ "five" ,
1446+ "six" ,
1447+ "seven" ,
1448+ "eight" ,
1449+ "nine" ,
1450+ ]
1451+ # xArray
1452+ # Make a mock dataarray
1453+ darr = xr .DataArray (coords , coords = [coords ], dims = ["coords" ])
1454+ expected = xr .DataArray (
1455+ [np .nan , np .nan , 1 , 1 , 1 , 8 , 31 , 104 , 542 ],
1456+ dims = "coords_bins" ,
1457+ coords = {"coords_bins" : labels },
1458+ )
1459+ gb = darr .groupby_bins ("coords" , bins , labels = labels )
1460+ with xr .set_options (use_flox = use_flox ):
1461+ actual = gb .count ()
1462+ assert_identical (actual , expected )
14191463
14201464 def test_groupby_bins_empty (self ):
14211465 array = DataArray (np .arange (4 ), [("x" , range (4 ))])
0 commit comments