@@ -308,6 +308,156 @@ def test_data_frame_value_counts_dropna(
308308 tm .assert_series_equal (result_frame_groupby , expected )
309309
310310
311+ def _test_categorical_single_grouper (
312+ education_df , as_index , observed , expected_index , normalize , expected_data
313+ ):
314+ # Test single categorical grouper when non-groupers are also categorical
315+ education_df = education_df .copy ().astype ("category" )
316+
317+ # Add non-observed grouping categories
318+ education_df ["country" ] = education_df ["country" ].cat .add_categories (["ASIA" ])
319+
320+ gp = education_df .groupby ("country" , as_index = as_index , observed = observed )
321+ result = gp .value_counts (normalize = normalize )
322+
323+ expected_series = Series (
324+ data = expected_data ,
325+ index = MultiIndex .from_tuples (
326+ expected_index ,
327+ names = ["country" , "gender" , "education" ],
328+ ),
329+ )
330+ for i in range (3 ):
331+ index_level = CategoricalIndex (expected_series .index .levels [i ])
332+ if i == 0 :
333+ index_level = index_level .set_categories (
334+ education_df ["country" ].cat .categories
335+ )
336+ expected_series .index = expected_series .index .set_levels (index_level , level = i )
337+
338+ if as_index :
339+ tm .assert_series_equal (result , expected_series )
340+ else :
341+ expected = expected_series .reset_index (
342+ name = "proportion" if normalize else "count"
343+ )
344+ tm .assert_frame_equal (result , expected )
345+
346+
347+ @pytest .mark .parametrize ("as_index" , [True , False ])
348+ @pytest .mark .parametrize (
349+ "normalize, expected_data" ,
350+ [
351+ (False , np .array ([2 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 0 , 0 ], dtype = np .int64 )),
352+ (
353+ True ,
354+ np .array ([0.5 , 0.25 , 0.25 , 0.0 , 0.0 , 0.0 , 0.5 , 0.5 , 0.0 , 0.0 , 0.0 , 0.0 ]),
355+ ),
356+ ],
357+ )
358+ def test_categorical_single_grouper_observed_true (
359+ education_df , as_index , normalize , expected_data
360+ ):
361+ # GH#46357
362+
363+ expected_index = [
364+ ("FR" , "male" , "low" ),
365+ ("FR" , "female" , "high" ),
366+ ("FR" , "male" , "medium" ),
367+ ("FR" , "female" , "low" ),
368+ ("FR" , "female" , "medium" ),
369+ ("FR" , "male" , "high" ),
370+ ("US" , "female" , "high" ),
371+ ("US" , "male" , "low" ),
372+ ("US" , "female" , "low" ),
373+ ("US" , "female" , "medium" ),
374+ ("US" , "male" , "high" ),
375+ ("US" , "male" , "medium" ),
376+ ]
377+
378+ _test_categorical_single_grouper (
379+ education_df = education_df ,
380+ as_index = as_index ,
381+ observed = True ,
382+ expected_index = expected_index ,
383+ normalize = normalize ,
384+ expected_data = expected_data ,
385+ )
386+
387+
388+ @pytest .mark .parametrize ("as_index" , [True , False ])
389+ @pytest .mark .parametrize (
390+ "normalize, expected_data" ,
391+ [
392+ (
393+ False ,
394+ np .array (
395+ [2 , 1 , 1 , 0 , 0 , 0 , 1 , 1 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ], dtype = np .int64
396+ ),
397+ ),
398+ (
399+ True ,
400+ np .array (
401+ [
402+ 0.5 ,
403+ 0.25 ,
404+ 0.25 ,
405+ 0.0 ,
406+ 0.0 ,
407+ 0.0 ,
408+ 0.5 ,
409+ 0.5 ,
410+ 0.0 ,
411+ 0.0 ,
412+ 0.0 ,
413+ 0.0 ,
414+ 0.0 ,
415+ 0.0 ,
416+ 0.0 ,
417+ 0.0 ,
418+ 0.0 ,
419+ 0.0 ,
420+ ]
421+ ),
422+ ),
423+ ],
424+ )
425+ def test_categorical_single_grouper_observed_false (
426+ education_df , as_index , normalize , expected_data
427+ ):
428+ # GH#46357
429+
430+ expected_index = [
431+ ("FR" , "male" , "low" ),
432+ ("FR" , "female" , "high" ),
433+ ("FR" , "male" , "medium" ),
434+ ("FR" , "female" , "low" ),
435+ ("FR" , "male" , "high" ),
436+ ("FR" , "female" , "medium" ),
437+ ("US" , "female" , "high" ),
438+ ("US" , "male" , "low" ),
439+ ("US" , "male" , "medium" ),
440+ ("US" , "male" , "high" ),
441+ ("US" , "female" , "medium" ),
442+ ("US" , "female" , "low" ),
443+ ("ASIA" , "male" , "low" ),
444+ ("ASIA" , "male" , "high" ),
445+ ("ASIA" , "female" , "medium" ),
446+ ("ASIA" , "female" , "low" ),
447+ ("ASIA" , "female" , "high" ),
448+ ("ASIA" , "male" , "medium" ),
449+ ]
450+
451+ _test_categorical_single_grouper (
452+ education_df = education_df ,
453+ as_index = as_index ,
454+ observed = False ,
455+ expected_index = expected_index ,
456+ normalize = normalize ,
457+ expected_data = expected_data ,
458+ )
459+
460+
311461@pytest .mark .parametrize ("as_index" , [True , False ])
312462@pytest .mark .parametrize (
313463 "observed, expected_index" ,
@@ -348,15 +498,16 @@ def test_data_frame_value_counts_dropna(
348498 (
349499 True ,
350500 # NaN values corresponds to non-observed groups
351- np .array (
352- [1.0 , 0.0 , 1.0 , 0.0 , 1.0 , 0.0 , 1.0 , 0.0 , 1.0 , 0.0 , np .nan , np .nan ]
353- ),
501+ np .array ([1.0 , 0.0 , 1.0 , 0.0 , 1.0 , 0.0 , 1.0 , 0.0 , 1.0 , 0.0 , 0.0 , 0.0 ]),
354502 ),
355503 ],
356504)
357- def test_categorical_groupers (
505+ def test_categorical_multiple_groupers (
358506 education_df , as_index , observed , expected_index , normalize , expected_data
359507):
508+ # GH#46357
509+
510+ # Test multiple categorical groupers when non-groupers are non-categorical
360511 education_df = education_df .copy ()
361512 education_df ["country" ] = education_df ["country" ].astype ("category" )
362513 education_df ["education" ] = education_df ["education" ].astype ("category" )
@@ -400,8 +551,10 @@ def test_categorical_groupers(
400551 ),
401552 ],
402553)
403- def test_categorical_values (education_df , as_index , observed , normalize , expected_data ):
404- # Test non-observed categories are included in the result,
554+ def test_categorical_non_groupers (
555+ education_df , as_index , observed , normalize , expected_data
556+ ):
557+ # GH#46357 Test non-observed categories are included in the result,
405558 # regardless of `observed`
406559 education_df = education_df .copy ()
407560 education_df ["gender" ] = education_df ["gender" ].astype ("category" )
0 commit comments