1111 List ,
1212 Optional ,
1313 Sequence ,
14+ Tuple ,
1415 Type ,
1516 TypeVar ,
1617 Union ,
@@ -2642,13 +2643,11 @@ def recode_for_categories(
26422643 return new_codes
26432644
26442645
2645- def factorize_from_iterable (values ):
2646+ def factorize_from_iterable (values ) -> Tuple [ np . ndarray , Index ] :
26462647 """
26472648 Factorize an input `values` into `categories` and `codes`. Preserves
26482649 categorical dtype in `categories`.
26492650
2650- *This is an internal function*
2651-
26522651 Parameters
26532652 ----------
26542653 values : list-like
@@ -2660,6 +2659,8 @@ def factorize_from_iterable(values):
26602659 If `values` has a categorical dtype, then `categories` is
26612660 a CategoricalIndex keeping the categories and order of `values`.
26622661 """
2662+ from pandas import CategoricalIndex
2663+
26632664 if not is_list_like (values ):
26642665 raise TypeError ("Input must be list-like" )
26652666
@@ -2668,7 +2669,8 @@ def factorize_from_iterable(values):
26682669 # The Categorical we want to build has the same categories
26692670 # as values but its codes are by def [0, ..., len(n_categories) - 1]
26702671 cat_codes = np .arange (len (values .categories ), dtype = values .codes .dtype )
2671- categories = Categorical .from_codes (cat_codes , dtype = values .dtype )
2672+ cat = Categorical .from_codes (cat_codes , dtype = values .dtype )
2673+ categories = CategoricalIndex (cat )
26722674 codes = values .codes
26732675 else :
26742676 # The value of ordered is irrelevant since we don't use cat as such,
@@ -2680,26 +2682,26 @@ def factorize_from_iterable(values):
26802682 return codes , categories
26812683
26822684
2683- def factorize_from_iterables (iterables ):
2685+ def factorize_from_iterables (iterables ) -> Tuple [ List [ np . ndarray ], List [ Index ]] :
26842686 """
26852687 A higher-level wrapper over `factorize_from_iterable`.
26862688
2687- *This is an internal function*
2688-
26892689 Parameters
26902690 ----------
26912691 iterables : list-like of list-likes
26922692
26932693 Returns
26942694 -------
2695- codes_list : list of ndarrays
2696- categories_list : list of Indexes
2695+ codes : list of ndarrays
2696+ categories : list of Indexes
26972697
26982698 Notes
26992699 -----
27002700 See `factorize_from_iterable` for more info.
27012701 """
27022702 if len (iterables ) == 0 :
2703- # For consistency, it should return a list of 2 lists.
2704- return [[], []]
2705- return map (list , zip (* (factorize_from_iterable (it ) for it in iterables )))
2703+ # For consistency, it should return two empty lists.
2704+ return [], []
2705+
2706+ codes , categories = zip (* (factorize_from_iterable (it ) for it in iterables ))
2707+ return list (codes ), list (categories )
0 commit comments