From b9c32a130db110fce8de308cf56d02b85d5917ad Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Mon, 11 May 2020 20:07:46 -0500 Subject: [PATCH 1/6] CLN: Move _convert_to_list_like to common --- pandas/core/arrays/categorical.py | 15 +-------------- pandas/core/common.py | 13 +++++++++++++ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index dd5ff7781e463..737c130161246 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -26,11 +26,9 @@ is_dtype_equal, is_extension_array_dtype, is_integer_dtype, - is_iterator, is_list_like, is_object_dtype, is_scalar, - is_sequence, is_timedelta64_dtype, needs_i8_conversion, ) @@ -324,7 +322,7 @@ def __init__( # of numpy values = maybe_infer_to_datetimelike(values, convert_dates=True) if not isinstance(values, np.ndarray): - values = _convert_to_list_like(values) + values = com.convert_to_list_like(values) # By convention, empty lists result in object dtype: sanitize_dtype = np.dtype("O") if len(values) == 0 else None @@ -2647,17 +2645,6 @@ def recode_for_categories(codes: np.ndarray, old_categories, new_categories): return new_codes -def _convert_to_list_like(list_like): - if hasattr(list_like, "dtype"): - return list_like - if isinstance(list_like, list): - return list_like - if is_sequence(list_like) or isinstance(list_like, tuple) or is_iterator(list_like): - return list(list_like) - - return [list_like] - - def factorize_from_iterable(values): """ Factorize an input `values` into `categories` and `codes`. Preserves diff --git a/pandas/core/common.py b/pandas/core/common.py index bb911c0617242..57e3f97918798 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -23,6 +23,8 @@ is_bool_dtype, is_extension_array_dtype, is_integer, + is_iterator, + is_sequence, ) from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries from pandas.core.dtypes.inference import _iterable_not_string @@ -473,3 +475,14 @@ def f(x): f = mapper return f + + +def convert_to_list_like(values): + if hasattr(values, "dtype"): + return values + if isinstance(values, list): + return values + if is_sequence(values) or isinstance(values, tuple) or is_iterator(values): + return list(values) + + return [values] From fd7aee0a02e3733d7ddae429b04d5478f2e386df Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 12 May 2020 12:18:04 -0500 Subject: [PATCH 2/6] Type --- pandas/core/common.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 57e3f97918798..70f02bb619f86 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -4,17 +4,16 @@ Note: pandas.core.common is *not* part of the public API. """ -import collections -from collections import abc +from collections import abc, defaultdict from datetime import datetime, timedelta from functools import partial import inspect -from typing import Any, Collection, Iterable, Union +from typing import Any, Collection, Iterable, List, Union import numpy as np from pandas._libs import lib, tslibs -from pandas._typing import T +from pandas._typing import AnyArrayLike, Scalar, T from pandas.compat.numpy import _np_version_under1p17 from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike @@ -23,10 +22,13 @@ is_bool_dtype, is_extension_array_dtype, is_integer, - is_iterator, - is_sequence, ) -from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries +from pandas.core.dtypes.generic import ( + ABCExtensionArray, + ABCIndex, + ABCIndexClass, + ABCSeries, +) from pandas.core.dtypes.inference import _iterable_not_string from pandas.core.dtypes.missing import isna, isnull, notnull # noqa @@ -369,12 +371,12 @@ def standardize_mapping(into): Series.to_dict """ if not inspect.isclass(into): - if isinstance(into, collections.defaultdict): - return partial(collections.defaultdict, into.default_factory) + if isinstance(into, defaultdict): + return partial(defaultdict, into.default_factory) into = type(into) if not issubclass(into, abc.Mapping): raise TypeError(f"unsupported type: {into}") - elif into == collections.defaultdict: + elif into == defaultdict: raise TypeError("to_dict() only accepts initialized defaultdicts") return into @@ -477,12 +479,14 @@ def f(x): return f -def convert_to_list_like(values): - if hasattr(values, "dtype"): - return values +def convert_to_list_like( + values: Union[Scalar, Iterable, AnyArrayLike] +) -> Union[List, AnyArrayLike]: if isinstance(values, list): return values - if is_sequence(values) or isinstance(values, tuple) or is_iterator(values): + elif isinstance(values, abc.Iterable) and not isinstance(values, str): return list(values) + elif isinstance(values, (np.ndarray, ABCIndex, ABCSeries, ABCExtensionArray)): + return values return [values] From dbae893843e107b5547b326730f6dcd5ab93a45d Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 12 May 2020 12:22:18 -0500 Subject: [PATCH 3/6] Docstring --- pandas/core/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/common.py b/pandas/core/common.py index 70f02bb619f86..2e8ccf398313c 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -482,6 +482,9 @@ def f(x): def convert_to_list_like( values: Union[Scalar, Iterable, AnyArrayLike] ) -> Union[List, AnyArrayLike]: + """ + Convert scalar or list-like input to np.array, pd.array, or list. + """ if isinstance(values, list): return values elif isinstance(values, abc.Iterable) and not isinstance(values, str): From 0a10a8d41be9d5e2625a714a0f5b1dc431f51c70 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 12 May 2020 15:24:17 -0500 Subject: [PATCH 4/6] Fixup --- pandas/core/common.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 2e8ccf398313c..d80d828e39da2 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -485,11 +485,9 @@ def convert_to_list_like( """ Convert scalar or list-like input to np.array, pd.array, or list. """ - if isinstance(values, list): + if isinstance(values, (list, np.ndarray, ABCIndex, ABCSeries, ABCExtensionArray)): return values elif isinstance(values, abc.Iterable) and not isinstance(values, str): return list(values) - elif isinstance(values, (np.ndarray, ABCIndex, ABCSeries, ABCExtensionArray)): - return values return [values] From 6ff0316d47933cad743c3b635e15323041a10bbc Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 12 May 2020 17:09:18 -0500 Subject: [PATCH 5/6] Update --- pandas/core/common.py | 3 ++- pandas/core/tools/timedeltas.py | 12 ++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index d80d828e39da2..1ccca5193ab46 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -483,7 +483,8 @@ def convert_to_list_like( values: Union[Scalar, Iterable, AnyArrayLike] ) -> Union[List, AnyArrayLike]: """ - Convert scalar or list-like input to np.array, pd.array, or list. + Convert list-like or scalar input to list-like. List, numpy and pandas array-like + inputs are returned unmodified whereas others are converted to list. """ if isinstance(values, (list, np.ndarray, ABCIndex, ABCSeries, ABCExtensionArray)): return values diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 51b404b46f321..85ce639dbced8 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -8,6 +8,7 @@ from pandas._libs.tslibs.timedeltas import Timedelta, parse_timedelta_unit from pandas.core.dtypes.common import is_list_like +from pandas.core.common import convert_to_list_like from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries from pandas.core.arrays.timedeltas import sequence_to_td64ns @@ -126,12 +127,11 @@ def _coerce_scalar_to_timedelta_type(r, unit="ns", errors="raise"): def _convert_listlike(arg, unit="ns", errors="raise", name=None): """Convert a list of objects to a timedelta index object.""" - if isinstance(arg, (list, tuple)) or not hasattr(arg, "dtype"): - # This is needed only to ensure that in the case where we end up - # returning arg (errors == "ignore"), and where the input is a - # generator, we return a useful list-like instead of a - # used-up generator - arg = np.array(list(arg), dtype=object) + # This is needed only to ensure that in the case where we end up + # returning arg (errors == "ignore"), and where the input is a + # generator, we return a useful list-like instead of a + # used-up generator + arg = convert_to_list_like(arg) try: value = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0] From f48dff9765040ba8a961aeb6c6ca12659932a122 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 12 May 2020 19:12:10 -0500 Subject: [PATCH 6/6] Revert --- pandas/core/tools/timedeltas.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 85ce639dbced8..51b404b46f321 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -8,7 +8,6 @@ from pandas._libs.tslibs.timedeltas import Timedelta, parse_timedelta_unit from pandas.core.dtypes.common import is_list_like -from pandas.core.common import convert_to_list_like from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries from pandas.core.arrays.timedeltas import sequence_to_td64ns @@ -127,11 +126,12 @@ def _coerce_scalar_to_timedelta_type(r, unit="ns", errors="raise"): def _convert_listlike(arg, unit="ns", errors="raise", name=None): """Convert a list of objects to a timedelta index object.""" - # This is needed only to ensure that in the case where we end up - # returning arg (errors == "ignore"), and where the input is a - # generator, we return a useful list-like instead of a - # used-up generator - arg = convert_to_list_like(arg) + if isinstance(arg, (list, tuple)) or not hasattr(arg, "dtype"): + # This is needed only to ensure that in the case where we end up + # returning arg (errors == "ignore"), and where the input is a + # generator, we return a useful list-like instead of a + # used-up generator + arg = np.array(list(arg), dtype=object) try: value = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0]