Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,7 @@ Conversion
^^^^^^^^^^
- Bug in :class:`UInt64Index` constructor when passing a list containing both positive integers small enough to cast to int64 and integers too large too hold in int64 (:issue:`42201`)
- Bug in :class:`Series` constructor returning 0 for missing values with dtype ``int64`` and ``False`` for dtype ``bool`` (:issue:`43017`, :issue:`43018`)
- Bug in :class:`IntegerDtype` not allowing coercion from string dtype (:issue:`25472`)
-

Strings
Expand Down
15 changes: 10 additions & 5 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
is_integer_dtype,
is_list_like,
is_object_dtype,
is_string_dtype,
pandas_dtype,
)
from pandas.core.dtypes.missing import isna
Expand Down Expand Up @@ -124,12 +125,10 @@ def safe_cast(values, dtype, copy: bool):
Safely cast the values to the dtype if they
are equivalent, meaning floats must be equivalent to the
ints.

"""
try:
return values.astype(dtype, casting="safe", copy=copy)
except TypeError as err:

casted = values.astype(dtype, copy=copy)
if (casted == values).all():
return casted
Expand All @@ -143,7 +142,7 @@ def coerce_to_array(
values, dtype, mask=None, copy: bool = False
) -> tuple[np.ndarray, np.ndarray]:
"""
Coerce the input values array to numpy arrays with a mask
Coerce the input values array to numpy arrays with a mask.

Parameters
----------
Expand Down Expand Up @@ -187,7 +186,8 @@ def coerce_to_array(
return values, mask

values = np.array(values, copy=copy)
if is_object_dtype(values):
inferred_type = None
if is_object_dtype(values) or is_string_dtype(values):
inferred_type = lib.infer_dtype(values, skipna=True)
if inferred_type == "empty":
values = np.empty(len(values))
Expand All @@ -198,6 +198,8 @@ def coerce_to_array(
"mixed-integer",
"integer-na",
"mixed-integer-float",
"string",
"unicode",
]:
raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype")

Expand Down Expand Up @@ -230,7 +232,10 @@ def coerce_to_array(
if mask.any():
values = values.copy()
values[mask] = 1
values = safe_cast(values, dtype, copy=False)
if inferred_type in ("string", "unicode"):
# casts from str are always safe since they raise
# a ValueError if the str cannot be parsed into an int
values = values.astype(dtype, copy=copy)
else:
values = safe_cast(values, dtype, copy=False)

Expand Down
14 changes: 7 additions & 7 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,14 +143,14 @@ def ensure_python_int(value: int | np.integer) -> int:


def classes(*klasses) -> Callable:
"""evaluate if the tipo is a subclass of the klasses"""
"""Evaluate if the tipo is a subclass of the klasses."""
return lambda tipo: issubclass(tipo, klasses)


def classes_and_not_datetimelike(*klasses) -> Callable:
"""
evaluate if the tipo is a subclass of the klasses
and not a datetimelike
Evaluate if the tipo is a subclass of the klasses
and not a datetimelike.
"""
return lambda tipo: (
issubclass(tipo, klasses)
Expand Down Expand Up @@ -674,7 +674,7 @@ def is_integer_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of an integer dtype.

Unlike in `in_any_int_dtype`, timedelta64 instances will return False.
Unlike in `is_any_int_dtype`, timedelta64 instances will return False.

The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered
as integer by this function.
Expand Down Expand Up @@ -726,7 +726,7 @@ def is_signed_integer_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of a signed integer dtype.

Unlike in `in_any_int_dtype`, timedelta64 instances will return False.
Unlike in `is_any_int_dtype`, timedelta64 instances will return False.

The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered
as integer by this function.
Expand Down Expand Up @@ -1521,7 +1521,7 @@ def is_complex_dtype(arr_or_dtype) -> bool:

def _is_dtype(arr_or_dtype, condition) -> bool:
"""
Return a boolean if the condition is satisfied for the arr_or_dtype.
Return true if the condition is satisfied for the arr_or_dtype.

Parameters
----------
Expand Down Expand Up @@ -1580,7 +1580,7 @@ def get_dtype(arr_or_dtype) -> DtypeObj:

def _is_dtype_type(arr_or_dtype, condition) -> bool:
"""
Return a boolean if the condition is satisfied for the arr_or_dtype.
Return true if the condition is satisfied for the arr_or_dtype.

Parameters
----------
Expand Down
21 changes: 18 additions & 3 deletions pandas/tests/arrays/integer/test_construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def test_from_dtype_from_float(data):


def test_conversions(data_missing):

# astype to object series
df = pd.DataFrame({"A": data_missing})
result = df["A"].astype("object")
Expand Down Expand Up @@ -123,7 +122,6 @@ def test_to_integer_array_none_is_nan(a, b):
"values",
[
["foo", "bar"],
["1", "2"],
"foo",
1,
1.0,
Expand All @@ -137,13 +135,14 @@ def test_to_integer_array_error(values):
# error in converting existing arrays to IntegerArrays
msg = (
r"(:?.* cannot be converted to an IntegerDtype)"
r"|(invalid literal for int\(\) with base 10: .*)"
r"|(:?values must be a 1D list-like)"
r"|(Cannot pass scalar)"
)
with pytest.raises((ValueError, TypeError), match=msg):
pd.array(values, dtype="Int64")

with pytest.raises(TypeError, match=msg):
with pytest.raises((ValueError, TypeError), match=msg):
IntegerArray._from_sequence(values)


Expand Down Expand Up @@ -181,6 +180,22 @@ def test_to_integer_array_float():
assert result.dtype == Int64Dtype()


def test_to_integer_array_str():
result = IntegerArray._from_sequence(["1", "2", None])
expected = pd.array([1, 2, np.nan], dtype="Int64")
tm.assert_extension_array_equal(result, expected)

with pytest.raises(
ValueError, match=r"invalid literal for int\(\) with base 10: .*"
):
IntegerArray._from_sequence(["1", "2", ""])

with pytest.raises(
ValueError, match=r"invalid literal for int\(\) with base 10: .*"
):
IntegerArray._from_sequence(["1.5", "2.0"])


@pytest.mark.parametrize(
"bool_values, int_values, target_dtype, expected_dtype",
[
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/io/parser/dtypes/test_dtypes_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,3 +314,23 @@ def test_dtype_multi_index(all_parsers):
)

tm.assert_frame_equal(result, expected)


def test_nullable_int_dtype(all_parsers, any_int_ea_dtype):
# GH 25472
parser = all_parsers
dtype = any_int_ea_dtype

data = """a,b,c
,3,5
1,,6
2,4,"""
expected = DataFrame(
{
"a": pd.array([pd.NA, 1, 2], dtype=dtype),
"b": pd.array([3, pd.NA, 4], dtype=dtype),
"c": pd.array([5, 6, pd.NA], dtype=dtype),
}
)
actual = parser.read_csv(StringIO(data), dtype=dtype)
tm.assert_frame_equal(actual, expected)