Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,6 @@ def __setitem__(self, key, value):

def astype(self, dtype, copy=True):
dtype = pandas_dtype(dtype)

if is_dtype_equal(dtype, self.dtype):
if copy:
return self.copy()
Expand All @@ -327,6 +326,9 @@ def astype(self, dtype, copy=True):
arr[mask] = "0"
values = arr.astype(dtype.numpy_dtype)
return FloatingArray(values, mask, copy=False)
elif isinstance(dtype, ExtensionDtype):
cls = dtype.construct_array_type()
return cls._from_sequence(self, dtype=dtype, copy=copy)
elif np.issubdtype(dtype, np.floating):
arr = self._ndarray.copy()
mask = self.isna()
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/series/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,18 @@ def test_astype_categories_raises(self):
with pytest.raises(TypeError, match="got an unexpected"):
s.astype("category", categories=["a", "b"], ordered=True)

def test_astype_str_to_extension_dtype(self):
# GH-40351
s = Series(["A", np.NaN], dtype="string")
result = s.astype("category")
expected = Series(["A", np.NaN], dtype="category")
tm.assert_series_equal(result, expected)

s = Series(["1/1/2021", "2/1/2021"], dtype="string")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can u add an example for Timedelta, Datetime w/time zone and Interval (all the EA types)

Copy link
Contributor Author

@siboehm siboehm Mar 17, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added test for all ExtensionArray dtypes. BooleanArray and IntervalArray I had to exclude since there's no way to to parse them back from a list of strings. TimedeltaArray xfails due to #40478. For PeriodArrayand DatetimeArray the NaT get converted to NA strings. But converting the NA strings back to NaT fails. I added XFails, unless expecting EA ⇒ StringArray ⇒ EA to roundtrip successfully is still up for debate.

result = s.astype("period[M]")
expected = Series(["1/1/2021", "2/1/2021"], dtype="period[M]")
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("items", [["a", "b", "c", "a"], [1, 2, 3, 1]])
def test_astype_from_categorical(self, items):
ser = Series(items)
Expand Down