3232 is_integer_dtype ,
3333 is_list_like ,
3434 is_object_dtype ,
35+ is_string_dtype ,
3536 pandas_dtype ,
3637)
3738from pandas .core .dtypes .missing import isna
@@ -119,17 +120,20 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
119120 return None
120121
121122
122- def safe_cast (values , dtype , copy : bool ):
123+ def safe_cast (values , dtype , inferred_type : str | None , copy : bool ):
123124 """
124125 Safely cast the values to the dtype if they
125126 are equivalent, meaning floats must be equivalent to the
126127 ints.
127-
128128 """
129+ if inferred_type in ("string" , "unicode" ):
130+ # casts from str are always safe since they raise
131+ # a ValueError if the str cannot be parsed into an int
132+ return values .astype (dtype , copy = copy )
133+
129134 try :
130135 return values .astype (dtype , casting = "safe" , copy = copy )
131136 except TypeError as err :
132-
133137 casted = values .astype (dtype , copy = copy )
134138 if (casted == values ).all ():
135139 return casted
@@ -143,7 +147,7 @@ def coerce_to_array(
143147 values , dtype , mask = None , copy : bool = False
144148) -> tuple [np .ndarray , np .ndarray ]:
145149 """
146- Coerce the input values array to numpy arrays with a mask
150+ Coerce the input values array to numpy arrays with a mask.
147151
148152 Parameters
149153 ----------
@@ -187,7 +191,8 @@ def coerce_to_array(
187191 return values , mask
188192
189193 values = np .array (values , copy = copy )
190- if is_object_dtype (values ):
194+ inferred_type = None
195+ if is_object_dtype (values ) or is_string_dtype (values ):
191196 inferred_type = lib .infer_dtype (values , skipna = True )
192197 if inferred_type == "empty" :
193198 values = np .empty (len (values ))
@@ -198,6 +203,8 @@ def coerce_to_array(
198203 "mixed-integer" ,
199204 "integer-na" ,
200205 "mixed-integer-float" ,
206+ "string" ,
207+ "unicode" ,
201208 ]:
202209 raise TypeError (f"{ values .dtype } cannot be converted to an IntegerDtype" )
203210
@@ -230,9 +237,7 @@ def coerce_to_array(
230237 if mask .any ():
231238 values = values .copy ()
232239 values [mask ] = 1
233- values = safe_cast (values , dtype , copy = False )
234- else :
235- values = safe_cast (values , dtype , copy = False )
240+ values = safe_cast (values , dtype , inferred_type , copy = False )
236241
237242 return values , mask
238243
0 commit comments