@@ -94,10 +94,19 @@ def array(
94
94
:class:`pandas.Period` :class:`pandas.arrays.PeriodArray`
95
95
:class:`datetime.datetime` :class:`pandas.arrays.DatetimeArray`
96
96
:class:`datetime.timedelta` :class:`pandas.arrays.TimedeltaArray`
97
+ :class:`int` :class:`pandas.arrays.IntegerArray`
98
+ :class:`str` :class:`pandas.arrays.StringArray`
99
+ :class:`bool` :class:`pandas.arrays.BooleanArray`
97
100
============================== =====================================
98
101
99
102
For all other cases, NumPy's usual inference rules will be used.
100
103
104
+ .. versionchanged:: 1.0.0
105
+
106
+ Pandas infers nullable-integer dtype for integer data,
107
+ string dtype for string data, and nullable-boolean dtype
108
+ for boolean data.
109
+
101
110
copy : bool, default True
102
111
Whether to copy the data, even if not necessary. Depending
103
112
on the type of `data`, creating the new array may require
@@ -154,14 +163,6 @@ def array(
154
163
['a', 'b']
155
164
Length: 2, dtype: str32
156
165
157
- Or use the dedicated constructor for the array you're expecting, and
158
- wrap that in a PandasArray
159
-
160
- >>> pd.array(np.array(['a', 'b'], dtype='<U1'))
161
- <PandasArray>
162
- ['a', 'b']
163
- Length: 2, dtype: str32
164
-
165
166
Finally, Pandas has arrays that mostly overlap with NumPy
166
167
167
168
* :class:`arrays.DatetimeArray`
@@ -184,20 +185,28 @@ def array(
184
185
185
186
Examples
186
187
--------
187
- If a dtype is not specified, `data` is passed through to
188
- :meth:`numpy.array`, and a :class:`arrays.PandasArray` is returned .
188
+ If a dtype is not specified, pandas will infer the best dtype from the values.
189
+ See the description of `dtype` for the types pandas infers for .
189
190
190
191
>>> pd.array([1, 2])
191
- <PandasArray >
192
+ <IntegerArray >
192
193
[1, 2]
193
- Length: 2, dtype: int64
194
+ Length: 2, dtype: Int64
194
195
195
- Or the NumPy dtype can be specified
196
+ >>> pd.array([1, 2, np.nan])
197
+ <IntegerArray>
198
+ [1, 2, NaN]
199
+ Length: 3, dtype: Int64
196
200
197
- >>> pd.array([1, 2], dtype=np.dtype("int32"))
198
- <PandasArray>
199
- [1, 2]
200
- Length: 2, dtype: int32
201
+ >>> pd.array(["a", None, "c"])
202
+ <StringArray>
203
+ ['a', nan, 'c']
204
+ Length: 3, dtype: string
205
+
206
+ >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")])
207
+ <PeriodArray>
208
+ ['2000-01-01', '2000-01-01']
209
+ Length: 2, dtype: period[D]
201
210
202
211
You can use the string alias for `dtype`
203
212
@@ -212,29 +221,24 @@ def array(
212
221
[a, b, a]
213
222
Categories (3, object): [a < b < c]
214
223
215
- Because omitting the `dtype` passes the data through to NumPy,
216
- a mixture of valid integers and NA will return a floating-point
217
- NumPy array.
224
+ If pandas does not infer a dedicated extension type a
225
+ :class:`arrays.PandasArray` is returned.
218
226
219
- >>> pd.array([1, 2, np.nan ])
227
+ >>> pd.array([1.1 , 2.2 ])
220
228
<PandasArray>
221
- [1.0, 2.0, nan]
222
- Length: 3, dtype: float64
223
-
224
- To use pandas' nullable :class:`pandas.arrays.IntegerArray`, specify
225
- the dtype:
229
+ [1.1, 2.2]
230
+ Length: 2, dtype: float64
226
231
227
- >>> pd.array([1, 2, np.nan], dtype='Int64')
228
- <IntegerArray>
229
- [1, 2, NaN]
230
- Length: 3, dtype: Int64
232
+ As mentioned in the "Notes" section, new extension types may be added
233
+ in the future (by pandas or 3rd party libraries), causing the return
234
+ value to no longer be a :class:`arrays.PandasArray`. Specify the `dtype`
235
+ as a NumPy dtype if you need to ensure there's no future change in
236
+ behavior.
231
237
232
- Pandas will infer an ExtensionArray for some types of data:
233
-
234
- >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")])
235
- <PeriodArray>
236
- ['2000-01-01', '2000-01-01']
237
- Length: 2, dtype: period[D]
238
+ >>> pd.array([1, 2], dtype=np.dtype("int32"))
239
+ <PandasArray>
240
+ [1, 2]
241
+ Length: 2, dtype: int32
238
242
239
243
`data` must be 1-dimensional. A ValueError is raised when the input
240
244
has the wrong dimensionality.
@@ -246,21 +250,26 @@ def array(
246
250
"""
247
251
from pandas .core .arrays import (
248
252
period_array ,
253
+ BooleanArray ,
254
+ IntegerArray ,
249
255
IntervalArray ,
250
256
PandasArray ,
251
257
DatetimeArray ,
252
258
TimedeltaArray ,
259
+ StringArray ,
253
260
)
254
261
255
262
if lib .is_scalar (data ):
256
263
msg = "Cannot pass scalar '{}' to 'pandas.array'."
257
264
raise ValueError (msg .format (data ))
258
265
259
- data = extract_array ( data , extract_numpy = True )
260
-
261
- if dtype is None and isinstance ( data , ABCExtensionArray ):
266
+ if dtype is None and isinstance (
267
+ data , ( ABCSeries , ABCIndexClass , ABCExtensionArray )
268
+ ):
262
269
dtype = data .dtype
263
270
271
+ data = extract_array (data , extract_numpy = True )
272
+
264
273
# this returns None for not-found dtypes.
265
274
if isinstance (dtype , str ):
266
275
dtype = registry .find (dtype ) or dtype
@@ -270,7 +279,7 @@ def array(
270
279
return cls ._from_sequence (data , dtype = dtype , copy = copy )
271
280
272
281
if dtype is None :
273
- inferred_dtype = lib .infer_dtype (data , skipna = False )
282
+ inferred_dtype = lib .infer_dtype (data , skipna = True )
274
283
if inferred_dtype == "period" :
275
284
try :
276
285
return period_array (data , copy = copy )
@@ -298,7 +307,14 @@ def array(
298
307
# timedelta, timedelta64
299
308
return TimedeltaArray ._from_sequence (data , copy = copy )
300
309
301
- # TODO(BooleanArray): handle this type
310
+ elif inferred_dtype == "string" :
311
+ return StringArray ._from_sequence (data , copy = copy )
312
+
313
+ elif inferred_dtype == "integer" :
314
+ return IntegerArray ._from_sequence (data , copy = copy )
315
+
316
+ elif inferred_dtype == "boolean" :
317
+ return BooleanArray ._from_sequence (data , copy = copy )
302
318
303
319
# Pandas overrides NumPy for
304
320
# 1. datetime64[ns]
0 commit comments