-
-
Notifications
You must be signed in to change notification settings - Fork 19.3k
Description
import numpy as np
import pandas as pd
from io import StringIO
def __custom_date_parser(time):
time_temp = time.astype(np.float).astype(np.int) # convert float seconds to int type
return pd.to_timedelta(time_temp, unit='s')
testdata = StringIO("""time e n h
41047.00 -98573.7297 871458.0640 389.0089
41048.00 -98573.7299 871458.0640 389.0089
41049.00 -98573.7300 871458.0642 389.0088
41050.00 -98573.7299 871458.0643 389.0088
41051.00 -98573.7302 871458.0640 389.0086
""")
df = pd.read_csv(testdata, delim_whitespace=True, parse_dates=True, date_parser=__custom_date_parser, index_col='time')I noticed this problem when I executed a piece of old code which has worked before (a few months ago). Normally this code would parse a text file with GPS seconds of week as time and convert it to a TimeDeltaIndex. Now when I execute this, it results in a ValueError: unit abbreviation w/o a number. (Full stack trace below) I tracked it down to the default option na_filter=True in pd.read_csv. When i set it to False everything is working. With a bit of digging I think i found the source of the error in algorithms.py -> _ensure_data -> line 142.
# datetimelike
vals_dtype = getattr(values, "dtype", None)
if needs_i8_conversion(vals_dtype) or needs_i8_conversion(dtype):
if is_period_dtype(vals_dtype) or is_period_dtype(dtype):
from pandas import PeriodIndex
values = PeriodIndex(values)
dtype = values.dtype
elif is_timedelta64_dtype(vals_dtype) or is_timedelta64_dtype(dtype):
from pandas import TimedeltaIndex
values = TimedeltaIndex(values) #This is line 142
dtype = values.dtype
else:
# Datetime
if values.ndim > 1 and is_datetime64_ns_dtype(vals_dtype):
# Avoid calling the DatetimeIndex constructor as it is 1D only
# Note: this is reached by DataFrame.rank calls GH#27027
# TODO(EA2D): special case not needed with 2D EAs
asi8 = values.view("i8")
dtype = values.dtype
return asi8, dtype
from pandas import DatetimeIndex
values = DatetimeIndex(values)
dtype = values.dtypeHere the function tries to parse values as TimeDeltaIndex, but values is ['' 'n/a' '-nan' '#N/A' '1.#QNAN' 'nan' '#NA' 'NaN' '-1.#QNAN' '#N/A N/A', '-NaN' 'N/A' 'NULL' '' 'null' '1.#IND' 'NA' '-1.#IND'] in this case. It executes this if statement, because is_timedelta64_dtype(dtype) is true in this case. I can't believe that this is expected behaviour, as it has worked before.
Traceback (most recent call last):
File "...\lib\site-packages\pandas\io\parsers.py", line 458, in _read
data = parser.read(nrows)
File "...\lib\site-packages\pandas\io\parsers.py", line 1186, in read
ret = self._engine.read(nrows)
File "...\lib\site-packages\pandas\io\parsers.py", line 2221, in read
index, names = self._make_index(data, alldata, names)
File "...\lib\site-packages\pandas\io\parsers.py", line 1667, in _make_index
index = self._agg_index(index)
File "...\lib\site-packages\pandas\io\parsers.py", line 1760, in _agg_index
arr, _ = self._infer_types(arr, col_na_values | col_na_fvalues)
File "...\lib\site-packages\pandas\io\parsers.py", line 1861, in _infer_types
mask = algorithms.isin(values, list(na_values))
File "...\lib\site-packages\pandas\core\algorithms.py", line 433, in isin
values, _ = _ensure_data(values, dtype=dtype)
File "...\lib\site-packages\pandas\core\algorithms.py", line 142, in _ensure_data
values = TimedeltaIndex(values)
File "...\lib\site-packages\pandas\core\indexes\timedeltas.py", line 157, in __new__
data, freq=freq, unit=unit, dtype=dtype, copy=copy
File "...\lib\site-packages\pandas\core\arrays\timedeltas.py", line 216, in _from_sequence
data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit)
File "...\lib\site-packages\pandas\core\arrays\timedeltas.py", line 930, in sequence_to_td64ns
data = objects_to_td64ns(data, unit=unit, errors=errors)
File "...\lib\site-packages\pandas\core\arrays\timedeltas.py", line 1040, in objects_to_td64ns
result = array_to_timedelta64(values, unit=unit, errors=errors)
File "pandas\_libs\tslibs\timedeltas.pyx", line 273, in pandas._libs.tslibs.timedeltas.array_to_timedelta64
File "pandas\_libs\tslibs\timedeltas.pyx", line 268, in pandas._libs.tslibs.timedeltas.array_to_timedelta64
File "pandas\_libs\tslibs\timedeltas.pyx", line 215, in pandas._libs.tslibs.timedeltas.convert_to_timedelta64
File "pandas\_libs\tslibs\timedeltas.pyx", line 428, in pandas._libs.tslibs.timedeltas.parse_timedelta_string
ValueError: unit abbreviation w/o a number
python-BaseExceptionINSTALLED VERSIONS
commit : f2ca0a2
python : 3.7.9.final.0
python-bits : 64
OS : Windows
OS-release : 10
Version : 10.0.18362
machine : AMD64
processor : Intel64 Family 6 Model 94 Stepping 3, GenuineIntel
byteorder : little
LC_ALL : None
LANG : None
LOCALE : None.None
pandas : 1.1.1
numpy : 1.18.1
pytz : 2020.1
dateutil : 2.8.1
pip : 20.2.2
setuptools : 49.6.0.post20200814
Cython : None
pytest : None
hypothesis : None
sphinx : None
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : None
pymysql : None
psycopg2 : None
jinja2 : None
IPython : None
pandas_datareader: None
bs4 : None
bottleneck : None
fsspec : None
fastparquet : None
gcsfs : None
matplotlib : 3.1.2
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
pyarrow : None
pytables : None
pyxlsb : None
s3fs : None
scipy : 1.4.1
sqlalchemy : None
tables : None
tabulate : None
xarray : None
xlrd : None
xlwt : None
numba : None