Skip to content

Commit 0436809

Browse files
committed
BUG: TimeSeries not properly deserializing for < 0.13 (deprecations); additional tests
ENH: add 32-bit 2.7 pickle from 0.12 BUG: fixed py3 load_reduce; weird encoding on nan
1 parent b15a376 commit 0436809

12 files changed

+146
-81
lines changed

pandas/compat/pickle_compat.py

Lines changed: 62 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,62 +1,103 @@
11
""" support pre 0.12 series pickle compatibility """
22

33
import sys
4-
import pickle
54
import numpy as np
65
import pandas
6+
import pickle as pkl
77
from pandas import compat
8-
from pandas.core.series import Series
9-
from pandas.sparse.series import SparseSeries
8+
from pandas.compat import u, string_types
9+
from pandas.core.series import Series, TimeSeries
10+
from pandas.sparse.series import SparseSeries, SparseTimeSeries
1011

1112
def load_reduce(self):
1213
stack = self.stack
1314
args = stack.pop()
1415
func = stack[-1]
1516
if type(args[0]) is type:
1617
n = args[0].__name__
17-
if n == 'DeprecatedSeries':
18+
if n == u('DeprecatedSeries') or n == u('DeprecatedTimeSeries'):
1819
stack[-1] = object.__new__(Series)
1920
return
20-
elif n == 'DeprecatedSparseSeries':
21+
elif n == u('DeprecatedSparseSeries') or n == u('DeprecatedSparseTimeSeries'):
2122
stack[-1] = object.__new__(SparseSeries)
2223
return
2324

2425
try:
2526
value = func(*args)
2627
except:
27-
print(sys.exc_info())
28-
print(func, args)
28+
29+
# try to reencode the arguments
30+
if self.encoding is not None:
31+
args = tuple([ arg.encode(self.encoding) if isinstance(arg, string_types) else arg for arg in args ])
32+
try:
33+
stack[-1] = func(*args)
34+
return
35+
except:
36+
pass
37+
38+
if self.is_verbose:
39+
print(sys.exc_info())
40+
print(func, args)
2941
raise
3042

3143
stack[-1] = value
3244

3345
if compat.PY3:
34-
class Unpickler(pickle._Unpickler):
46+
class Unpickler(pkl._Unpickler):
3547
pass
3648
else:
37-
class Unpickler(pickle.Unpickler):
49+
class Unpickler(pkl.Unpickler):
3850
pass
3951

40-
Unpickler.dispatch[pickle.REDUCE[0]] = load_reduce
52+
Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce
53+
54+
def load(fh, encoding=None, compat=False, is_verbose=False):
55+
"""
56+
load a pickle, with a provided encoding
4157
42-
def load(file):
43-
# try to load a compatibility pickle
44-
# fake the old class hierarchy
45-
# if it works, then return the new type objects
58+
if compat is True:
59+
fake the old class hierarchy
60+
if it works, then return the new type objects
61+
62+
Parameters
63+
----------
64+
fh: a filelike object
65+
encoding: an optional encoding
66+
compat: provide Series compatibility mode, boolean, default False
67+
is_verbose: show exception output
68+
"""
4669

4770
try:
48-
pandas.core.series.Series = DeprecatedSeries
49-
pandas.sparse.series.SparseSeries = DeprecatedSparseSeries
50-
with open(file,'rb') as fh:
51-
return Unpickler(fh).load()
71+
if compat:
72+
pandas.core.series.Series = DeprecatedSeries
73+
pandas.core.series.TimeSeries = DeprecatedTimeSeries
74+
pandas.sparse.series.SparseSeries = DeprecatedSparseSeries
75+
pandas.sparse.series.SparseTimeSeries = DeprecatedSparseTimeSeries
76+
fh.seek(0)
77+
if encoding is not None:
78+
up = Unpickler(fh, encoding=encoding)
79+
else:
80+
up = Unpickler(fh)
81+
up.is_verbose = is_verbose
82+
83+
return up.load()
5284
except:
5385
raise
5486
finally:
55-
pandas.core.series.Series = Series
56-
pandas.sparse.series.SparseSeries = SparseSeries
87+
if compat:
88+
pandas.core.series.Series = Series
89+
pandas.core.series.Series = TimeSeries
90+
pandas.sparse.series.SparseSeries = SparseSeries
91+
pandas.sparse.series.SparseTimeSeries = SparseTimeSeries
5792

58-
class DeprecatedSeries(Series, np.ndarray):
93+
class DeprecatedSeries(np.ndarray, Series):
94+
pass
95+
96+
class DeprecatedTimeSeries(DeprecatedSeries):
5997
pass
6098

6199
class DeprecatedSparseSeries(DeprecatedSeries):
62100
pass
101+
102+
class DeprecatedSparseTimeSeries(DeprecatedSparseSeries):
103+
pass

pandas/core/internals.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1845,6 +1845,11 @@ def __setstate__(self, state):
18451845

18461846
blocks = []
18471847
for values, items in zip(bvalues, bitems):
1848+
1849+
# numpy < 1.7 pickle compat
1850+
if values.dtype == 'M8[us]':
1851+
values = values.astype('M8[ns]')
1852+
18481853
blk = make_block(values, items, self.axes[0])
18491854
blocks.append(blk)
18501855
self.blocks = blocks

pandas/io/pickle.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from pandas.compat import cPickle as pkl, PY3
1+
from pandas.compat import cPickle as pkl, pickle_compat as pc, PY3
22

33
def to_pickle(obj, path):
44
"""
@@ -31,11 +31,23 @@ def read_pickle(path):
3131
-------
3232
unpickled : type of object stored in file
3333
"""
34+
35+
def try_read(path, encoding=None):
36+
# try with current pickle, if we have a Type Error then
37+
# try with the compat pickle to handle subclass changes
38+
# pass encoding only if its not None as py2 doesn't handle
39+
# the param
40+
try:
41+
with open(path,'rb') as fh:
42+
with open(path,'rb') as fh:
43+
return pc.load(fh, encoding=encoding, compat=False)
44+
except:
45+
with open(path,'rb') as fh:
46+
return pc.load(fh, encoding=encoding, compat=True)
47+
3448
try:
35-
with open(path, 'rb') as fh:
36-
return pkl.load(fh)
49+
return try_read(path)
3750
except:
3851
if PY3:
39-
with open(path, 'rb') as fh:
40-
return pkl.load(fh, encoding='latin1')
52+
return try_read(path, encoding='latin1')
4153
raise
6.71 KB
Binary file not shown.
6.61 KB
Binary file not shown.
4.95 KB
Binary file not shown.
4.19 KB
Binary file not shown.
5.79 KB
Binary file not shown.

pandas/io/tests/generate_legacy_pickles.py

Lines changed: 38 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,47 @@
11
""" self-contained to write legacy pickle files """
22
from __future__ import print_function
33

4-
from pandas.compat import zip, cPickle as pickle
4+
# make sure we are < 0.13 compat (in py3)
5+
try:
6+
from pandas.compat import zip, cPickle as pickle
7+
except:
8+
import pickle
59

610
def _create_sp_series():
711

812
import numpy as np
9-
from pandas import bdate_range, SparseSeries
13+
from pandas import SparseSeries
1014

1115
nan = np.nan
1216

1317
# nan-based
14-
arr = np.arange(15, dtype=float)
18+
arr = np.arange(15, dtype=np.float64)
1519
index = np.arange(15)
1620
arr[7:12] = nan
1721
arr[-1:] = nan
1822

19-
date_index = bdate_range('1/1/2011', periods=len(index))
20-
bseries = SparseSeries(arr, index=index, kind='block')
23+
bseries = SparseSeries(arr, kind='block')
2124
bseries.name = 'bseries'
2225
return bseries
2326

27+
def _create_sp_tsseries():
28+
29+
import numpy as np
30+
from pandas import bdate_range, SparseTimeSeries
31+
32+
nan = np.nan
33+
34+
# nan-based
35+
arr = np.arange(15, dtype=np.float64)
36+
index = np.arange(15)
37+
arr[7:12] = nan
38+
arr[-1:] = nan
39+
40+
date_index = bdate_range('1/1/2011', periods=len(index))
41+
bseries = SparseTimeSeries(arr, index=date_index, kind='block')
42+
bseries.name = 'btsseries'
43+
return bseries
44+
2445
def _create_sp_frame():
2546
import numpy as np
2647
from pandas import bdate_range, SparseDataFrame
@@ -29,7 +50,7 @@ def _create_sp_frame():
2950

3051
data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
3152
'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
32-
'C': np.arange(10),
53+
'C': np.arange(10).astype(np.int64),
3354
'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}
3455

3556
dates = bdate_range('1/1/2011', periods=10)
@@ -40,8 +61,8 @@ def create_data():
4061

4162
import numpy as np
4263
import pandas
43-
from pandas import (Series,DataFrame,Panel,
44-
SparseSeries,SparseDataFrame,SparsePanel,
64+
from pandas import (Series,TimeSeries,DataFrame,Panel,
65+
SparseSeries,SparseTimeSeries,SparseDataFrame,SparsePanel,
4566
Index,MultiIndex,PeriodIndex,
4667
date_range,bdate_range,Timestamp)
4768
nan = np.nan
@@ -61,10 +82,11 @@ def create_data():
6182
names=['first', 'second']))
6283
series = dict(float = Series(data['A']),
6384
int = Series(data['B']),
64-
mixed = Series(data['E']))
85+
mixed = Series(data['E']),
86+
ts = TimeSeries(np.arange(10).astype(np.int64),index=date_range('20130101',periods=10)))
6587
frame = dict(float = DataFrame(dict(A = series['float'], B = series['float'] + 1)),
66-
int = DataFrame(dict(A = series['int'] , B = series['int'] + 1)),
67-
mixed = DataFrame(dict([ (k,data[k]) for k in ['A','B','C','D']])))
88+
int = DataFrame(dict(A = series['int'] , B = series['int'] + 1)),
89+
mixed = DataFrame(dict([ (k,data[k]) for k in ['A','B','C','D']])))
6890
panel = dict(float = Panel(dict(ItemA = frame['float'], ItemB = frame['float']+1)))
6991

7092

@@ -74,7 +96,8 @@ def create_data():
7496
panel = panel,
7597
index = index,
7698
mi = mi,
77-
sp_series = dict(float = _create_sp_series()),
99+
sp_series = dict(float = _create_sp_series(),
100+
ts = _create_sp_tsseries()),
78101
sp_frame = dict(float = _create_sp_frame())
79102
)
80103

@@ -92,24 +115,11 @@ def write_legacy_pickles():
92115

93116
print("This script generates a pickle file for the current arch, system, and python version")
94117

95-
base_dir, _ = os.path.split(os.path.abspath(__file__))
96-
base_dir = os.path.join(base_dir,'data/legacy_pickle')
97-
98-
# could make this a parameter?
99-
version = None
100-
101-
102-
if version is None:
103-
version = pandas.__version__
104-
pth = os.path.join(base_dir, str(version))
105-
try:
106-
os.mkdir(pth)
107-
except:
108-
pass
118+
version = pandas.__version__
109119

110120
# construct a reasonable platform name
111-
f = '_'.join([ str(pl.machine()), str(pl.system().lower()), str(pl.python_version()) ])
112-
pth = os.path.abspath(os.path.join(pth,'%s.pickle' % f))
121+
f = '_'.join([ str(version), str(pl.machine()), str(pl.system().lower()), str(pl.python_version()) ])
122+
pth = '{0}.pickle'.format(f)
113123

114124
fh = open(pth,'wb')
115125
pickle.dump(create_data(),fh,pickle.HIGHEST_PROTOCOL)

pandas/io/tests/test_pickle.py

Lines changed: 16 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from datetime import datetime, timedelta
66
import operator
7-
import pickle
7+
import pickle as pkl
88
import unittest
99
import nose
1010
import os
@@ -29,25 +29,11 @@ def compare(self, vf):
2929

3030
# py3 compat when reading py2 pickle
3131
try:
32-
with open(vf,'rb') as fh:
33-
data = pickle.load(fh)
34-
except ValueError as detail:
35-
36-
# we are trying to read a py3 pickle in py2.....
32+
data = pandas.read_pickle(vf)
33+
except (ValueError) as detail:
34+
# trying to read a py3 pickle in py2
3735
return
3836

39-
# we have a deprecated klass
40-
except TypeError as detail:
41-
42-
from pandas.compat.pickle_compat import load
43-
data = load(vf)
44-
45-
except:
46-
if not compat.PY3:
47-
raise
48-
with open(vf,'rb') as fh:
49-
data = pickle.load(fh, encoding='latin1')
50-
5137
for typ, dv in data.items():
5238
for dt, result in dv.items():
5339

@@ -64,23 +50,26 @@ def compare(self, vf):
6450
comparator = getattr(tm,"assert_%s_equal" % typ)
6551
comparator(result,expected)
6652

67-
def test_read_pickles_0_10_1(self):
53+
def read_pickles(self, version):
6854
if not is_little_endian():
69-
raise nose.SkipTest("known failure of test_read_pickles_0_10_1 on non-little endian")
55+
raise nose.SkipTest("known failure on non-little endian")
7056

71-
pth = tm.get_data_path('legacy_pickle/0.10.1')
57+
pth = tm.get_data_path('legacy_pickle/{0}'.format(str(version)))
7258
for f in os.listdir(pth):
7359
vf = os.path.join(pth,f)
7460
self.compare(vf)
7561

62+
def test_read_pickles_0_10_1(self):
63+
self.read_pickles('0.10.1')
64+
7665
def test_read_pickles_0_11_0(self):
77-
if not is_little_endian():
78-
raise nose.SkipTest("known failure of test_read_pickles_0_11_0 on non-little endian")
66+
self.read_pickles('0.11.0')
7967

80-
pth = tm.get_data_path('legacy_pickle/0.11.0')
81-
for f in os.listdir(pth):
82-
vf = os.path.join(pth,f)
83-
self.compare(vf)
68+
def test_read_pickles_0_12_0(self):
69+
self.read_pickles('0.12.0')
70+
71+
def test_read_pickles_0_13_0(self):
72+
self.read_pickles('0.13.0')
8473

8574
if __name__ == '__main__':
8675
import nose

0 commit comments

Comments
 (0)