Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,19 @@ Breaking changes
Enhancements
~~~~~~~~~~~~

- Upsampling an array via interpolation with resample is now dask-compatible,
as long as the array is not chunked along the resampling dimension.
By `Spencer Clark <https://github.com/spencerkclark>`_.

Bug fixes
~~~~~~~~~

- Interpolating via resample now internally specifies ``bounds_error=False``
as an argument to ``scipy.interpolate.interp1d``, allowing for interpolation
from higher frequencies to lower frequencies. Datapoints outside the bounds
of the original time coordinate are now filled with NaN (:issue:`2197`). By
`Spencer Clark <https://github.com/spencerkclark>`_.

.. _whats-new.0.11.1:

v0.11.1 (29 December 2018)
Expand Down
96 changes: 10 additions & 86 deletions xarray/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from . import ops
from .groupby import DEFAULT_DIMS, DataArrayGroupBy, DatasetGroupBy
from .pycompat import OrderedDict, dask_array_type

RESAMPLE_DIM = '__resample_dim__'

Expand Down Expand Up @@ -110,7 +109,16 @@ def interpolate(self, kind='linear'):
return self._interpolate(kind=kind)

def _interpolate(self, kind='linear'):
raise NotImplementedError
"""Apply scipy.interpolate.interp1d along resampling dimension."""
# drop any existing non-dimension coordinates along the resampling
# dimension
dummy = self._obj.copy()
for k, v in self._obj.coords.items():
if k != self._dim and self._dim in v.dims:
dummy = dummy.drop(k)
return dummy.interp(assume_sorted=True, method=kind,
kwargs={'bounds_error': False},
**{self._dim: self._full_index})


class DataArrayResample(DataArrayGroupBy, Resample):
Expand Down Expand Up @@ -182,46 +190,6 @@ def apply(self, func, shortcut=False, args=(), **kwargs):

return combined

def _interpolate(self, kind='linear'):
"""Apply scipy.interpolate.interp1d along resampling dimension."""
from .dataarray import DataArray
from scipy.interpolate import interp1d

if isinstance(self._obj.data, dask_array_type):
raise TypeError(
"Up-sampling via interpolation was attempted on the the "
"variable '{}', but it is a dask array; dask arrays are not "
"yet supported in resample.interpolate(). Load into "
"memory with Dataset.load() before resampling."
.format(self._obj.data.name)
)

x = self._obj[self._dim].astype('float')
y = self._obj.data

axis = self._obj.get_axis_num(self._dim)

f = interp1d(x, y, kind=kind, axis=axis, bounds_error=True,
assume_sorted=True)
new_x = self._full_index.values.astype('float')

# construct new up-sampled DataArray
dummy = self._obj.copy()
dims = dummy.dims

# drop any existing non-dimension coordinates along the resampling
# dimension
coords = OrderedDict()
for k, v in dummy.coords.items():
# is the resampling dimension
if k == self._dim:
coords[self._dim] = self._full_index
# else, check if resampling dim is in coordinate dimensions
elif self._dim not in v.dims:
coords[k] = v
return DataArray(f(new_x), coords, dims, name=dummy.name,
attrs=dummy.attrs)


ops.inject_reduce_methods(DataArrayResample)
ops.inject_binary_ops(DataArrayResample)
Expand Down Expand Up @@ -308,50 +276,6 @@ def reduce(self, func, dim=None, keep_attrs=None, **kwargs):
return super(DatasetResample, self).reduce(
func, dim, keep_attrs, **kwargs)

def _interpolate(self, kind='linear'):
"""Apply scipy.interpolate.interp1d along resampling dimension."""
from .dataset import Dataset
from .variable import Variable
from scipy.interpolate import interp1d

old_times = self._obj[self._dim].astype(float)
new_times = self._full_index.values.astype(float)

data_vars = OrderedDict()
coords = OrderedDict()

# Apply the interpolation to each DataArray in our original Dataset
for name, variable in self._obj.variables.items():
if name in self._obj.coords:
if name == self._dim:
coords[self._dim] = self._full_index
elif self._dim not in variable.dims:
coords[name] = variable
else:
if isinstance(variable.data, dask_array_type):
raise TypeError(
"Up-sampling via interpolation was attempted on the "
"variable '{}', but it is a dask array; dask arrays "
"are not yet supprted in resample.interpolate(). Load "
"into memory with Dataset.load() before resampling."
.format(name)
)

axis = variable.get_axis_num(self._dim)

# We've previously checked for monotonicity along the
# re-sampling dimension (in __init__ via the GroupBy
# constructor), so we can avoid sorting the data again by
# passing 'assume_sorted=True'
f = interp1d(old_times, variable.data, kind=kind,
axis=axis, bounds_error=True,
assume_sorted=True)
interpolated = Variable(variable.dims, f(new_times))

data_vars[name] = interpolated

return Dataset(data_vars, coords)


ops.inject_reduce_methods(DatasetResample)
ops.inject_binary_ops(DatasetResample)
45 changes: 38 additions & 7 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2524,6 +2524,16 @@ def test_upsample_interpolate(self):
# done here due to floating point arithmetic
assert_allclose(expected, actual, rtol=1e-16)

@requires_scipy
def test_upsample_interpolate_bug_2197(self):
dates = pd.date_range('2007-02-01', '2007-03-01', freq='D')
da = xr.DataArray(np.arange(len(dates)), [('time', dates)])
result = da.resample(time='M').interpolate('linear')
expected_times = np.array([np.datetime64('2007-02-28'),
np.datetime64('2007-03-31')])
expected = xr.DataArray([27., np.nan], [('time', expected_times)])
assert_equal(result, expected)

@requires_scipy
def test_upsample_interpolate_regression_1605(self):
dates = pd.date_range('2016-01-01', '2016-03-31', freq='1D')
Expand All @@ -2536,21 +2546,42 @@ def test_upsample_interpolate_regression_1605(self):
@requires_dask
@requires_scipy
def test_upsample_interpolate_dask(self):
import dask.array as da

times = pd.date_range('2000-01-01', freq='6H', periods=5)
from scipy.interpolate import interp1d
xs = np.arange(6)
ys = np.arange(3)
times = pd.date_range('2000-01-01', freq='6H', periods=5)

z = np.arange(5)**2
data = da.from_array(np.tile(z, (6, 3, 1)), (1, 3, 1))
data = np.tile(z, (6, 3, 1))
array = DataArray(data,
{'time': times, 'x': xs, 'y': ys},
('x', 'y', 'time'))
chunks = {'x': 2, 'y': 1}

expected_times = times.to_series().resample('1H').asfreq().index
# Split the times into equal sub-intervals to simulate the 6 hour
# to 1 hour up-sampling
new_times_idx = np.linspace(0, len(times) - 1, len(times) * 5)
for kind in ['linear', 'nearest', 'zero', 'slinear', 'quadratic',
'cubic']:
actual = array.chunk(chunks).resample(time='1H').interpolate(kind)
actual = actual.compute()
f = interp1d(np.arange(len(times)), data, kind=kind, axis=-1,
bounds_error=True, assume_sorted=True)
expected_data = f(new_times_idx)
expected = DataArray(expected_data,
{'time': expected_times, 'x': xs, 'y': ys},
('x', 'y', 'time'))
# Use AllClose because there are some small differences in how
# we upsample timeseries versus the integer indexing as I've
# done here due to floating point arithmetic
assert_allclose(expected, actual, rtol=1e-16)

with raises_regex(TypeError,
"dask arrays are not yet supported"):
array.resample(time='1H').interpolate('linear')
# Check that an error is raised if an attempt is made to interpolate
# over a chunked dimension
with raises_regex(NotImplementedError,
'Chunking along the dimension to be interpolated'):
array.chunk({'time': 1}).resample(time='1H').interpolate('linear')

def test_align(self):
array = DataArray(np.random.random((6, 8)),
Expand Down