From 1582c1f01986f7510c6f37402b5937e0ad993f11 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Mon, 16 Oct 2017 19:45:54 -0700 Subject: [PATCH 01/20] initial interpolate commit first working version of interpolate method/module pep8 after merge master update interpolate, more pandas compat tests add interpolate_na to api encoding fix for py2 in dataarray.py working... checkin, roughed in interpolator classes move tests and some mods to apply_ufunc usage in interp_na add method to kwargs fixes for scipy and some docs cleanup scipy vs numpy interpolator selection cleanups add limit to interpolate_na bfill/ffill parallelized new interface with use_coordinate kwarg use partial function to wrap interpolate class a few fixes for ffill/bfill, placeholder for interpolate_at method add some tests fix test --- doc/api.rst | 3 + xarray/core/dataarray.py | 48 ++++++ xarray/core/missing.py | 291 +++++++++++++++++++++++++++++++++++ xarray/tests/test_missing.py | 230 +++++++++++++++++++++++++++ 4 files changed, 572 insertions(+) create mode 100644 xarray/core/missing.py create mode 100644 xarray/tests/test_missing.py diff --git a/doc/api.rst b/doc/api.rst index 0eb9e4c131c..f75c6fdeedf 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -298,6 +298,9 @@ Computation :py:attr:`~DataArray.count` :py:attr:`~DataArray.dropna` :py:attr:`~DataArray.fillna` +:py:attr:`~DataArray.ffill` +:py:attr:`~DataArray.bfill` +:py:attr:`~DataArray.interpolate_na` :py:attr:`~DataArray.where` **ndarray methods**: diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 1dac72335d2..8c15f9595d0 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1214,6 +1214,54 @@ def fillna(self, value): out = ops.fillna(self, value) return out + def interpolate_na(self, dim=None, method='linear', inplace=False, + limit=None, **kwargs): + """Interpolate values according to different methods. + + Parameters + ---------- + dim : str + Specifies the dimension along which to interpolate. + method : {'linear', 'time', 'index', 'values', 'nearest'} + 'linear': ignore the index and treat the values as equally + spaced. default + 'time': interpolation works on daily and higher resolution data to + interpolate given length of interval + 'index', 'values': use the actual numerical values of the index + 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', + 'polynomial' is passed to scipy.interpolate.interp1d with the + order given both 'polynomial' and 'spline' require that you also + specify and order (int) e.g. da.interpolate_na(method='polynomial', + order=4) + limit : limit : int, default None + Maximum number of consecutive NaNs to fill. Must be greater than 0. + + Returns + ------- + DataArray + """ + from .missing import interp_na + return interp_na(self, dim=dim, method=method, inplace=inplace, + **kwargs) + + def interpolate_at(self, dim, locs, method='linear', inplace=False, + limit=None, **kwargs): + # this is just here so I remember the signature we discussed + # dim: the dimension along which to interpolate + # locs: a broadcastable boolean mask describing where interpolation + # should happen + raise NotImplementedError() + + def ffill(self, dim, limit=None): + '''TODO''' + from .missing import ffill + return ffill(self, dim, limit=limit) + + def bfill(self, dim, limit=None): + '''TODO''' + from .missing import bfill + return bfill(self, dim, limit=limit) + def combine_first(self, other): """Combine two DataArray objects, with union of coordinates. diff --git a/xarray/core/missing.py b/xarray/core/missing.py new file mode 100644 index 00000000000..a20f38754dd --- /dev/null +++ b/xarray/core/missing.py @@ -0,0 +1,291 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import Iterable +from functools import partial + +import numpy as np +import pandas as pd + + +from .computation import apply_ufunc +from .utils import is_scalar + + +class BaseInterpolator(object): + '''gerneric interpolator class for normalizing interpolation methods''' + cons_kwargs = {} + call_kwargs = {} + f = None + kind = None + + def __init__(self, xi, yi, kind=None, **kwargs): + self.kind = kind + self.call_kwargs = kwargs + + def __call__(self, x): + return self.f(x, **self.call_kwargs) + + def __repr__(self): + return "{type}: kind={kind}".format(type=self.__class__.__name__, + kind=self.kind) + + +class NumpyInterpolator(BaseInterpolator): + def __init__(self, xi, yi, kind='linear', fill_value=None, **kwargs): + self.kind = kind + self.f = np.interp + self.cons_kwargs = kwargs + self.call_kwargs = {'period': self.cons_kwargs.pop('period', None)} + + self._xi = xi + self._yi = yi + + if self.cons_kwargs: + raise ValueError( + 'recieved invalid kwargs: %r' % self.cons_kwargs.keys()) + + if fill_value is None: + self._left = np.nan + self._right = yi[-1] + elif isinstance(fill_value, Iterable) and len(fill_value) == 2: + self._left = fill_value[0] + self._right = fill_value[1] + elif is_scalar(fill_value): + self._left = fill_value + self._right = fill_value + else: + raise ValueError('%s is not a valid fill_value' % fill_value) + + def __call__(self, x): + return self.f(x, self._xi, self._yi, left=self._left, + right=self._right, **self.call_kwargs) + + +class ScipyInterpolator(BaseInterpolator): + def __init__(self, xi, yi, kind=None, fill_value=None, assume_sorted=True, + copy=False, bounds_error=False, **kwargs): + from scipy.interpolate import interp1d + + if kind is None: + raise ValueError('kind is a required argument') + + if kind == 'polynomial': + kind = kwargs.pop('order', None) + if kind is None: + raise ValueError('order is required when kind=polynomial') + + self.kind = kind + + self.cons_kwargs = kwargs + self.call_kwargs = {} + + if fill_value is None and kind == 'linear': + fill_value = kwargs.pop('fill_value', (np.nan, yi[-1])) + else: + fill_value = np.nan + + self.f = interp1d(xi, yi, kind=self.kind, fill_value=fill_value, + bounds_error=False, **self.cons_kwargs) + + +class FromDerivativesInterpolator(BaseInterpolator): + def __init__(self, xi, yi, kind=None, fill_value=None, **kwargs): + from scipy.interpolate import BPoly + + if kind is None: + raise ValueError('kind is a required argument') + + self.kind = kind + self.cons_kwargs = kwargs + + if fill_value is not None: + raise ValueError('from_derivatives does not support fill_value') + + self.f = BPoly.from_derivatives(xi, yi, **self.cons_kwargs) + + +class SplineInterpolator(BaseInterpolator): + def __init__(self, xi, yi, kind=None, fill_value=None, order=3, **kwargs): + from scipy.interpolate import UnivariateSpline + + if kind is None: + raise ValueError('kind is a required argument') + + self.kind = kind + self.cons_kwargs = kwargs + self.call_kwargs['nu'] = kwargs.pop('nu', 0) + self.call_kwargs['ext'] = kwargs.pop('ext', None) + + if fill_value is not None: + raise ValueError('SplineInterpolator does not support fill_value') + + self.f = UnivariateSpline(xi, yi, k=order, **self.cons_kwargs) + + +def get_clean_interp_index(arr, dim, use_coordinate=True, **kwargs): + '''get index to use for x values in interpolation + ''' + + if use_coordinate: + index = arr.get_index(dim) + if isinstance(index, pd.DatetimeIndex): + index = index.values.astype(np.float64) + + # check index sorting now so we can skip it later + if not (np.diff(index) > 0).all(): + raise ValueError("Index must be monotonicly increasing") + + else: + axis = arr.get_axis_num(dim) + index = np.arange(arr.shape[axis]) + + return index + + +def interp_na(self, dim=None, use_coordinate=True, method='linear', limit=None, + inplace=False, **kwargs): + '''Interpolate values according to different methods.''' + + arr = self if inplace else self.copy() + + if dim is None: + raise NotImplementedError('dim is a required argument') + + if limit is not None: + valids = _get_valid_fill_mask(arr, dim, limit) + + # method + index = get_clean_interp_index(arr, dim, use_coordinate=use_coordinate, + **kwargs) + kwargs.update(kind=method) + + interpolator = _get_interpolator(method, **kwargs) + + arr = apply_ufunc(interpolator, index, arr, + input_core_dims=[[dim], [dim]], + output_core_dims=[[dim]], + dask='parallelized', + vectorize=True, + keep_attrs=True).transpose(*arr.dims) + + if limit is not None: + arr = arr.where(valids) + + return arr + + +def wrap_interpolator(interpolator, x, y, **kwargs): + '''helper function to apply interpolation along 1 dimension''' + if x.shape != y.shape: + # this can probably be removed once I get apply_ufuncs to work + raise AssertionError("x and y shapes do not match " + "%s != %s" % (x.shape, y.shape)) + + nans = pd.isnull(y) + nonans = ~nans + + # fast track for no-nans and all-nans cases + n_nans = nans.sum() + if n_nans == 0 or n_nans == len(y): + return y + + f = interpolator(x[nonans], y[nonans], **kwargs) + return f(x[nans]) + + +def _bfill(arr, n=None, axis=-1): + '''inverse of ffill''' + import bottleneck as bn + + arr = np.flip(arr, axis=axis) + + # fill + arr = bn.push(arr, axis=axis, n=n) + + # reverse back to original + return np.flip(arr, axis=axis) + + +def ffill(arr, dim=None, limit=None): + ''' ''' + import bottleneck as bn + + axis = arr.get_axis_num(dim) + + if limit is not None: + valids = _get_valid_fill_mask(arr, dim, limit) + + # work around for bottleneck 178 + _limit = limit if limit is not None else arr.shape[axis] + + new = apply_ufunc(bn.push, arr, + dask='parallelized', + keep_attrs=True, + kwargs=dict(n=_limit, axis=axis)).transpose(*arr.dims) + + if limit is not None: + new = new.where(valids) + + return new + + +def bfill(arr, dim=None, limit=None): + ''' ''' + axis = arr.get_axis_num(dim) + + if limit is not None: + valids = _get_valid_fill_mask(arr, dim, limit) + + # work around for bottleneck 178 + _limit = limit if limit is not None else arr.shape[axis] + + new = apply_ufunc(_bfill, arr, + dask='parallelized', + keep_attrs=True, + kwargs=dict(n=_limit, axis=axis)).transpose(*arr.dims) + if limit is not None: + new = new.where(valids) + + return new + + +def _get_interpolator(method, **kwargs): + interp1d_methods = ['linear', 'nearest', 'zero', 'slinear', 'quadratic', + 'cubic', 'polynomial'] + valid_methods = interp1d_methods + ['piecewise_polynomial', 'barycentric', + 'krog', 'pchip', 'spline'] + + if (method == 'linear' and not + kwargs.get('fill_value', None) == 'extrapolate'): + interp_class = NumpyInterpolator + elif method in valid_methods: + try: + from scipy import interpolate + except ImportError: + raise ImportError( + 'Interpolation with method `%s` requires scipy' % method) + if method in interp1d_methods: + interp_class = ScipyInterpolator + elif method == 'piecewise_polynomial': + interp_class = FromDerivativesInterpolator + elif method == 'barycentric': + interp_class = interpolate.BarycentricInterpolator + elif method == 'krog': + interp_class = interpolate.KroghInterpolator + elif method == 'pchip': + interp_class = interpolate.PchipInterpolator + elif method == 'spline': + interp_class = SplineInterpolator + else: + raise ValueError('%s is not a valid scipy interpolator' % method) + else: + raise ValueError('%s is not a valid interpolator' % method) + + return partial(wrap_interpolator, interp_class, **kwargs) + + +def _get_valid_fill_mask(arr, dim, limit): + kw = {dim: limit + 1} + return arr.isnull().rolling(min_periods=1, **kw).sum() <= limit diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py new file mode 100644 index 00000000000..8d405ce7fd0 --- /dev/null +++ b/xarray/tests/test_missing.py @@ -0,0 +1,230 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import numpy as np +import pandas as pd +import pytest + +from xarray import DataArray + +from xarray.tests import ( + assert_identical, assert_equal, raises_regex, requires_scipy, + requires_bottleneck, assert_array_equal) + + +@pytest.fixture(params=[1]) +def da(request): + if request.param == 1: + times = pd.date_range('2000-01-01', freq='1D', periods=21) + values = np.random.random((3, 21, 4)) + da = DataArray(values, dims=('a', 'time', 'x')) + da['time'] = times + return da + + if request.param == 2: + return DataArray( + [0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], + dims='time') + + +def make_interpolate_example_data(shape, frac_nan, seed=12345, + non_uniform=False): + rs = np.random.RandomState(seed) + vals = rs.normal(size=shape) + if frac_nan == 1: + vals[:] = np.nan + elif frac_nan == 0: + pass + else: + n_missing = int(vals.size * frac_nan) + + ys = np.arange(shape[0]) + xs = np.arange(shape[1]) + if n_missing: + np.random.shuffle(ys) + ys = ys[:n_missing] + + np.random.shuffle(xs) + xs = xs[:n_missing] + + vals[ys, xs] = np.nan + + if non_uniform: + # construct a datetime index that has irregular spacing + deltas = pd.TimedeltaIndex(unit='d', data=rs.normal(size=shape[0], + scale=10)) + coords = {'time': (pd.Timestamp('2000-01-01') + deltas).sort_values()} + else: + coords = {'time': pd.date_range('2000-01-01', freq='D', + periods=shape[0])} + da = DataArray(vals, dims=('time', 'x'), coords=coords) + df = da.to_pandas() + return da, df + + +@requires_scipy +@pytest.mark.parametrize('shape', [(8, 8), (1, 20), (20, 1), (100, 100)]) +@pytest.mark.parametrize('frac_nan', [0, 0.5, 1]) +@pytest.mark.parametrize('method', ['linear', 'nearest', 'zero', 'slinear', + 'quadratic', 'cubic']) +def test_interpolate_pd_compat(shape, frac_nan, method): + da, df = make_interpolate_example_data(shape, frac_nan) + + for dim in ['time', 'x']: + actual = da.interpolate_na(method=method, dim=dim) + expected = df.interpolate(method=method, axis=da.get_axis_num(dim)) + np.testing.assert_allclose(actual.values, expected.values) + + +@requires_scipy +@pytest.mark.parametrize('shape', [(8, 8), (1, 20), (20, 1)]) +@pytest.mark.parametrize('frac_nan', [0, 0.5, 1]) +@pytest.mark.parametrize('method', ['linear', 'nearest', 'zero', 'slinear', + 'quadratic', 'cubic']) +def test_interpolate_pd_compat_non_uniform_index(shape, frac_nan, method): + da, df = make_interpolate_example_data(shape, frac_nan, non_uniform=True) + for dim in ['time', 'x']: + actual = da.interpolate_na(method=method, dim=dim) + expected = df.interpolate(method=method, axis=da.get_axis_num(dim)) + np.testing.assert_allclose(actual.values, expected.values) + + +@requires_scipy +@pytest.mark.parametrize('shape', [(8, 8), (100, 100)]) +@pytest.mark.parametrize('frac_nan', [0, 0.5, 1]) +@pytest.mark.parametrize('order', [1, 2, 3]) +def test_interpolate_pd_compat_polynomial(shape, frac_nan, order): + da, df = make_interpolate_example_data(shape, frac_nan) + + for dim in ['time', 'x']: + actual = da.interpolate_na(method='polynomial', order=order, dim=dim) + expected = df.interpolate(method='polynomial', order=order, + axis=da.get_axis_num(dim)) + np.testing.assert_allclose(actual.values, expected.values) + + +@requires_scipy +def test_interpolate_unsorted_index_raises(): + vals = np.array([1, 2, 3], dtype=np.float64) + expected = DataArray(vals, dims=('x'), coords={'x': [2, 1, 3]}) + with raises_regex(ValueError, 'must be monotonicly increasing'): + expected.interpolate_na(dim='x', method='index') + + +@requires_scipy +def test_interpolate_inplace(): + original = DataArray(np.array([1, 2, 3, np.nan, 5], dtype=np.float64), + dims=('x')) + expected = DataArray(np.array([1, 2, 3, 4, 5], dtype=np.float64), + dims=('x')) + new = original.interpolate_na(inplace=True, dim='x') + assert_identical(original, expected) + assert_identical(original, new) + + +@requires_scipy +def test_interpolate_no_dim_raises(): + da = DataArray(np.array([1, 2, np.nan, 5], dtype=np.float64), dims=('x')) + with raises_regex(NotImplementedError, 'dim is a required argument'): + da.interpolate_na(method='linear') + + +@requires_scipy +def test_interpolate_kwargs(): + da = DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims=('x')) + expected = DataArray(np.array([4, 5, 6], dtype=np.float64), dims=('x')) + actual = da.interpolate_na(dim='x', fill_value='extrapolate') + assert_equal(actual, expected) + + expected = DataArray(np.array([4, 5, -999], dtype=np.float64), dims=('x')) + actual = da.interpolate_na(dim='x', fill_value=-999) + assert_equal(actual, expected) + + +@requires_scipy +def test_interpolate(): + + vals = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) + expected = DataArray(vals, dims=('x')) + mvals = vals.copy() + mvals[2] = np.nan + missing = DataArray(mvals, dims=('x')) + + actual = missing.interpolate_na(dim='x') + + assert_equal(actual, expected) + + +@requires_scipy +def test_interpolate_nonans(): + + vals = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) + expected = DataArray(vals, dims=('x')) + actual = expected.interpolate_na(dim='x') + assert_equal(actual, expected) + + +@requires_scipy +def test_interpolate_allnans(): + vals = np.full(6, np.nan, dtype=np.float64) + expected = DataArray(vals, dims=('x')) + actual = expected.interpolate_na(dim='x') + + assert_equal(actual, expected) + + +def test_interpolate_limits(): + raise NotImplementedError() + + +@requires_bottleneck +def test_ffill(): + da = DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims=('x')) + expected = DataArray(np.array([4, 5, 5], dtype=np.float64), dims=('x')) + actual = da.ffill('x') + assert_equal(actual, expected) + + +@requires_bottleneck +def test_ffill_bfill_nonans(): + + vals = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) + expected = DataArray(vals, dims='x') + + actual = expected.ffill(dim='x') + assert_equal(actual, expected) + + actual = expected.bfill(dim='x') + assert_equal(actual, expected) + + +@requires_bottleneck +def test_ffill_bfill_allnans(): + + vals = np.full(6, np.nan, dtype=np.float64) + expected = DataArray(vals, dims='x') + + actual = expected.ffill(dim='x') + assert_equal(actual, expected) + + actual = expected.bfill(dim='x') + assert_equal(actual, expected) + + +@pytest.mark.parametrize('da', (1, 2), indirect=True) +def test_ffill_functions(da): + result = da.ffill('time') + assert result.isnull().sum() == 0 + + +def test_ffill_limit(): + da = DataArray( + [0, np.nan, np.nan, np.nan, np.nan, 3, 4, 5, np.nan, 6, 7], + dims='time') + result = da.ffill('time') + expected = DataArray([0, 0, 0, 0, 0, 3, 4, 5, 5, 6, 7], dims='time') + assert_array_equal(result, expected) + + result = da.ffill('time', limit=1) + expected = DataArray( + [0, 0, np.nan, np.nan, np.nan, 3, 4, 5, 5, 6, 7], dims='time') From ab727e7a2f6cce51d55e9a05d191b3725b0ad3e6 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Sat, 11 Nov 2017 00:19:10 -0800 Subject: [PATCH 02/20] fix to interpolate wrapper function --- xarray/core/missing.py | 57 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 5 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index a20f38754dd..6c1a76a062e 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -33,6 +33,12 @@ def __repr__(self): class NumpyInterpolator(BaseInterpolator): + '''One-dimensional linear interpolation. + + See Also + -------- + numpy.interp + ''' def __init__(self, xi, yi, kind='linear', fill_value=None, **kwargs): self.kind = kind self.f = np.interp @@ -64,6 +70,12 @@ def __call__(self, x): class ScipyInterpolator(BaseInterpolator): + '''Interpolate a 1-D function using Scipy interp1d + + See Also + -------- + scipy.interpolate.interp1d + ''' def __init__(self, xi, yi, kind=None, fill_value=None, assume_sorted=True, copy=False, bounds_error=False, **kwargs): from scipy.interpolate import interp1d @@ -83,7 +95,7 @@ def __init__(self, xi, yi, kind=None, fill_value=None, assume_sorted=True, if fill_value is None and kind == 'linear': fill_value = kwargs.pop('fill_value', (np.nan, yi[-1])) - else: + elif fill_value is None: fill_value = np.nan self.f = interp1d(xi, yi, kind=self.kind, fill_value=fill_value, @@ -91,6 +103,12 @@ def __init__(self, xi, yi, kind=None, fill_value=None, assume_sorted=True, class FromDerivativesInterpolator(BaseInterpolator): + '''Piecewise polynomial in terms of coefficients and breakpoints. + + See Also + -------- + scipy.interpolate.BPoly + ''' def __init__(self, xi, yi, kind=None, fill_value=None, **kwargs): from scipy.interpolate import BPoly @@ -107,6 +125,12 @@ def __init__(self, xi, yi, kind=None, fill_value=None, **kwargs): class SplineInterpolator(BaseInterpolator): + '''One-dimensional smoothing spline fit to a given set of data points. + + See Also + -------- + scipy.interpolate.UnivariateSpline + ''' def __init__(self, xi, yi, kind=None, fill_value=None, order=3, **kwargs): from scipy.interpolate import UnivariateSpline @@ -125,7 +149,14 @@ def __init__(self, xi, yi, kind=None, fill_value=None, order=3, **kwargs): def get_clean_interp_index(arr, dim, use_coordinate=True, **kwargs): - '''get index to use for x values in interpolation + '''get index to use for x values in interpolation. + + If use_coordinate is True, the coordinate that shares the name of the + dimension along which interpolation is being performed will be used as the + x values. + + If use_coordinate is False, the x values are set as an equally spaced + sequence. ''' if use_coordinate: @@ -148,6 +179,7 @@ def interp_na(self, dim=None, use_coordinate=True, method='linear', limit=None, inplace=False, **kwargs): '''Interpolate values according to different methods.''' + # this may not be possible with apply_ufunc arr = self if inplace else self.copy() if dim is None: @@ -166,6 +198,8 @@ def interp_na(self, dim=None, use_coordinate=True, method='linear', limit=None, arr = apply_ufunc(interpolator, index, arr, input_core_dims=[[dim], [dim]], output_core_dims=[[dim]], + output_dtypes=[arr.dtype], + output_sizes=dict(zip(arr.dims, arr.shape)), dask='parallelized', vectorize=True, keep_attrs=True).transpose(*arr.dims) @@ -183,6 +217,10 @@ def wrap_interpolator(interpolator, x, y, **kwargs): raise AssertionError("x and y shapes do not match " "%s != %s" % (x.shape, y.shape)) + # it would be nice if this wasn't necessary, works around: + # "ValueError: assignment destination is read-only" in assignment below + out = y.copy() + nans = pd.isnull(y) nonans = ~nans @@ -192,7 +230,8 @@ def wrap_interpolator(interpolator, x, y, **kwargs): return y f = interpolator(x[nonans], y[nonans], **kwargs) - return f(x[nans]) + out[nans] = f(x[nans]) + return out def _bfill(arr, n=None, axis=-1): @@ -209,7 +248,7 @@ def _bfill(arr, n=None, axis=-1): def ffill(arr, dim=None, limit=None): - ''' ''' + '''forward fill missing values''' import bottleneck as bn axis = arr.get_axis_num(dim) @@ -223,6 +262,7 @@ def ffill(arr, dim=None, limit=None): new = apply_ufunc(bn.push, arr, dask='parallelized', keep_attrs=True, + output_dtypes=[arr.dtype], kwargs=dict(n=_limit, axis=axis)).transpose(*arr.dims) if limit is not None: @@ -232,7 +272,7 @@ def ffill(arr, dim=None, limit=None): def bfill(arr, dim=None, limit=None): - ''' ''' + '''backfill missing values''' axis = arr.get_axis_num(dim) if limit is not None: @@ -244,6 +284,7 @@ def bfill(arr, dim=None, limit=None): new = apply_ufunc(_bfill, arr, dask='parallelized', keep_attrs=True, + output_dtypes=[arr.dtype], kwargs=dict(n=_limit, axis=axis)).transpose(*arr.dims) if limit is not None: new = new.where(valids) @@ -252,6 +293,10 @@ def bfill(arr, dim=None, limit=None): def _get_interpolator(method, **kwargs): + '''helper function to select the appropriate interpolator class + + returns a partial of wrap_interpolator + ''' interp1d_methods = ['linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial'] valid_methods = interp1d_methods + ['piecewise_polynomial', 'barycentric', @@ -287,5 +332,7 @@ def _get_interpolator(method, **kwargs): def _get_valid_fill_mask(arr, dim, limit): + '''helper function to determine values that can be filled when limit is not + None''' kw = {dim: limit + 1} return arr.isnull().rolling(min_periods=1, **kw).sum() <= limit From 95006c462fde40405fc6bfec9fb1e89e30eb3308 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Sat, 11 Nov 2017 00:27:18 -0800 Subject: [PATCH 03/20] remove duplicate limit handling in ffill/bfill --- xarray/core/missing.py | 35 ++++++++++------------------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 6c1a76a062e..fcfa0dc70f2 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -253,43 +253,28 @@ def ffill(arr, dim=None, limit=None): axis = arr.get_axis_num(dim) - if limit is not None: - valids = _get_valid_fill_mask(arr, dim, limit) - # work around for bottleneck 178 _limit = limit if limit is not None else arr.shape[axis] - new = apply_ufunc(bn.push, arr, - dask='parallelized', - keep_attrs=True, - output_dtypes=[arr.dtype], - kwargs=dict(n=_limit, axis=axis)).transpose(*arr.dims) - - if limit is not None: - new = new.where(valids) - - return new + return apply_ufunc(bn.push, arr, + dask='parallelized', + keep_attrs=True, + output_dtypes=[arr.dtype], + kwargs=dict(n=_limit, axis=axis)).transpose(*arr.dims) def bfill(arr, dim=None, limit=None): '''backfill missing values''' axis = arr.get_axis_num(dim) - if limit is not None: - valids = _get_valid_fill_mask(arr, dim, limit) - # work around for bottleneck 178 _limit = limit if limit is not None else arr.shape[axis] - new = apply_ufunc(_bfill, arr, - dask='parallelized', - keep_attrs=True, - output_dtypes=[arr.dtype], - kwargs=dict(n=_limit, axis=axis)).transpose(*arr.dims) - if limit is not None: - new = new.where(valids) - - return new + return apply_ufunc(_bfill, arr, + dask='parallelized', + keep_attrs=True, + output_dtypes=[arr.dtype], + kwargs=dict(n=_limit, axis=axis)).transpose(*arr.dims) def _get_interpolator(method, **kwargs): From 4a4f6ebec4068da58970357a86dedb06eb497a8c Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Sat, 11 Nov 2017 20:18:44 -0800 Subject: [PATCH 04/20] tests are passing --- xarray/core/dataarray.py | 10 +++--- xarray/core/missing.py | 22 ++++++------- xarray/tests/test_missing.py | 61 +++++++++++++++++++++++++++--------- 3 files changed, 63 insertions(+), 30 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 8c15f9595d0..e76777ef410 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1214,8 +1214,9 @@ def fillna(self, value): out = ops.fillna(self, value) return out - def interpolate_na(self, dim=None, method='linear', inplace=False, - limit=None, **kwargs): + def interpolate_na(self, dim=None, method='linear', limit=None, + use_coordinate=True, + **kwargs): """Interpolate values according to different methods. Parameters @@ -1233,6 +1234,7 @@ def interpolate_na(self, dim=None, method='linear', inplace=False, order given both 'polynomial' and 'spline' require that you also specify and order (int) e.g. da.interpolate_na(method='polynomial', order=4) + use_coordinate : boolean, default True limit : limit : int, default None Maximum number of consecutive NaNs to fill. Must be greater than 0. @@ -1241,8 +1243,8 @@ def interpolate_na(self, dim=None, method='linear', inplace=False, DataArray """ from .missing import interp_na - return interp_na(self, dim=dim, method=method, inplace=inplace, - **kwargs) + return interp_na(self, dim=dim, method=method, limit=limit, + use_coordinate=use_coordinate, **kwargs) def interpolate_at(self, dim, locs, method='linear', inplace=False, limit=None, **kwargs): diff --git a/xarray/core/missing.py b/xarray/core/missing.py index fcfa0dc70f2..682434cd3a4 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -176,35 +176,33 @@ def get_clean_interp_index(arr, dim, use_coordinate=True, **kwargs): def interp_na(self, dim=None, use_coordinate=True, method='linear', limit=None, - inplace=False, **kwargs): + **kwargs): '''Interpolate values according to different methods.''' - # this may not be possible with apply_ufunc - arr = self if inplace else self.copy() - if dim is None: raise NotImplementedError('dim is a required argument') if limit is not None: - valids = _get_valid_fill_mask(arr, dim, limit) + valids = _get_valid_fill_mask(self, dim, limit) # method - index = get_clean_interp_index(arr, dim, use_coordinate=use_coordinate, + index = get_clean_interp_index(self, dim, use_coordinate=use_coordinate, **kwargs) kwargs.update(kind=method) interpolator = _get_interpolator(method, **kwargs) - arr = apply_ufunc(interpolator, index, arr, + arr = apply_ufunc(interpolator, index, self, input_core_dims=[[dim], [dim]], output_core_dims=[[dim]], - output_dtypes=[arr.dtype], - output_sizes=dict(zip(arr.dims, arr.shape)), + output_dtypes=[self.dtype], + output_sizes=dict(zip(self.dims, self.shape)), dask='parallelized', vectorize=True, - keep_attrs=True).transpose(*arr.dims) + keep_attrs=True).transpose(*self.dims) if limit is not None: + print(valids) arr = arr.where(valids) return arr @@ -299,7 +297,9 @@ def _get_interpolator(method, **kwargs): if method in interp1d_methods: interp_class = ScipyInterpolator elif method == 'piecewise_polynomial': - interp_class = FromDerivativesInterpolator + raise NotImplementedError( + '%s has not been fully implemented' % method) + # interp_class = FromDerivativesInterpolator elif method == 'barycentric': interp_class = interpolate.BarycentricInterpolator elif method == 'krog': diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 8d405ce7fd0..8152a1282c3 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -7,6 +7,10 @@ from xarray import DataArray +from xarray.core.missing import (NumpyInterpolator, ScipyInterpolator, + FromDerivativesInterpolator, + SplineInterpolator) + from xarray.tests import ( assert_identical, assert_equal, raises_regex, requires_scipy, requires_bottleneck, assert_array_equal) @@ -79,12 +83,25 @@ def test_interpolate_pd_compat(shape, frac_nan, method): @requires_scipy @pytest.mark.parametrize('shape', [(8, 8), (1, 20), (20, 1)]) @pytest.mark.parametrize('frac_nan', [0, 0.5, 1]) -@pytest.mark.parametrize('method', ['linear', 'nearest', 'zero', 'slinear', +@pytest.mark.parametrize('method', ['time', 'index', 'values', 'linear', + 'nearest', 'zero', 'slinear', 'quadratic', 'cubic']) def test_interpolate_pd_compat_non_uniform_index(shape, frac_nan, method): + # translate pandas syntax to xarray equivalent + xmethod = method + use_coordinate = False + if method in ['time', 'index', 'values']: + use_coordinate = True + xmethod = 'linear' + elif method in ['nearest', 'slinear', 'quadratic', 'cubic']: + use_coordinate = True + da, df = make_interpolate_example_data(shape, frac_nan, non_uniform=True) for dim in ['time', 'x']: - actual = da.interpolate_na(method=method, dim=dim) + if method == 'time' and dim != 'time': + continue + actual = da.interpolate_na(method=xmethod, dim=dim, + use_coordinate=use_coordinate) expected = df.interpolate(method=method, axis=da.get_axis_num(dim)) np.testing.assert_allclose(actual.values, expected.values) @@ -97,7 +114,8 @@ def test_interpolate_pd_compat_polynomial(shape, frac_nan, order): da, df = make_interpolate_example_data(shape, frac_nan) for dim in ['time', 'x']: - actual = da.interpolate_na(method='polynomial', order=order, dim=dim) + actual = da.interpolate_na(method='polynomial', order=order, dim=dim, + use_coordinate=False) expected = df.interpolate(method='polynomial', order=order, axis=da.get_axis_num(dim)) np.testing.assert_allclose(actual.values, expected.values) @@ -111,17 +129,6 @@ def test_interpolate_unsorted_index_raises(): expected.interpolate_na(dim='x', method='index') -@requires_scipy -def test_interpolate_inplace(): - original = DataArray(np.array([1, 2, 3, np.nan, 5], dtype=np.float64), - dims=('x')) - expected = DataArray(np.array([1, 2, 3, 4, 5], dtype=np.float64), - dims=('x')) - new = original.interpolate_na(inplace=True, dim='x') - assert_identical(original, expected) - assert_identical(original, new) - - @requires_scipy def test_interpolate_no_dim_raises(): da = DataArray(np.array([1, 2, np.nan, 5], dtype=np.float64), dims=('x')) @@ -174,7 +181,31 @@ def test_interpolate_allnans(): def test_interpolate_limits(): - raise NotImplementedError() + da = DataArray(np.array([1, 2, np.nan, np.nan, np.nan, 6], + dtype=np.float64), dims=('x')) + + actual = da.interpolate_na(dim='x', limit=None) + assert actual.isnull().sum() == 0 + + actual = da.interpolate_na(dim='x', limit=2) + expected = DataArray(np.array([1, 2, 3, 4, np.nan, 6], + dtype=np.float64), dims=('x')) + + assert_equal(actual, expected) + + +@pytest.mark.parametrize( + 'kind, interpolator', + [('linear', NumpyInterpolator), ('linear', ScipyInterpolator), + ('spline', SplineInterpolator)]) +def test_interpolators(kind, interpolator): + xi = np.array([-1, 0, 1, 2, 5], dtype=np.float64) + yi = np.array([-10, 0, 10, 20, 50], dtype=np.float64) + x = np.array([3, 4], dtype=np.float64) + + f = interpolator(xi, yi, kind=kind) + out = f(x) + assert pd.isnull(out).sum() == 0 @requires_bottleneck From 42d63eff15a082d37b1ab7c6fc8e8efeb642e5f6 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Sun, 12 Nov 2017 19:24:49 -0800 Subject: [PATCH 05/20] more docs, more tests --- xarray/core/dataarray.py | 83 ++++++++++++----- xarray/core/missing.py | 50 ++-------- xarray/tests/test_missing.py | 171 ++++++++++++++++++++++++++++------- 3 files changed, 208 insertions(+), 96 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index e76777ef410..558b197e3a1 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1223,44 +1223,79 @@ def interpolate_na(self, dim=None, method='linear', limit=None, ---------- dim : str Specifies the dimension along which to interpolate. - method : {'linear', 'time', 'index', 'values', 'nearest'} - 'linear': ignore the index and treat the values as equally - spaced. default - 'time': interpolation works on daily and higher resolution data to - interpolate given length of interval - 'index', 'values': use the actual numerical values of the index - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', - 'polynomial' is passed to scipy.interpolate.interp1d with the - order given both 'polynomial' and 'spline' require that you also - specify and order (int) e.g. da.interpolate_na(method='polynomial', - order=4) - use_coordinate : boolean, default True - limit : limit : int, default None - Maximum number of consecutive NaNs to fill. Must be greater than 0. + method : {'linear', 'time', 'index', 'values', 'nearest'}, optional + String indicating which method to use for interpolation: + + - 'linear': linear interpolation (Default). Additional keyword + arguments are passed to ``numpy.interp`` + - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', + 'polynomial': are passed to ``scipy.interpolate.interp1d``. If + method=='polynomial', the ``order`` keyword argument must also be + provided. + - 'barycentric', 'krog', 'pchip', 'spline': use their respective + ``scipy.interpolate`` classes. + use_coordinate : boolean or str, default True + Specifies which index to use as the x values in the interpolation + formulated as `y = f(x)`. If False, values are treated as if + eqaully-spaced along `dim`. If True, the IndexVariable `dim` is + used. If use_coordinate is a string, it specifies the name of a + coordinate variariable to use as the index. + limit : int, default None + Maximum number of consecutive NaNs to fill. Must be greater than 0 + or None for no limit. Returns ------- DataArray + + See also + -------- + numpy.interp + scipy.interpolate """ from .missing import interp_na return interp_na(self, dim=dim, method=method, limit=limit, use_coordinate=use_coordinate, **kwargs) - def interpolate_at(self, dim, locs, method='linear', inplace=False, - limit=None, **kwargs): - # this is just here so I remember the signature we discussed - # dim: the dimension along which to interpolate - # locs: a broadcastable boolean mask describing where interpolation - # should happen - raise NotImplementedError() - def ffill(self, dim, limit=None): - '''TODO''' + '''Fill NaN values by propogating values forward + + Parameters + ---------- + dim : str + Specifies the dimension along which to propogate values when + filling. + limit : int, default None + The maximum number of consecutive NaN values to forward fill. In + other words, if there is a gap with more than this number of + consecutive NaNs, it will only be partially filled. Must be greater + than 0 or None for no limit. + + Returns + ------- + DataArray + ''' from .missing import ffill return ffill(self, dim, limit=limit) def bfill(self, dim, limit=None): - '''TODO''' + '''Fill NaN values by propogating values backward + + Parameters + ---------- + dim : str + Specifies the dimension along which to propogate values when + filling. + limit : int, default None + The maximum number of consecutive NaN values to backward fill. In + other words, if there is a gap with more than this number of + consecutive NaNs, it will only be partially filled. Must be greater + than 0 or None for no limit. + + Returns + ------- + DataArray + ''' from .missing import bfill return bfill(self, dim, limit=limit) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 682434cd3a4..6254b778c61 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -86,7 +86,7 @@ def __init__(self, xi, yi, kind=None, fill_value=None, assume_sorted=True, if kind == 'polynomial': kind = kwargs.pop('order', None) if kind is None: - raise ValueError('order is required when kind=polynomial') + raise ValueError('order is required when method=polynomial') self.kind = kind @@ -102,28 +102,6 @@ def __init__(self, xi, yi, kind=None, fill_value=None, assume_sorted=True, bounds_error=False, **self.cons_kwargs) -class FromDerivativesInterpolator(BaseInterpolator): - '''Piecewise polynomial in terms of coefficients and breakpoints. - - See Also - -------- - scipy.interpolate.BPoly - ''' - def __init__(self, xi, yi, kind=None, fill_value=None, **kwargs): - from scipy.interpolate import BPoly - - if kind is None: - raise ValueError('kind is a required argument') - - self.kind = kind - self.cons_kwargs = kwargs - - if fill_value is not None: - raise ValueError('from_derivatives does not support fill_value') - - self.f = BPoly.from_derivatives(xi, yi, **self.cons_kwargs) - - class SplineInterpolator(BaseInterpolator): '''One-dimensional smoothing spline fit to a given set of data points. @@ -160,10 +138,12 @@ def get_clean_interp_index(arr, dim, use_coordinate=True, **kwargs): ''' if use_coordinate: - index = arr.get_index(dim) + if use_coordinate is True: + index = arr.get_index(dim) + else: + index = arr.coords[use_coordinate] if isinstance(index, pd.DatetimeIndex): index = index.values.astype(np.float64) - # check index sorting now so we can skip it later if not (np.diff(index) > 0).all(): raise ValueError("Index must be monotonicly increasing") @@ -188,21 +168,17 @@ def interp_na(self, dim=None, use_coordinate=True, method='linear', limit=None, # method index = get_clean_interp_index(self, dim, use_coordinate=use_coordinate, **kwargs) - kwargs.update(kind=method) - interpolator = _get_interpolator(method, **kwargs) arr = apply_ufunc(interpolator, index, self, input_core_dims=[[dim], [dim]], output_core_dims=[[dim]], output_dtypes=[self.dtype], - output_sizes=dict(zip(self.dims, self.shape)), dask='parallelized', vectorize=True, keep_attrs=True).transpose(*self.dims) if limit is not None: - print(valids) arr = arr.where(valids) return arr @@ -210,11 +186,6 @@ def interp_na(self, dim=None, use_coordinate=True, method='linear', limit=None, def wrap_interpolator(interpolator, x, y, **kwargs): '''helper function to apply interpolation along 1 dimension''' - if x.shape != y.shape: - # this can probably be removed once I get apply_ufuncs to work - raise AssertionError("x and y shapes do not match " - "%s != %s" % (x.shape, y.shape)) - # it would be nice if this wasn't necessary, works around: # "ValueError: assignment destination is read-only" in assignment below out = y.copy() @@ -282,11 +253,12 @@ def _get_interpolator(method, **kwargs): ''' interp1d_methods = ['linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial'] - valid_methods = interp1d_methods + ['piecewise_polynomial', 'barycentric', - 'krog', 'pchip', 'spline'] + valid_methods = interp1d_methods + ['barycentric', 'krog', 'pchip', + 'spline'] if (method == 'linear' and not kwargs.get('fill_value', None) == 'extrapolate'): + kwargs.update(kind=method) interp_class = NumpyInterpolator elif method in valid_methods: try: @@ -295,11 +267,8 @@ def _get_interpolator(method, **kwargs): raise ImportError( 'Interpolation with method `%s` requires scipy' % method) if method in interp1d_methods: + kwargs.update(kind=method) interp_class = ScipyInterpolator - elif method == 'piecewise_polynomial': - raise NotImplementedError( - '%s has not been fully implemented' % method) - # interp_class = FromDerivativesInterpolator elif method == 'barycentric': interp_class = interpolate.BarycentricInterpolator elif method == 'krog': @@ -307,6 +276,7 @@ def _get_interpolator(method, **kwargs): elif method == 'pchip': interp_class = interpolate.PchipInterpolator elif method == 'spline': + kwargs.update(kind=method) interp_class = SplineInterpolator else: raise ValueError('%s is not a valid scipy interpolator' % method) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 8152a1282c3..b709e595563 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -5,15 +5,14 @@ import pandas as pd import pytest -from xarray import DataArray +import xarray as xr from xarray.core.missing import (NumpyInterpolator, ScipyInterpolator, - FromDerivativesInterpolator, SplineInterpolator) +from xarray.core.pycompat import dask_array_type -from xarray.tests import ( - assert_identical, assert_equal, raises_regex, requires_scipy, - requires_bottleneck, assert_array_equal) +from xarray.tests import (assert_equal, assert_array_equal, raises_regex, + requires_scipy, requires_bottleneck, requires_dask) @pytest.fixture(params=[1]) @@ -21,12 +20,12 @@ def da(request): if request.param == 1: times = pd.date_range('2000-01-01', freq='1D', periods=21) values = np.random.random((3, 21, 4)) - da = DataArray(values, dims=('a', 'time', 'x')) + da = xr.DataArray(values, dims=('a', 'time', 'x')) da['time'] = times return da if request.param == 2: - return DataArray( + return xr.DataArray( [0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], dims='time') @@ -61,8 +60,9 @@ def make_interpolate_example_data(shape, frac_nan, seed=12345, else: coords = {'time': pd.date_range('2000-01-01', freq='D', periods=shape[0])} - da = DataArray(vals, dims=('time', 'x'), coords=coords) + da = xr.DataArray(vals, dims=('time', 'x'), coords=coords) df = da.to_pandas() + return da, df @@ -124,49 +124,52 @@ def test_interpolate_pd_compat_polynomial(shape, frac_nan, order): @requires_scipy def test_interpolate_unsorted_index_raises(): vals = np.array([1, 2, 3], dtype=np.float64) - expected = DataArray(vals, dims=('x'), coords={'x': [2, 1, 3]}) - with raises_regex(ValueError, 'must be monotonicly increasing'): + expected = xr.DataArray(vals, dims='x', coords={'x': [2, 1, 3]}) + with raises_regex(ValueError, 'Index must be monotonicly increasing'): expected.interpolate_na(dim='x', method='index') -@requires_scipy def test_interpolate_no_dim_raises(): - da = DataArray(np.array([1, 2, np.nan, 5], dtype=np.float64), dims=('x')) + da = xr.DataArray(np.array([1, 2, np.nan, 5], dtype=np.float64), dims='x') with raises_regex(NotImplementedError, 'dim is a required argument'): da.interpolate_na(method='linear') +def test_interpolate_invalid_interpolator_raises(): + da = xr.DataArray(np.array([1, 2, np.nan, 5], dtype=np.float64), dims='x') + with raises_regex(ValueError, 'not a valid'): + da.interpolate_na(dim='x', method='foo') + + @requires_scipy def test_interpolate_kwargs(): - da = DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims=('x')) - expected = DataArray(np.array([4, 5, 6], dtype=np.float64), dims=('x')) + da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims='x') + expected = xr.DataArray(np.array([4, 5, 6], dtype=np.float64), dims='x') actual = da.interpolate_na(dim='x', fill_value='extrapolate') assert_equal(actual, expected) - expected = DataArray(np.array([4, 5, -999], dtype=np.float64), dims=('x')) + expected = xr.DataArray(np.array([4, 5, -999], dtype=np.float64), dims='x') actual = da.interpolate_na(dim='x', fill_value=-999) assert_equal(actual, expected) -@requires_scipy def test_interpolate(): vals = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) - expected = DataArray(vals, dims=('x')) + expected = xr.DataArray(vals, dims='x') mvals = vals.copy() mvals[2] = np.nan - missing = DataArray(mvals, dims=('x')) + missing = xr.DataArray(mvals, dims='x') actual = missing.interpolate_na(dim='x') assert_equal(actual, expected) -@requires_scipy def test_interpolate_nonans(): vals = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) - expected = DataArray(vals, dims=('x')) + expected = xr.DataArray(vals, dims='x') actual = expected.interpolate_na(dim='x') assert_equal(actual, expected) @@ -174,30 +177,49 @@ def test_interpolate_nonans(): @requires_scipy def test_interpolate_allnans(): vals = np.full(6, np.nan, dtype=np.float64) - expected = DataArray(vals, dims=('x')) + expected = xr.DataArray(vals, dims='x') actual = expected.interpolate_na(dim='x') assert_equal(actual, expected) +@requires_bottleneck def test_interpolate_limits(): - da = DataArray(np.array([1, 2, np.nan, np.nan, np.nan, 6], - dtype=np.float64), dims=('x')) + da = xr.DataArray(np.array([1, 2, np.nan, np.nan, np.nan, 6], + dtype=np.float64), dims='x') actual = da.interpolate_na(dim='x', limit=None) assert actual.isnull().sum() == 0 actual = da.interpolate_na(dim='x', limit=2) - expected = DataArray(np.array([1, 2, 3, 4, np.nan, 6], - dtype=np.float64), dims=('x')) + expected = xr.DataArray(np.array([1, 2, 3, 4, np.nan, 6], + dtype=np.float64), dims='x') assert_equal(actual, expected) +@pytest.mark.parametrize('method', ['linear', 'nearest', 'zero', 'slinear', + 'quadratic', 'cubic', 'polynomial', + 'barycentric', 'krog', 'pchip', 'spline']) +@requires_scipy +def test_interpolate_methods(method): + kwargs = {} + if method == 'polynomial': + kwargs['order'] = 1 + da = xr.DataArray(np.array([0, 1, 2, np.nan, np.nan, np.nan, 6, 7, 8], + dtype=np.float64), dims='x') + actual = da.interpolate_na('x', method=method, **kwargs) + assert actual.isnull().sum() == 0 + + actual = da.interpolate_na('x', method=method, limit=2, **kwargs) + assert actual.isnull().sum() == 1 + + @pytest.mark.parametrize( 'kind, interpolator', [('linear', NumpyInterpolator), ('linear', ScipyInterpolator), ('spline', SplineInterpolator)]) +@requires_scipy def test_interpolators(kind, interpolator): xi = np.array([-1, 0, 1, 2, 5], dtype=np.float64) yi = np.array([-10, 0, 10, 20, 50], dtype=np.float64) @@ -208,19 +230,102 @@ def test_interpolators(kind, interpolator): assert pd.isnull(out).sum() == 0 +def test_interpolate_use_coordinate(): + xc = xr.Variable('x', [100, 200, 300, 400, 500, 600]) + da = xr.DataArray(np.array([1, 2, np.nan, np.nan, np.nan, 6], + dtype=np.float64), + dims='x', coords={'xc': xc}) + + # use_coordinate == False is same as using the default index + actual = da.interpolate_na(dim='x', use_coordinate=False) + expected = da.interpolate_na(dim='x') + assert_equal(actual, expected) + + # possible to specify non index coordinate + actual = da.interpolate_na(dim='x', use_coordinate='xc') + expected = da.interpolate_na(dim='x') + assert_equal(actual, expected) + + # possible to specify index coordinate by name + actual = da.interpolate_na(dim='x', use_coordinate='x') + expected = da.interpolate_na(dim='x') + assert_equal(actual, expected) + + +@requires_dask +def test_interpolate_dask(): + da, _ = make_interpolate_example_data((40, 40), 0.5) + da = da.chunk({'x': 5}) + actual = da.interpolate_na('time') + expected = da.load().interpolate_na('time') + assert isinstance(actual.data, dask_array_type) + assert_equal(actual.compute(), expected) + + # with limit + da = da.chunk({'x': 5}) + actual = da.interpolate_na('time', limit=3) + expected = da.load().interpolate_na('time', limit=3) + assert isinstance(actual.data, dask_array_type) + assert_equal(actual, expected) + + +@requires_dask +def test_interpolate_dask_raises_for_invalid_chunk_dim(): + da, _ = make_interpolate_example_data((40, 40), 0.5) + da = da.chunk({'time': 5}) + with raises_regex(ValueError, "dask='parallelized' consists of multiple"): + da.interpolate_na('time') + + @requires_bottleneck def test_ffill(): - da = DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims=('x')) - expected = DataArray(np.array([4, 5, 5], dtype=np.float64), dims=('x')) + da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims='x') + expected = xr.DataArray(np.array([4, 5, 5], dtype=np.float64), dims='x') actual = da.ffill('x') assert_equal(actual, expected) +@requires_bottleneck +@requires_dask +def test_ffill_dask(): + da, _ = make_interpolate_example_data((40, 40), 0.5) + da = da.chunk({'x': 5}) + actual = da.ffill('time') + expected = da.load().ffill('time') + assert isinstance(actual.data, dask_array_type) + assert_equal(actual, expected) + + # with limit + da = da.chunk({'x': 5}) + actual = da.ffill('time', limit=3) + expected = da.load().ffill('time', limit=3) + assert isinstance(actual.data, dask_array_type) + assert_equal(actual, expected) + + +@requires_bottleneck +@requires_dask +def test_bfill_dask(): + da, _ = make_interpolate_example_data((40, 40), 0.5) + da = da.chunk({'x': 5}) + actual = da.bfill('time') + expected = da.load().bfill('time') + assert isinstance(actual.data, dask_array_type) + assert_equal(actual, expected) + + # with limit + da = da.chunk({'x': 5}) + actual = da.bfill('time', limit=3) + expected = da.load().bfill('time', limit=3) + assert isinstance(actual.data, dask_array_type) + assert_equal(actual, expected) + + @requires_bottleneck def test_ffill_bfill_nonans(): vals = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) - expected = DataArray(vals, dims='x') + expected = xr.DataArray(vals, dims='x') actual = expected.ffill(dim='x') assert_equal(actual, expected) @@ -233,7 +338,7 @@ def test_ffill_bfill_nonans(): def test_ffill_bfill_allnans(): vals = np.full(6, np.nan, dtype=np.float64) - expected = DataArray(vals, dims='x') + expected = xr.DataArray(vals, dims='x') actual = expected.ffill(dim='x') assert_equal(actual, expected) @@ -242,20 +347,22 @@ def test_ffill_bfill_allnans(): assert_equal(actual, expected) +@requires_bottleneck @pytest.mark.parametrize('da', (1, 2), indirect=True) def test_ffill_functions(da): result = da.ffill('time') assert result.isnull().sum() == 0 +@requires_bottleneck def test_ffill_limit(): - da = DataArray( + da = xr.DataArray( [0, np.nan, np.nan, np.nan, np.nan, 3, 4, 5, np.nan, 6, 7], dims='time') result = da.ffill('time') - expected = DataArray([0, 0, 0, 0, 0, 3, 4, 5, 5, 6, 7], dims='time') + expected = xr.DataArray([0, 0, 0, 0, 0, 3, 4, 5, 5, 6, 7], dims='time') assert_array_equal(result, expected) result = da.ffill('time', limit=1) - expected = DataArray( + expected = xr.DataArray( [0, 0, np.nan, np.nan, np.nan, 3, 4, 5, 5, 6, 7], dims='time') From 19d21b8aee1c828487f3e2eec3ce460ff6d9761d Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Mon, 13 Nov 2017 14:38:24 -0800 Subject: [PATCH 06/20] backward compat and add benchmarks --- .travis.yml | 2 +- asv_bench/benchmarks/dataarray_missing.py | 74 +++++++++++++++++++++++ xarray/core/npcompat.py | 73 +++++++++++++++++++++- 3 files changed, 147 insertions(+), 2 deletions(-) create mode 100644 asv_bench/benchmarks/dataarray_missing.py diff --git a/.travis.yml b/.travis.yml index d797e9844bc..5bf07e8b443 100644 --- a/.travis.yml +++ b/.travis.yml @@ -89,9 +89,9 @@ install: - python xarray/util/print_versions.py script: + - git diff upstream/master **/*py | flake8 --diff --exit-zero || true - python -OO -c "import xarray" - py.test xarray --cov=xarray --cov-config ci/.coveragerc --cov-report term-missing --verbose $EXTRA_FLAGS - - git diff upstream/master **/*py | flake8 --diff --exit-zero || true after_success: - coveralls diff --git a/asv_bench/benchmarks/dataarray_missing.py b/asv_bench/benchmarks/dataarray_missing.py new file mode 100644 index 00000000000..f121971365e --- /dev/null +++ b/asv_bench/benchmarks/dataarray_missing.py @@ -0,0 +1,74 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import pandas as pd + +try: + import dask + import dask.multiprocessing +except ImportError: + pass + +import xarray as xr + +from . import randn, requires_dask + + +def make_bench_data(shape, frac_nan, chunks): + vals = randn(shape, frac_nan) + coords = {'time': pd.date_range('2000-01-01', freq='D', + periods=shape[0])} + da = xr.DataArray(vals, dims=('time', 'x', 'y'), coords=coords) + + if chunks is not None: + da = da.chunk(chunks) + + return da + + +def time_interpolate_na(shape, chunks, method, limit): + if chunks is not None: + requires_dask() + da = make_bench_data(shape, 0.1, chunks=chunks) + actual = da.interpolate_na(dim='time', method='linear', limit=limit) + + if chunks is not None: + actual = actual.compute() + + +time_interpolate_na.param_names = ['shape', 'chunks', 'method', 'limit'] +time_interpolate_na.params = ([(3650, 200, 400), (100, 25, 25)], + [None, {'x': 25, 'y': 25}], + ['linear', 'spline', 'quadratic', 'cubic'], + [None, 3]) + + +def time_ffill(shape, chunks, limit): + + da = make_bench_data(shape, 0.1, chunks=chunks) + actual = da.ffill(dim='time', limit=limit) + + if chunks is not None: + actual = actual.compute() + + +time_ffill.param_names = ['shape', 'chunks', 'limit'] +time_ffill.params = ([(3650, 200, 400), (100, 25, 25)], + [None, {'x': 25, 'y': 25}], + [None, 3]) + + +def time_bfill(shape, chunks, limit): + + da = make_bench_data(shape, 0.1, chunks=chunks) + actual = da.bfill(dim='time', limit=limit) + + if chunks is not None: + actual = actual.compute() + + +time_bfill.param_names = ['shape', 'chunks', 'limit'] +time_bfill.params = ([(3650, 200, 400), (100, 25, 25)], + [None, {'x': 25, 'y': 25}], + [None, 3]) diff --git a/xarray/core/npcompat.py b/xarray/core/npcompat.py index fc080c63e7e..fa01e37e94f 100644 --- a/xarray/core/npcompat.py +++ b/xarray/core/npcompat.py @@ -4,7 +4,7 @@ import numpy as np try: - from numpy import nancumsum, nancumprod + from numpy import nancumsum, nancumprod, flip except ImportError: # pragma: no cover # Code copied from newer versions of NumPy (v1.12). # Used under the terms of NumPy's license, see licenses/NUMPY_LICENSE. @@ -174,3 +174,74 @@ def nancumprod(a, axis=None, dtype=None, out=None): """ a, mask = _replace_nan(a, 1) return np.cumprod(a, axis=axis, dtype=dtype, out=out) + + def flip(m, axis): + """ + Reverse the order of elements in an array along the given axis. + + The shape of the array is preserved, but the elements are reordered. + + .. versionadded:: 1.12.0 + + Parameters + ---------- + m : array_like + Input array. + axis : integer + Axis in array, which entries are reversed. + + + Returns + ------- + out : array_like + A view of `m` with the entries of axis reversed. Since a view is + returned, this operation is done in constant time. + + See Also + -------- + flipud : Flip an array vertically (axis=0). + fliplr : Flip an array horizontally (axis=1). + + Notes + ----- + flip(m, 0) is equivalent to flipud(m). + flip(m, 1) is equivalent to fliplr(m). + flip(m, n) corresponds to ``m[...,::-1,...]`` with ``::-1`` at position n. + + Examples + -------- + >>> A = np.arange(8).reshape((2,2,2)) + >>> A + array([[[0, 1], + [2, 3]], + + [[4, 5], + [6, 7]]]) + + >>> flip(A, 0) + array([[[4, 5], + [6, 7]], + + [[0, 1], + [2, 3]]]) + + >>> flip(A, 1) + array([[[2, 3], + [0, 1]], + + [[6, 7], + [4, 5]]]) + + >>> A = np.random.randn(3,4,5) + >>> np.all(flip(A,2) == A[:,:,::-1,...]) + True + """ + if not hasattr(m, 'ndim'): + m = np.asarray(m) + indexer = [slice(None)] * m.ndim + try: + indexer[axis] = slice(None, None, -1) + except IndexError: + raise ValueError("axis=%i is invalid for the %i-dimensional " + "input array" % (axis, m.ndim)) + return m[tuple(indexer)] From f937c07f1b33e823544664b076a37c9f2d5b53ea Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Mon, 13 Nov 2017 14:56:31 -0800 Subject: [PATCH 07/20] skip tests for numpy versions before 1.12 --- xarray/tests/__init__.py | 1 + xarray/tests/test_missing.py | 27 ++++++++++++++++++++++++--- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index d4d1b092631..150fdb435a3 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -71,6 +71,7 @@ def _importorskip(modname, minversion=None): has_bottleneck, requires_bottleneck = _importorskip('bottleneck') has_rasterio, requires_rasterio = _importorskip('rasterio') has_pathlib, requires_pathlib = _importorskip('pathlib') +has_np112, requires_np112 = _importorskip('numpy', minversion='1.12.0') # some special cases has_scipy_or_netCDF4 = has_scipy or has_netCDF4 diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index b709e595563..0115239cf77 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -12,7 +12,8 @@ from xarray.core.pycompat import dask_array_type from xarray.tests import (assert_equal, assert_array_equal, raises_regex, - requires_scipy, requires_bottleneck, requires_dask) + requires_scipy, requires_bottleneck, requires_dask, + requires_np112) @pytest.fixture(params=[1]) @@ -66,6 +67,7 @@ def make_interpolate_example_data(shape, frac_nan, seed=12345, return da, df +@requires_np112 @requires_scipy @pytest.mark.parametrize('shape', [(8, 8), (1, 20), (20, 1), (100, 100)]) @pytest.mark.parametrize('frac_nan', [0, 0.5, 1]) @@ -80,6 +82,7 @@ def test_interpolate_pd_compat(shape, frac_nan, method): np.testing.assert_allclose(actual.values, expected.values) +@requires_np112 @requires_scipy @pytest.mark.parametrize('shape', [(8, 8), (1, 20), (20, 1)]) @pytest.mark.parametrize('frac_nan', [0, 0.5, 1]) @@ -106,6 +109,7 @@ def test_interpolate_pd_compat_non_uniform_index(shape, frac_nan, method): np.testing.assert_allclose(actual.values, expected.values) +@requires_np112 @requires_scipy @pytest.mark.parametrize('shape', [(8, 8), (100, 100)]) @pytest.mark.parametrize('frac_nan', [0, 0.5, 1]) @@ -141,6 +145,7 @@ def test_interpolate_invalid_interpolator_raises(): da.interpolate_na(dim='x', method='foo') +@requires_np112 @requires_scipy def test_interpolate_kwargs(): da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims='x') @@ -153,6 +158,7 @@ def test_interpolate_kwargs(): assert_equal(actual, expected) +@requires_np112 def test_interpolate(): vals = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) @@ -166,6 +172,7 @@ def test_interpolate(): assert_equal(actual, expected) +@requires_np112 def test_interpolate_nonans(): vals = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) @@ -174,6 +181,7 @@ def test_interpolate_nonans(): assert_equal(actual, expected) +@requires_np112 @requires_scipy def test_interpolate_allnans(): vals = np.full(6, np.nan, dtype=np.float64) @@ -183,6 +191,7 @@ def test_interpolate_allnans(): assert_equal(actual, expected) +@requires_np112 @requires_bottleneck def test_interpolate_limits(): da = xr.DataArray(np.array([1, 2, np.nan, np.nan, np.nan, 6], @@ -198,10 +207,11 @@ def test_interpolate_limits(): assert_equal(actual, expected) +@requires_np112 +@requires_scipy @pytest.mark.parametrize('method', ['linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial', 'barycentric', 'krog', 'pchip', 'spline']) -@requires_scipy def test_interpolate_methods(method): kwargs = {} if method == 'polynomial': @@ -215,11 +225,12 @@ def test_interpolate_methods(method): assert actual.isnull().sum() == 1 +@requires_np112 +@requires_scipy @pytest.mark.parametrize( 'kind, interpolator', [('linear', NumpyInterpolator), ('linear', ScipyInterpolator), ('spline', SplineInterpolator)]) -@requires_scipy def test_interpolators(kind, interpolator): xi = np.array([-1, 0, 1, 2, 5], dtype=np.float64) yi = np.array([-10, 0, 10, 20, 50], dtype=np.float64) @@ -230,6 +241,7 @@ def test_interpolators(kind, interpolator): assert pd.isnull(out).sum() == 0 +@requires_np112 def test_interpolate_use_coordinate(): xc = xr.Variable('x', [100, 200, 300, 400, 500, 600]) da = xr.DataArray(np.array([1, 2, np.nan, np.nan, np.nan, 6], @@ -252,6 +264,7 @@ def test_interpolate_use_coordinate(): assert_equal(actual, expected) +@requires_np112 @requires_dask def test_interpolate_dask(): da, _ = make_interpolate_example_data((40, 40), 0.5) @@ -269,6 +282,7 @@ def test_interpolate_dask(): assert_equal(actual, expected) +@requires_np112 @requires_dask def test_interpolate_dask_raises_for_invalid_chunk_dim(): da, _ = make_interpolate_example_data((40, 40), 0.5) @@ -277,6 +291,7 @@ def test_interpolate_dask_raises_for_invalid_chunk_dim(): da.interpolate_na('time') +@requires_np112 @requires_bottleneck def test_ffill(): da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims='x') @@ -285,6 +300,7 @@ def test_ffill(): assert_equal(actual, expected) +@requires_np112 @requires_bottleneck @requires_dask def test_ffill_dask(): @@ -303,6 +319,7 @@ def test_ffill_dask(): assert_equal(actual, expected) +@requires_np112 @requires_bottleneck @requires_dask def test_bfill_dask(): @@ -321,6 +338,7 @@ def test_bfill_dask(): assert_equal(actual, expected) +@requires_np112 @requires_bottleneck def test_ffill_bfill_nonans(): @@ -334,6 +352,7 @@ def test_ffill_bfill_nonans(): assert_equal(actual, expected) +@requires_np112 @requires_bottleneck def test_ffill_bfill_allnans(): @@ -347,6 +366,7 @@ def test_ffill_bfill_allnans(): assert_equal(actual, expected) +@requires_np112 @requires_bottleneck @pytest.mark.parametrize('da', (1, 2), indirect=True) def test_ffill_functions(da): @@ -354,6 +374,7 @@ def test_ffill_functions(da): assert result.isnull().sum() == 0 +@requires_np112 @requires_bottleneck def test_ffill_limit(): da = xr.DataArray( From 8717e3848834da94bdd809979854b0cd50288ad5 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Mon, 13 Nov 2017 15:36:16 -0800 Subject: [PATCH 08/20] test fixes for py27 fixture --- xarray/tests/test_missing.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 0115239cf77..ea0d8aa9d02 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -16,19 +16,10 @@ requires_np112) -@pytest.fixture(params=[1]) -def da(request): - if request.param == 1: - times = pd.date_range('2000-01-01', freq='1D', periods=21) - values = np.random.random((3, 21, 4)) - da = xr.DataArray(values, dims=('a', 'time', 'x')) - da['time'] = times - return da - - if request.param == 2: - return xr.DataArray( - [0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], - dims='time') +@pytest.fixture +def da(): + return xr.DataArray([0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], + dims='time') def make_interpolate_example_data(shape, frac_nan, seed=12345, @@ -368,7 +359,6 @@ def test_ffill_bfill_allnans(): @requires_np112 @requires_bottleneck -@pytest.mark.parametrize('da', (1, 2), indirect=True) def test_ffill_functions(da): result = da.ffill('time') assert result.isnull().sum() == 0 From 3d5c1b15ad42c1549af4657020146db6d0a8b89b Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Mon, 13 Nov 2017 15:51:46 -0800 Subject: [PATCH 09/20] try reording decorators --- xarray/tests/test_missing.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index ea0d8aa9d02..aecb332e969 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -58,12 +58,12 @@ def make_interpolate_example_data(shape, frac_nan, seed=12345, return da, df -@requires_np112 -@requires_scipy @pytest.mark.parametrize('shape', [(8, 8), (1, 20), (20, 1), (100, 100)]) @pytest.mark.parametrize('frac_nan', [0, 0.5, 1]) @pytest.mark.parametrize('method', ['linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic']) +@requires_np112 +@requires_scipy def test_interpolate_pd_compat(shape, frac_nan, method): da, df = make_interpolate_example_data(shape, frac_nan) @@ -73,13 +73,13 @@ def test_interpolate_pd_compat(shape, frac_nan, method): np.testing.assert_allclose(actual.values, expected.values) -@requires_np112 -@requires_scipy @pytest.mark.parametrize('shape', [(8, 8), (1, 20), (20, 1)]) @pytest.mark.parametrize('frac_nan', [0, 0.5, 1]) @pytest.mark.parametrize('method', ['time', 'index', 'values', 'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic']) +@requires_np112 +@requires_scipy def test_interpolate_pd_compat_non_uniform_index(shape, frac_nan, method): # translate pandas syntax to xarray equivalent xmethod = method @@ -100,11 +100,11 @@ def test_interpolate_pd_compat_non_uniform_index(shape, frac_nan, method): np.testing.assert_allclose(actual.values, expected.values) -@requires_np112 -@requires_scipy @pytest.mark.parametrize('shape', [(8, 8), (100, 100)]) @pytest.mark.parametrize('frac_nan', [0, 0.5, 1]) @pytest.mark.parametrize('order', [1, 2, 3]) +@requires_np112 +@requires_scipy def test_interpolate_pd_compat_polynomial(shape, frac_nan, order): da, df = make_interpolate_example_data(shape, frac_nan) @@ -198,11 +198,11 @@ def test_interpolate_limits(): assert_equal(actual, expected) -@requires_np112 -@requires_scipy @pytest.mark.parametrize('method', ['linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial', 'barycentric', 'krog', 'pchip', 'spline']) +@requires_np112 +@requires_scipy def test_interpolate_methods(method): kwargs = {} if method == 'polynomial': @@ -216,12 +216,11 @@ def test_interpolate_methods(method): assert actual.isnull().sum() == 1 -@requires_np112 -@requires_scipy @pytest.mark.parametrize( 'kind, interpolator', [('linear', NumpyInterpolator), ('linear', ScipyInterpolator), ('spline', SplineInterpolator)]) +@requires_scipy def test_interpolators(kind, interpolator): xi = np.array([-1, 0, 1, 2, 5], dtype=np.float64) yi = np.array([-10, 0, 10, 20, 50], dtype=np.float64) From 1864e8f318b3b4c6bf74ce208c21435113c22e24 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Thu, 16 Nov 2017 08:49:33 -0800 Subject: [PATCH 10/20] minor reorg of travis to make the flake8 check useful --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 5bf07e8b443..1d902fb4471 100644 --- a/.travis.yml +++ b/.travis.yml @@ -89,7 +89,7 @@ install: - python xarray/util/print_versions.py script: - - git diff upstream/master **/*py | flake8 --diff --exit-zero || true + - git diff upstream/master xarray/**/*py | flake8 --diff --exit-zero || true - python -OO -c "import xarray" - py.test xarray --cov=xarray --cov-config ci/.coveragerc --cov-report term-missing --verbose $EXTRA_FLAGS From f58d464fc468c55611fde930b852e80b1d08a69b Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Sat, 18 Nov 2017 09:33:16 -0800 Subject: [PATCH 11/20] cleanup following @fujiisoup's comments --- xarray/core/dataarray.py | 4 ++- xarray/core/missing.py | 63 ++++++++++++++++++++++-------------- xarray/tests/test_missing.py | 6 ++-- 3 files changed, 44 insertions(+), 29 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 5601c053218..e3e4d238d95 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1229,7 +1229,9 @@ def interpolate_na(self, dim=None, method='linear', limit=None, ---------- dim : str Specifies the dimension along which to interpolate. - method : {'linear', 'time', 'index', 'values', 'nearest'}, optional + method : {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', + 'polynomial', 'barycentric', 'krog', 'pchip', + 'spline'}, optional String indicating which method to use for interpolation: - 'linear': linear interpolation (Default). Additional keyword diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 6254b778c61..a3dc467947d 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -18,18 +18,18 @@ class BaseInterpolator(object): cons_kwargs = {} call_kwargs = {} f = None - kind = None + method = None - def __init__(self, xi, yi, kind=None, **kwargs): - self.kind = kind + def __init__(self, xi, yi, method=None, **kwargs): + self.method = method self.call_kwargs = kwargs def __call__(self, x): return self.f(x, **self.call_kwargs) def __repr__(self): - return "{type}: kind={kind}".format(type=self.__class__.__name__, - kind=self.kind) + return "{type}: method={method}".format(type=self.__class__.__name__, + method=self.method) class NumpyInterpolator(BaseInterpolator): @@ -39,8 +39,13 @@ class NumpyInterpolator(BaseInterpolator): -------- numpy.interp ''' - def __init__(self, xi, yi, kind='linear', fill_value=None, **kwargs): - self.kind = kind + def __init__(self, xi, yi, method='linear', fill_value=None, **kwargs): + + if method is not 'linear': + raise ValueError( + 'only method `linear` is valid for the NumpyInterpolator') + + self.method = method self.f = np.interp self.cons_kwargs = kwargs self.call_kwargs = {'period': self.cons_kwargs.pop('period', None)} @@ -76,30 +81,32 @@ class ScipyInterpolator(BaseInterpolator): -------- scipy.interpolate.interp1d ''' - def __init__(self, xi, yi, kind=None, fill_value=None, assume_sorted=True, - copy=False, bounds_error=False, **kwargs): + def __init__(self, xi, yi, method=None, fill_value=None, + assume_sorted=True, copy=False, bounds_error=False, **kwargs): from scipy.interpolate import interp1d - if kind is None: - raise ValueError('kind is a required argument') + if method is None: + raise ValueError('method is a required argument, please supply a ' + 'valid scipy.inter1d method (kind)') - if kind == 'polynomial': - kind = kwargs.pop('order', None) - if kind is None: + if method == 'polynomial': + method = kwargs.pop('order', None) + if method is None: raise ValueError('order is required when method=polynomial') - self.kind = kind + self.method = method self.cons_kwargs = kwargs self.call_kwargs = {} - if fill_value is None and kind == 'linear': + if fill_value is None and method == 'linear': fill_value = kwargs.pop('fill_value', (np.nan, yi[-1])) elif fill_value is None: fill_value = np.nan - self.f = interp1d(xi, yi, kind=self.kind, fill_value=fill_value, - bounds_error=False, **self.cons_kwargs) + self.f = interp1d(xi, yi, kind=self.method, fill_value=fill_value, + bounds_error=False, assume_sorted=assume_sorted, + copy=copy, **self.cons_kwargs) class SplineInterpolator(BaseInterpolator): @@ -109,13 +116,15 @@ class SplineInterpolator(BaseInterpolator): -------- scipy.interpolate.UnivariateSpline ''' - def __init__(self, xi, yi, kind=None, fill_value=None, order=3, **kwargs): + def __init__(self, xi, yi, method='spline', fill_value=None, order=3, + **kwargs): from scipy.interpolate import UnivariateSpline - if kind is None: - raise ValueError('kind is a required argument') + if method is not 'spline': + raise ValueError( + 'only method `spline` is valid for the SplineInterpolator') - self.kind = kind + self.method = method self.cons_kwargs = kwargs self.call_kwargs['nu'] = kwargs.pop('nu', 0) self.call_kwargs['ext'] = kwargs.pop('ext', None) @@ -142,6 +151,10 @@ def get_clean_interp_index(arr, dim, use_coordinate=True, **kwargs): index = arr.get_index(dim) else: index = arr.coords[use_coordinate] + if index.ndim != 1: + raise ValueError( + 'Coordinates used for interpolation must be 1D, ' + '%s is %dD.' % (use_coordinate, index.ndim)) if isinstance(index, pd.DatetimeIndex): index = index.values.astype(np.float64) # check index sorting now so we can skip it later @@ -258,7 +271,7 @@ def _get_interpolator(method, **kwargs): if (method == 'linear' and not kwargs.get('fill_value', None) == 'extrapolate'): - kwargs.update(kind=method) + kwargs.update(method=method) interp_class = NumpyInterpolator elif method in valid_methods: try: @@ -267,7 +280,7 @@ def _get_interpolator(method, **kwargs): raise ImportError( 'Interpolation with method `%s` requires scipy' % method) if method in interp1d_methods: - kwargs.update(kind=method) + kwargs.update(method=method) interp_class = ScipyInterpolator elif method == 'barycentric': interp_class = interpolate.BarycentricInterpolator @@ -276,7 +289,7 @@ def _get_interpolator(method, **kwargs): elif method == 'pchip': interp_class = interpolate.PchipInterpolator elif method == 'spline': - kwargs.update(kind=method) + kwargs.update(method=method) interp_class = SplineInterpolator else: raise ValueError('%s is not a valid scipy interpolator' % method) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index aecb332e969..ae86726bbb4 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -217,16 +217,16 @@ def test_interpolate_methods(method): @pytest.mark.parametrize( - 'kind, interpolator', + 'method, interpolator', [('linear', NumpyInterpolator), ('linear', ScipyInterpolator), ('spline', SplineInterpolator)]) @requires_scipy -def test_interpolators(kind, interpolator): +def test_interpolators(method, interpolator): xi = np.array([-1, 0, 1, 2, 5], dtype=np.float64) yi = np.array([-10, 0, 10, 20, 50], dtype=np.float64) x = np.array([3, 4], dtype=np.float64) - f = interpolator(xi, yi, kind=kind) + f = interpolator(xi, yi, method=method) out = f(x) assert pd.isnull(out).sum() == 0 From 1b938088949d12515766862b549c3f29f2ba3d90 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Sun, 26 Nov 2017 20:40:48 -0800 Subject: [PATCH 12/20] dataset missing methods, some more docs, and more scipy interpolators --- doc/api.rst | 3 ++ doc/computation.rst | 20 +++++++- xarray/core/dataarray.py | 14 +++-- xarray/core/dataset.py | 99 ++++++++++++++++++++++++++++++++++++ xarray/core/missing.py | 40 ++++++++++++--- xarray/tests/test_missing.py | 80 ++++++++++++++++++++++------- 6 files changed, 223 insertions(+), 33 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 4d13351e44c..f862a6b2d08 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -148,6 +148,9 @@ Computation :py:attr:`~Dataset.count` :py:attr:`~Dataset.dropna` :py:attr:`~Dataset.fillna` +:py:attr:`~Dataset.ffill` +:py:attr:`~Dataset.bfill` +:py:attr:`~Dataset.interpolate_na` :py:attr:`~Dataset.where` **ndarray methods**: diff --git a/doc/computation.rst b/doc/computation.rst index 087cca64e15..c3dddfe034e 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -59,8 +59,9 @@ Missing values xarray objects borrow the :py:meth:`~xarray.DataArray.isnull`, :py:meth:`~xarray.DataArray.notnull`, :py:meth:`~xarray.DataArray.count`, -:py:meth:`~xarray.DataArray.dropna` and :py:meth:`~xarray.DataArray.fillna` methods -for working with missing data from pandas: +:py:meth:`~xarray.DataArray.dropna`, :py:meth:`~xarray.DataArray.fillna`, +:py:meth:`~xarray.DataArray.ffill`, and :py:meth:`~xarray.DataArray.bfill` +methods for working with missing data from pandas: .. ipython:: python @@ -70,10 +71,25 @@ for working with missing data from pandas: x.count() x.dropna(dim='x') x.fillna(-1) + x.ffill() + x.bfill() Like pandas, xarray uses the float value ``np.nan`` (not-a-number) to represent missing values. +xarray objects also have an :py:meth:`~xarray.DataArray.interpolate_na` method +for filling missing values via 1D interpolation. + +.. ipython:: python + + x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=['x'], + coords={'xx': [0, 1, 1.1, 1.9, 3]}) + x.interpolate_na(dim='x', method='linear', use_coordinate='xx') + +Note that xarray slightly diverges from the pandas ``interpolate`` syntax by +providing the ``use_coordinate`` keyword which facilitates a clear specification +of which values to use as the index in the interpolation. + Aggregation =========== diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index e3e4d238d95..9a71befc4da 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1231,7 +1231,7 @@ def interpolate_na(self, dim=None, method='linear', limit=None, Specifies the dimension along which to interpolate. method : {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial', 'barycentric', 'krog', 'pchip', - 'spline'}, optional + 'spline', 'akima'}, optional String indicating which method to use for interpolation: - 'linear': linear interpolation (Default). Additional keyword @@ -1240,8 +1240,8 @@ def interpolate_na(self, dim=None, method='linear', limit=None, 'polynomial': are passed to ``scipy.interpolate.interp1d``. If method=='polynomial', the ``order`` keyword argument must also be provided. - - 'barycentric', 'krog', 'pchip', 'spline': use their respective - ``scipy.interpolate`` classes. + - 'barycentric', 'krog', 'pchip', 'spline', and `akima`: use their + respective``scipy.interpolate`` classes. use_coordinate : boolean or str, default True Specifies which index to use as the x values in the interpolation formulated as `y = f(x)`. If False, values are treated as if @@ -1268,10 +1268,12 @@ def interpolate_na(self, dim=None, method='linear', limit=None, def ffill(self, dim, limit=None): '''Fill NaN values by propogating values forward + *Requires bottleneck.* + Parameters ---------- dim : str - Specifies the dimension along which to propogate values when + Specifies the dimension along which to propagate values when filling. limit : int, default None The maximum number of consecutive NaN values to forward fill. In @@ -1289,10 +1291,12 @@ def ffill(self, dim, limit=None): def bfill(self, dim, limit=None): '''Fill NaN values by propogating values backward + *Requires bottleneck and Numpy v1.12.0 or later.* + Parameters ---------- dim : str - Specifies the dimension along which to propogate values when + Specifies the dimension along which to propagate values when filling. limit : int, default None The maximum number of consecutive NaN values to backward fill. In diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 56c9df0af93..b363eca7b38 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2393,6 +2393,105 @@ def fillna(self, value): out = ops.fillna(self, value) return out + def interpolate_na(self, dim=None, method='linear', limit=None, + use_coordinate=True, + **kwargs): + """Interpolate values according to different methods. + + Parameters + ---------- + dim : str + Specifies the dimension along which to interpolate. + method : {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', + 'polynomial', 'barycentric', 'krog', 'pchip', + 'spline'}, optional + String indicating which method to use for interpolation: + + - 'linear': linear interpolation (Default). Additional keyword + arguments are passed to ``numpy.interp`` + - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', + 'polynomial': are passed to ``scipy.interpolate.interp1d``. If + method=='polynomial', the ``order`` keyword argument must also be + provided. + - 'barycentric', 'krog', 'pchip', 'spline': use their respective + ``scipy.interpolate`` classes. + use_coordinate : boolean or str, default True + Specifies which index to use as the x values in the interpolation + formulated as `y = f(x)`. If False, values are treated as if + eqaully-spaced along `dim`. If True, the IndexVariable `dim` is + used. If use_coordinate is a string, it specifies the name of a + coordinate variariable to use as the index. + limit : int, default None + Maximum number of consecutive NaNs to fill. Must be greater than 0 + or None for no limit. + + Returns + ------- + Dataset + + See also + -------- + numpy.interp + scipy.interpolate + """ + from .missing import interp_na, _apply_over_vars_with_dim + + new = _apply_over_vars_with_dim(interp_na, self, dim=dim, method=method, + limit=limit, + use_coordinate=use_coordinate, + **kwargs) + return new + + def ffill(self, dim, limit=None): + '''Fill NaN values by propogating values forward + + *Requires bottleneck.* + + Parameters + ---------- + dim : str + Specifies the dimension along which to propagate values when + filling. + limit : int, default None + The maximum number of consecutive NaN values to forward fill. In + other words, if there is a gap with more than this number of + consecutive NaNs, it will only be partially filled. Must be greater + than 0 or None for no limit. + + Returns + ------- + Dataset + ''' + from .missing import ffill, _apply_over_vars_with_dim + + new = _apply_over_vars_with_dim(ffill, self, dim=dim, limit=limit) + return new + + def bfill(self, dim, limit=None): + '''Fill NaN values by propogating values backward + + *Requires bottleneck and Numpy v1.12.0 or later.* + + Parameters + ---------- + dim : str + Specifies the dimension along which to propagate values when + filling. + limit : int, default None + The maximum number of consecutive NaN values to backward fill. In + other words, if there is a gap with more than this number of + consecutive NaNs, it will only be partially filled. Must be greater + than 0 or None for no limit. + + Returns + ------- + Dataset + ''' + from .missing import bfill, _apply_over_vars_with_dim + + new = _apply_over_vars_with_dim(bfill, self, dim=dim, limit=limit) + return new + def combine_first(self, other): """Combine two Datasets, default to data_vars of self. diff --git a/xarray/core/missing.py b/xarray/core/missing.py index a3dc467947d..4205f108cfd 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -9,6 +9,7 @@ import pandas as pd +from .pycompat import iteritems from .computation import apply_ufunc from .utils import is_scalar @@ -29,7 +30,7 @@ def __call__(self, x): def __repr__(self): return "{type}: method={method}".format(type=self.__class__.__name__, - method=self.method) + method=self.method) class NumpyInterpolator(BaseInterpolator): @@ -41,7 +42,7 @@ class NumpyInterpolator(BaseInterpolator): ''' def __init__(self, xi, yi, method='linear', fill_value=None, **kwargs): - if method is not 'linear': + if method != 'linear': raise ValueError( 'only method `linear` is valid for the NumpyInterpolator') @@ -120,7 +121,7 @@ def __init__(self, xi, yi, method='spline', fill_value=None, order=3, **kwargs): from scipy.interpolate import UnivariateSpline - if method is not 'spline': + if method != 'spline': raise ValueError( 'only method `spline` is valid for the SplineInterpolator') @@ -135,6 +136,22 @@ def __init__(self, xi, yi, method='spline', fill_value=None, order=3, self.f = UnivariateSpline(xi, yi, k=order, **self.cons_kwargs) +def _apply_over_vars_with_dim(func, self, dim=None, **kwargs): + '''wrapper for datasets''' + # can this be done with apply_ufunc somehow? + from .dataset import Dataset + + ds = Dataset(coords=self.coords, attrs=self.attrs) + + for name, var in iteritems(self.data_vars): + if dim in var.dims: + ds[name] = func(var, dim=dim, **kwargs) + else: + ds[name] = var + + return ds + + def get_clean_interp_index(arr, dim, use_coordinate=True, **kwargs): '''get index to use for x values in interpolation. @@ -145,7 +162,6 @@ def get_clean_interp_index(arr, dim, use_coordinate=True, **kwargs): If use_coordinate is False, the x values are set as an equally spaced sequence. ''' - if use_coordinate: if use_coordinate is True: index = arr.get_index(dim) @@ -155,15 +171,21 @@ def get_clean_interp_index(arr, dim, use_coordinate=True, **kwargs): raise ValueError( 'Coordinates used for interpolation must be 1D, ' '%s is %dD.' % (use_coordinate, index.ndim)) - if isinstance(index, pd.DatetimeIndex): + + # raise if index cannot be cast to a float (e.g. MultiIndex) + try: index = index.values.astype(np.float64) + except (TypeError, ValueError): + # pandas raises a TypeError + # xarray/nuppy raise a ValueError + raise TypeError('Index must be castable to float64 to support' + 'interpolation, got: %s' % type(index)) # check index sorting now so we can skip it later if not (np.diff(index) > 0).all(): raise ValueError("Index must be monotonicly increasing") - else: axis = arr.get_axis_num(dim) - index = np.arange(arr.shape[axis]) + index = np.arange(arr.shape[axis], dtype=np.float64) return index @@ -267,7 +289,7 @@ def _get_interpolator(method, **kwargs): interp1d_methods = ['linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial'] valid_methods = interp1d_methods + ['barycentric', 'krog', 'pchip', - 'spline'] + 'spline', 'akima'] if (method == 'linear' and not kwargs.get('fill_value', None) == 'extrapolate'): @@ -291,6 +313,8 @@ def _get_interpolator(method, **kwargs): elif method == 'spline': kwargs.update(method=method) interp_class = SplineInterpolator + elif method == 'akima': + interp_class = interpolate.Akima1DInterpolator else: raise ValueError('%s is not a valid scipy interpolator' % method) else: diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index ae86726bbb4..4343c8bb4b9 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -22,6 +22,16 @@ def da(): dims='time') +@pytest.fixture +def ds(): + ds = xr.Dataset() + ds['var1'] = xr.DataArray([0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], + dims='time') + ds['var2'] = xr.DataArray([10, np.nan, 11, 12, np.nan, 13, 14, 15, np.nan, + 16, 17], dims='x') + return ds + + def make_interpolate_example_data(shape, frac_nan, seed=12345, non_uniform=False): rs = np.random.RandomState(seed) @@ -73,29 +83,29 @@ def test_interpolate_pd_compat(shape, frac_nan, method): np.testing.assert_allclose(actual.values, expected.values) +@pytest.mark.parametrize('method', ['barycentric', 'krog', 'pchip', 'spline', + 'akima']) +def test_scipy_methods_function(method): + kwargs = {} + # This test seems silly but the problem is, Pandas does some wacky things + # with these methods and I can't get a integration test to work. + da, _ = make_interpolate_example_data((25, 25), 0.4, non_uniform=True) + actual = da.interpolate_na(method=method, dim='time', **kwargs) + assert (da.count('time') <= actual.count('time')).all() + + @pytest.mark.parametrize('shape', [(8, 8), (1, 20), (20, 1)]) @pytest.mark.parametrize('frac_nan', [0, 0.5, 1]) -@pytest.mark.parametrize('method', ['time', 'index', 'values', 'linear', - 'nearest', 'zero', 'slinear', - 'quadratic', 'cubic']) +@pytest.mark.parametrize('method', ['time', 'index', 'values']) @requires_np112 @requires_scipy def test_interpolate_pd_compat_non_uniform_index(shape, frac_nan, method): - # translate pandas syntax to xarray equivalent - xmethod = method - use_coordinate = False - if method in ['time', 'index', 'values']: - use_coordinate = True - xmethod = 'linear' - elif method in ['nearest', 'slinear', 'quadratic', 'cubic']: - use_coordinate = True - da, df = make_interpolate_example_data(shape, frac_nan, non_uniform=True) for dim in ['time', 'x']: if method == 'time' and dim != 'time': continue - actual = da.interpolate_na(method=xmethod, dim=dim, - use_coordinate=use_coordinate) + actual = da.interpolate_na(method='linear', dim=dim, + use_coordinate=True) expected = df.interpolate(method=method, axis=da.get_axis_num(dim)) np.testing.assert_allclose(actual.values, expected.values) @@ -136,6 +146,26 @@ def test_interpolate_invalid_interpolator_raises(): da.interpolate_na(dim='x', method='foo') +def test_interpolate_multiindex_raises(): + data = np.random.randn(2, 3) + data[1, 1] = np.nan + da = xr.DataArray(data, coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) + das = da.stack(z=('x', 'y')) + with raises_regex(TypeError, 'Index must be castable to float64'): + das.interpolate_na(dim='z') + + +def test_interpolate_2d_coord_raises(): + coords = {'x': xr.Variable(('a', 'b'), np.arange(6).reshape(2, 3)), + 'y': xr.Variable(('a', 'b'), np.arange(6).reshape(2, 3)) * 2} + + data = np.random.randn(2, 3) + data[1, 1] = np.nan + da = xr.DataArray(data, dims=('a', 'b'), coords=coords) + with raises_regex(ValueError, 'interpolation must be 1D'): + da.interpolate_na(dim='a', use_coordinate='x') + + @requires_np112 @requires_scipy def test_interpolate_kwargs(): @@ -199,14 +229,11 @@ def test_interpolate_limits(): @pytest.mark.parametrize('method', ['linear', 'nearest', 'zero', 'slinear', - 'quadratic', 'cubic', 'polynomial', - 'barycentric', 'krog', 'pchip', 'spline']) + 'quadratic', 'cubic']) @requires_np112 @requires_scipy def test_interpolate_methods(method): kwargs = {} - if method == 'polynomial': - kwargs['order'] = 1 da = xr.DataArray(np.array([0, 1, 2, np.nan, np.nan, np.nan, 6, 7, 8], dtype=np.float64), dims='x') actual = da.interpolate_na('x', method=method, **kwargs) @@ -376,3 +403,20 @@ def test_ffill_limit(): result = da.ffill('time', limit=1) expected = xr.DataArray( [0, 0, np.nan, np.nan, np.nan, 3, 4, 5, 5, 6, 7], dims='time') + + +def test_interpolate_dataset(ds): + actual = ds.interpolate_na(dim='time') + # no missing values in var1 + assert actual['var1'].count('time') == actual.dims['time'] + + # var2 should be the same as it was + assert_array_equal(actual['var2'], ds['var2']) + + +def test_ffill_dataset(ds): + ds.ffill(dim='time') + + +def test_bfill_dataset(ds): + ds.ffill(dim='time') From dd9fa8ce7927cface5b40b25921cc993f9b3db19 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 15 Dec 2017 21:11:06 -0600 Subject: [PATCH 13/20] workaround for parameterized tests that are skipped in missing.py module --- xarray/core/dataset.py | 4 +- xarray/core/missing.py | 1 - xarray/tests/__init__.py | 1 + xarray/tests/test_missing.py | 144 +++++++++++++++++++---------------- 4 files changed, 82 insertions(+), 68 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index fd6b8aea212..1d392ccdbd9 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2453,8 +2453,8 @@ def interpolate_na(self, dim=None, method='linear', limit=None, """ from .missing import interp_na, _apply_over_vars_with_dim - new = _apply_over_vars_with_dim(interp_na, self, dim=dim, method=method, - limit=limit, + new = _apply_over_vars_with_dim(interp_na, self, dim=dim, + method=method, limit=limit, use_coordinate=use_coordinate, **kwargs) return new diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 4205f108cfd..5d319b28e18 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -138,7 +138,6 @@ def __init__(self, xi, yi, method='spline', fill_value=None, order=3, def _apply_over_vars_with_dim(func, self, dim=None, **kwargs): '''wrapper for datasets''' - # can this be done with apply_ufunc somehow? from .dataset import Dataset ds = Dataset(coords=self.coords, attrs=self.attrs) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 235c6e9e410..9afe6f43850 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -72,6 +72,7 @@ def _importorskip(modname, minversion=None): has_rasterio, requires_rasterio = _importorskip('rasterio') has_pathlib, requires_pathlib = _importorskip('pathlib') has_zarr, requires_zarr = _importorskip('zarr', minversion='2.2.0') +has_np112, requires_np112 = _importorskip('numpy', minversion='1.12.0') # some special cases has_scipy_or_netCDF4 = has_scipy or has_netCDF4 diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 4343c8bb4b9..04e98c49222 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -4,6 +4,7 @@ import numpy as np import pandas as pd import pytest +import itertools import xarray as xr @@ -68,62 +69,74 @@ def make_interpolate_example_data(shape, frac_nan, seed=12345, return da, df -@pytest.mark.parametrize('shape', [(8, 8), (1, 20), (20, 1), (100, 100)]) -@pytest.mark.parametrize('frac_nan', [0, 0.5, 1]) -@pytest.mark.parametrize('method', ['linear', 'nearest', 'zero', 'slinear', - 'quadratic', 'cubic']) @requires_np112 @requires_scipy -def test_interpolate_pd_compat(shape, frac_nan, method): - da, df = make_interpolate_example_data(shape, frac_nan) +def test_interpolate_pd_compat(): + shapes = [(8, 8), (1, 20), (20, 1), (100, 100)] + frac_nans = [0, 0.5, 1] + methods = ['linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic'] - for dim in ['time', 'x']: - actual = da.interpolate_na(method=method, dim=dim) - expected = df.interpolate(method=method, axis=da.get_axis_num(dim)) - np.testing.assert_allclose(actual.values, expected.values) + for (shape, frac_nan, method) in itertools.product(shapes, frac_nans, + methods): + da, df = make_interpolate_example_data(shape, frac_nan) -@pytest.mark.parametrize('method', ['barycentric', 'krog', 'pchip', 'spline', - 'akima']) -def test_scipy_methods_function(method): - kwargs = {} - # This test seems silly but the problem is, Pandas does some wacky things - # with these methods and I can't get a integration test to work. - da, _ = make_interpolate_example_data((25, 25), 0.4, non_uniform=True) - actual = da.interpolate_na(method=method, dim='time', **kwargs) - assert (da.count('time') <= actual.count('time')).all() + for dim in ['time', 'x']: + actual = da.interpolate_na(method=method, dim=dim) + expected = df.interpolate(method=method, axis=da.get_axis_num(dim)) + np.testing.assert_allclose(actual.values, expected.values) + + +@requires_scipy +def test_scipy_methods_function(): + for method in ['barycentric', 'krog', 'pchip', 'spline', 'akima']: + kwargs = {} + # Note: Pandas does some wacky things with these methods and the full + # integration tests wont work. + da, _ = make_interpolate_example_data((25, 25), 0.4, non_uniform=True) + actual = da.interpolate_na(method=method, dim='time', **kwargs) + assert (da.count('time') <= actual.count('time')).all() -@pytest.mark.parametrize('shape', [(8, 8), (1, 20), (20, 1)]) -@pytest.mark.parametrize('frac_nan', [0, 0.5, 1]) -@pytest.mark.parametrize('method', ['time', 'index', 'values']) @requires_np112 @requires_scipy -def test_interpolate_pd_compat_non_uniform_index(shape, frac_nan, method): - da, df = make_interpolate_example_data(shape, frac_nan, non_uniform=True) - for dim in ['time', 'x']: - if method == 'time' and dim != 'time': - continue - actual = da.interpolate_na(method='linear', dim=dim, - use_coordinate=True) - expected = df.interpolate(method=method, axis=da.get_axis_num(dim)) - np.testing.assert_allclose(actual.values, expected.values) - - -@pytest.mark.parametrize('shape', [(8, 8), (100, 100)]) -@pytest.mark.parametrize('frac_nan', [0, 0.5, 1]) -@pytest.mark.parametrize('order', [1, 2, 3]) +def test_interpolate_pd_compat_non_uniform_index(): + shapes = [(8, 8), (1, 20), (20, 1), (100, 100)] + frac_nans = [0, 0.5, 1] + methods = ['time', 'index', 'values'] + + for (shape, frac_nan, method) in itertools.product(shapes, frac_nans, + methods): + + da, df = make_interpolate_example_data(shape, frac_nan, + non_uniform=True) + for dim in ['time', 'x']: + if method == 'time' and dim != 'time': + continue + actual = da.interpolate_na(method='linear', dim=dim, + use_coordinate=True) + expected = df.interpolate(method=method, axis=da.get_axis_num(dim)) + np.testing.assert_allclose(actual.values, expected.values) + + @requires_np112 @requires_scipy -def test_interpolate_pd_compat_polynomial(shape, frac_nan, order): - da, df = make_interpolate_example_data(shape, frac_nan) +def test_interpolate_pd_compat_polynomial(): + shapes = [(8, 8), (1, 20), (20, 1), (100, 100)] + frac_nans = [0, 0.5, 1] + orders = [1, 2, 3] - for dim in ['time', 'x']: - actual = da.interpolate_na(method='polynomial', order=order, dim=dim, - use_coordinate=False) - expected = df.interpolate(method='polynomial', order=order, - axis=da.get_axis_num(dim)) - np.testing.assert_allclose(actual.values, expected.values) + for (shape, frac_nan, order) in itertools.product(shapes, frac_nans, + orders): + + da, df = make_interpolate_example_data(shape, frac_nan) + + for dim in ['time', 'x']: + actual = da.interpolate_na(method='polynomial', order=order, dim=dim, + use_coordinate=False) + expected = df.interpolate(method='polynomial', order=order, + axis=da.get_axis_num(dim)) + np.testing.assert_allclose(actual.values, expected.values) @requires_scipy @@ -228,34 +241,33 @@ def test_interpolate_limits(): assert_equal(actual, expected) -@pytest.mark.parametrize('method', ['linear', 'nearest', 'zero', 'slinear', - 'quadratic', 'cubic']) @requires_np112 @requires_scipy -def test_interpolate_methods(method): - kwargs = {} - da = xr.DataArray(np.array([0, 1, 2, np.nan, np.nan, np.nan, 6, 7, 8], - dtype=np.float64), dims='x') - actual = da.interpolate_na('x', method=method, **kwargs) - assert actual.isnull().sum() == 0 +def test_interpolate_methods(): + for method in ['linear', 'nearest', 'zero', 'slinear', 'quadratic', + 'cubic']: + kwargs = {} + da = xr.DataArray(np.array([0, 1, 2, np.nan, np.nan, np.nan, 6, 7, 8], + dtype=np.float64), dims='x') + actual = da.interpolate_na('x', method=method, **kwargs) + assert actual.isnull().sum() == 0 - actual = da.interpolate_na('x', method=method, limit=2, **kwargs) - assert actual.isnull().sum() == 1 + actual = da.interpolate_na('x', method=method, limit=2, **kwargs) + assert actual.isnull().sum() == 1 -@pytest.mark.parametrize( - 'method, interpolator', - [('linear', NumpyInterpolator), ('linear', ScipyInterpolator), - ('spline', SplineInterpolator)]) @requires_scipy -def test_interpolators(method, interpolator): - xi = np.array([-1, 0, 1, 2, 5], dtype=np.float64) - yi = np.array([-10, 0, 10, 20, 50], dtype=np.float64) - x = np.array([3, 4], dtype=np.float64) +def test_interpolators(): + for method, interpolator in [('linear', NumpyInterpolator), + ('linear', ScipyInterpolator), + ('spline', SplineInterpolator)]: + xi = np.array([-1, 0, 1, 2, 5], dtype=np.float64) + yi = np.array([-10, 0, 10, 20, 50], dtype=np.float64) + x = np.array([3, 4], dtype=np.float64) - f = interpolator(xi, yi, method=method) - out = f(x) - assert pd.isnull(out).sum() == 0 + f = interpolator(xi, yi, method=method) + out = f(x) + assert pd.isnull(out).sum() == 0 @requires_np112 @@ -414,9 +426,11 @@ def test_interpolate_dataset(ds): assert_array_equal(actual['var2'], ds['var2']) +@requires_bottleneck def test_ffill_dataset(ds): ds.ffill(dim='time') +@requires_bottleneck def test_bfill_dataset(ds): ds.ffill(dim='time') From 88d15695b559d9f2557c2652d7c40a41bcc7ceb3 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 18 Dec 2017 07:58:57 -0800 Subject: [PATCH 14/20] requires_np112 for dataset interpolate test --- xarray/tests/test_missing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 04e98c49222..82991fdee35 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -417,6 +417,7 @@ def test_ffill_limit(): [0, 0, np.nan, np.nan, np.nan, 3, 4, 5, 5, 6, 7], dims='time') +@requires_np112 def test_interpolate_dataset(ds): actual = ds.interpolate_na(dim='time') # no missing values in var1 From 37882b778e56126cf2106bf264918e855cef00d3 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Wed, 20 Dec 2017 16:12:11 -0800 Subject: [PATCH 15/20] remove req for np 112 --- xarray/core/missing.py | 5 +++-- xarray/tests/test_missing.py | 20 -------------------- 2 files changed, 3 insertions(+), 22 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 5d319b28e18..460ed7cf3fd 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -12,6 +12,7 @@ from .pycompat import iteritems from .computation import apply_ufunc from .utils import is_scalar +from .npcompat import flip class BaseInterpolator(object): @@ -241,13 +242,13 @@ def _bfill(arr, n=None, axis=-1): '''inverse of ffill''' import bottleneck as bn - arr = np.flip(arr, axis=axis) + arr = flip(arr, axis=axis) # fill arr = bn.push(arr, axis=axis, n=n) # reverse back to original - return np.flip(arr, axis=axis) + return flip(arr, axis=axis) def ffill(arr, dim=None, limit=None): diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 82991fdee35..57a11a8eef9 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -69,7 +69,6 @@ def make_interpolate_example_data(shape, frac_nan, seed=12345, return da, df -@requires_np112 @requires_scipy def test_interpolate_pd_compat(): shapes = [(8, 8), (1, 20), (20, 1), (100, 100)] @@ -98,7 +97,6 @@ def test_scipy_methods_function(): assert (da.count('time') <= actual.count('time')).all() -@requires_np112 @requires_scipy def test_interpolate_pd_compat_non_uniform_index(): shapes = [(8, 8), (1, 20), (20, 1), (100, 100)] @@ -119,7 +117,6 @@ def test_interpolate_pd_compat_non_uniform_index(): np.testing.assert_allclose(actual.values, expected.values) -@requires_np112 @requires_scipy def test_interpolate_pd_compat_polynomial(): shapes = [(8, 8), (1, 20), (20, 1), (100, 100)] @@ -179,7 +176,6 @@ def test_interpolate_2d_coord_raises(): da.interpolate_na(dim='a', use_coordinate='x') -@requires_np112 @requires_scipy def test_interpolate_kwargs(): da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims='x') @@ -192,7 +188,6 @@ def test_interpolate_kwargs(): assert_equal(actual, expected) -@requires_np112 def test_interpolate(): vals = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) @@ -206,7 +201,6 @@ def test_interpolate(): assert_equal(actual, expected) -@requires_np112 def test_interpolate_nonans(): vals = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) @@ -215,7 +209,6 @@ def test_interpolate_nonans(): assert_equal(actual, expected) -@requires_np112 @requires_scipy def test_interpolate_allnans(): vals = np.full(6, np.nan, dtype=np.float64) @@ -225,7 +218,6 @@ def test_interpolate_allnans(): assert_equal(actual, expected) -@requires_np112 @requires_bottleneck def test_interpolate_limits(): da = xr.DataArray(np.array([1, 2, np.nan, np.nan, np.nan, 6], @@ -241,7 +233,6 @@ def test_interpolate_limits(): assert_equal(actual, expected) -@requires_np112 @requires_scipy def test_interpolate_methods(): for method in ['linear', 'nearest', 'zero', 'slinear', 'quadratic', @@ -270,7 +261,6 @@ def test_interpolators(): assert pd.isnull(out).sum() == 0 -@requires_np112 def test_interpolate_use_coordinate(): xc = xr.Variable('x', [100, 200, 300, 400, 500, 600]) da = xr.DataArray(np.array([1, 2, np.nan, np.nan, np.nan, 6], @@ -293,7 +283,6 @@ def test_interpolate_use_coordinate(): assert_equal(actual, expected) -@requires_np112 @requires_dask def test_interpolate_dask(): da, _ = make_interpolate_example_data((40, 40), 0.5) @@ -311,7 +300,6 @@ def test_interpolate_dask(): assert_equal(actual, expected) -@requires_np112 @requires_dask def test_interpolate_dask_raises_for_invalid_chunk_dim(): da, _ = make_interpolate_example_data((40, 40), 0.5) @@ -320,7 +308,6 @@ def test_interpolate_dask_raises_for_invalid_chunk_dim(): da.interpolate_na('time') -@requires_np112 @requires_bottleneck def test_ffill(): da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims='x') @@ -329,7 +316,6 @@ def test_ffill(): assert_equal(actual, expected) -@requires_np112 @requires_bottleneck @requires_dask def test_ffill_dask(): @@ -348,7 +334,6 @@ def test_ffill_dask(): assert_equal(actual, expected) -@requires_np112 @requires_bottleneck @requires_dask def test_bfill_dask(): @@ -367,7 +352,6 @@ def test_bfill_dask(): assert_equal(actual, expected) -@requires_np112 @requires_bottleneck def test_ffill_bfill_nonans(): @@ -381,7 +365,6 @@ def test_ffill_bfill_nonans(): assert_equal(actual, expected) -@requires_np112 @requires_bottleneck def test_ffill_bfill_allnans(): @@ -395,14 +378,12 @@ def test_ffill_bfill_allnans(): assert_equal(actual, expected) -@requires_np112 @requires_bottleneck def test_ffill_functions(da): result = da.ffill('time') assert result.isnull().sum() == 0 -@requires_np112 @requires_bottleneck def test_ffill_limit(): da = xr.DataArray( @@ -417,7 +398,6 @@ def test_ffill_limit(): [0, 0, np.nan, np.nan, np.nan, 3, 4, 5, 5, 6, 7], dims='time') -@requires_np112 def test_interpolate_dataset(ds): actual = ds.interpolate_na(dim='time') # no missing values in var1 From a04e83e894f6cc7cfa3a95872dd0f7ced2a557b5 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Wed, 20 Dec 2017 19:44:59 -0800 Subject: [PATCH 16/20] fix typo in docs --- doc/computation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/computation.rst b/doc/computation.rst index c3dddfe034e..420b97923d7 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -83,7 +83,7 @@ for filling missing values via 1D interpolation. .. ipython:: python x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=['x'], - coords={'xx': [0, 1, 1.1, 1.9, 3]}) + coords={'xx': xr.Variable('x', [0, 1, 1.1, 1.9, 3])}) x.interpolate_na(dim='x', method='linear', use_coordinate='xx') Note that xarray slightly diverges from the pandas ``interpolate`` syntax by From 48505a51e69d6b3663783db414962f3011e94aa1 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Wed, 20 Dec 2017 19:58:27 -0800 Subject: [PATCH 17/20] @requires_np112 for methods that use apply_ufunc in missing.py --- xarray/tests/test_missing.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 57a11a8eef9..69767f3abba 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -69,6 +69,7 @@ def make_interpolate_example_data(shape, frac_nan, seed=12345, return da, df +@requires_np112 @requires_scipy def test_interpolate_pd_compat(): shapes = [(8, 8), (1, 20), (20, 1), (100, 100)] @@ -86,6 +87,7 @@ def test_interpolate_pd_compat(): np.testing.assert_allclose(actual.values, expected.values) +@requires_np112 @requires_scipy def test_scipy_methods_function(): for method in ['barycentric', 'krog', 'pchip', 'spline', 'akima']: @@ -97,6 +99,7 @@ def test_scipy_methods_function(): assert (da.count('time') <= actual.count('time')).all() +@requires_np112 @requires_scipy def test_interpolate_pd_compat_non_uniform_index(): shapes = [(8, 8), (1, 20), (20, 1), (100, 100)] @@ -117,6 +120,7 @@ def test_interpolate_pd_compat_non_uniform_index(): np.testing.assert_allclose(actual.values, expected.values) +@requires_np112 @requires_scipy def test_interpolate_pd_compat_polynomial(): shapes = [(8, 8), (1, 20), (20, 1), (100, 100)] @@ -136,6 +140,7 @@ def test_interpolate_pd_compat_polynomial(): np.testing.assert_allclose(actual.values, expected.values) +@requires_np112 @requires_scipy def test_interpolate_unsorted_index_raises(): vals = np.array([1, 2, 3], dtype=np.float64) @@ -176,6 +181,7 @@ def test_interpolate_2d_coord_raises(): da.interpolate_na(dim='a', use_coordinate='x') +@requires_np112 @requires_scipy def test_interpolate_kwargs(): da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims='x') @@ -188,6 +194,7 @@ def test_interpolate_kwargs(): assert_equal(actual, expected) +@requires_np112 def test_interpolate(): vals = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) @@ -201,6 +208,7 @@ def test_interpolate(): assert_equal(actual, expected) +@requires_np112 def test_interpolate_nonans(): vals = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) @@ -209,6 +217,7 @@ def test_interpolate_nonans(): assert_equal(actual, expected) +@requires_np112 @requires_scipy def test_interpolate_allnans(): vals = np.full(6, np.nan, dtype=np.float64) @@ -218,6 +227,7 @@ def test_interpolate_allnans(): assert_equal(actual, expected) +@requires_np112 @requires_bottleneck def test_interpolate_limits(): da = xr.DataArray(np.array([1, 2, np.nan, np.nan, np.nan, 6], @@ -233,6 +243,7 @@ def test_interpolate_limits(): assert_equal(actual, expected) +@requires_np112 @requires_scipy def test_interpolate_methods(): for method in ['linear', 'nearest', 'zero', 'slinear', 'quadratic', @@ -248,6 +259,7 @@ def test_interpolate_methods(): @requires_scipy +@requires_np112 def test_interpolators(): for method, interpolator in [('linear', NumpyInterpolator), ('linear', ScipyInterpolator), @@ -261,6 +273,7 @@ def test_interpolators(): assert pd.isnull(out).sum() == 0 +@requires_np112 def test_interpolate_use_coordinate(): xc = xr.Variable('x', [100, 200, 300, 400, 500, 600]) da = xr.DataArray(np.array([1, 2, np.nan, np.nan, np.nan, 6], @@ -283,6 +296,7 @@ def test_interpolate_use_coordinate(): assert_equal(actual, expected) +@requires_np112 @requires_dask def test_interpolate_dask(): da, _ = make_interpolate_example_data((40, 40), 0.5) @@ -300,6 +314,7 @@ def test_interpolate_dask(): assert_equal(actual, expected) +@requires_np112 @requires_dask def test_interpolate_dask_raises_for_invalid_chunk_dim(): da, _ = make_interpolate_example_data((40, 40), 0.5) @@ -308,6 +323,7 @@ def test_interpolate_dask_raises_for_invalid_chunk_dim(): da.interpolate_na('time') +@requires_np112 @requires_bottleneck def test_ffill(): da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims='x') @@ -316,6 +332,7 @@ def test_ffill(): assert_equal(actual, expected) +@requires_np112 @requires_bottleneck @requires_dask def test_ffill_dask(): @@ -353,6 +370,7 @@ def test_bfill_dask(): @requires_bottleneck +@requires_np112 def test_ffill_bfill_nonans(): vals = np.array([1, 2, 3, 4, 5, 6], dtype=np.float64) @@ -366,6 +384,7 @@ def test_ffill_bfill_nonans(): @requires_bottleneck +@requires_np112 def test_ffill_bfill_allnans(): vals = np.full(6, np.nan, dtype=np.float64) @@ -379,12 +398,14 @@ def test_ffill_bfill_allnans(): @requires_bottleneck +@requires_np112 def test_ffill_functions(da): result = da.ffill('time') assert result.isnull().sum() == 0 @requires_bottleneck +@requires_np112 def test_ffill_limit(): da = xr.DataArray( [0, np.nan, np.nan, np.nan, np.nan, 3, 4, 5, np.nan, 6, 7], @@ -398,6 +419,7 @@ def test_ffill_limit(): [0, 0, np.nan, np.nan, np.nan, 3, 4, 5, 5, 6, 7], dims='time') +@requires_np112 def test_interpolate_dataset(ds): actual = ds.interpolate_na(dim='time') # no missing values in var1 @@ -408,10 +430,12 @@ def test_interpolate_dataset(ds): @requires_bottleneck +@requires_np112 def test_ffill_dataset(ds): ds.ffill(dim='time') @requires_bottleneck +@requires_np112 def test_bfill_dataset(ds): ds.ffill(dim='time') From 282bb6522ddb24fbe93fc010460bafe97e64c428 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Thu, 21 Dec 2017 12:16:48 -0800 Subject: [PATCH 18/20] reuse type in apply over vars with dim --- xarray/core/missing.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 460ed7cf3fd..c63e02fec9e 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -139,9 +139,8 @@ def __init__(self, xi, yi, method='spline', fill_value=None, order=3, def _apply_over_vars_with_dim(func, self, dim=None, **kwargs): '''wrapper for datasets''' - from .dataset import Dataset - ds = Dataset(coords=self.coords, attrs=self.attrs) + ds = type(self)(coords=self.coords, attrs=self.attrs) for name, var in iteritems(self.data_vars): if dim in var.dims: From a6fcb7f310ed7e861e0c0bec2d377d0edd74fabc Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 22 Dec 2017 11:12:33 -0700 Subject: [PATCH 19/20] rework the fill value convention for linear interpolation, no longer match pandas -- adjusted tests and docs to reflect this change --- xarray/core/dataarray.py | 8 ++++---- xarray/core/dataset.py | 2 +- xarray/core/missing.py | 4 ++-- xarray/tests/test_missing.py | 30 +++++++++++++++++++++++------- 4 files changed, 30 insertions(+), 14 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 86104a5ab58..a522fe60b2d 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1299,7 +1299,7 @@ def ffill(self, dim, limit=None): def bfill(self, dim, limit=None): '''Fill NaN values by propogating values backward - *Requires bottleneck and Numpy v1.12.0 or later.* + *Requires bottleneck.* Parameters ---------- @@ -2026,10 +2026,10 @@ def sortby(self, variables, ascending=True): sorted: DataArray A new dataarray where all the specified dims are sorted by dim labels. - + Examples -------- - + >>> da = xr.DataArray(np.random.rand(5), ... coords=[pd.date_range('1/1/2000', periods=5)], ... dims='time') @@ -2043,7 +2043,7 @@ def sortby(self, variables, ascending=True): array([ 0.26532 , 0.270962, 0.552878, 0.615637, 0.965471]) Coordinates: - * time (time) datetime64[ns] 2000-01-03 2000-01-04 2000-01-05 ... + * time (time) datetime64[ns] 2000-01-03 2000-01-04 2000-01-05 ... """ ds = self._to_temp_dataset().sortby(variables, ascending=ascending) return self._from_temp_dataset(ds) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 177e0753515..58847bb0086 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2487,7 +2487,7 @@ def ffill(self, dim, limit=None): def bfill(self, dim, limit=None): '''Fill NaN values by propogating values backward - *Requires bottleneck and Numpy v1.12.0 or later.* + *Requires bottleneck.* Parameters ---------- diff --git a/xarray/core/missing.py b/xarray/core/missing.py index c63e02fec9e..cbf0df958d5 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -61,7 +61,7 @@ def __init__(self, xi, yi, method='linear', fill_value=None, **kwargs): if fill_value is None: self._left = np.nan - self._right = yi[-1] + self._right = np.nan elif isinstance(fill_value, Iterable) and len(fill_value) == 2: self._left = fill_value[0] self._right = fill_value[1] @@ -102,7 +102,7 @@ def __init__(self, xi, yi, method=None, fill_value=None, self.call_kwargs = {} if fill_value is None and method == 'linear': - fill_value = kwargs.pop('fill_value', (np.nan, yi[-1])) + fill_value = kwargs.pop('fill_value', (np.nan, np.nan)) elif fill_value is None: fill_value = np.nan diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 69767f3abba..209e8de8663 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -74,7 +74,7 @@ def make_interpolate_example_data(shape, frac_nan, seed=12345, def test_interpolate_pd_compat(): shapes = [(8, 8), (1, 20), (20, 1), (100, 100)] frac_nans = [0, 0.5, 1] - methods = ['linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic'] + methods = ['linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic'] for (shape, frac_nan, method) in itertools.product(shapes, frac_nans, methods): @@ -82,8 +82,16 @@ def test_interpolate_pd_compat(): da, df = make_interpolate_example_data(shape, frac_nan) for dim in ['time', 'x']: - actual = da.interpolate_na(method=method, dim=dim) - expected = df.interpolate(method=method, axis=da.get_axis_num(dim)) + actual = da.interpolate_na(method=method, dim=dim, + fill_value=np.nan) + expected = df.interpolate(method=method, axis=da.get_axis_num(dim), + fill_value=(np.nan, np.nan)) + # Note, Pandas does some odd things with the left/right fill_value + # for the linear methods. This next line inforces the xarray + # fill_value convention on the pandas output. Therefore, this test + # only checks that interpolated values are the same (not nans) + expected.values[pd.isnull(actual.values)] = np.nan + np.testing.assert_allclose(actual.values, expected.values) @@ -115,8 +123,16 @@ def test_interpolate_pd_compat_non_uniform_index(): if method == 'time' and dim != 'time': continue actual = da.interpolate_na(method='linear', dim=dim, - use_coordinate=True) - expected = df.interpolate(method=method, axis=da.get_axis_num(dim)) + use_coordinate=True, fill_value=np.nan) + expected = df.interpolate(method=method, axis=da.get_axis_num(dim), + fill_value=np.nan) + + # Note, Pandas does some odd things with the left/right fill_value + # for the linear methods. This next line inforces the xarray + # fill_value convention on the pandas output. Therefore, this test + # only checks that interpolated values are the same (not nans) + expected.values[pd.isnull(actual.values)] = np.nan + np.testing.assert_allclose(actual.values, expected.values) @@ -133,8 +149,8 @@ def test_interpolate_pd_compat_polynomial(): da, df = make_interpolate_example_data(shape, frac_nan) for dim in ['time', 'x']: - actual = da.interpolate_na(method='polynomial', order=order, dim=dim, - use_coordinate=False) + actual = da.interpolate_na(method='polynomial', order=order, + dim=dim, use_coordinate=False) expected = df.interpolate(method='polynomial', order=order, axis=da.get_axis_num(dim)) np.testing.assert_allclose(actual.values, expected.values) From 2b0d9e1ac4815e2483ee1a6d0d303e560608549e Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Fri, 29 Dec 2017 22:26:31 -0700 Subject: [PATCH 20/20] flake8 --- asv_bench/benchmarks/dataarray_missing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/asv_bench/benchmarks/dataarray_missing.py b/asv_bench/benchmarks/dataarray_missing.py index f121971365e..c6aa8f428bd 100644 --- a/asv_bench/benchmarks/dataarray_missing.py +++ b/asv_bench/benchmarks/dataarray_missing.py @@ -6,7 +6,6 @@ try: import dask - import dask.multiprocessing except ImportError: pass