- 
          
- 
                Notifications
    You must be signed in to change notification settings 
- Fork 1.2k
WIP: Feature/interpolate #1640
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
WIP: Feature/interpolate #1640
Changes from all commits
1582c1f
              ab727e7
              95006c4
              4a4f6eb
              42d63ef
              263ec98
              19d21b8
              f937c07
              8717e38
              3d5c1b1
              1864e8f
              f58d464
              1b93808
              6f83b7b
              33df6af
              eafe67a
              dd9fa8c
              88d1569
              3fb9261
              37882b7
              a04e83e
              48505a5
              20f957d
              282bb65
              a6fcb7f
              2b0d9e1
              d3220f3
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,73 @@ | ||
| from __future__ import absolute_import | ||
| from __future__ import division | ||
| from __future__ import print_function | ||
|  | ||
| import pandas as pd | ||
|  | ||
| try: | ||
| import dask | ||
| except ImportError: | ||
| pass | ||
|  | ||
| import xarray as xr | ||
|  | ||
| from . import randn, requires_dask | ||
|  | ||
|  | ||
| def make_bench_data(shape, frac_nan, chunks): | ||
| vals = randn(shape, frac_nan) | ||
| coords = {'time': pd.date_range('2000-01-01', freq='D', | ||
| periods=shape[0])} | ||
| da = xr.DataArray(vals, dims=('time', 'x', 'y'), coords=coords) | ||
|  | ||
| if chunks is not None: | ||
| da = da.chunk(chunks) | ||
|  | ||
| return da | ||
|  | ||
|  | ||
| def time_interpolate_na(shape, chunks, method, limit): | ||
| if chunks is not None: | ||
| requires_dask() | ||
| da = make_bench_data(shape, 0.1, chunks=chunks) | ||
| actual = da.interpolate_na(dim='time', method='linear', limit=limit) | ||
|  | ||
| if chunks is not None: | ||
| actual = actual.compute() | ||
|  | ||
|  | ||
| time_interpolate_na.param_names = ['shape', 'chunks', 'method', 'limit'] | ||
| time_interpolate_na.params = ([(3650, 200, 400), (100, 25, 25)], | ||
| [None, {'x': 25, 'y': 25}], | ||
| ['linear', 'spline', 'quadratic', 'cubic'], | ||
| [None, 3]) | ||
|  | ||
|  | ||
| def time_ffill(shape, chunks, limit): | ||
|  | ||
| da = make_bench_data(shape, 0.1, chunks=chunks) | ||
| actual = da.ffill(dim='time', limit=limit) | ||
|  | ||
| if chunks is not None: | ||
| actual = actual.compute() | ||
|  | ||
|  | ||
| time_ffill.param_names = ['shape', 'chunks', 'limit'] | ||
| time_ffill.params = ([(3650, 200, 400), (100, 25, 25)], | ||
| [None, {'x': 25, 'y': 25}], | ||
| [None, 3]) | ||
|  | ||
|  | ||
| def time_bfill(shape, chunks, limit): | ||
|  | ||
| da = make_bench_data(shape, 0.1, chunks=chunks) | ||
| actual = da.bfill(dim='time', limit=limit) | ||
|  | ||
| if chunks is not None: | ||
| actual = actual.compute() | ||
|  | ||
|  | ||
| time_bfill.param_names = ['shape', 'chunks', 'limit'] | ||
| time_bfill.params = ([(3650, 200, 400), (100, 25, 25)], | ||
| [None, {'x': 25, 'y': 25}], | ||
| [None, 3]) | 
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -1228,6 +1228,97 @@ def fillna(self, value): | |
| out = ops.fillna(self, value) | ||
| return out | ||
|  | ||
| def interpolate_na(self, dim=None, method='linear', limit=None, | ||
| use_coordinate=True, | ||
| **kwargs): | ||
| """Interpolate values according to different methods. | ||
|  | ||
| Parameters | ||
| ---------- | ||
| dim : str | ||
| Specifies the dimension along which to interpolate. | ||
| method : {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', | ||
| 'polynomial', 'barycentric', 'krog', 'pchip', | ||
| 'spline', 'akima'}, optional | ||
| String indicating which method to use for interpolation: | ||
|  | ||
| - 'linear': linear interpolation (Default). Additional keyword | ||
| arguments are passed to ``numpy.interp`` | ||
| - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', | ||
| 'polynomial': are passed to ``scipy.interpolate.interp1d``. If | ||
| method=='polynomial', the ``order`` keyword argument must also be | ||
| provided. | ||
| - 'barycentric', 'krog', 'pchip', 'spline', and `akima`: use their | ||
| respective``scipy.interpolate`` classes. | ||
| use_coordinate : boolean or str, default True | ||
| Specifies which index to use as the x values in the interpolation | ||
| formulated as `y = f(x)`. If False, values are treated as if | ||
| eqaully-spaced along `dim`. If True, the IndexVariable `dim` is | ||
| used. If use_coordinate is a string, it specifies the name of a | ||
| coordinate variariable to use as the index. | ||
| limit : int, default None | ||
| Maximum number of consecutive NaNs to fill. Must be greater than 0 | ||
| or None for no limit. | ||
|  | ||
| Returns | ||
| ------- | ||
| DataArray | ||
|  | ||
| See also | ||
| -------- | ||
| numpy.interp | ||
| scipy.interpolate | ||
| """ | ||
| from .missing import interp_na | ||
| return interp_na(self, dim=dim, method=method, limit=limit, | ||
| use_coordinate=use_coordinate, **kwargs) | ||
|  | ||
| def ffill(self, dim, limit=None): | ||
| '''Fill NaN values by propogating values forward | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no need to change now, but FYI PEP8 is  | ||
|  | ||
| *Requires bottleneck.* | ||
|  | ||
| Parameters | ||
| ---------- | ||
| dim : str | ||
| Specifies the dimension along which to propagate values when | ||
| filling. | ||
| limit : int, default None | ||
| The maximum number of consecutive NaN values to forward fill. In | ||
| other words, if there is a gap with more than this number of | ||
| consecutive NaNs, it will only be partially filled. Must be greater | ||
| than 0 or None for no limit. | ||
|  | ||
| Returns | ||
| ------- | ||
| DataArray | ||
| ''' | ||
| from .missing import ffill | ||
| return ffill(self, dim, limit=limit) | ||
|  | ||
| def bfill(self, dim, limit=None): | ||
| '''Fill NaN values by propogating values backward | ||
|  | ||
| *Requires bottleneck.* | ||
|  | ||
| Parameters | ||
| ---------- | ||
| dim : str | ||
| Specifies the dimension along which to propagate values when | ||
| filling. | ||
| limit : int, default None | ||
| The maximum number of consecutive NaN values to backward fill. In | ||
| other words, if there is a gap with more than this number of | ||
| consecutive NaNs, it will only be partially filled. Must be greater | ||
| than 0 or None for no limit. | ||
|  | ||
| Returns | ||
| ------- | ||
| DataArray | ||
| ''' | ||
| from .missing import bfill | ||
| return bfill(self, dim, limit=limit) | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need bottleneck installed to use  | ||
|  | ||
| def combine_first(self, other): | ||
| """Combine two DataArray objects, with union of coordinates. | ||
|  | ||
|  | @@ -1935,10 +2026,10 @@ def sortby(self, variables, ascending=True): | |
| sorted: DataArray | ||
| A new dataarray where all the specified dims are sorted by dim | ||
| labels. | ||
|  | ||
| Examples | ||
| -------- | ||
|  | ||
| >>> da = xr.DataArray(np.random.rand(5), | ||
| ... coords=[pd.date_range('1/1/2000', periods=5)], | ||
| ... dims='time') | ||
|  | @@ -1952,7 +2043,7 @@ def sortby(self, variables, ascending=True): | |
| <xarray.DataArray (time: 5)> | ||
| array([ 0.26532 , 0.270962, 0.552878, 0.615637, 0.965471]) | ||
| Coordinates: | ||
| * time (time) datetime64[ns] 2000-01-03 2000-01-04 2000-01-05 ... | ||
| * time (time) datetime64[ns] 2000-01-03 2000-01-04 2000-01-05 ... | ||
| """ | ||
| ds = self._to_temp_dataset().sortby(variables, ascending=ascending) | ||
| return self._from_temp_dataset(ds) | ||
|  | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably too late to be helpful - but are we sure about the name here? We don't generally add
_naonto methods (bfill_na?), and pandas isinterpolateonlyThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See comment from @shoyer above: #1640 (comment)