Skip to content

Commit e2d4ad0

Browse files
committed
Merge branch 'master' into flake
2 parents 1cb412c + d8d87d2 commit e2d4ad0

File tree

11 files changed

+236
-15
lines changed

11 files changed

+236
-15
lines changed

doc/whats-new.rst

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,10 @@ Enhancements
6666
- :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` now supports the
6767
``loffset`` kwarg just like Pandas.
6868
By `Deepak Cherian <https://github.com/dcherian>`_
69+
- The `apply` methods for `DatasetGroupBy`, `DataArrayGroupBy`,
70+
`DatasetResample` and `DataArrayResample` can now pass positional arguments to
71+
the applied function.
72+
By `Matti Eskelinen <https://github.com/maaleske>`_.
6973
- 0d slices of ndarrays are now obtained directly through indexing, rather than
7074
extracting and wrapping a scalar, avoiding unnecessary copying. By `Daniel
7175
Wennberg <https://github.com/danielwe>`_.
@@ -83,7 +87,11 @@ Bug fixes
8387
By `Martin Raspaud <https://github.com/mraspaud>`_.
8488
- Fix parsing of ``_Unsigned`` attribute set by OPENDAP servers. (:issue:`2583`).
8589
By `Deepak Cherian <https://github.com/dcherian>`_
86-
90+
- Fix failure in time encoding when exporting to netCDF with versions of pandas
91+
less than 0.21.1 (:issue:`2623`). By `Spencer Clark
92+
<https://github.com/spencerkclark>`_.
93+
- Fix MultiIndex selection to update label and level (:issue:`2619`).
94+
By `Keisuke Fujii <https://github.com/fujiisoup>`_.
8795

8896
.. _whats-new.0.11.0:
8997

@@ -256,13 +264,17 @@ Announcements of note:
256264
for more details.
257265
- We have a new :doc:`roadmap` that outlines our future development plans.
258266

267+
- `Dataset.apply` now properly documents the way `func` is called.
268+
By `Matti Eskelinen <https://github.com/maaleske>`_.
269+
259270
Enhancements
260271
~~~~~~~~~~~~
261272

262273
- :py:meth:`~xarray.DataArray.differentiate` and
263274
:py:meth:`~xarray.Dataset.differentiate` are newly added.
264275
(:issue:`1332`)
265276
By `Keisuke Fujii <https://github.com/fujiisoup>`_.
277+
266278
- Default colormap for sequential and divergent data can now be set via
267279
:py:func:`~xarray.set_options()`
268280
(:issue:`2394`)

xarray/coding/times.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,7 @@ def encode_cf_datetime(dates, units=None, calendar=None):
357357

358358
delta_units = _netcdf_to_numpy_timeunit(delta)
359359
time_delta = np.timedelta64(1, delta_units).astype('timedelta64[ns]')
360-
ref_date = np.datetime64(pd.Timestamp(ref_date))
360+
ref_date = pd.Timestamp(ref_date)
361361

362362
# Wrap the dates in a DatetimeIndex to do the subtraction to ensure
363363
# an OverflowError is raised if the ref_date is too far away from

xarray/core/dataset.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
import xarray as xr
1414

1515
from . import (
16-
alignment, duck_array_ops, formatting, groupby, indexing, ops, resample,
17-
rolling, utils)
16+
alignment, duck_array_ops, formatting, groupby, indexing, ops, pdcompat,
17+
resample, rolling, utils)
1818
from ..coding.cftimeindex import _parse_array_of_cftime_strings
1919
from .alignment import align
2020
from .common import (
@@ -2426,6 +2426,12 @@ def stack(self, dimensions=None, **dimensions_kwargs):
24262426

24272427
def _unstack_once(self, dim):
24282428
index = self.get_index(dim)
2429+
# GH2619. For MultiIndex, we need to call remove_unused.
2430+
if LooseVersion(pd.__version__) >= "0.20":
2431+
index = index.remove_unused_levels()
2432+
else: # for pandas 0.19
2433+
index = pdcompat.remove_unused_levels(index)
2434+
24292435
full_idx = pd.MultiIndex.from_product(index.levels, names=index.names)
24302436

24312437
# take a shortcut in case the MultiIndex was not modified.
@@ -2948,8 +2954,8 @@ def apply(self, func, keep_attrs=None, args=(), **kwargs):
29482954
Parameters
29492955
----------
29502956
func : function
2951-
Function which can be called in the form `f(x, **kwargs)` to
2952-
transform each DataArray `x` in this dataset into another
2957+
Function which can be called in the form `func(x, *args, **kwargs)`
2958+
to transform each DataArray `x` in this dataset into another
29532959
DataArray.
29542960
keep_attrs : bool, optional
29552961
If True, the dataset's attributes (`attrs`) will be copied from

xarray/core/groupby.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,7 @@ def lookup_order(dimension):
503503
new_order = sorted(stacked.dims, key=lookup_order)
504504
return stacked.transpose(*new_order)
505505

506-
def apply(self, func, shortcut=False, **kwargs):
506+
def apply(self, func, shortcut=False, args=(), **kwargs):
507507
"""Apply a function over each array in the group and concatenate them
508508
together into a new array.
509509
@@ -532,6 +532,8 @@ def apply(self, func, shortcut=False, **kwargs):
532532
If these conditions are satisfied `shortcut` provides significant
533533
speedup. This should be the case for many common groupby operations
534534
(e.g., applying numpy ufuncs).
535+
args : tuple, optional
536+
Positional arguments passed to `func`.
535537
**kwargs
536538
Used to call `func(ar, **kwargs)` for each array `ar`.
537539
@@ -544,7 +546,7 @@ def apply(self, func, shortcut=False, **kwargs):
544546
grouped = self._iter_grouped_shortcut()
545547
else:
546548
grouped = self._iter_grouped()
547-
applied = (maybe_wrap_array(arr, func(arr, **kwargs))
549+
applied = (maybe_wrap_array(arr, func(arr, *args, **kwargs))
548550
for arr in grouped)
549551
return self._combine(applied, shortcut=shortcut)
550552

@@ -642,7 +644,7 @@ def wrapped_func(self, dim=DEFAULT_DIMS, axis=None,
642644

643645

644646
class DatasetGroupBy(GroupBy, ImplementsDatasetReduce):
645-
def apply(self, func, **kwargs):
647+
def apply(self, func, args=(), **kwargs):
646648
"""Apply a function over each Dataset in the group and concatenate them
647649
together into a new Dataset.
648650
@@ -661,6 +663,8 @@ def apply(self, func, **kwargs):
661663
----------
662664
func : function
663665
Callable to apply to each sub-dataset.
666+
args : tuple, optional
667+
Positional arguments to pass to `func`.
664668
**kwargs
665669
Used to call `func(ds, **kwargs)` for each sub-dataset `ar`.
666670
@@ -670,7 +674,7 @@ def apply(self, func, **kwargs):
670674
The result of splitting, applying and combining this dataset.
671675
"""
672676
kwargs.pop('shortcut', None) # ignore shortcut if set (for now)
673-
applied = (func(ds, **kwargs) for ds in self._iter_grouped())
677+
applied = (func(ds, *args, **kwargs) for ds in self._iter_grouped())
674678
return self._combine(applied)
675679

676680
def _combine(self, applied):

xarray/core/indexing.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,10 @@ def convert_label_indexer(index, label, index_name='', method=None,
159159
indexer, new_index = index.get_loc_level(
160160
tuple(label.values()), level=tuple(label.keys()))
161161

162+
# GH2619. Raise a KeyError if nothing is chosen
163+
if indexer.dtype.kind == 'b' and indexer.sum() == 0:
164+
raise KeyError('{} not found'.format(label))
165+
162166
elif isinstance(label, tuple) and isinstance(index, pd.MultiIndex):
163167
if _is_nested_tuple(label):
164168
indexer = index.get_locs(label)
@@ -168,7 +172,6 @@ def convert_label_indexer(index, label, index_name='', method=None,
168172
indexer, new_index = index.get_loc_level(
169173
label, level=list(range(len(label)))
170174
)
171-
172175
else:
173176
label = (label if getattr(label, 'ndim', 1) > 1 # vectorized-indexing
174177
else _asarray_tuplesafe(label))

xarray/core/pdcompat.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
# The remove_unused_levels defined here was copied based on the source code
2+
# defined in pandas.core.indexes.muli.py
3+
4+
# For reference, here is a copy of the pandas copyright notice:
5+
6+
# (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team
7+
# All rights reserved.
8+
9+
# Copyright (c) 2008-2011 AQR Capital Management, LLC
10+
# All rights reserved.
11+
12+
# Redistribution and use in source and binary forms, with or without
13+
# modification, are permitted provided that the following conditions are
14+
# met:
15+
16+
# * Redistributions of source code must retain the above copyright
17+
# notice, this list of conditions and the following disclaimer.
18+
19+
# * Redistributions in binary form must reproduce the above
20+
# copyright notice, this list of conditions and the following
21+
# disclaimer in the documentation and/or other materials provided
22+
# with the distribution.
23+
24+
# * Neither the name of the copyright holder nor the names of any
25+
# contributors may be used to endorse or promote products derived
26+
# from this software without specific prior written permission.
27+
28+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
29+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32+
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36+
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39+
40+
41+
import numpy as np
42+
import pandas as pd
43+
44+
45+
# for pandas 0.19
46+
def remove_unused_levels(self):
47+
"""
48+
create a new MultiIndex from the current that removing
49+
unused levels, meaning that they are not expressed in the labels
50+
The resulting MultiIndex will have the same outward
51+
appearance, meaning the same .values and ordering. It will also
52+
be .equals() to the original.
53+
.. versionadded:: 0.20.0
54+
Returns
55+
-------
56+
MultiIndex
57+
Examples
58+
--------
59+
>>> i = pd.MultiIndex.from_product([range(2), list('ab')])
60+
MultiIndex(levels=[[0, 1], ['a', 'b']],
61+
labels=[[0, 0, 1, 1], [0, 1, 0, 1]])
62+
>>> i[2:]
63+
MultiIndex(levels=[[0, 1], ['a', 'b']],
64+
labels=[[1, 1], [0, 1]])
65+
The 0 from the first level is not represented
66+
and can be removed
67+
>>> i[2:].remove_unused_levels()
68+
MultiIndex(levels=[[1], ['a', 'b']],
69+
labels=[[0, 0], [0, 1]])
70+
"""
71+
import pandas.core.algorithms as algos
72+
73+
new_levels = []
74+
new_labels = []
75+
76+
changed = False
77+
for lev, lab in zip(self.levels, self.labels):
78+
79+
# Since few levels are typically unused, bincount() is more
80+
# efficient than unique() - however it only accepts positive values
81+
# (and drops order):
82+
uniques = np.where(np.bincount(lab + 1) > 0)[0] - 1
83+
has_na = int(len(uniques) and (uniques[0] == -1))
84+
85+
if len(uniques) != len(lev) + has_na:
86+
# We have unused levels
87+
changed = True
88+
89+
# Recalculate uniques, now preserving order.
90+
# Can easily be cythonized by exploiting the already existing
91+
# "uniques" and stop parsing "lab" when all items are found:
92+
uniques = algos.unique(lab)
93+
if has_na:
94+
na_idx = np.where(uniques == -1)[0]
95+
# Just ensure that -1 is in first position:
96+
uniques[[0, na_idx[0]]] = uniques[[na_idx[0], 0]]
97+
98+
# labels get mapped from uniques to 0:len(uniques)
99+
# -1 (if present) is mapped to last position
100+
label_mapping = np.zeros(len(lev) + has_na)
101+
# ... and reassigned value -1:
102+
label_mapping[uniques] = np.arange(len(uniques)) - has_na
103+
104+
lab = label_mapping[lab]
105+
106+
# new levels are simple
107+
lev = lev.take(uniques[has_na:])
108+
109+
new_levels.append(lev)
110+
new_labels.append(lab)
111+
112+
result = self._shallow_copy()
113+
114+
if changed:
115+
result._reset_identity()
116+
result._set_levels(new_levels, validate=False)
117+
result._set_labels(new_labels, validate=False)
118+
119+
return result

xarray/core/resample.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ def __init__(self, *args, **kwargs):
129129
"('{}')! ".format(self._resample_dim, self._dim))
130130
super(DataArrayResample, self).__init__(*args, **kwargs)
131131

132-
def apply(self, func, shortcut=False, **kwargs):
132+
def apply(self, func, shortcut=False, args=(), **kwargs):
133133
"""Apply a function over each array in the group and concatenate them
134134
together into a new array.
135135
@@ -158,6 +158,8 @@ def apply(self, func, shortcut=False, **kwargs):
158158
If these conditions are satisfied `shortcut` provides significant
159159
speedup. This should be the case for many common groupby operations
160160
(e.g., applying numpy ufuncs).
161+
args : tuple, optional
162+
Positional arguments passed on to `func`.
161163
**kwargs
162164
Used to call `func(ar, **kwargs)` for each array `ar`.
163165
@@ -167,7 +169,7 @@ def apply(self, func, shortcut=False, **kwargs):
167169
The result of splitting, applying and combining this array.
168170
"""
169171
combined = super(DataArrayResample, self).apply(
170-
func, shortcut=shortcut, **kwargs)
172+
func, shortcut=shortcut, args=args, **kwargs)
171173

172174
# If the aggregation function didn't drop the original resampling
173175
# dimension, then we need to do so before we can rename the proxy
@@ -240,7 +242,7 @@ def __init__(self, *args, **kwargs):
240242
"('{}')! ".format(self._resample_dim, self._dim))
241243
super(DatasetResample, self).__init__(*args, **kwargs)
242244

243-
def apply(self, func, **kwargs):
245+
def apply(self, func, args=(), **kwargs):
244246
"""Apply a function over each Dataset in the groups generated for
245247
resampling and concatenate them together into a new Dataset.
246248
@@ -259,6 +261,8 @@ def apply(self, func, **kwargs):
259261
----------
260262
func : function
261263
Callable to apply to each sub-dataset.
264+
args : tuple, optional
265+
Positional arguments passed on to `func`.
262266
**kwargs
263267
Used to call `func(ds, **kwargs)` for each sub-dataset `ar`.
264268
@@ -268,7 +272,7 @@ def apply(self, func, **kwargs):
268272
The result of splitting, applying and combining this dataset.
269273
"""
270274
kwargs.pop('shortcut', None) # ignore shortcut if set (for now)
271-
applied = (func(ds, **kwargs) for ds in self._iter_grouped())
275+
applied = (func(ds, *args, **kwargs) for ds in self._iter_grouped())
272276
combined = self._combine(applied)
273277

274278
return combined.rename({self._resample_dim: self._dim})

xarray/tests/test_coding_times.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -737,3 +737,16 @@ def test_encode_cf_datetime_overflow(shape):
737737
num, _, _ = encode_cf_datetime(dates, units, calendar)
738738
roundtrip = decode_cf_datetime(num, units, calendar)
739739
np.testing.assert_array_equal(dates, roundtrip)
740+
741+
742+
def test_encode_cf_datetime_pandas_min():
743+
# Test that encode_cf_datetime does not fail for versions
744+
# of pandas < 0.21.1 (GH 2623).
745+
dates = pd.date_range('2000', periods=3)
746+
num, units, calendar = encode_cf_datetime(dates)
747+
expected_num = np.array([0., 1., 2.])
748+
expected_units = 'days since 2000-01-01 00:00:00'
749+
expected_calendar = 'proleptic_gregorian'
750+
np.testing.assert_array_equal(num, expected_num)
751+
assert units == expected_units
752+
assert calendar == expected_calendar

xarray/tests/test_dataarray.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,6 +1027,20 @@ def test_sel(lab_indexer, pos_indexer, replaced_idx=False,
10271027
assert_identical(mdata.sel(x={'one': 'a', 'two': 1}),
10281028
mdata.sel(one='a', two=1))
10291029

1030+
def test_selection_multiindex(self):
1031+
# GH2619. For MultiIndex, we need to call remove_unused.
1032+
ds = xr.DataArray(np.arange(40).reshape(8, 5), dims=['x', 'y'],
1033+
coords={'x': np.arange(8), 'y': np.arange(5)})
1034+
ds = ds.stack(xy=['x', 'y'])
1035+
ds_isel = ds.isel(xy=ds['x'] < 4)
1036+
with pytest.raises(KeyError):
1037+
ds_isel.sel(x=5)
1038+
1039+
actual = ds_isel.unstack()
1040+
expected = ds.reset_index('xy').isel(xy=ds['x'] < 4)
1041+
expected = expected.set_index(xy=['x', 'y']).unstack()
1042+
assert_identical(expected, actual)
1043+
10301044
def test_virtual_default_coords(self):
10311045
array = DataArray(np.zeros((5,)), dims='x')
10321046
expected = DataArray(range(5), dims='x', name='x')
@@ -2281,6 +2295,17 @@ def test_resample(self):
22812295
with raises_regex(ValueError, 'index must be monotonic'):
22822296
array[[2, 0, 1]].resample(time='1D')
22832297

2298+
def test_da_resample_func_args(self):
2299+
2300+
def func(arg1, arg2, arg3=0.):
2301+
return arg1.mean('time') + arg2 + arg3
2302+
2303+
times = pd.date_range('2000', periods=3, freq='D')
2304+
da = xr.DataArray([1., 1., 1.], coords=[times], dims=['time'])
2305+
expected = xr.DataArray([3., 3., 3.], coords=[times], dims=['time'])
2306+
actual = da.resample(time='D').apply(func, args=(1.,), arg3=1.)
2307+
assert_identical(actual, expected)
2308+
22842309
@requires_cftime
22852310
def test_resample_cftimeindex(self):
22862311
cftime = _import_cftime()

0 commit comments

Comments
 (0)