Skip to content

Commit d578bfe

Browse files
Gregory Gundersenmax-sixty
authored andcommitted
Support keyword API for Dataset.drop (#3128)
* Support for keyword argument-based dropping. * Cherry picked changes. * Moved noqa to correct location; added check for when labels are coordinates. * Added okwarning to drop docs. * Made unit tests more explicit. Added test for dropping along multiple dimensions. * Used black. * Fixed typo. Co-Authored-By: Maximilian Roos <[email protected]> * Docs for amended drop API.
1 parent 6083d3c commit d578bfe

File tree

5 files changed

+101
-16
lines changed

5 files changed

+101
-16
lines changed

doc/indexing.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,7 @@ The :py:meth:`~xarray.Dataset.drop` method returns a new object with the listed
236236
index labels along a dimension dropped:
237237

238238
.. ipython:: python
239+
:okwarning:
239240
240241
ds.drop(['IN', 'IL'], dim='space')
241242

doc/whats-new.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,12 +57,17 @@ Enhancements
5757

5858
- Added ``join='override'``. This only checks that index sizes are equal among objects and skips
5959
checking indexes for equality. By `Deepak Cherian <https://github.com/dcherian>`_.
60+
6061
- :py:func:`~xarray.concat` and :py:func:`~xarray.open_mfdataset` now support the ``join`` kwarg.
6162
It is passed down to :py:func:`~xarray.align`. By `Deepak Cherian <https://github.com/dcherian>`_.
63+
6264
- In :py:meth:`~xarray.Dataset.to_zarr`, passing ``mode`` is not mandatory if
6365
``append_dim`` is set, as it will automatically be set to ``'a'`` internally.
6466
By `David Brochart <https://github.com/davidbrochart>`_.
6567

68+
- :py:meth:`~xarray.Dataset.drop` now supports keyword arguments; dropping index labels by specifying both ``dim`` and ``labels`` is deprecated (:issue:`2910`).
69+
By `Gregory Gundersen <https://github.com/gwgundersen/>`_.
70+
6671
Bug fixes
6772
~~~~~~~~~
6873
- Fix regression introduced in v0.12.2 where ``copy(deep=True)`` would convert

xarray/core/dataset.py

Lines changed: 60 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
)
5555
from .coordinates import (
5656
DatasetCoordinates,
57+
DataArrayCoordinates,
5758
LevelCoordinatesSource,
5859
assert_coordinate_consistent,
5960
remap_label_indexers,
@@ -3450,7 +3451,7 @@ def _assert_all_in_dataset(
34503451
)
34513452

34523453
# Drop variables
3453-
@overload
3454+
@overload # noqa: F811
34543455
def drop(
34553456
self, labels: Union[Hashable, Iterable[Hashable]], *, errors: str = "raise"
34563457
) -> "Dataset":
@@ -3463,7 +3464,9 @@ def drop(
34633464
) -> "Dataset":
34643465
...
34653466

3466-
def drop(self, labels, dim=None, *, errors="raise"): # noqa: F811
3467+
def drop( # noqa: F811
3468+
self, labels=None, dim=None, *, errors="raise", **labels_kwargs
3469+
):
34673470
"""Drop variables or index labels from this dataset.
34683471
34693472
Parameters
@@ -3479,34 +3482,75 @@ def drop(self, labels, dim=None, *, errors="raise"): # noqa: F811
34793482
any of the variable or index labels passed are not
34803483
in the dataset. If 'ignore', any given labels that are in the
34813484
dataset are dropped and no error is raised.
3485+
**labels_kwargs : {dim: label, ...}, optional
3486+
The keyword arguments form of ``dim`` and ``labels``.
34823487
34833488
Returns
34843489
-------
34853490
dropped : Dataset
3491+
3492+
Examples
3493+
--------
3494+
>>> data = np.random.randn(2, 3)
3495+
>>> labels = ['a', 'b', 'c']
3496+
>>> ds = xr.Dataset({'A': (['x', 'y'], data), 'y': labels})
3497+
>>> ds.drop(y=['a', 'c'])
3498+
<xarray.Dataset>
3499+
Dimensions: (x: 2, y: 1)
3500+
Coordinates:
3501+
* y (y) <U1 'b'
3502+
Dimensions without coordinates: x
3503+
Data variables:
3504+
A (x, y) float64 -0.3454 0.1734
3505+
>>> ds.drop(y='b')
3506+
<xarray.Dataset>
3507+
Dimensions: (x: 2, y: 2)
3508+
Coordinates:
3509+
* y (y) <U1 'a' 'c'
3510+
Dimensions without coordinates: x
3511+
Data variables:
3512+
A (x, y) float64 -0.3944 -1.418 1.423 -1.041
34863513
"""
34873514
if errors not in ["raise", "ignore"]:
34883515
raise ValueError('errors must be either "raise" or "ignore"')
34893516

3490-
if dim is None:
3517+
labels_are_coords = isinstance(labels, DataArrayCoordinates)
3518+
if labels_kwargs or (utils.is_dict_like(labels) and not labels_are_coords):
3519+
labels_kwargs = utils.either_dict_or_kwargs(labels, labels_kwargs, "drop")
3520+
if dim is not None:
3521+
raise ValueError("cannot specify dim and dict-like arguments.")
3522+
ds = self
3523+
for dim, labels in labels_kwargs.items():
3524+
ds = ds._drop_labels(labels, dim, errors=errors)
3525+
return ds
3526+
elif dim is None:
34913527
if isinstance(labels, str) or not isinstance(labels, Iterable):
34923528
labels = {labels}
34933529
else:
34943530
labels = set(labels)
3495-
34963531
return self._drop_vars(labels, errors=errors)
34973532
else:
3498-
# Don't cast to set, as it would harm performance when labels
3499-
# is a large numpy array
3500-
if utils.is_scalar(labels):
3501-
labels = [labels]
3502-
labels = np.asarray(labels)
3503-
3504-
try:
3505-
index = self.indexes[dim]
3506-
except KeyError:
3507-
raise ValueError("dimension %r does not have coordinate labels" % dim)
3508-
new_index = index.drop(labels, errors=errors)
3509-
return self.loc[{dim: new_index}]
3533+
if utils.is_list_like(labels):
3534+
warnings.warn(
3535+
"dropping dimensions using list-like labels is deprecated; "
3536+
"use dict-like arguments.",
3537+
DeprecationWarning,
3538+
stacklevel=2,
3539+
)
3540+
return self._drop_labels(labels, dim, errors=errors)
3541+
3542+
def _drop_labels(self, labels=None, dim=None, errors="raise"):
3543+
# Don't cast to set, as it would harm performance when labels
3544+
# is a large numpy array
3545+
if utils.is_scalar(labels):
3546+
labels = [labels]
3547+
labels = np.asarray(labels)
3548+
try:
3549+
index = self.indexes[dim]
3550+
except KeyError:
3551+
raise ValueError("dimension %r does not have coordinate labels" % dim)
3552+
new_index = index.drop(labels, errors=errors)
3553+
return self.loc[{dim: new_index}]
35103554

35113555
def _drop_vars(self, names: set, errors: str = "raise") -> "Dataset":
35123556
if errors == "raise":

xarray/core/utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,10 @@ def is_full_slice(value: Any) -> bool:
250250
return isinstance(value, slice) and value == slice(None)
251251

252252

253+
def is_list_like(value: Any) -> bool:
254+
return isinstance(value, list) or isinstance(value, tuple)
255+
256+
253257
def either_dict_or_kwargs(
254258
pos_kwargs: Optional[Mapping[Hashable, T]],
255259
kw_kwargs: Mapping[str, T],

xarray/tests/test_dataset.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2206,6 +2206,37 @@ def test_drop_index_labels(self):
22062206
with raises_regex(ValueError, "does not have coordinate labels"):
22072207
data.drop(1, "y")
22082208

2209+
def test_drop_labels_by_keyword(self):
2210+
# Tests for #2910: Support for a additional `drop()` API.
2211+
data = Dataset(
2212+
{"A": (["x", "y"], np.random.randn(2, 6)), "x": ["a", "b"], "y": range(6)}
2213+
)
2214+
# Basic functionality.
2215+
assert len(data.coords["x"]) == 2
2216+
2217+
# This API is allowed but deprecated.
2218+
with pytest.warns(DeprecationWarning):
2219+
ds1 = data.drop(["a"], dim="x")
2220+
ds2 = data.drop(x="a")
2221+
ds3 = data.drop(x=["a"])
2222+
ds4 = data.drop(x=["a", "b"])
2223+
ds5 = data.drop(x=["a", "b"], y=range(0, 6, 2))
2224+
2225+
assert_array_equal(ds1.coords["x"], ["b"])
2226+
assert_array_equal(ds2.coords["x"], ["b"])
2227+
assert_array_equal(ds3.coords["x"], ["b"])
2228+
assert ds4.coords["x"].size == 0
2229+
assert ds5.coords["x"].size == 0
2230+
assert_array_equal(ds5.coords["y"], [1, 3, 5])
2231+
2232+
# Error handling if user tries both approaches.
2233+
with pytest.raises(ValueError):
2234+
data.drop(labels=["a"], x="a")
2235+
with pytest.raises(ValueError):
2236+
data.drop(dim="x", x="a")
2237+
with pytest.raises(ValueError):
2238+
data.drop(labels=["a"], dim="x", x="a")
2239+
22092240
def test_drop_dims(self):
22102241
data = xr.Dataset(
22112242
{

0 commit comments

Comments
 (0)