Skip to content

Commit af37ddd

Browse files
jukentshoyer
authored andcommitted
Add rename_vars and rename_dims (#3045)
* Added rename_coords and rename_dims * Removed white space from blank lines * Changed rename_coords to rename_vars * Changed rename_coords to rename_vars in "See Also" or rename fx * Fixed renaming dimension indexing * Added testing for rename_vars and rename_dims * Testing and fx for renaming vars and dims * Met pep8 standards * Undid autopep8 for lines w noqa * Update xarray/tests/test_dataset.py Co-Authored-By: Maximilian Roos <[email protected]> * Cleaned up helper fxs and added actual_2 test * Update xarray/core/dataset.py Co-Authored-By: Maximilian Roos <[email protected]> * Update xarray/core/dataset.py Co-Authored-By: Maximilian Roos <[email protected]> * deleted misc file * Update xarray/core/dataset.py Co-Authored-By: Maximilian Roos <[email protected]> * _rename_var_dims_helper undefined test * Use separate rename_dims and rename_vars dictionaries * Fixed documentation and added inplace back * removing changes from rename * removed test set up to fail (will add back) * fixed coord vs variable in test rename_ * Moved rename_var to under new fx/methods * Update whats-new.rst * use pytest.raises to test for ValueError * did not assign failed * pep8 compliance
1 parent 378c330 commit af37ddd

File tree

4 files changed

+150
-37
lines changed

4 files changed

+150
-37
lines changed

doc/api.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ Dataset contents
8787
Dataset.pipe
8888
Dataset.merge
8989
Dataset.rename
90+
Dataset.rename_vars
91+
Dataset.rename_dims
9092
Dataset.swap_dims
9193
Dataset.expand_dims
9294
Dataset.drop

doc/whats-new.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@ v0.12.3 (unreleased)
2121
Enhancements
2222
~~~~~~~~~~~~
2323

24+
- Renaming variables and dimensions independently:
25+
Datasets with coordinate dimensions can now have only their dimension
26+
(using rename_dim) or only their coordinate (using rename_vars) renamed
27+
instead of the rename function applyingto both. (:issue:`3026`)
28+
By `Julia Kent <https://github.com/jukent>`_.
29+
2430
Bug fixes
2531
~~~~~~~~~
2632

@@ -102,6 +108,8 @@ Enhancements to existing functionality
102108
accept a keyword argument ``restore_coord_dims`` which keeps the order
103109
of the dimensions of multi-dimensional coordinates intact (:issue:`1856`).
104110
By `Peter Hausamann <http://github.com/phausamann>`_.
111+
- Clean up Python 2 compatibility in code (:issue:`2950`)
112+
By `Guido Imperiale <https://github.com/crusaderky>`_.
105113
- Better warning message when supplying invalid objects to ``xr.merge``
106114
(:issue:`2948`). By `Mathias Hauser <https://github.com/mathause>`_.
107115
- Add ``errors`` keyword argument to :py:meth:`Dataset.drop` and :py:meth:`Dataset.drop_dims`

xarray/core/dataset.py

Lines changed: 106 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,8 @@ def merge_indexes(
149149

150150
for n in var_names:
151151
var = variables[n]
152-
if (current_index_variable is not None and
153-
var.dims != current_index_variable.dims):
152+
if (current_index_variable is not None
153+
and var.dims != current_index_variable.dims):
154154
raise ValueError(
155155
"dimension mismatch between %r %s and %r %s"
156156
% (dim, current_index_variable.dims, n, var.dims))
@@ -209,8 +209,8 @@ def split_indexes(
209209
Not public API. Used in Dataset and DataArray reset_index
210210
methods.
211211
"""
212-
if (isinstance(dims_or_levels, str)
213-
or not isinstance(dims_or_levels, Sequence)):
212+
if (isinstance(dims_or_levels, str) or
213+
not isinstance(dims_or_levels, Sequence)):
214214
dims_or_levels = [dims_or_levels]
215215

216216
dim_levels \
@@ -287,8 +287,8 @@ def __len__(self) -> int:
287287
return len(self._dataset._variables) - len(self._dataset._coord_names)
288288

289289
def __contains__(self, key) -> bool:
290-
return (key in self._dataset._variables and
291-
key not in self._dataset._coord_names)
290+
return (key in self._dataset._variables
291+
and key not in self._dataset._coord_names)
292292

293293
def __getitem__(self, key) -> 'DataArray':
294294
if key not in self._dataset._coord_names:
@@ -1188,8 +1188,8 @@ def identical(self, other):
11881188
Dataset.equals
11891189
"""
11901190
try:
1191-
return (utils.dict_equiv(self.attrs, other.attrs) and
1192-
self._all_compat(other, 'identical'))
1191+
return (utils.dict_equiv(self.attrs, other.attrs)
1192+
and self._all_compat(other, 'identical'))
11931193
except (TypeError, AttributeError):
11941194
return False
11951195

@@ -2151,8 +2151,8 @@ def _validate_interp_indexer(x, new_x):
21512151
# In the case of datetimes, the restrictions placed on indexers
21522152
# used with interp are stronger than those which are placed on
21532153
# isel, so we need an additional check after _validate_indexers.
2154-
if (_contains_datetime_like_objects(x) and
2155-
not _contains_datetime_like_objects(new_x)):
2154+
if (_contains_datetime_like_objects(x)
2155+
and not _contains_datetime_like_objects(new_x)):
21562156
raise TypeError('When interpolating over a datetime-like '
21572157
'coordinate, the coordinates to '
21582158
'interpolate to must be either datetime '
@@ -2264,19 +2264,18 @@ def _rename_vars(self, name_dict, dims_dict):
22642264
variables = OrderedDict()
22652265
coord_names = set()
22662266
for k, v in self.variables.items():
2267-
name = name_dict.get(k, k)
2268-
dims = tuple(dims_dict.get(dim, dim) for dim in v.dims)
22692267
var = v.copy(deep=False)
2270-
var.dims = dims
2268+
var.dims = tuple(dims_dict.get(dim, dim) for dim in v.dims)
2269+
name = name_dict.get(k, k)
22712270
if name in variables:
22722271
raise ValueError('the new name %r conflicts' % (name,))
22732272
variables[name] = var
22742273
if k in self._coord_names:
22752274
coord_names.add(name)
22762275
return variables, coord_names
22772276

2278-
def _rename_dims(self, dims_dict):
2279-
return {dims_dict.get(k, k): v for k, v in self.dims.items()}
2277+
def _rename_dims(self, name_dict):
2278+
return {name_dict.get(k, k): v for k, v in self.dims.items()}
22802279

22812280
def _rename_indexes(self, name_dict):
22822281
if self._indexes is None:
@@ -2293,9 +2292,9 @@ def _rename_indexes(self, name_dict):
22932292
indexes[new_name] = index
22942293
return indexes
22952294

2296-
def _rename_all(self, name_dict, dim_dict):
2297-
variables, coord_names = self._rename_vars(name_dict, dim_dict)
2298-
dims = self._rename_dims(dim_dict)
2295+
def _rename_all(self, name_dict, dims_dict):
2296+
variables, coord_names = self._rename_vars(name_dict, dims_dict)
2297+
dims = self._rename_dims(dims_dict)
22992298
indexes = self._rename_indexes(name_dict)
23002299
return variables, coord_names, dims, indexes
23012300

@@ -2322,21 +2321,91 @@ def rename(self, name_dict=None, inplace=None, **names):
23222321
See Also
23232322
--------
23242323
Dataset.swap_dims
2324+
Dataset.rename_vars
2325+
Dataset.rename_dims
23252326
DataArray.rename
23262327
"""
2327-
# TODO: add separate rename_vars and rename_dims methods.
23282328
inplace = _check_inplace(inplace)
23292329
name_dict = either_dict_or_kwargs(name_dict, names, 'rename')
2330-
for k, v in name_dict.items():
2330+
for k in name_dict.keys():
23312331
if k not in self and k not in self.dims:
23322332
raise ValueError("cannot rename %r because it is not a "
23332333
"variable or dimension in this dataset" % k)
23342334

23352335
variables, coord_names, dims, indexes = self._rename_all(
2336-
name_dict=name_dict, dim_dict=name_dict)
2336+
name_dict=name_dict, dims_dict=name_dict)
23372337
return self._replace(variables, coord_names, dims=dims,
23382338
indexes=indexes, inplace=inplace)
23392339

2340+
def rename_dims(self, dims_dict=None, **dims):
2341+
"""Returns a new object with renamed dimensions only.
2342+
2343+
Parameters
2344+
----------
2345+
dims_dict : dict-like, optional
2346+
Dictionary whose keys are current dimension names and
2347+
whose values are the desired names.
2348+
**dims, optional
2349+
Keyword form of ``dims_dict``.
2350+
One of dims_dict or dims must be provided.
2351+
2352+
Returns
2353+
-------
2354+
renamed : Dataset
2355+
Dataset with renamed dimensions.
2356+
2357+
See Also
2358+
--------
2359+
Dataset.swap_dims
2360+
Dataset.rename
2361+
Dataset.rename_vars
2362+
DataArray.rename
2363+
"""
2364+
dims_dict = either_dict_or_kwargs(dims_dict, dims, 'rename_dims')
2365+
for k in dims_dict:
2366+
if k not in self.dims:
2367+
raise ValueError("cannot rename %r because it is not a "
2368+
"dimension in this dataset" % k)
2369+
2370+
variables, coord_names, dims, indexes = self._rename_all(
2371+
name_dict={}, dims_dict=dims_dict)
2372+
return self._replace(variables, coord_names, dims=dims,
2373+
indexes=indexes)
2374+
2375+
def rename_vars(self, name_dict=None, **names):
2376+
"""Returns a new object with renamed variables including coordinates
2377+
2378+
Parameters
2379+
----------
2380+
name_dict : dict-like, optional
2381+
Dictionary whose keys are current variable or coordinate names and
2382+
whose values are the desired names.
2383+
**names, optional
2384+
Keyword form of ``name_dict``.
2385+
One of name_dict or names must be provided.
2386+
2387+
Returns
2388+
-------
2389+
renamed : Dataset
2390+
Dataset with renamed variables including coordinates
2391+
2392+
See Also
2393+
--------
2394+
Dataset.swap_dims
2395+
Dataset.rename
2396+
Dataset.rename_dims
2397+
DataArray.rename
2398+
"""
2399+
name_dict = either_dict_or_kwargs(name_dict, names, 'rename_vars')
2400+
for k in name_dict:
2401+
if k not in self:
2402+
raise ValueError("cannot rename %r because it is not a "
2403+
"variable or coordinate in this dataset" % k)
2404+
variables, coord_names, dims, indexes = self._rename_all(
2405+
name_dict=name_dict, dims_dict={})
2406+
return self._replace(variables, coord_names, dims=dims,
2407+
indexes=indexes)
2408+
23402409
def swap_dims(self, dims_dict, inplace=None):
23412410
"""Returns a new object with swapped dimensions.
23422411
@@ -2464,8 +2533,8 @@ def expand_dims(self, dim=None, axis=None, **dim_kwargs):
24642533
if d in self.dims:
24652534
raise ValueError(
24662535
'Dimension {dim} already exists.'.format(dim=d))
2467-
if (d in self._variables and
2468-
not utils.is_scalar(self._variables[d])):
2536+
if (d in self._variables
2537+
and not utils.is_scalar(self._variables[d])):
24692538
raise ValueError(
24702539
'{dim} already exists as coordinate or'
24712540
' variable name.'.format(dim=d))
@@ -3256,9 +3325,9 @@ def reduce(self, func, dim=None, keep_attrs=None, keepdims=False,
32563325
if not reduce_dims:
32573326
variables[name] = var
32583327
else:
3259-
if (not numeric_only or
3260-
np.issubdtype(var.dtype, np.number) or
3261-
(var.dtype == np.bool_)):
3328+
if (not numeric_only
3329+
or np.issubdtype(var.dtype, np.number)
3330+
or (var.dtype == np.bool_)):
32623331
if len(reduce_dims) == 1:
32633332
# unpack dimensions for the benefit of functions
32643333
# like np.argmin which can't handle tuple arguments
@@ -3791,8 +3860,8 @@ def diff(self, dim, n=1, label='upper'):
37913860
for name, var in self.variables.items():
37923861
if dim in var.dims:
37933862
if name in self.data_vars:
3794-
variables[name] = (var.isel(**kwargs_end) -
3795-
var.isel(**kwargs_start))
3863+
variables[name] = (var.isel(**kwargs_end)
3864+
- var.isel(**kwargs_start))
37963865
else:
37973866
variables[name] = var.isel(**kwargs_new)
37983867
else:
@@ -3976,8 +4045,8 @@ def sortby(self, variables, ascending=True):
39764045
for data_array in aligned_other_vars:
39774046
if data_array.ndim != 1:
39784047
raise ValueError("Input DataArray is not 1-D.")
3979-
if (data_array.dtype == object and
3980-
LooseVersion(np.__version__) < LooseVersion('1.11.0')):
4048+
if (data_array.dtype == object
4049+
and LooseVersion(np.__version__) < LooseVersion('1.11.0')):
39814050
raise NotImplementedError(
39824051
'sortby uses np.lexsort under the hood, which requires '
39834052
'numpy 1.11.0 or later to support object data-type.')
@@ -4053,9 +4122,9 @@ def quantile(self, q, dim=None, interpolation='linear',
40534122
reduce_dims = [d for d in var.dims if d in dims]
40544123
if reduce_dims or not var.dims:
40554124
if name not in self.coords:
4056-
if (not numeric_only or
4057-
np.issubdtype(var.dtype, np.number) or
4058-
var.dtype == np.bool_):
4125+
if (not numeric_only
4126+
or np.issubdtype(var.dtype, np.number)
4127+
or var.dtype == np.bool_):
40594128
if len(reduce_dims) == var.ndim:
40604129
# prefer to aggregate over axis=None rather than
40614130
# axis=(0, 1) if they will be equivalent, because
@@ -4171,8 +4240,8 @@ def differentiate(self, coord, edge_order=1, datetime_unit=None):
41714240

41724241
variables = OrderedDict()
41734242
for k, v in self.variables.items():
4174-
if (k in self.data_vars and dim in v.dims and
4175-
k not in self.coords):
4243+
if (k in self.data_vars and dim in v.dims
4244+
and k not in self.coords):
41764245
if _contains_datetime_like_objects(v):
41774246
v = v._to_numeric(datetime_unit=datetime_unit)
41784247
grad = duck_array_ops.gradient(
@@ -4348,8 +4417,8 @@ def filter_by_attrs(self, **kwargs):
43484417
has_value_flag = False
43494418
for attr_name, pattern in kwargs.items():
43504419
attr_value = variable.attrs.get(attr_name)
4351-
if ((callable(pattern) and pattern(attr_value)) or
4352-
attr_value == pattern):
4420+
if ((callable(pattern) and pattern(attr_value))
4421+
or attr_value == pattern):
43534422
has_value_flag = True
43544423
else:
43554424
has_value_flag = False

xarray/tests/test_dataset.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2162,6 +2162,40 @@ def test_rename_inplace(self):
21622162
# check virtual variables
21632163
assert_array_equal(data['t.dayofyear'], [1, 2, 3])
21642164

2165+
def test_rename_dims(self):
2166+
original = Dataset(
2167+
{'x': ('x', [0, 1, 2]), 'y': ('x', [10, 11, 12]), 'z': 42})
2168+
expected = Dataset(
2169+
{'x': ('x_new', [0, 1, 2]), 'y': ('x_new', [10, 11, 12]), 'z': 42})
2170+
expected = expected.set_coords('x')
2171+
dims_dict = {'x': 'x_new'}
2172+
actual = original.rename_dims(dims_dict)
2173+
assert_identical(expected, actual)
2174+
actual_2 = original.rename_dims(**dims_dict)
2175+
assert_identical(expected, actual_2)
2176+
2177+
# Test to raise ValueError
2178+
dims_dict_bad = {'x_bad': 'x_new'}
2179+
with pytest.raises(ValueError):
2180+
original.rename_dims(dims_dict_bad)
2181+
2182+
def test_rename_vars(self):
2183+
original = Dataset(
2184+
{'x': ('x', [0, 1, 2]), 'y': ('x', [10, 11, 12]), 'z': 42})
2185+
expected = Dataset(
2186+
{'x_new': ('x', [0, 1, 2]), 'y': ('x', [10, 11, 12]), 'z': 42})
2187+
expected = expected.set_coords('x_new')
2188+
name_dict = {'x': 'x_new'}
2189+
actual = original.rename_vars(name_dict)
2190+
assert_identical(expected, actual)
2191+
actual_2 = original.rename_vars(**name_dict)
2192+
assert_identical(expected, actual_2)
2193+
2194+
# Test to raise ValueError
2195+
names_dict_bad = {'x_bad': 'x_new'}
2196+
with pytest.raises(ValueError):
2197+
original.rename_vars(names_dict_bad)
2198+
21652199
def test_swap_dims(self):
21662200
original = Dataset({'x': [1, 2, 3], 'y': ('x', list('abc')), 'z': 42})
21672201
expected = Dataset({'z': 42},

0 commit comments

Comments
 (0)