From d3ffdb8d75e4438d2dd24f6092cfbcd027b93dcf Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Mon, 21 May 2018 17:18:31 -1000 Subject: [PATCH 1/3] WIP: utility functions for working with explicit coordinates --- xarray/core/coordinates.py | 66 ++++++++++++++++++++++++++++++ xarray/core/options.py | 3 ++ xarray/core/variable.py | 82 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 151 insertions(+) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index efe8affb2a3..30fc59fefc8 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -3,6 +3,7 @@ from collections import Mapping from contextlib import contextmanager +import numpy as np import pandas as pd from . import formatting, indexing @@ -314,6 +315,71 @@ def __unicode__(self): return formatting.indexes_repr(self) +def normalize_indexes(indexes, coords, sizes): + """Normalize indexes for Dataset/DataArray. + + - Validates that all indexes are pd.Index instances (or at least satisfy + the Index API we need for xarray). + - Combines indexes along the same dimension into a MultiIndex. + - Creates default indexes for variables whose name matches their sole + dimension. + + Parameters + ---------- + indexes : Optional[Dict[Any, pandas.Index]] + Explicitly supplied indexes, if any. + coords : Mapping[Any, xarray.Variable] + Coordinate variables from which to draw default indexes. + dim_sizes : Mapping[Any, int] + Integer sizes for each Dataset/DataArray dimension. + + Returns + ------- + Mapping[Any, pandas.Index] mapping indexing keys (levels/dimension names) + to indexes used for indexing along that dimension. + """ + indexes = {} if indexes is None else dict(indexes) + + # default indexes + for key in sizes: + if key not in indexes: + if key in coords: + indexes[key] = coords[key].to_index() + else: + # need to ensure dtype=int64 in case range is empty on Python 2 + indexes[key] = pd.Index( + range(sizes[key]), name=key, dtype=np.int64) + + # TODO: merge logic to combine indexes along the same dimension into a + # MultiIndex + + return indexes + + +def combine_indexes(input_indexes, input_coords, output_coords=None): + """Combine indexes from inputs into indexes for an operation result. + + - Combines indexes along the same dimension into a MultiIndex. + - Drops indexes corresponding to dropped coordinates. + + Parameters + ---------- + input_indexes : Sequence[Mapping[Any, pandas.Index]] + Sequence of mappings of indexes to combine. + input_coords : Sequence[Mapping[Any, pandas.Variable]] + Sequence of mappings of coordinate variables from input arguments. + output_coords : Optional[Sequence[Mapping[Any, pandas.Variable]]] + Optional sequence of mappings provided output coordinates. By default, + a single output including all coordinates from all inputs is assumed. + + Returns + ------- + Tuple[Mapping[Any, pandas.Index], ...] mapping indexing keys + (levels/dimension names) to indexes used for indexing along that dimension, + for each requested mapping of output coordinates. + """ + + def assert_coordinate_consistent(obj, coords): """ Maeke sure the dimension coordinate of obj is consistent with coords. diff --git a/xarray/core/options.py b/xarray/core/options.py index 48d4567fc99..a3af494083d 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -7,6 +7,9 @@ } +EXPLICIT_INDEXES = False + + class set_options(object): """Set options for xarray in a controlled context. diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 52d470accfe..df93f58b285 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -117,6 +117,88 @@ def as_variable(obj, name=None): return obj +def as_index_or_compatible_data(data): + if isinstance(data, pd.Index): + return data + else: + return as_compatible_data(data) + + +def as_variables_with_multiindex_expansion(obj, name): + """Expand an object into one or more Variable objects. + + Parameters + ---------- + obj : object + Object to convert into a variable or variables. Like the obj argument + to as_variable(), but if data is a MultiIndex, each level is extracted + as a separate IndexVariable. + name : any + Name of this object, when used as a key in a dictionary. This is used + to set a default dimension name. + + Returns + ------- + OrderedDict with a single Variable/IndexVariable value or multiple + IndexVariable values (keyed by level name) if input data is a MultiIndex. + + Examples + -------- + >>> as_variables_with_multiindex_expansion([1, 2, 3], name='x') + OrderedDict([('x', IndexVariable(('x',), array([1, 2, 3])))]) + + >>> as_variables_with_multiindex_expansion(('y', [1, 2, 3]), name='x') + OrderedDict([('x', Variable(('y',), array([1, 2, 3])))]) + + >>> idx = pd.MultiIndex.from_tuples([('a', 1), ('b', 2)], names=['y', 'z']) + >>> as_variables_with_multiindex_expansion(idx, name='x') + OrderedDict([('y', IndexVariable(('x',), array(['a', 'b']))), + ('z', IndexVariable(('x',), array([1, 2])))]) + """ + + if hasattr(obj, 'variable'): + # extract the primary Variable from DataArrays + obj = obj.variable + + if isinstance(obj, Variable): + variable = obj.copy(deep=False) + + elif utils.is_scalar(obj): + variable = Variable([], obj) + + else: + if isinstance(obj, tuple): + if len(obj) < 2: + # use .format() instead of % because it handles tuples + # consistently + raise TypeError('tuples to convert into variables must be of ' + 'the form (dims, data[, attrs, encoding]): ' + '{}'.format(obj)) + dims, data = obj[:2] + data = as_index_or_compatible_data(data) + args = obj[2:] + else: + dims = (name,) + data = as_index_or_compatible_data(obj) + args = () + + if data.ndim != 1: + raise MissingDimensionsError( + 'cannot set variable %r with %r-dimensional data ' + 'without explicit dimension names. Pass a tuple of ' + '(dims, data) instead.' % (name, data.ndim)) + + if isinstance(data, pd.MultiIndex): + raise NotImplementedError('TODO: expand MultiIndex objects.') + + if (name,) == dims or isinstance(data, pd.Index): + variable = IndexVariable(dims, data, *args, fastpath=True) + else: + variable = Variable(dims, data, *args, fastpath=True) + + return OrderedDict([(name, variable)]) + + def _maybe_wrap_data(data): """ Put pandas.Index and numpy.ndarray arguments in adapter objects to ensure From 84def15f73fd2910a555e69a5113e2be025c2df6 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sun, 25 Nov 2018 15:48:02 -0800 Subject: [PATCH 2/3] some reorg --- xarray/core/coordinates.py | 65 ------------------------------- xarray/core/indexes.py | 80 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 65 deletions(-) create mode 100644 xarray/core/indexes.py diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 30fc59fefc8..0c5ac822e81 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -315,71 +315,6 @@ def __unicode__(self): return formatting.indexes_repr(self) -def normalize_indexes(indexes, coords, sizes): - """Normalize indexes for Dataset/DataArray. - - - Validates that all indexes are pd.Index instances (or at least satisfy - the Index API we need for xarray). - - Combines indexes along the same dimension into a MultiIndex. - - Creates default indexes for variables whose name matches their sole - dimension. - - Parameters - ---------- - indexes : Optional[Dict[Any, pandas.Index]] - Explicitly supplied indexes, if any. - coords : Mapping[Any, xarray.Variable] - Coordinate variables from which to draw default indexes. - dim_sizes : Mapping[Any, int] - Integer sizes for each Dataset/DataArray dimension. - - Returns - ------- - Mapping[Any, pandas.Index] mapping indexing keys (levels/dimension names) - to indexes used for indexing along that dimension. - """ - indexes = {} if indexes is None else dict(indexes) - - # default indexes - for key in sizes: - if key not in indexes: - if key in coords: - indexes[key] = coords[key].to_index() - else: - # need to ensure dtype=int64 in case range is empty on Python 2 - indexes[key] = pd.Index( - range(sizes[key]), name=key, dtype=np.int64) - - # TODO: merge logic to combine indexes along the same dimension into a - # MultiIndex - - return indexes - - -def combine_indexes(input_indexes, input_coords, output_coords=None): - """Combine indexes from inputs into indexes for an operation result. - - - Combines indexes along the same dimension into a MultiIndex. - - Drops indexes corresponding to dropped coordinates. - - Parameters - ---------- - input_indexes : Sequence[Mapping[Any, pandas.Index]] - Sequence of mappings of indexes to combine. - input_coords : Sequence[Mapping[Any, pandas.Variable]] - Sequence of mappings of coordinate variables from input arguments. - output_coords : Optional[Sequence[Mapping[Any, pandas.Variable]]] - Optional sequence of mappings provided output coordinates. By default, - a single output including all coordinates from all inputs is assumed. - - Returns - ------- - Tuple[Mapping[Any, pandas.Index], ...] mapping indexing keys - (levels/dimension names) to indexes used for indexing along that dimension, - for each requested mapping of output coordinates. - """ - - def assert_coordinate_consistent(obj, coords): """ Maeke sure the dimension coordinate of obj is consistent with coords. diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py new file mode 100644 index 00000000000..4d5ef54d973 --- /dev/null +++ b/xarray/core/indexes.py @@ -0,0 +1,80 @@ +from __future__ import absolute_import, division, print_function + +import numpy as np +import pandas as pd + + +def normalize_indexes(indexes, coords, sizes): + """Normalize indexes for Dataset/DataArray. + + Validates that all indexes are pd.Index instances (or at least satisfy + the Index API we need for xarray). Creates default indexes for variables + whose name matches their sole dimension. + + Eventually: consider combining indexes along the same dimension into a + MultiIndex. + + Parameters + ---------- + indexes : Optional[Dict[Any, pandas.Index]] + Explicitly supplied indexes, if any. + coords : Mapping[Any, xarray.Variable] + Coordinate variables from which to draw default indexes. + dim_sizes : Mapping[Any, int] + Integer sizes for each Dataset/DataArray dimension. + + Returns + ------- + Mapping[Any, pandas.Index] mapping indexing keys (levels/dimension names) + to indexes used for indexing along that dimension. + """ + indexes = {} if indexes is None else dict(indexes) + + # default indexes + for key in sizes: + if key not in indexes: + if key in coords: + indexes[key] = coords[key].to_index() + else: + # need to ensure dtype=int64 in case range is empty on Python 2 + indexes[key] = pd.Index( + range(sizes[key]), name=key, dtype=np.int64) + + # TODO: merge logic to combine indexes along the same dimension into a + # MultiIndex + + return indexes + + +def combine_indexes(input_indexes, output_coords, unsafe=True): + """Combine indexes from inputs into indexes for an operation result. + + Drops indexes corresponding to dropped coordinates. + + Eventually: consider combining indexes along the same dimension into a + MultiIndex. + + Parameters + ---------- + input_indexes : Sequence[Mapping[Any, pandas.Index]] + Sequence of mappings of indexes to combine. + output_coords : Optional[Sequence[Mapping[Any, pandas.Variable]]] + Optional sequence of mappings provided output coordinates. + unsafe : bool, optional + Whether it's OK to skip compatibility checks for input indexes. + + Returns + ------- + List[Mapping[Any, pandas.Index]] mapping variable names to indexes, + for each requested mapping of output coordinates. + """ + if not unsafe: + raise NotImplementedError('safe index combining not supported yet') + output_indexes = [] + for output_coords_item in output_coords: + indexes = {} + for input_indexes_item in input_indexes: + for k, v in input_indexes_item.items(): + if k in output_coords_item: + indexes[k] = v + output_indexes.append(indexes) From b2da98b4562ffb69e861abb3b5472649043f4837 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Tue, 27 Nov 2018 08:23:10 -0800 Subject: [PATCH 3/3] Expand MultiIndex in Dataset/DataArray constructors --- xarray/core/dataarray.py | 31 ++++++++------- xarray/core/dataset.py | 31 +++------------ xarray/core/formatting.py | 8 ++-- xarray/core/indexes.py | 21 ++++------ xarray/core/merge.py | 21 ++++++---- xarray/core/variable.py | 71 +++++++++++----------------------- xarray/tests/test_dataarray.py | 6 +-- xarray/tests/test_dataset.py | 12 +++--- xarray/tests/test_variable.py | 41 +++++++++++++++++++- 9 files changed, 120 insertions(+), 122 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 17af3cf2cd1..916d6efac8f 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -21,9 +21,10 @@ from .utils import ( _check_inplace, decode_numpy_dict_values, either_dict_or_kwargs, ensure_us_time_resolution) +from .merge import expand_variable_dicts, merge_variables from .variable import ( IndexVariable, Variable, as_compatible_data, as_variable, - assert_unique_multiindex_level_names) + assert_unique_multiindex_level_names, maybe_expand_multiindex) def _infer_coords_and_dims(shape, coords, dims): @@ -58,19 +59,24 @@ def _infer_coords_and_dims(shape, coords, dims): if not isinstance(d, basestring): raise TypeError('dimension %s is not a string' % d) - new_coords = OrderedDict() - - if utils.is_dict_like(coords): - for k, v in coords.items(): - new_coords[k] = as_variable(v, name=k) - elif coords is not None: + if coords is None: + coords = OrderedDict() + elif not utils.is_dict_like(coords): + # Convert list-like coords into a dict + coords_dict = OrderedDict() for dim, coord in zip(dims, coords): var = as_variable(coord, name=dim) var.dims = (dim,) - new_coords[dim] = var + coords_dict[dim] = var + coords = coords_dict + + # Combine coordinates, including MultiIndex levels + expanded = expand_variable_dicts([coords]) + coords = merge_variables(expanded, compat='equals') + # Check consistent sizes = dict(zip(dims, shape)) - for k, v in new_coords.items(): + for k, v in coords.items(): if any(d not in dims for d in v.dims): raise ValueError('coordinate %s has dimensions %s, but these ' 'are not a subset of the DataArray ' @@ -88,9 +94,9 @@ def _infer_coords_and_dims(shape, coords, dims): 'matching the dimension size' % (k, v.shape, (sizes[k],))) - assert_unique_multiindex_level_names(new_coords) + # assert_unique_multiindex_level_names(coords) - return new_coords, dims + return coords, dims class _LocIndexer(object): @@ -462,8 +468,7 @@ def _getitem_coord(self, key): var = self._coords[key] except KeyError: dim_sizes = dict(zip(self.dims, self.shape)) - _, key, var = _get_virtual_variable( - self._coords, key, self._level_coords, dim_sizes) + _, key, var = _get_virtual_variable(self._coords, key, dim_sizes) return self._replace_maybe_drop_dims(var, name=key) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 4f9c61b3269..aa9e302061e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -44,12 +44,10 @@ 'quarter'] -def _get_virtual_variable(variables, key, level_vars=None, dim_sizes=None): - """Get a virtual variable (e.g., 'time.year' or a MultiIndex level) +def _get_virtual_variable(variables, key, dim_sizes=None): + """Get a virtual variable (e.g., 'time.year') from a dict of xarray.Variable objects (if possible) """ - if level_vars is None: - level_vars = {} if dim_sizes is None: dim_sizes = {} @@ -69,11 +67,7 @@ def _get_virtual_variable(variables, key, level_vars=None, dim_sizes=None): else: raise KeyError(key) - if ref_name in level_vars: - dim_var = variables[level_vars[ref_name]] - ref_var = dim_var.to_index_variable().get_level_variable(ref_name) - else: - ref_var = variables[ref_name] + ref_var = variables[ref_name] if var_name is None: virtual_var = ref_var @@ -843,21 +837,6 @@ def _subset_with_all_valid_coords(self, variables, coord_names, attrs): return self._construct_direct(variables, coord_names, dims, attrs) - @property - def _level_coords(self): - """Return a mapping of all MultiIndex levels and their corresponding - coordinate name. - """ - level_coords = OrderedDict() - for cname in self._coord_names: - var = self.variables[cname] - if var.ndim == 1 and isinstance(var, IndexVariable): - level_names = var.level_names - if level_names is not None: - dim, = var.dims - level_coords.update({lname: dim for lname in level_names}) - return level_coords - def _copy_listed(self, names): """Create a new Dataset with the listed variables from this dataset and the all relevant coordinates. Skips all validation. @@ -870,7 +849,7 @@ def _copy_listed(self, names): variables[name] = self._variables[name] except KeyError: ref_name, var_name, var = _get_virtual_variable( - self._variables, name, self._level_coords, self.dims) + self._variables, name, self.dims) variables[var_name] = var if ref_name in self._coord_names or ref_name in self.dims: coord_names.add(var_name) @@ -887,7 +866,7 @@ def _construct_dataarray(self, name): variable = self._variables[name] except KeyError: _, name, variable = _get_virtual_variable( - self._variables, name, self._level_coords, self.dims) + self._variables, name, self.dims) coords = OrderedDict() needed_dims = set(variable.dims) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 5dd3cf06025..a17ca3013a3 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -253,7 +253,9 @@ def summarize_variable(name, var, col_width, show_values=True, def _summarize_coord_multiindex(coord, col_width, marker): first_col = pretty_print(u' %s %s ' % (marker, coord.name), col_width) - return u'%s(%s) MultiIndex' % (first_col, unicode_type(coord.dims[0])) + level_names_str = ', '.join(map(str, coord.level_names)) + return (u'%s(%s) MultiIndex[%s]' % + (first_col, unicode_type(coord.dims[0]), level_names_str)) def _summarize_coord_levels(coord, col_width, marker=u'-'): @@ -277,9 +279,7 @@ def summarize_coord(name, var, col_width): if is_index: coord = var.variable.to_index_variable() if coord.level_names is not None: - return u'\n'.join( - [_summarize_coord_multiindex(coord, col_width, marker), - _summarize_coord_levels(coord, col_width)]) + return _summarize_coord_multiindex(coord, col_width, marker) return summarize_variable( name, var.variable, col_width, show_values, marker) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 4d5ef54d973..dcf738627cc 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -4,7 +4,7 @@ import pandas as pd -def normalize_indexes(indexes, coords, sizes): +def normalize_indexes(coords, sizes, indexes=None): """Normalize indexes for Dataset/DataArray. Validates that all indexes are pd.Index instances (or at least satisfy @@ -16,12 +16,12 @@ def normalize_indexes(indexes, coords, sizes): Parameters ---------- - indexes : Optional[Dict[Any, pandas.Index]] - Explicitly supplied indexes, if any. coords : Mapping[Any, xarray.Variable] Coordinate variables from which to draw default indexes. dim_sizes : Mapping[Any, int] Integer sizes for each Dataset/DataArray dimension. + indexes : Optional[Dict[Any, pandas.Index]] + Explicitly supplied indexes, if any. Returns ------- @@ -40,36 +40,28 @@ def normalize_indexes(indexes, coords, sizes): indexes[key] = pd.Index( range(sizes[key]), name=key, dtype=np.int64) - # TODO: merge logic to combine indexes along the same dimension into a - # MultiIndex - return indexes -def combine_indexes(input_indexes, output_coords, unsafe=True): +def result_indexes(input_indexes, output_coords): """Combine indexes from inputs into indexes for an operation result. Drops indexes corresponding to dropped coordinates. - Eventually: consider combining indexes along the same dimension into a - MultiIndex. + IMPORTANT: Assumes outputs are already aligned! Parameters ---------- input_indexes : Sequence[Mapping[Any, pandas.Index]] Sequence of mappings of indexes to combine. - output_coords : Optional[Sequence[Mapping[Any, pandas.Variable]]] + output_coords : Sequence[Mapping[Any, pandas.Variable] Optional sequence of mappings provided output coordinates. - unsafe : bool, optional - Whether it's OK to skip compatibility checks for input indexes. Returns ------- List[Mapping[Any, pandas.Index]] mapping variable names to indexes, for each requested mapping of output coordinates. """ - if not unsafe: - raise NotImplementedError('safe index combining not supported yet') output_indexes = [] for output_coords_item in output_coords: indexes = {} @@ -78,3 +70,4 @@ def combine_indexes(input_indexes, output_coords, unsafe=True): if k in output_coords_item: indexes[k] = v output_indexes.append(indexes) + return output_indexes diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 984dd2fa204..a2ea6fe9083 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -5,7 +5,8 @@ from .alignment import deep_align from .pycompat import OrderedDict, basestring from .utils import Frozen -from .variable import as_variable, assert_unique_multiindex_level_names +from .variable import ( + as_variable, assert_unique_multiindex_level_names, maybe_expand_multiindex) PANDAS_TYPES = (pd.Series, pd.DataFrame, pd.Panel) @@ -197,11 +198,10 @@ def expand_variable_dicts(list_of_variable_dicts): for variables in list_of_variable_dicts: if isinstance(variables, Dataset): - sanitized_vars = variables.variables + var_dicts.append(variables.variables) else: - # append coords to var_dicts before appending sanitized_vars, - # because we want coords to appear first sanitized_vars = OrderedDict() + var_dicts.append(sanitized_vars) for name, var in variables.items(): if isinstance(var, DataArray): @@ -211,10 +211,13 @@ def expand_variable_dicts(list_of_variable_dicts): coords.pop(name, None) var_dicts.append(coords) + multiindex_vars = maybe_expand_multiindex(var, name) + if multiindex_vars is not None: + var_dicts.append(multiindex_vars) + var = as_variable(var, name=name) sanitized_vars[name] = var - var_dicts.append(sanitized_vars) return var_dicts @@ -253,6 +256,10 @@ def determine_coords(list_of_variable_dicts): coords.discard(name) coord_names.update(coords) + multiindex_vars = maybe_expand_multiindex(var, name) + if multiindex_vars is not None: + coord_names.update(multiindex_vars) + return coord_names, noncoord_names @@ -296,7 +303,7 @@ def merge_coords_for_inplace_math(objs, priority_vars=None): """ expanded = expand_variable_dicts(objs) variables = merge_variables(expanded, priority_vars) - assert_unique_multiindex_level_names(variables) + # assert_unique_multiindex_level_names(variables) return variables @@ -443,7 +450,7 @@ def merge_core(objs, priority_vars = _get_priority_vars(aligned, priority_arg, compat=compat) variables = merge_variables(expanded, priority_vars, compat=compat) - assert_unique_multiindex_level_names(variables) + # assert_unique_multiindex_level_names(variables) dims = calculate_dimensions(variables) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 1cd77a7cc30..9ede83a676d 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -114,14 +114,7 @@ def as_variable(obj, name=None): return obj -def as_index_or_compatible_data(data): - if isinstance(data, pd.Index): - return data - else: - return as_compatible_data(data) - - -def as_variables_with_multiindex_expansion(obj, name): +def maybe_expand_multiindex(obj, name): """Expand an object into one or more Variable objects. Parameters @@ -149,51 +142,33 @@ def as_variables_with_multiindex_expansion(obj, name): >>> idx = pd.MultiIndex.from_tuples([('a', 1), ('b', 2)], names=['y', 'z']) >>> as_variables_with_multiindex_expansion(idx, name='x') - OrderedDict([('y', IndexVariable(('x',), array(['a', 'b']))), - ('z', IndexVariable(('x',), array([1, 2])))]) + OrderedDict([('y', Variable(('x',), array(['a', 'b']))), + ('z', Variable(('x',), array([1, 2])))]) """ - - if hasattr(obj, 'variable'): - # extract the primary Variable from DataArrays - obj = obj.variable - - if isinstance(obj, Variable): - variable = obj.copy(deep=False) - - elif utils.is_scalar(obj): - variable = Variable([], obj) - - else: + tuple_with_multiindex = (isinstance(obj, tuple) and len(obj) > 1 and + isinstance(obj[1], pd.MultiIndex)) + if tuple_with_multiindex or isinstance(obj, pd.MultiIndex): if isinstance(obj, tuple): - if len(obj) < 2: - # use .format() instead of % because it handles tuples - # consistently - raise TypeError('tuples to convert into variables must be of ' - 'the form (dims, data[, attrs, encoding]): ' - '{}'.format(obj)) - dims, data = obj[:2] - data = as_index_or_compatible_data(data) - args = obj[2:] + dims, index = obj[:2] else: dims = (name,) - data = as_index_or_compatible_data(obj) - args = () - - if data.ndim != 1: - raise MissingDimensionsError( - 'cannot set variable %r with %r-dimensional data ' - 'without explicit dimension names. Pass a tuple of ' - '(dims, data) instead.' % (name, data.ndim)) - - if isinstance(data, pd.MultiIndex): - raise NotImplementedError('TODO: expand MultiIndex objects.') - - if (name,) == dims or isinstance(data, pd.Index): - variable = IndexVariable(dims, data, *args, fastpath=True) - else: - variable = Variable(dims, data, *args, fastpath=True) + index = obj + if any(level_name is None for level_name in index.names): + raise ValueError( + 'cannot convert a MultiIndex with unknown level names {} into ' + 'xarray variables: {}'.format(index.names, index)) + if len(set(index.names)) != len(index.names): + raise ValueError( + 'cannot convert a MultiIndex with non-unique level names {} ' + 'into xarray variables: {}'.format(index.names, index)) + multiindex_vars = OrderedDict() + for level_name in index.names: + multiindex_vars[level_name] = Variable( + dims, index.get_level_values(level_name)) + else: + multiindex_vars = None - return OrderedDict([(name, variable)]) + return multiindex_vars def _maybe_wrap_data(data): diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 87ee60715a1..015ee4c2ce2 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -59,9 +59,9 @@ def test_repr_multiindex(self): array([0, 1, 2, 3]) Coordinates: - * x (x) MultiIndex - - level_1 (x) object 'a' 'a' 'b' 'b' - - level_2 (x) int64 1 2 1 2""") + * x (x) MultiIndex[level_1, level_2] + level_1 (x) object 'a' 'a' 'b' 'b' + level_2 (x) int64 1 2 1 2""") assert expected == repr(self.mda) def test_properties(self): diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 89ea3ba78a0..652df3d1491 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -144,9 +144,9 @@ def test_repr_multiindex(self): Dimensions: (x: 4) Coordinates: - * x (x) MultiIndex - - level_1 (x) object 'a' 'a' 'b' 'b' - - level_2 (x) int64 1 2 1 2 + * x (x) MultiIndex[level_1, level_2] + level_1 (x) object 'a' 'a' 'b' 'b' + level_2 (x) int64 1 2 1 2 Data variables: *empty*""") actual = '\n'.join(x.rstrip() for x in repr(data).split('\n')) @@ -162,9 +162,9 @@ def test_repr_multiindex(self): Dimensions: (x: 4) Coordinates: - * x (x) MultiIndex - - a_quite_long_level_name (x) object 'a' 'a' 'b' 'b' - - level_2 (x) int64 1 2 1 2 + * x (x) MultiIndex[a_quite_long_level_name, level_2] + a_quite_long_level_name (x) object 'a' 'a' 'b' 'b' + level_2 (x) int64 1 2 1 2 Data variables: *empty*""") actual = '\n'.join(x.rstrip() for x in repr(data).split('\n')) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 0bd440781ac..24374dc6d75 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -21,7 +21,8 @@ OuterIndexer, PandasIndexAdapter, VectorizedIndexer) from xarray.core.pycompat import PY3, OrderedDict from xarray.core.utils import NDArrayMixin -from xarray.core.variable import as_compatible_data, as_variable +from xarray.core.variable import ( + as_compatible_data, as_variable, maybe_expand_multiindex) from xarray.tests import requires_bottleneck from . import ( @@ -1949,6 +1950,44 @@ class CustomIndexable(CustomArray, indexing.ExplicitlyIndexed): assert isinstance(orig._data, CustomIndexable) +def assert_dict_identical(expected, actual): + assert expected.keys() == actual.keys() + for k in expected: + assert_identical(expected[k], actual[k]) + + +def test_maybe_expand_multiindex(): + + result = maybe_expand_multiindex([1, 2, 3], name='x') + assert result is None + + result = maybe_expand_multiindex(('y', [1, 2, 3]), name='x') + assert result is None + + index = pd.MultiIndex.from_arrays([[1, 2, 3]], names=['x']) + result = maybe_expand_multiindex(index, name='y') + expected = OrderedDict([('x', Variable(('y',), [1, 2, 3]))]) + assert_dict_identical(expected, result) + + result = maybe_expand_multiindex(('y', index), name='y') + expected = OrderedDict([('x', Variable(('y',), [1, 2, 3]))]) + assert_dict_identical(expected, result) + + index = pd.MultiIndex.from_tuples([('a', 1), ('b', 2)], names=['y', 'z']) + result = maybe_expand_multiindex(index, name='x') + expected = OrderedDict([('y', Variable(('x',), ['a', 'b'])), + ('z', Variable(('x',), [1, 2]))]) + assert_dict_identical(expected, result) + + index = pd.MultiIndex.from_arrays([[1, 2, 3]]) + with raises_regex(ValueError, 'unknown level names'): + maybe_expand_multiindex(index, 'foo') + + index = pd.MultiIndex.from_tuples([('a', 1), ('b', 2)], names=['A', 'A']) + with raises_regex(ValueError, 'non-unique level names'): + maybe_expand_multiindex(index, 'foo') + + def test_raise_no_warning_for_nan_in_binary_ops(): with pytest.warns(None) as record: Variable('x', [1, 2, np.NaN]) > 0