diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index e50a4b099a8e1..b3a3f758e05b1 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -54,95 +54,111 @@ def _get_objs_combined_axis(objs, intersect=False, axis=0, sort=True): def _get_combined_index(indexes, intersect=False, sort=False): - # TODO: handle index names! - indexes = com.get_distinct_objs(indexes) - if len(indexes) == 0: - index = Index([]) - elif len(indexes) == 1: - index = indexes[0] - elif intersect: - index = indexes[0] - for other in indexes[1:]: - index = index.intersection(other) + if intersect: + return _intersect_indexes(indexes, sort=sort) else: - index = _union_indexes(indexes, sort=sort) - index = ensure_index(index) + return _union_indexes(indexes, sort=sort) + + +def _intersect_indexes(indexes, sort=True): + """Return the intersection of indexes + """ + if len(indexes) == 0: + return Index([]) # TODO + + indexes = com.get_distinct_objs(indexes) # distinct ids + result = indexes[0] + for other in indexes[1:]: + result = result.intersection(other) + if sort: - try: - index = index.sort_values() - except TypeError: - pass - return index + result = _maybe_sort(result) + + # TODO: names + + return result def _union_indexes(indexes, sort=True): if len(indexes) == 0: - raise AssertionError('Must have at least 1 Index to union') - if len(indexes) == 1: - result = indexes[0] - if isinstance(result, list): - result = Index(sorted(result)) - return result + return Index([]) + + indexes = com.get_distinct_objs(indexes) + # convert lists to indexes + # check if at least one 'special' indexes, kind = _sanitize_and_check(indexes) - def _unique_indices(inds): - def conv(i): - if isinstance(i, Index): - i = i.tolist() - return i + if kind == 'special': + return _union_indexes_special(indexes, sort=sort) + else: + return _union_indexes_no_special(indexes, sort=sort) - return Index( - lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort)) - if kind == 'special': +def _union_indexes_special(indexes, sort=True): + if sort: result = indexes[0] - if hasattr(result, 'union_many'): + if hasattr(result, 'union_many'): # DatetimeIndex return result.union_many(indexes[1:]) else: for other in indexes[1:]: result = result.union(other) return result - elif kind == 'array': - index = indexes[0] - for other in indexes[1:]: - if not index.equals(other): - - if sort is None: - # TODO: remove once pd.concat sort default changes - warnings.warn(_sort_msg, FutureWarning, stacklevel=8) - sort = True + else: + raise NotImplementedError - return _unique_indices(indexes) +def _union_indexes_no_special(indexes, sort=True): + index = indexes[0] + if _all_indexes_same(indexes): + # name handled here name = _get_consensus_names(indexes)[0] if name != index.name: index = index._shallow_copy(name=name) + if sort: + index = _maybe_sort(index) return index - else: # kind='list' - return _unique_indices(indexes) + else: + # but not here + if sort is None: + # TODO: remove once pd.concat and df.append sort default changes + warnings.warn(_sort_msg, FutureWarning, stacklevel=8) + sort = True + return _unique_indices(indexes, sort=sort) def _sanitize_and_check(indexes): - kinds = list({type(index) for index in indexes}) + kinds = {type(index) for index in indexes} if list in kinds: if len(kinds) > 1: - indexes = [Index(com.try_sort(x)) - if not isinstance(x, Index) else - x for x in indexes] + # e.g. indexes = [Index([2, 3]), [[1, 2]]) + indexes = [Index(x) if isinstance(x, list) else x + for x in indexes] kinds.remove(list) else: + #e.g. indexes = [[1, 2]] return indexes, 'list' if len(kinds) > 1 or Index not in kinds: + # equivalent to any(kind != Index for kind in kinds) return indexes, 'special' else: return indexes, 'array' +def _unique_indices(inds, sort=sort): + def conv(i): + if isinstance(i, Index): + i = i.tolist() + return i + + return Index( + lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort)) + + def _get_consensus_names(indexes): # find the non-none names, need to tupleify to make