From d22913119a92bf7fb9158dbf09f5dd755fd3d73c Mon Sep 17 00:00:00 2001 From: araraonline Date: Wed, 3 Oct 2018 13:19:36 -0300 Subject: [PATCH 1/8] Refactor index.intersect --- pandas/core/indexes/api.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index e50a4b099a8e1..1799e56b7a29f 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -56,14 +56,13 @@ def _get_objs_combined_axis(objs, intersect=False, axis=0, sort=True): def _get_combined_index(indexes, intersect=False, sort=False): # TODO: handle index names! indexes = com.get_distinct_objs(indexes) + if len(indexes) == 0: index = Index([]) elif len(indexes) == 1: index = indexes[0] elif intersect: - index = indexes[0] - for other in indexes[1:]: - index = index.intersection(other) + return _intersect_indexes(indexes, sort=sort) else: index = _union_indexes(indexes, sort=sort) index = ensure_index(index) @@ -73,6 +72,7 @@ def _get_combined_index(indexes, intersect=False, sort=False): index = index.sort_values() except TypeError: pass + return index @@ -125,6 +125,25 @@ def conv(i): return _unique_indices(indexes) +def _intersect_indexes(indexes, sort=True): + """Return the intersection of indexes + """ + if len(indexes) == 0: + return Index([]) + + indexes = com.get_distinct_objs(indexes) # distinct ids + + result = indexes[0] + for other in indexes[1:] + result = result.intersection(other) + + if sort: + result = _maybe_sort(result) + + return result + + + def _sanitize_and_check(indexes): kinds = list({type(index) for index in indexes}) From a386937852ef3dda9884fca48a6abb86fe6d675f Mon Sep 17 00:00:00 2001 From: araraonline Date: Wed, 3 Oct 2018 13:23:51 -0300 Subject: [PATCH 2/8] Refactor combined index --- pandas/core/indexes/api.py | 50 +++++++++++++++----------------------- 1 file changed, 19 insertions(+), 31 deletions(-) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 1799e56b7a29f..a9cec481eaf63 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -54,26 +54,30 @@ def _get_objs_combined_axis(objs, intersect=False, axis=0, sort=True): def _get_combined_index(indexes, intersect=False, sort=False): - # TODO: handle index names! - indexes = com.get_distinct_objs(indexes) - - if len(indexes) == 0: - index = Index([]) - elif len(indexes) == 1: - index = indexes[0] - elif intersect: + if intersect: return _intersect_indexes(indexes, sort=sort) else: - index = _union_indexes(indexes, sort=sort) - index = ensure_index(index) + return _union_indexes(indexes, sort=sort) + + +def _intersect_indexes(indexes, sort=True): + """Return the intersection of indexes + """ + if len(indexes) == 0: + return Index([]) + + indexes = com.get_distinct_objs(indexes) # distinct ids + result = indexes[0] + for other in indexes[1:] + result = result.intersection(other) + if sort: - try: - index = index.sort_values() - except TypeError: - pass + result = _maybe_sort(result) - return index + # TODO: names + + return result def _union_indexes(indexes, sort=True): @@ -125,22 +129,6 @@ def conv(i): return _unique_indices(indexes) -def _intersect_indexes(indexes, sort=True): - """Return the intersection of indexes - """ - if len(indexes) == 0: - return Index([]) - - indexes = com.get_distinct_objs(indexes) # distinct ids - - result = indexes[0] - for other in indexes[1:] - result = result.intersection(other) - - if sort: - result = _maybe_sort(result) - - return result From 3417361d1229b21cbfa06c10fff40231dbdf2591 Mon Sep 17 00:00:00 2001 From: araraonline Date: Wed, 3 Oct 2018 13:27:57 -0300 Subject: [PATCH 3/8] WIP Refactor _union_indexes --- pandas/core/indexes/api.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index a9cec481eaf63..03d91114aa2ae 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -64,7 +64,7 @@ def _intersect_indexes(indexes, sort=True): """Return the intersection of indexes """ if len(indexes) == 0: - return Index([]) + return Index([]) # TODO indexes = com.get_distinct_objs(indexes) # distinct ids @@ -82,7 +82,10 @@ def _intersect_indexes(indexes, sort=True): def _union_indexes(indexes, sort=True): if len(indexes) == 0: - raise AssertionError('Must have at least 1 Index to union') + return Index([]) + + indexes = com.get_distinct_objs(indexes) + if len(indexes) == 1: result = indexes[0] if isinstance(result, list): @@ -129,9 +132,6 @@ def conv(i): return _unique_indices(indexes) - - - def _sanitize_and_check(indexes): kinds = list({type(index) for index in indexes}) From 7c6e5004a0402f35e325d20c3ba5d31f84ead38e Mon Sep 17 00:00:00 2001 From: araraonline Date: Wed, 3 Oct 2018 13:45:54 -0300 Subject: [PATCH 4/8] Refactor _sanitize and check --- pandas/core/indexes/api.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 03d91114aa2ae..252706ba1737e 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -133,18 +133,20 @@ def conv(i): def _sanitize_and_check(indexes): - kinds = list({type(index) for index in indexes}) + kinds = {type(index) for index in indexes} if list in kinds: if len(kinds) > 1: - indexes = [Index(com.try_sort(x)) - if not isinstance(x, Index) else - x for x in indexes] + # e.g. indexes = [Index([2, 3]), [[1, 2]]) + indexes = [Index(x) if isinstance(x, list) else x + for x in indexes] kinds.remove(list) else: + #e.g. indexes = [[1, 2]] return indexes, 'list' if len(kinds) > 1 or Index not in kinds: + # equivalent to any(kind != Index for kind in kinds) return indexes, 'special' else: return indexes, 'array' From ccf38aa0b135e297ac82e95a6a91ff4019c3bdd6 Mon Sep 17 00:00:00 2001 From: araraonline Date: Wed, 3 Oct 2018 14:43:44 -0300 Subject: [PATCH 5/8] WIP Refactor _union_indices --- pandas/core/indexes/api.py | 47 +++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 252706ba1737e..3f36639f7a1b6 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -89,29 +89,15 @@ def _union_indexes(indexes, sort=True): if len(indexes) == 1: result = indexes[0] if isinstance(result, list): - result = Index(sorted(result)) + result = Index(sorted(result)) # why do we sort?? return result + # convert lists to indexes + # check if at least one 'special' indexes, kind = _sanitize_and_check(indexes) - def _unique_indices(inds): - def conv(i): - if isinstance(i, Index): - i = i.tolist() - return i - - return Index( - lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort)) - if kind == 'special': - result = indexes[0] - - if hasattr(result, 'union_many'): - return result.union_many(indexes[1:]) - else: - for other in indexes[1:]: - result = result.union(other) - return result + return _union_indexes_special(indexes, sort=sort) elif kind == 'array': index = indexes[0] for other in indexes[1:]: @@ -132,6 +118,21 @@ def conv(i): return _unique_indices(indexes) +def _union_indexes_special(indexes, sort=True): + if sort: + result = indexes[0] + + if hasattr(result, 'union_many'): # DatetimeIndex + return result.union_many(indexes[1:]) + else: + for other in indexes[1:]: + result = result.union(other) + return result + else: + raise NotImplementedError + + + def _sanitize_and_check(indexes): kinds = {type(index) for index in indexes} @@ -152,6 +153,16 @@ def _sanitize_and_check(indexes): return indexes, 'array' +def _unique_indices(inds): + def conv(i): + if isinstance(i, Index): + i = i.tolist() + return i + + return Index( + lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort)) + + def _get_consensus_names(indexes): # find the non-none names, need to tupleify to make From 2d6cb1f8c67b5ec0e9cddb80296ff2f35d71c7ce Mon Sep 17 00:00:00 2001 From: araraonline Date: Wed, 3 Oct 2018 14:50:47 -0300 Subject: [PATCH 6/8] WIP Refactor _union_indices --- pandas/core/indexes/api.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 3f36639f7a1b6..bea76c0d01f73 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -99,23 +99,24 @@ def _union_indexes(indexes, sort=True): if kind == 'special': return _union_indexes_special(indexes, sort=sort) elif kind == 'array': - index = indexes[0] - for other in indexes[1:]: - if not index.equals(other): - - if sort is None: - # TODO: remove once pd.concat sort default changes - warnings.warn(_sort_msg, FutureWarning, stacklevel=8) - sort = True - return _unique_indices(indexes) + index = indexes[0] + if _all_indexes_same(indexes): + # name handled here + name = _get_consensus_names(indexes)[0] + if name != index.name: + index = index._shallow_copy(name=name) + return index + else: + # but not here + if sort is None: + # TODO: remove once pd.concat and df.append sort default changes + warnings.warn(_sort_msg, FutureWarning, stacklevel=8) + sort = True + return _unique_indices(indexes, sort=sort) - name = _get_consensus_names(indexes)[0] - if name != index.name: - index = index._shallow_copy(name=name) - return index else: # kind='list' - return _unique_indices(indexes) + return _unique_indices(indexes, sort=sort) def _union_indexes_special(indexes, sort=True): @@ -153,7 +154,7 @@ def _sanitize_and_check(indexes): return indexes, 'array' -def _unique_indices(inds): +def _unique_indices(inds, sort=sort): def conv(i): if isinstance(i, Index): i = i.tolist() From 6c61c11e36327e0379df637fae2e31b25809b7f9 Mon Sep 17 00:00:00 2001 From: araraonline Date: Wed, 3 Oct 2018 14:58:37 -0300 Subject: [PATCH 7/8] WIP Refactor _union_indices --- pandas/core/indexes/api.py | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index bea76c0d01f73..081420268fabb 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -98,25 +98,8 @@ def _union_indexes(indexes, sort=True): if kind == 'special': return _union_indexes_special(indexes, sort=sort) - elif kind == 'array': - - index = indexes[0] - if _all_indexes_same(indexes): - # name handled here - name = _get_consensus_names(indexes)[0] - if name != index.name: - index = index._shallow_copy(name=name) - return index - else: - # but not here - if sort is None: - # TODO: remove once pd.concat and df.append sort default changes - warnings.warn(_sort_msg, FutureWarning, stacklevel=8) - sort = True - return _unique_indices(indexes, sort=sort) - - else: # kind='list' - return _unique_indices(indexes, sort=sort) + else: + return _union_indexes_no_special(indexes, sort=sort) def _union_indexes_special(indexes, sort=True): @@ -133,6 +116,22 @@ def _union_indexes_special(indexes, sort=True): raise NotImplementedError +def _union_indexes_no_special(indexes, sort=True): + index = indexes[0] + if _all_indexes_same(indexes): + # name handled here + name = _get_consensus_names(indexes)[0] + if name != index.name: + index = index._shallow_copy(name=name) + return index + else: + # but not here + if sort is None: + # TODO: remove once pd.concat and df.append sort default changes + warnings.warn(_sort_msg, FutureWarning, stacklevel=8) + sort = True + return _unique_indices(indexes, sort=sort) + def _sanitize_and_check(indexes): kinds = {type(index) for index in indexes} From f590d04f5008b55e9b48640ac3b55868f4c6bad6 Mon Sep 17 00:00:00 2001 From: araraonline Date: Wed, 3 Oct 2018 15:03:07 -0300 Subject: [PATCH 8/8] WIP Refactor _union_indices --- pandas/core/indexes/api.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 081420268fabb..b3a3f758e05b1 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -69,7 +69,7 @@ def _intersect_indexes(indexes, sort=True): indexes = com.get_distinct_objs(indexes) # distinct ids result = indexes[0] - for other in indexes[1:] + for other in indexes[1:]: result = result.intersection(other) if sort: @@ -86,12 +86,6 @@ def _union_indexes(indexes, sort=True): indexes = com.get_distinct_objs(indexes) - if len(indexes) == 1: - result = indexes[0] - if isinstance(result, list): - result = Index(sorted(result)) # why do we sort?? - return result - # convert lists to indexes # check if at least one 'special' indexes, kind = _sanitize_and_check(indexes) @@ -123,6 +117,8 @@ def _union_indexes_no_special(indexes, sort=True): name = _get_consensus_names(indexes)[0] if name != index.name: index = index._shallow_copy(name=name) + if sort: + index = _maybe_sort(index) return index else: # but not here