From 9ef97f2767126240e0cd0425a2eaee2d7c7518bc Mon Sep 17 00:00:00 2001 From: Joseph Bylund Date: Wed, 27 Oct 2021 18:58:31 -0400 Subject: [PATCH 1/3] Optimize provider's is_backtrack_cause by caching resolved names is_backtrack_cause is called for every requirement on every step of resolution. As it's implemented currently if an identifier is not the backtrack cause, the entire list is scanned, invoking the .name property on every element of the list (and possibly parents). Since all the provider needs to know is presence or absence of the identifier in the backtrack causes list stash the resolved names in a set on first invocation and check for presence/absence in that set. --- news/10621.feature.rst | 1 + .../resolution/resolvelib/provider.py | 26 ++++++++++++++----- 2 files changed, 21 insertions(+), 6 deletions(-) create mode 100644 news/10621.feature.rst diff --git a/news/10621.feature.rst b/news/10621.feature.rst new file mode 100644 index 00000000000..b1456096dc0 --- /dev/null +++ b/news/10621.feature.rst @@ -0,0 +1 @@ +Optimize performance of conflict cause resolution while backtracking. diff --git a/src/pip/_internal/resolution/resolvelib/provider.py b/src/pip/_internal/resolution/resolvelib/provider.py index e6ec9594f62..d3b56ed3ebe 100644 --- a/src/pip/_internal/resolution/resolvelib/provider.py +++ b/src/pip/_internal/resolution/resolvelib/provider.py @@ -7,6 +7,7 @@ Iterator, Mapping, Sequence, + Set, TypeVar, Union, ) @@ -75,6 +76,24 @@ def _get_with_identifier( return default +key_to_names: Dict[int, Set[str]] = {} + + +def _get_causes_set(backtrack_causes: Sequence["PreferenceInformation"]) -> Set[str]: + key = id(backtrack_causes) + cached_names = key_to_names.get(key) + if cached_names is None: + key_to_names.clear() + cached_names = set() + for backtrack_cause in backtrack_causes: + cached_names.add(backtrack_cause.requirement.name) + parent = backtrack_cause.parent + if parent: + cached_names.add(parent.name) + key_to_names[key] = cached_names + return cached_names + + class PipProvider(_ProviderBase): """Pip's provider implementation for resolvelib. @@ -240,9 +259,4 @@ def get_dependencies(self, candidate: Candidate) -> Sequence[Requirement]: def is_backtrack_cause( identifier: str, backtrack_causes: Sequence["PreferenceInformation"] ) -> bool: - for backtrack_cause in backtrack_causes: - if identifier == backtrack_cause.requirement.name: - return True - if backtrack_cause.parent and identifier == backtrack_cause.parent.name: - return True - return False + return identifier in _get_causes_set(backtrack_causes) From 4d1ea9b5be7be91867ecd3d807737bf91c349007 Mon Sep 17 00:00:00 2001 From: Joseph Bylund Date: Fri, 29 Oct 2021 09:15:32 -0400 Subject: [PATCH 2/3] Update news/10621.feature.rst Co-authored-by: Tzu-ping Chung --- news/10621.feature.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/news/10621.feature.rst b/news/10621.feature.rst index b1456096dc0..4f7d29e4584 100644 --- a/news/10621.feature.rst +++ b/news/10621.feature.rst @@ -1 +1 @@ -Optimize performance of conflict cause resolution while backtracking. +Optimize performance of conflict cause resolution while the dependency resolver backtracks. From 6951b9bfd2d315c7e0ddd4f1ec4d74ab65d4fd77 Mon Sep 17 00:00:00 2001 From: Joseph Bylund Date: Fri, 29 Oct 2021 09:39:54 -0400 Subject: [PATCH 3/3] Move helper to go from sequence of backtrack causes to set of names to inside the provider. Use the id of every element in the sequence as part of the key as a guard against the sequence mutating. --- .../resolution/resolvelib/provider.py | 46 ++++++++++--------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/src/pip/_internal/resolution/resolvelib/provider.py b/src/pip/_internal/resolution/resolvelib/provider.py index d3b56ed3ebe..603e0ccf83f 100644 --- a/src/pip/_internal/resolution/resolvelib/provider.py +++ b/src/pip/_internal/resolution/resolvelib/provider.py @@ -8,6 +8,7 @@ Mapping, Sequence, Set, + Tuple, TypeVar, Union, ) @@ -76,24 +77,6 @@ def _get_with_identifier( return default -key_to_names: Dict[int, Set[str]] = {} - - -def _get_causes_set(backtrack_causes: Sequence["PreferenceInformation"]) -> Set[str]: - key = id(backtrack_causes) - cached_names = key_to_names.get(key) - if cached_names is None: - key_to_names.clear() - cached_names = set() - for backtrack_cause in backtrack_causes: - cached_names.add(backtrack_cause.requirement.name) - parent = backtrack_cause.parent - if parent: - cached_names.add(parent.name) - key_to_names[key] = cached_names - return cached_names - - class PipProvider(_ProviderBase): """Pip's provider implementation for resolvelib. @@ -119,6 +102,7 @@ def __init__( self._upgrade_strategy = upgrade_strategy self._user_requested = user_requested self._known_depths: Dict[str, float] = collections.defaultdict(lambda: math.inf) + self._backtrack_cause_name_collections: Dict[Tuple[int, ...], Set[str]] = {} def identify(self, requirement_or_candidate: Union[Requirement, Candidate]) -> str: return requirement_or_candidate.name @@ -255,8 +239,28 @@ def get_dependencies(self, candidate: Candidate) -> Sequence[Requirement]: with_requires = not self._ignore_dependencies return [r for r in candidate.iter_dependencies(with_requires) if r is not None] - @staticmethod def is_backtrack_cause( - identifier: str, backtrack_causes: Sequence["PreferenceInformation"] + self, identifier: str, backtrack_causes: Sequence["PreferenceInformation"] ) -> bool: - return identifier in _get_causes_set(backtrack_causes) + return identifier in self._get_causes_set(backtrack_causes) + + def _get_causes_set( + self, + backtrack_causes: Sequence["PreferenceInformation"], + ) -> Set[str]: + backtrack_causes_collection_key = tuple(id(c) for c in backtrack_causes) + cached_names = self._backtrack_cause_name_collections.get( + backtrack_causes_collection_key + ) + if cached_names is None: + self._backtrack_cause_name_collections.clear() + cached_names = set() + for backtrack_cause in backtrack_causes: + cached_names.add(backtrack_cause.requirement.name) + parent = backtrack_cause.parent + if parent: + cached_names.add(parent.name) + self._backtrack_cause_name_collections[ + backtrack_causes_collection_key + ] = cached_names + return cached_names