From 30dde87de5a1077887347f9106a04c47d1e5b382 Mon Sep 17 00:00:00 2001 From: Matt Eding Date: Sun, 1 Dec 2019 16:25:47 -0800 Subject: [PATCH 1/5] refactored _iter to allow use inheritance to remove derived methods --- imblearn/pipeline.py | 277 +++---------------------------------------- 1 file changed, 15 insertions(+), 262 deletions(-) diff --git a/imblearn/pipeline.py b/imblearn/pipeline.py index 038d23c91..fb5b15212 100644 --- a/imblearn/pipeline.py +++ b/imblearn/pipeline.py @@ -12,6 +12,7 @@ # Christos Aridas # Guillaume Lemaitre # License: BSD +from itertools import filterfalse from sklearn import pipeline from sklearn.base import clone @@ -145,7 +146,8 @@ def _validate_steps(self): ): raise TypeError( "All intermediate steps of the chain should " - "be estimators that implement fit and transform or sample." + "be estimators that implement fit and transform or " + "fit_resample." " '%s' implements both)" % (t) ) @@ -167,6 +169,15 @@ def _validate_steps(self): % (estimator, type(estimator)) ) + def _iter( + self, with_final=True, filter_passthrough=True, with_resample=False + ): + it = super()._iter(with_final, filter_passthrough) + if with_resample: + return it + else: + return filterfalse(lambda x: hasattr(x[-1], "fit_resample"), it) + # Estimator interface def _fit(self, X, y=None, **fit_params): @@ -175,7 +186,7 @@ def _fit(self, X, y=None, **fit_params): # Setup the memory memory = check_memory(self.memory) - fit_transform_one_cached = memory.cache(_fit_transform_one) + fit_transform_one_cached = memory.cache(pipeline._fit_transform_one) fit_resample_one_cached = memory.cache(_fit_resample_one) fit_params_steps = { @@ -194,7 +205,8 @@ def _fit(self, X, y=None, **fit_params): for (step_idx, name, transformer) in self._iter(with_final=False, - filter_passthrough=False): + filter_passthrough=False, + with_resample=True): if (transformer is None or transformer == 'passthrough'): with _print_elapsed_time('Pipeline', self._log_message(step_idx)): @@ -354,38 +366,6 @@ def fit_resample(self, X, y=None, **fit_params): elif hasattr(last_step, "fit_resample"): return last_step.fit_resample(Xt, yt, **fit_params) - @if_delegate_has_method(delegate="_final_estimator") - def predict(self, X, **predict_params): - """Apply transformers/samplers to the data, and predict with the final - estimator - - Parameters - ---------- - X : iterable - Data to predict on. Must fulfill input requirements of first step - of the pipeline. - - **predict_params : dict of string -> object - Parameters to the ``predict`` called at the end of all - transformations in the pipeline. Note that while this may be - used to return uncertainties from some models with return_std - or return_cov, uncertainties that are generated by the - transformations in the pipeline are not propagated to the - final estimator. - - Returns - ------- - y_pred : array-like - - """ - Xt = X - for _, _, transform in self._iter(with_final=False): - if hasattr(transform, "fit_resample"): - pass - else: - Xt = transform.transform(Xt) - return self.steps[-1][-1].predict(Xt, **predict_params) - @if_delegate_has_method(delegate="_final_estimator") def fit_predict(self, X, y=None, **fit_params): """Applies fit_predict of last step in pipeline after transforms. @@ -419,233 +399,6 @@ def fit_predict(self, X, y=None, **fit_params): y_pred = self.steps[-1][-1].fit_predict(Xt, yt, **fit_params) return y_pred - @if_delegate_has_method(delegate="_final_estimator") - def predict_proba(self, X): - """Apply transformers/samplers, and predict_proba of the final - estimator - - Parameters - ---------- - X : iterable - Data to predict on. Must fulfill input requirements of first step - of the pipeline. - - Returns - ------- - y_proba : array-like, shape = [n_samples, n_classes] - - """ - Xt = X - for _, _, transform in self._iter(with_final=False): - if hasattr(transform, "fit_resample"): - pass - else: - Xt = transform.transform(Xt) - return self.steps[-1][-1].predict_proba(Xt) - - @if_delegate_has_method(delegate="_final_estimator") - def score_samples(self, X): - """Apply transforms, and score_samples of the final estimator. - Parameters - ---------- - X : iterable - Data to predict on. Must fulfill input requirements of first step - of the pipeline. - Returns - ------- - y_score : ndarray, shape (n_samples,) - """ - Xt = X - for _, _, transformer in self._iter(with_final=False): - if hasattr(transformer, "fit_resample"): - pass - else: - Xt = transformer.transform(Xt) - return self.steps[-1][-1].score_samples(Xt) - - @if_delegate_has_method(delegate="_final_estimator") - def decision_function(self, X): - """Apply transformers/samplers, and decision_function of the final - estimator - - Parameters - ---------- - X : iterable - Data to predict on. Must fulfill input requirements of first step - of the pipeline. - - Returns - ------- - y_score : array-like, shape = [n_samples, n_classes] - - """ - Xt = X - for _, _, transform in self._iter(with_final=False): - if hasattr(transform, "fit_resample"): - pass - else: - Xt = transform.transform(Xt) - return self.steps[-1][-1].decision_function(Xt) - - @if_delegate_has_method(delegate="_final_estimator") - def predict_log_proba(self, X): - """Apply transformers/samplers, and predict_log_proba of the final - estimator - - Parameters - ---------- - X : iterable - Data to predict on. Must fulfill input requirements of first step - of the pipeline. - - Returns - ------- - y_score : array-like, shape = [n_samples, n_classes] - - """ - Xt = X - for _, _, transform in self._iter(with_final=False): - if hasattr(transform, "fit_resample"): - pass - else: - Xt = transform.transform(Xt) - return self.steps[-1][-1].predict_log_proba(Xt) - - @property - def transform(self): - """Apply transformers/samplers, and transform with the final estimator - - This also works where final estimator is ``None``: all prior - transformations are applied. - - Parameters - ---------- - X : iterable - Data to transform. Must fulfill input requirements of first step - of the pipeline. - - Returns - ------- - Xt : array-like, shape = [n_samples, n_transformed_features] - """ - # _final_estimator is None or has transform, otherwise attribute error - if self._final_estimator != "passthrough": - self._final_estimator.transform - return self._transform - - def _transform(self, X): - Xt = X - for _, _, transform in self._iter(): - if hasattr(transform, "fit_resample"): - pass - else: - Xt = transform.transform(Xt) - return Xt - - @property - def inverse_transform(self): - """Apply inverse transformations in reverse order - - All estimators in the pipeline must support ``inverse_transform``. - - Parameters - ---------- - Xt : array-like, shape = [n_samples, n_transformed_features] - Data samples, where ``n_samples`` is the number of samples and - ``n_features`` is the number of features. Must fulfill - input requirements of last step of pipeline's - ``inverse_transform`` method. - - Returns - ------- - Xt : array-like, shape = [n_samples, n_features] - """ - # raise AttributeError if necessary for hasattr behaviour - for _, _, transform in self._iter(): - transform.inverse_transform - return self._inverse_transform - - def _inverse_transform(self, X): - Xt = X - reverse_iter = reversed(list(self._iter())) - for _, _, transform in reverse_iter: - if hasattr(transform, "fit_resample"): - pass - else: - Xt = transform.inverse_transform(Xt) - return Xt - - @if_delegate_has_method(delegate="_final_estimator") - def score(self, X, y=None, sample_weight=None): - """Apply transformers/samplers, and score with the final estimator - - Parameters - ---------- - X : iterable - Data to predict on. Must fulfill input requirements of first step - of the pipeline. - - y : iterable, default=None - Targets used for scoring. Must fulfill label requirements for all - steps of the pipeline. - - sample_weight : array-like, default=None - If not None, this argument is passed as ``sample_weight`` keyword - argument to the ``score`` method of the final estimator. - - Returns - ------- - score : float - """ - Xt = X - for _, _, transform in self._iter(with_final=False): - if hasattr(transform, "fit_resample"): - pass - else: - Xt = transform.transform(Xt) - score_params = {} - if sample_weight is not None: - score_params["sample_weight"] = sample_weight - return self.steps[-1][-1].score(Xt, y, **score_params) - - @if_delegate_has_method(delegate='_final_estimator') - def score_samples(self, X): - """Apply transforms, and score_samples of the final estimator. - Parameters - ---------- - X : iterable - Data to predict on. Must fulfill input requirements of first step - of the pipeline. - Returns - ------- - y_score : ndarray, shape (n_samples,) - """ - Xt = X - for _, _, transformer in self._iter(with_final=False): - if hasattr(transformer, "fit_resample"): - pass - else: - Xt = transformer.transform(Xt) - return self.steps[-1][-1].score_samples(Xt) - - -def _fit_transform_one(transformer, - X, - y, - weight, - message_clsname='', - message=None, - **fit_params): - with _print_elapsed_time(message_clsname, message): - if hasattr(transformer, "fit_transform"): - res = transformer.fit_transform(X, y, **fit_params) - else: - res = transformer.fit(X, y, **fit_params).transform(X) - # if we have a weight for this transformer, multiply output - if weight is None: - return res, transformer - return res * weight, transformer - def _fit_resample_one(sampler, X, From d4e7aea3a7ff40c2160be2df78a2b01cb8f48899 Mon Sep 17 00:00:00 2001 From: Matt Eding Date: Sun, 1 Dec 2019 21:55:07 -0800 Subject: [PATCH 2/5] change _iter semantics; remove unused import --- imblearn/pipeline.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/imblearn/pipeline.py b/imblearn/pipeline.py index fb5b15212..3769bc89f 100644 --- a/imblearn/pipeline.py +++ b/imblearn/pipeline.py @@ -12,11 +12,9 @@ # Christos Aridas # Guillaume Lemaitre # License: BSD -from itertools import filterfalse - from sklearn import pipeline from sklearn.base import clone -from sklearn.utils import Bunch, _print_elapsed_time +from sklearn.utils import _print_elapsed_time from sklearn.utils.metaestimators import if_delegate_has_method from sklearn.utils.validation import check_memory @@ -170,13 +168,13 @@ def _validate_steps(self): ) def _iter( - self, with_final=True, filter_passthrough=True, with_resample=False + self, with_final=True, filter_passthrough=True, filter_resample=True ): it = super()._iter(with_final, filter_passthrough) - if with_resample: - return it + if filter_resample: + return filter(lambda x: not hasattr(x[-1], "fit_resample"), it) else: - return filterfalse(lambda x: hasattr(x[-1], "fit_resample"), it) + return it # Estimator interface @@ -206,7 +204,7 @@ def _fit(self, X, y=None, **fit_params): name, transformer) in self._iter(with_final=False, filter_passthrough=False, - with_resample=True): + filter_resample=False): if (transformer is None or transformer == 'passthrough'): with _print_elapsed_time('Pipeline', self._log_message(step_idx)): @@ -220,7 +218,7 @@ def _fit(self, X, y=None, **fit_params): else: cloned_transformer = clone(transformer) elif hasattr(memory, "cachedir"): - # joblib < 0.11 + # joblib <= 0.11 if memory.cachedir is None: # we do not clone when caching is disabled to # preserve backward compatibility From ee7773bfcc0f315e9c7094af610fc44094a13fc8 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 5 Dec 2019 13:56:10 +0100 Subject: [PATCH 3/5] Update pipeline.py --- imblearn/pipeline.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/imblearn/pipeline.py b/imblearn/pipeline.py index 3769bc89f..ca57a2450 100644 --- a/imblearn/pipeline.py +++ b/imblearn/pipeline.py @@ -170,6 +170,12 @@ def _validate_steps(self): def _iter( self, with_final=True, filter_passthrough=True, filter_resample=True ): + """Generate (idx, (name, trans)) tuples from self.steps + + When `filter_passthrough` is `True`, 'passthrough' and None transformers + are filtered out. When `filter_resample` is `True`, estimator with a + method `fit_resample` are filtered out. + """ it = super()._iter(with_final, filter_passthrough) if filter_resample: return filter(lambda x: not hasattr(x[-1], "fit_resample"), it) From 9fa8b57b6cf72a8742233f355e6388c4c6dfbec8 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 5 Dec 2019 13:58:55 +0100 Subject: [PATCH 4/5] Update pipeline.py --- imblearn/pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/imblearn/pipeline.py b/imblearn/pipeline.py index ca57a2450..5a55b1b4e 100644 --- a/imblearn/pipeline.py +++ b/imblearn/pipeline.py @@ -170,8 +170,8 @@ def _validate_steps(self): def _iter( self, with_final=True, filter_passthrough=True, filter_resample=True ): - """Generate (idx, (name, trans)) tuples from self.steps - + """Generate (idx, (name, trans)) tuples from self.steps. + When `filter_passthrough` is `True`, 'passthrough' and None transformers are filtered out. When `filter_resample` is `True`, estimator with a method `fit_resample` are filtered out. From a9d7909adecbd264120945510296de9076dbcf66 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Thu, 5 Dec 2019 14:07:41 +0100 Subject: [PATCH 5/5] PEP8 --- imblearn/pipeline.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/imblearn/pipeline.py b/imblearn/pipeline.py index 5a55b1b4e..77d89e4c9 100644 --- a/imblearn/pipeline.py +++ b/imblearn/pipeline.py @@ -172,9 +172,9 @@ def _iter( ): """Generate (idx, (name, trans)) tuples from self.steps. - When `filter_passthrough` is `True`, 'passthrough' and None transformers - are filtered out. When `filter_resample` is `True`, estimator with a - method `fit_resample` are filtered out. + When `filter_passthrough` is `True`, 'passthrough' and None + transformers are filtered out. When `filter_resample` is `True`, + estimator with a method `fit_resample` are filtered out. """ it = super()._iter(with_final, filter_passthrough) if filter_resample: