From 30dde87de5a1077887347f9106a04c47d1e5b382 Mon Sep 17 00:00:00 2001
From: Matt Eding <matteding@gmail.com>
Date: Sun, 1 Dec 2019 16:25:47 -0800
Subject: [PATCH 1/5] refactored _iter to allow use inheritance to remove
 derived methods

---
 imblearn/pipeline.py | 277 +++----------------------------------------
 1 file changed, 15 insertions(+), 262 deletions(-)

diff --git a/imblearn/pipeline.py b/imblearn/pipeline.py
index 038d23c91..fb5b15212 100644
--- a/imblearn/pipeline.py
+++ b/imblearn/pipeline.py
@@ -12,6 +12,7 @@
 #         Christos Aridas
 #         Guillaume Lemaitre <g.lemaitre58@gmail.com>
 # License: BSD
+from itertools import filterfalse
 
 from sklearn import pipeline
 from sklearn.base import clone
@@ -145,7 +146,8 @@ def _validate_steps(self):
             ):
                 raise TypeError(
                     "All intermediate steps of the chain should "
-                    "be estimators that implement fit and transform or sample."
+                    "be estimators that implement fit and transform or "
+                    "fit_resample."
                     " '%s' implements both)" % (t)
                 )
 
@@ -167,6 +169,15 @@ def _validate_steps(self):
                 % (estimator, type(estimator))
             )
 
+    def _iter(
+        self, with_final=True, filter_passthrough=True, with_resample=False
+    ):
+        it = super()._iter(with_final, filter_passthrough)
+        if with_resample:
+            return it
+        else:
+            return filterfalse(lambda x: hasattr(x[-1], "fit_resample"), it)
+
     # Estimator interface
 
     def _fit(self, X, y=None, **fit_params):
@@ -175,7 +186,7 @@ def _fit(self, X, y=None, **fit_params):
         # Setup the memory
         memory = check_memory(self.memory)
 
-        fit_transform_one_cached = memory.cache(_fit_transform_one)
+        fit_transform_one_cached = memory.cache(pipeline._fit_transform_one)
         fit_resample_one_cached = memory.cache(_fit_resample_one)
 
         fit_params_steps = {
@@ -194,7 +205,8 @@ def _fit(self, X, y=None, **fit_params):
         for (step_idx,
              name,
              transformer) in self._iter(with_final=False,
-                                        filter_passthrough=False):
+                                        filter_passthrough=False,
+                                        with_resample=True):
             if (transformer is None or transformer == 'passthrough'):
                 with _print_elapsed_time('Pipeline',
                                          self._log_message(step_idx)):
@@ -354,38 +366,6 @@ def fit_resample(self, X, y=None, **fit_params):
             elif hasattr(last_step, "fit_resample"):
                 return last_step.fit_resample(Xt, yt, **fit_params)
 
-    @if_delegate_has_method(delegate="_final_estimator")
-    def predict(self, X, **predict_params):
-        """Apply transformers/samplers to the data, and predict with the final
-        estimator
-
-        Parameters
-        ----------
-        X : iterable
-            Data to predict on. Must fulfill input requirements of first step
-            of the pipeline.
-
-        **predict_params : dict of string -> object
-            Parameters to the ``predict`` called at the end of all
-            transformations in the pipeline. Note that while this may be
-            used to return uncertainties from some models with return_std
-            or return_cov, uncertainties that are generated by the
-            transformations in the pipeline are not propagated to the
-            final estimator.
-
-        Returns
-        -------
-        y_pred : array-like
-
-        """
-        Xt = X
-        for _, _, transform in self._iter(with_final=False):
-            if hasattr(transform, "fit_resample"):
-                pass
-            else:
-                Xt = transform.transform(Xt)
-        return self.steps[-1][-1].predict(Xt, **predict_params)
-
     @if_delegate_has_method(delegate="_final_estimator")
     def fit_predict(self, X, y=None, **fit_params):
         """Applies fit_predict of last step in pipeline after transforms.
@@ -419,233 +399,6 @@ def fit_predict(self, X, y=None, **fit_params):
             y_pred = self.steps[-1][-1].fit_predict(Xt, yt, **fit_params)
         return y_pred
 
-    @if_delegate_has_method(delegate="_final_estimator")
-    def predict_proba(self, X):
-        """Apply transformers/samplers, and predict_proba of the final
-        estimator
-
-        Parameters
-        ----------
-        X : iterable
-            Data to predict on. Must fulfill input requirements of first step
-            of the pipeline.
-
-        Returns
-        -------
-        y_proba : array-like, shape = [n_samples, n_classes]
-
-        """
-        Xt = X
-        for _, _, transform in self._iter(with_final=False):
-            if hasattr(transform, "fit_resample"):
-                pass
-            else:
-                Xt = transform.transform(Xt)
-        return self.steps[-1][-1].predict_proba(Xt)
-
-    @if_delegate_has_method(delegate="_final_estimator")
-    def score_samples(self, X):
-        """Apply transforms, and score_samples of the final estimator.
-        Parameters
-        ----------
-        X : iterable
-            Data to predict on. Must fulfill input requirements of first step
-            of the pipeline.
-        Returns
-        -------
-        y_score : ndarray, shape (n_samples,)
-        """
-        Xt = X
-        for _, _, transformer in self._iter(with_final=False):
-            if hasattr(transformer, "fit_resample"):
-                pass
-            else:
-                Xt = transformer.transform(Xt)
-        return self.steps[-1][-1].score_samples(Xt)
-
-    @if_delegate_has_method(delegate="_final_estimator")
-    def decision_function(self, X):
-        """Apply transformers/samplers, and decision_function of the final
-        estimator
-
-        Parameters
-        ----------
-        X : iterable
-            Data to predict on. Must fulfill input requirements of first step
-            of the pipeline.
-
-        Returns
-        -------
-        y_score : array-like, shape = [n_samples, n_classes]
-
-        """
-        Xt = X
-        for _, _, transform in self._iter(with_final=False):
-            if hasattr(transform, "fit_resample"):
-                pass
-            else:
-                Xt = transform.transform(Xt)
-        return self.steps[-1][-1].decision_function(Xt)
-
-    @if_delegate_has_method(delegate="_final_estimator")
-    def predict_log_proba(self, X):
-        """Apply transformers/samplers, and predict_log_proba of the final
-        estimator
-
-        Parameters
-        ----------
-        X : iterable
-            Data to predict on. Must fulfill input requirements of first step
-            of the pipeline.
-
-        Returns
-        -------
-        y_score : array-like, shape = [n_samples, n_classes]
-
-        """
-        Xt = X
-        for _, _, transform in self._iter(with_final=False):
-            if hasattr(transform, "fit_resample"):
-                pass
-            else:
-                Xt = transform.transform(Xt)
-        return self.steps[-1][-1].predict_log_proba(Xt)
-
-    @property
-    def transform(self):
-        """Apply transformers/samplers, and transform with the final estimator
-
-        This also works where final estimator is ``None``: all prior
-        transformations are applied.
-
-        Parameters
-        ----------
-        X : iterable
-            Data to transform. Must fulfill input requirements of first step
-            of the pipeline.
-
-        Returns
-        -------
-        Xt : array-like, shape = [n_samples, n_transformed_features]
-        """
-        # _final_estimator is None or has transform, otherwise attribute error
-        if self._final_estimator != "passthrough":
-            self._final_estimator.transform
-        return self._transform
-
-    def _transform(self, X):
-        Xt = X
-        for _, _, transform in self._iter():
-            if hasattr(transform, "fit_resample"):
-                pass
-            else:
-                Xt = transform.transform(Xt)
-        return Xt
-
-    @property
-    def inverse_transform(self):
-        """Apply inverse transformations in reverse order
-
-        All estimators in the pipeline must support ``inverse_transform``.
-
-        Parameters
-        ----------
-        Xt : array-like, shape = [n_samples, n_transformed_features]
-            Data samples, where ``n_samples`` is the number of samples and
-            ``n_features`` is the number of features. Must fulfill
-            input requirements of last step of pipeline's
-            ``inverse_transform`` method.
-
-        Returns
-        -------
-        Xt : array-like, shape = [n_samples, n_features]
-        """
-        # raise AttributeError if necessary for hasattr behaviour
-        for _, _, transform in self._iter():
-            transform.inverse_transform
-        return self._inverse_transform
-
-    def _inverse_transform(self, X):
-        Xt = X
-        reverse_iter = reversed(list(self._iter()))
-        for _, _, transform in reverse_iter:
-            if hasattr(transform, "fit_resample"):
-                pass
-            else:
-                Xt = transform.inverse_transform(Xt)
-        return Xt
-
-    @if_delegate_has_method(delegate="_final_estimator")
-    def score(self, X, y=None, sample_weight=None):
-        """Apply transformers/samplers, and score with the final estimator
-
-        Parameters
-        ----------
-        X : iterable
-            Data to predict on. Must fulfill input requirements of first step
-            of the pipeline.
-
-        y : iterable, default=None
-            Targets used for scoring. Must fulfill label requirements for all
-            steps of the pipeline.
-
-        sample_weight : array-like, default=None
-            If not None, this argument is passed as ``sample_weight`` keyword
-            argument to the ``score`` method of the final estimator.
-
-        Returns
-        -------
-        score : float
-        """
-        Xt = X
-        for _, _, transform in self._iter(with_final=False):
-            if hasattr(transform, "fit_resample"):
-                pass
-            else:
-                Xt = transform.transform(Xt)
-        score_params = {}
-        if sample_weight is not None:
-            score_params["sample_weight"] = sample_weight
-        return self.steps[-1][-1].score(Xt, y, **score_params)
-
-    @if_delegate_has_method(delegate='_final_estimator')
-    def score_samples(self, X):
-        """Apply transforms, and score_samples of the final estimator.
-        Parameters
-        ----------
-        X : iterable
-            Data to predict on. Must fulfill input requirements of first step
-            of the pipeline.
-        Returns
-        -------
-        y_score : ndarray, shape (n_samples,)
-        """
-        Xt = X
-        for _, _, transformer in self._iter(with_final=False):
-            if hasattr(transformer, "fit_resample"):
-                pass
-            else:
-                Xt = transformer.transform(Xt)
-        return self.steps[-1][-1].score_samples(Xt)
-
-
-def _fit_transform_one(transformer,
-                       X,
-                       y,
-                       weight,
-                       message_clsname='',
-                       message=None,
-                       **fit_params):
-    with _print_elapsed_time(message_clsname, message):
-        if hasattr(transformer, "fit_transform"):
-            res = transformer.fit_transform(X, y, **fit_params)
-        else:
-            res = transformer.fit(X, y, **fit_params).transform(X)
-    # if we have a weight for this transformer, multiply output
-    if weight is None:
-        return res, transformer
-    return res * weight, transformer
-
 
 def _fit_resample_one(sampler,
                       X,

From d4e7aea3a7ff40c2160be2df78a2b01cb8f48899 Mon Sep 17 00:00:00 2001
From: Matt Eding <matteding@gmail.com>
Date: Sun, 1 Dec 2019 21:55:07 -0800
Subject: [PATCH 2/5] change _iter semantics; remove unused import

---
 imblearn/pipeline.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/imblearn/pipeline.py b/imblearn/pipeline.py
index fb5b15212..3769bc89f 100644
--- a/imblearn/pipeline.py
+++ b/imblearn/pipeline.py
@@ -12,11 +12,9 @@
 #         Christos Aridas
 #         Guillaume Lemaitre <g.lemaitre58@gmail.com>
 # License: BSD
-from itertools import filterfalse
-
 from sklearn import pipeline
 from sklearn.base import clone
-from sklearn.utils import Bunch, _print_elapsed_time
+from sklearn.utils import _print_elapsed_time
 from sklearn.utils.metaestimators import if_delegate_has_method
 from sklearn.utils.validation import check_memory
 
@@ -170,13 +168,13 @@ def _validate_steps(self):
             )
 
     def _iter(
-        self, with_final=True, filter_passthrough=True, with_resample=False
+        self, with_final=True, filter_passthrough=True, filter_resample=True
     ):
         it = super()._iter(with_final, filter_passthrough)
-        if with_resample:
-            return it
+        if filter_resample:
+            return filter(lambda x: not hasattr(x[-1], "fit_resample"), it)
         else:
-            return filterfalse(lambda x: hasattr(x[-1], "fit_resample"), it)
+            return it
 
     # Estimator interface
 
@@ -206,7 +204,7 @@ def _fit(self, X, y=None, **fit_params):
              name,
              transformer) in self._iter(with_final=False,
                                         filter_passthrough=False,
-                                        with_resample=True):
+                                        filter_resample=False):
             if (transformer is None or transformer == 'passthrough'):
                 with _print_elapsed_time('Pipeline',
                                          self._log_message(step_idx)):
@@ -220,7 +218,7 @@ def _fit(self, X, y=None, **fit_params):
                 else:
                     cloned_transformer = clone(transformer)
             elif hasattr(memory, "cachedir"):
-                # joblib < 0.11
+                # joblib <= 0.11
                 if memory.cachedir is None:
                     # we do not clone when caching is disabled to
                     # preserve backward compatibility

From ee7773bfcc0f315e9c7094af610fc44094a13fc8 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 5 Dec 2019 13:56:10 +0100
Subject: [PATCH 3/5] Update pipeline.py

---
 imblearn/pipeline.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/imblearn/pipeline.py b/imblearn/pipeline.py
index 3769bc89f..ca57a2450 100644
--- a/imblearn/pipeline.py
+++ b/imblearn/pipeline.py
@@ -170,6 +170,12 @@ def _validate_steps(self):
     def _iter(
         self, with_final=True, filter_passthrough=True, filter_resample=True
     ):
+        """Generate (idx, (name, trans)) tuples from self.steps
+        
+        When `filter_passthrough` is `True`, 'passthrough' and None transformers
+        are filtered out. When `filter_resample` is `True`, estimator with a
+        method `fit_resample` are filtered out.
+        """
         it = super()._iter(with_final, filter_passthrough)
         if filter_resample:
             return filter(lambda x: not hasattr(x[-1], "fit_resample"), it)

From 9fa8b57b6cf72a8742233f355e6388c4c6dfbec8 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 5 Dec 2019 13:58:55 +0100
Subject: [PATCH 4/5] Update pipeline.py

---
 imblearn/pipeline.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/imblearn/pipeline.py b/imblearn/pipeline.py
index ca57a2450..5a55b1b4e 100644
--- a/imblearn/pipeline.py
+++ b/imblearn/pipeline.py
@@ -170,8 +170,8 @@ def _validate_steps(self):
     def _iter(
         self, with_final=True, filter_passthrough=True, filter_resample=True
     ):
-        """Generate (idx, (name, trans)) tuples from self.steps
-        
+        """Generate (idx, (name, trans)) tuples from self.steps.
+
         When `filter_passthrough` is `True`, 'passthrough' and None transformers
         are filtered out. When `filter_resample` is `True`, estimator with a
         method `fit_resample` are filtered out.

From a9d7909adecbd264120945510296de9076dbcf66 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 5 Dec 2019 14:07:41 +0100
Subject: [PATCH 5/5] PEP8

---
 imblearn/pipeline.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/imblearn/pipeline.py b/imblearn/pipeline.py
index 5a55b1b4e..77d89e4c9 100644
--- a/imblearn/pipeline.py
+++ b/imblearn/pipeline.py
@@ -172,9 +172,9 @@ def _iter(
     ):
         """Generate (idx, (name, trans)) tuples from self.steps.
 
-        When `filter_passthrough` is `True`, 'passthrough' and None transformers
-        are filtered out. When `filter_resample` is `True`, estimator with a
-        method `fit_resample` are filtered out.
+        When `filter_passthrough` is `True`, 'passthrough' and None
+        transformers are filtered out. When `filter_resample` is `True`,
+        estimator with a method `fit_resample` are filtered out.
         """
         it = super()._iter(with_final, filter_passthrough)
         if filter_resample: