From 1476dab125611dfcab4b3d11bb602855592779c4 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 5 Dec 2022 00:05:03 +0100
Subject: [PATCH 1/8] MAINT validate parameters for public functions

---
 imblearn/metrics/_classification.py | 125 +++++++++++++++++++++++-----
 1 file changed, 105 insertions(+), 20 deletions(-)

diff --git a/imblearn/metrics/_classification.py b/imblearn/metrics/_classification.py
index 797fb56a8..3504a29a7 100644
--- a/imblearn/metrics/_classification.py
+++ b/imblearn/metrics/_classification.py
@@ -15,6 +15,7 @@
 # License: MIT
 
 import functools
+import numbers
 import warnings
 from inspect import signature
 
@@ -26,7 +27,23 @@
 from sklearn.utils.multiclass import unique_labels
 from sklearn.utils.validation import check_consistent_length, column_or_1d
 
+from ..utils._param_validation import Interval, StrOptions, validate_params
 
+
+@validate_params(
+    {
+        "y_true": ["array-like"],
+        "y_pred": ["array-like"],
+        "labels": ["array-like", None],
+        "pos_label": [str, numbers.Integral, None],
+        "average": [
+            None,
+            StrOptions({"binary", "micro", "macro", "weighted", "samples"}),
+        ],
+        "warn_for": ["array-like"],
+        "sample_weight": ["array-like", None],
+    }
+)
 def sensitivity_specificity_support(
     y_true,
     y_pred,
@@ -57,13 +74,13 @@ def sensitivity_specificity_support(
 
     Parameters
     ----------
-    y_true : ndarray of shape (n_samples,)
+    y_true : array-like of shape (n_samples,)
         Ground truth (correct) target values.
 
-    y_pred : ndarray of shape (n_samples,)
+    y_pred : array-like of shape (n_samples,)
         Estimated targets as returned by a classifier.
 
-    labels : list, default=None
+    labels : array-like, default=None
         The set of labels to include when ``average != 'binary'``, and their
         order if ``average is None``. Labels present in the data can be
         excluded, for example to calculate a multiclass average ignoring a
@@ -72,7 +89,7 @@ def sensitivity_specificity_support(
         labels are column indices. By default, all labels in ``y_true`` and
         ``y_pred`` are used in sorted order.
 
-    pos_label : str or int, default=1
+    pos_label : str, int or None, default=1
         The class to report if ``average='binary'`` and the data is binary.
         If the data are multiclass, this will be ignored;
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
@@ -105,7 +122,7 @@ def sensitivity_specificity_support(
         This determines which warnings will be made in the case that this
         function is being used to return only one of its metrics.
 
-    sample_weight : ndarray of shape (n_samples,), default=None
+    sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
 
     Returns
@@ -274,6 +291,19 @@ def sensitivity_specificity_support(
     return sensitivity, specificity, true_sum
 
 
+@validate_params(
+    {
+        "y_true": ["array-like"],
+        "y_pred": ["array-like"],
+        "labels": ["array-like", None],
+        "pos_label": [str, numbers.Integral, None],
+        "average": [
+            None,
+            StrOptions({"binary", "micro", "macro", "weighted", "samples"}),
+        ],
+        "sample_weight": ["array-like", None],
+    }
+)
 def sensitivity_score(
     y_true,
     y_pred,
@@ -295,20 +325,20 @@ def sensitivity_score(
 
     Parameters
     ----------
-    y_true : ndarray of shape (n_samples,)
+    y_true : array-like of shape (n_samples,)
         Ground truth (correct) target values.
 
-    y_pred : ndarray of shape (n_samples,)
+    y_pred : array-like of shape (n_samples,)
         Estimated targets as returned by a classifier.
 
-    labels : list, default=None
+    labels : array-like, default=None
         The set of labels to include when ``average != 'binary'``, and their
         order if ``average is None``. Labels present in the data can be
         excluded, for example to calculate a multiclass average ignoring a
         majority negative class, while labels not present in the data will
         result in 0 components in a macro average.
 
-    pos_label : str or int, default=1
+    pos_label : str, int or None, default=1
         The class to report if ``average='binary'`` and the data is binary.
         If the data are multiclass, this will be ignored;
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
@@ -337,7 +367,7 @@ def sensitivity_score(
             meaningful for multilabel classification where this differs from
             :func:`accuracy_score`).
 
-    sample_weight : ndarray of shape (n_samples,), default=None
+    sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
 
     Returns
@@ -374,6 +404,19 @@ def sensitivity_score(
     return s
 
 
+@validate_params(
+    {
+        "y_true": ["array-like"],
+        "y_pred": ["array-like"],
+        "labels": ["array-like", None],
+        "pos_label": [str, numbers.Integral, None],
+        "average": [
+            None,
+            StrOptions({"binary", "micro", "macro", "weighted", "samples"}),
+        ],
+        "sample_weight": ["array-like", None],
+    }
+)
 def specificity_score(
     y_true,
     y_pred,
@@ -395,20 +438,20 @@ def specificity_score(
 
     Parameters
     ----------
-    y_true : ndarray of shape (n_samples,)
+    y_true : array-like of shape (n_samples,)
         Ground truth (correct) target values.
 
-    y_pred : ndarray of shape (n_samples,)
+    y_pred : array-like of shape (n_samples,)
         Estimated targets as returned by a classifier.
 
-    labels : list, default=None
+    labels : array-like, default=None
         The set of labels to include when ``average != 'binary'``, and their
         order if ``average is None``. Labels present in the data can be
         excluded, for example to calculate a multiclass average ignoring a
         majority negative class, while labels not present in the data will
         result in 0 components in a macro average.
 
-    pos_label : str or int, default=1
+    pos_label : str, int or None, default=1
         The class to report if ``average='binary'`` and the data is binary.
         If the data are multiclass, this will be ignored;
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
@@ -437,7 +480,7 @@ def specificity_score(
             meaningful for multilabel classification where this differs from
             :func:`accuracy_score`).
 
-    sample_weight : ndarray of shape (n_samples,), default=None
+    sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
 
     Returns
@@ -474,6 +517,22 @@ def specificity_score(
     return s
 
 
+@validate_params(
+    {
+        "y_true": ["array-like"],
+        "y_pred": ["array-like"],
+        "labels": ["array-like", None],
+        "pos_label": [str, numbers.Integral, None],
+        "average": [
+            None,
+            StrOptions(
+                {"binary", "micro", "macro", "weighted", "samples", "multiclass"}
+            ),
+        ],
+        "sample_weight": ["array-like", None],
+        "correction": [Interval(numbers.Real, 0, None, closed="left")],
+    }
+)
 def geometric_mean_score(
     y_true,
     y_pred,
@@ -507,20 +566,20 @@ class is unrecognized by the classifier, G-mean resolves to zero. To
 
     Parameters
     ----------
-    y_true : ndarray of shape (n_samples,)
+    y_true : array-like of shape (n_samples,)
         Ground truth (correct) target values.
 
-    y_pred : ndarray of shape (n_samples,)
+    y_pred : array-like of shape (n_samples,)
         Estimated targets as returned by a classifier.
 
-    labels : list, default=None
+    labels : array-like, default=None
         The set of labels to include when ``average != 'binary'``, and their
         order if ``average is None``. Labels present in the data can be
         excluded, for example to calculate a multiclass average ignoring a
         majority negative class, while labels not present in the data will
         result in 0 components in a macro average.
 
-    pos_label : str or int, default=1
+    pos_label : str, int or None, default=1
         The class to report if ``average='binary'`` and the data is binary.
         If the data are multiclass, this will be ignored;
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
@@ -539,6 +598,8 @@ class is unrecognized by the classifier, G-mean resolves to zero. To
         ``'macro'``:
             Calculate metrics for each label, and find their unweighted
             mean.  This does not take label imbalance into account.
+        ``'multiclass'``:
+            No average is taken.
         ``'weighted'``:
             Calculate metrics for each label, and find their average, weighted
             by support (the number of true instances for each label). This
@@ -549,7 +610,7 @@ class is unrecognized by the classifier, G-mean resolves to zero. To
             meaningful for multilabel classification where this differs from
             :func:`accuracy_score`).
 
-    sample_weight : ndarray of shape (n_samples,), default=None
+    sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
 
     correction : float, default=0.0
@@ -658,6 +719,7 @@ class is unrecognized by the classifier, G-mean resolves to zero. To
         return gmean
 
 
+@validate_params({"alpha": [numbers.Real], "squared": ["boolean"]})
 def make_index_balanced_accuracy(*, alpha=0.1, squared=True):
     """Balance any scoring function using the index balanced accuracy.
 
@@ -763,6 +825,22 @@ def compute_score(*args, **kwargs):
     return decorate
 
 
+@validate_params(
+    {
+        "y_true": ["array-like"],
+        "y_pred": ["array-like"],
+        "labels": ["array-like", None],
+        "target_names": ["array-like", None],
+        "sample_weight": ["array-like", None],
+        "digits": [Interval(numbers.Integral, 0, None, closed="left")],
+        "alpha": [numbers.Real],
+        "output_dict": ["boolean"],
+        "zero_division": [
+            StrOptions({"warn"}),
+            Interval(numbers.Integral, 0, 1, closed="both"),
+        ],
+    }
+)
 def classification_report_imbalanced(
     y_true,
     y_pred,
@@ -970,6 +1048,13 @@ class 2       1.00      0.67      1.00      0.80      0.82      0.64\
     return report
 
 
+@validate_params(
+    {
+        "y_true": ["array-like"],
+        "y_pred": ["array-like"],
+        "sample_weight": ["array-like", None],
+    }
+)
 def macro_averaged_mean_absolute_error(y_true, y_pred, *, sample_weight=None):
     """Compute Macro-Averaged MAE for imbalanced ordinal classification.
 

From d5ed9af79a9bc7b86548b84734f7c89dece9220e Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 5 Dec 2022 10:34:08 +0100
Subject: [PATCH 2/8] TST add common tests

---
 imblearn/metrics/_classification.py     | 10 +++
 imblearn/tests/test_public_functions.py | 98 +++++++++++++++++++++++++
 2 files changed, 108 insertions(+)
 create mode 100644 imblearn/tests/test_public_functions.py

diff --git a/imblearn/metrics/_classification.py b/imblearn/metrics/_classification.py
index 3504a29a7..b377db592 100644
--- a/imblearn/metrics/_classification.py
+++ b/imblearn/metrics/_classification.py
@@ -91,6 +91,9 @@ def sensitivity_specificity_support(
 
     pos_label : str, int or None, default=1
         The class to report if ``average='binary'`` and the data is binary.
+        If ``pos_label is None`` and in binary classification, this function
+        returns the average sensitivity and specificity if ``average``
+        is one of ``'weighted'``.
         If the data are multiclass, this will be ignored;
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
         scores for that label only.
@@ -340,6 +343,8 @@ def sensitivity_score(
 
     pos_label : str, int or None, default=1
         The class to report if ``average='binary'`` and the data is binary.
+        If ``pos_label is None`` and in binary classification, this function
+        returns the average sensitivity if ``average`` is one of ``'weighted'``.
         If the data are multiclass, this will be ignored;
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
         scores for that label only.
@@ -453,6 +458,8 @@ def specificity_score(
 
     pos_label : str, int or None, default=1
         The class to report if ``average='binary'`` and the data is binary.
+        If ``pos_label is None`` and in binary classification, this function
+        returns the average specificity if ``average`` is one of ``'weighted'``.
         If the data are multiclass, this will be ignored;
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
         scores for that label only.
@@ -581,6 +588,9 @@ class is unrecognized by the classifier, G-mean resolves to zero. To
 
     pos_label : str, int or None, default=1
         The class to report if ``average='binary'`` and the data is binary.
+        If ``pos_label is None`` and in binary classification, this function
+        returns the average geometric mean if ``average`` is one of
+        ``'weighted'``.
         If the data are multiclass, this will be ignored;
         setting ``labels=[pos_label]`` and ``average != 'binary'`` will report
         scores for that label only.
diff --git a/imblearn/tests/test_public_functions.py b/imblearn/tests/test_public_functions.py
new file mode 100644
index 000000000..510ebe204
--- /dev/null
+++ b/imblearn/tests/test_public_functions.py
@@ -0,0 +1,98 @@
+"""This is a copy of sklearn/tests/test_public_functions.py. It can be
+removed when we support scikit-learn >= 1.2.
+"""
+from importlib import import_module
+from inspect import signature
+
+import pytest
+
+from imblearn.utils._param_validation import (
+    generate_invalid_param_val,
+    generate_valid_param,
+    make_constraint,
+)
+
+PARAM_VALIDATION_FUNCTION_LIST = [
+    "imblearn.metrics.sensitivity_specificity_support",
+    "imblearn.metrics.sensitivity_score",
+    "imblearn.metrics.specificity_score",
+    "imblearn.metrics.geometric_mean_score",
+    "imblearn.metrics.make_index_balanced_accuracy",
+    "imblearn.metrics.classification_report_imbalanced",
+    "imblearn.metrics.macro_averaged_mean_absolute_error",
+]
+
+
+@pytest.mark.parametrize("func_module", PARAM_VALIDATION_FUNCTION_LIST)
+def test_function_param_validation(func_module):
+    """Check that an informative error is raised when the value of a parameter does not
+    have an appropriate type or value.
+    """
+    module_name, func_name = func_module.rsplit(".", 1)
+    module = import_module(module_name)
+    func = getattr(module, func_name)
+
+    func_sig = signature(func)
+    func_params = [
+        p.name
+        for p in func_sig.parameters.values()
+        if p.kind not in (p.VAR_POSITIONAL, p.VAR_KEYWORD)
+    ]
+    parameter_constraints = getattr(func, "_skl_parameter_constraints")
+
+    # generate valid values for the required parameters
+    required_params = [
+        p.name for p in func_sig.parameters.values() if p.default is p.empty
+    ]
+    valid_required_params = {}
+    for param_name in required_params:
+        if parameter_constraints[param_name] == "no_validation":
+            valid_required_params[param_name] = 1
+        else:
+            valid_required_params[param_name] = generate_valid_param(
+                make_constraint(parameter_constraints[param_name][0])
+            )
+
+    # check that there is a constraint for each parameter
+    if func_params:
+        validation_params = parameter_constraints.keys()
+        unexpected_params = set(validation_params) - set(func_params)
+        missing_params = set(func_params) - set(validation_params)
+        err_msg = (
+            "Mismatch between _parameter_constraints and the parameters of"
+            f" {func_name}.\nConsider the unexpected parameters {unexpected_params} and"
+            f" expected but missing parameters {missing_params}\n"
+        )
+        assert set(validation_params) == set(func_params), err_msg
+
+    # this object does not have a valid type for sure for all params
+    param_with_bad_type = type("BadType", (), {})()
+
+    for param_name in func_params:
+        constraints = parameter_constraints[param_name]
+
+        if constraints == "no_validation":
+            # This parameter is not validated
+            continue
+
+        match = (
+            rf"The '{param_name}' parameter of {func_name} must be .* Got .* instead."
+        )
+
+        # First, check that the error is raised if param doesn't match any valid type.
+        with pytest.raises(ValueError, match=match):
+            func(**{**valid_required_params, param_name: param_with_bad_type})
+
+        # Then, for constraints that are more than a type constraint, check that the
+        # error is raised if param does match a valid type but does not match any valid
+        # value for this type.
+        constraints = [make_constraint(constraint) for constraint in constraints]
+
+        for constraint in constraints:
+            try:
+                bad_value = generate_invalid_param_val(constraint)
+            except NotImplementedError:
+                continue
+
+            with pytest.raises(ValueError, match=match):
+                func(**{**valid_required_params, param_name: bad_value})

From 16b58e0ae4a4f1f0d7a684fdc06ab22b27ed29b4 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 5 Dec 2022 10:39:43 +0100
Subject: [PATCH 3/8] iter

---
 imblearn/datasets/_imbalance.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/imblearn/datasets/_imbalance.py b/imblearn/datasets/_imbalance.py
index ffa822037..e0820a03a 100644
--- a/imblearn/datasets/_imbalance.py
+++ b/imblearn/datasets/_imbalance.py
@@ -26,7 +26,7 @@ def make_imbalance(
     X : {array-like, dataframe} of shape (n_samples, n_features)
         Matrix containing the data to be imbalanced.
 
-    y : ndarray of shape (n_samples,)
+    y : array-like of shape (n_samples,)
         Corresponding label for each sample in X.
 
     sampling_strategy : dict or callable,

From 7ab8e14c3741af2bb690bc21cc21a3661973617d Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 5 Dec 2022 14:23:58 +0100
Subject: [PATCH 4/8] iter

---
 imblearn/datasets/_imbalance.py         | 15 +++++++++++++--
 imblearn/over_sampling/base.py          |  3 ++-
 imblearn/tests/test_public_functions.py |  9 +++++++--
 imblearn/under_sampling/base.py         |  3 ++-
 4 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/imblearn/datasets/_imbalance.py b/imblearn/datasets/_imbalance.py
index e0820a03a..e935f1b9b 100644
--- a/imblearn/datasets/_imbalance.py
+++ b/imblearn/datasets/_imbalance.py
@@ -6,11 +6,22 @@
 # License: MIT
 
 from collections import Counter
+from collections.abc import Mapping
 
 from ..under_sampling import RandomUnderSampler
 from ..utils import check_sampling_strategy
-
-
+from ..utils._param_validation import validate_params
+
+
+@validate_params(
+    {
+        "X": ["array-like", "dataframe"],
+        "y": ["array-like"],
+        "sampling_strategy": [Mapping, callable, None],
+        "random_state": ["random_state"],
+        "verbose": ["boolean"],
+    }
+)
 def make_imbalance(
     X, y, *, sampling_strategy=None, random_state=None, verbose=False, **kwargs
 ):
diff --git a/imblearn/over_sampling/base.py b/imblearn/over_sampling/base.py
index 4bc08e91a..d4e4a4541 100644
--- a/imblearn/over_sampling/base.py
+++ b/imblearn/over_sampling/base.py
@@ -6,6 +6,7 @@
 # License: MIT
 
 import numbers
+from collections.abc import Mapping
 
 from ..base import BaseSampler
 from ..utils._param_validation import Interval, StrOptions
@@ -61,7 +62,7 @@ class BaseOverSampler(BaseSampler):
         "sampling_strategy": [
             Interval(numbers.Real, 0, 1, closed="right"),
             StrOptions({"auto", "majority", "not minority", "not majority", "all"}),
-            dict,
+            Mapping,
             callable,
         ],
         "random_state": ["random_state"],
diff --git a/imblearn/tests/test_public_functions.py b/imblearn/tests/test_public_functions.py
index 510ebe204..1b94b16df 100644
--- a/imblearn/tests/test_public_functions.py
+++ b/imblearn/tests/test_public_functions.py
@@ -13,6 +13,7 @@
 )
 
 PARAM_VALIDATION_FUNCTION_LIST = [
+    "imblearn.datasets.make_imbalance",
     "imblearn.metrics.sensitivity_specificity_support",
     "imblearn.metrics.sensitivity_score",
     "imblearn.metrics.specificity_score",
@@ -40,9 +41,13 @@ def test_function_param_validation(func_module):
     ]
     parameter_constraints = getattr(func, "_skl_parameter_constraints")
 
-    # generate valid values for the required parameters
+    # Generate valid values for the required parameters
+    # The parameters `*args` and `**kwargs` are ignored since we cannot generate
+    # constraints.
     required_params = [
-        p.name for p in func_sig.parameters.values() if p.default is p.empty
+        p.name
+        for p in func_sig.parameters.values()
+        if p.default is p.empty and p.kind not in (p.VAR_POSITIONAL, p.VAR_KEYWORD)
     ]
     valid_required_params = {}
     for param_name in required_params:
diff --git a/imblearn/under_sampling/base.py b/imblearn/under_sampling/base.py
index e36d8c31f..92da45723 100644
--- a/imblearn/under_sampling/base.py
+++ b/imblearn/under_sampling/base.py
@@ -5,6 +5,7 @@
 # License: MIT
 
 import numbers
+from collections.abc import Mapping
 
 from ..base import BaseSampler
 from ..utils._param_validation import Interval, StrOptions
@@ -61,7 +62,7 @@ class BaseUnderSampler(BaseSampler):
         "sampling_strategy": [
             Interval(numbers.Real, 0, 1, closed="right"),
             StrOptions({"auto", "majority", "not minority", "not majority", "all"}),
-            dict,
+            Mapping,
             callable,
         ],
     }

From 3f0741064504567840fc89c239ac176215dce636 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 5 Dec 2022 15:06:50 +0100
Subject: [PATCH 5/8] iter

---
 imblearn/datasets/_zenodo.py              | 12 ++++++++++++
 imblearn/datasets/tests/test_imbalance.py |  1 -
 imblearn/pipeline.py                      | 23 ++++++++++++++++++++---
 imblearn/tests/test_common.py             |  1 -
 imblearn/tests/test_pipeline.py           |  8 ++++++++
 imblearn/tests/test_public_functions.py   | 10 ++++++----
 6 files changed, 46 insertions(+), 9 deletions(-)

diff --git a/imblearn/datasets/_zenodo.py b/imblearn/datasets/_zenodo.py
index 72bafe7a6..3a2c679a0 100644
--- a/imblearn/datasets/_zenodo.py
+++ b/imblearn/datasets/_zenodo.py
@@ -54,6 +54,8 @@
 from sklearn.datasets import get_data_home
 from sklearn.utils import Bunch, check_random_state
 
+from ..utils._param_validation import validate_params
+
 URL = "https://zenodo.org/record/61452/files/benchmark-imbalanced-learn.tar.gz"
 PRE_FILENAME = "x"
 POST_FILENAME = "data.npz"
@@ -95,6 +97,16 @@
     MAP_ID_NAME[v + 1] = k
 
 
+@validate_params(
+    {
+        "data_home": [None, str],
+        "filter_data": [None, tuple],
+        "download_if_missing": ["boolean"],
+        "random_state": ["random_state"],
+        "shuffle": ["boolean"],
+        "verbose": ["boolean"],
+    }
+)
 def fetch_datasets(
     *,
     data_home=None,
diff --git a/imblearn/datasets/tests/test_imbalance.py b/imblearn/datasets/tests/test_imbalance.py
index 1b98d3aae..2d8e278fa 100644
--- a/imblearn/datasets/tests/test_imbalance.py
+++ b/imblearn/datasets/tests/test_imbalance.py
@@ -22,7 +22,6 @@ def iris():
     [
         ({0: -100, 1: 50, 2: 50}, "in a class cannot be negative"),
         ({0: 10, 1: 70}, "should be less or equal to the original"),
-        ("random-string", "has to be a dictionary or a function"),
     ],
 )
 def test_make_imbalance_error(iris, sampling_strategy, err_msg):
diff --git a/imblearn/pipeline.py b/imblearn/pipeline.py
index f6b5d5d24..738f89b49 100644
--- a/imblearn/pipeline.py
+++ b/imblearn/pipeline.py
@@ -12,16 +12,19 @@
 #         Christos Aridas
 #         Guillaume Lemaitre <g.lemaitre58@gmail.com>
 # License: BSD
+import joblib
 from sklearn import pipeline
 from sklearn.base import clone
 from sklearn.utils import _print_elapsed_time
 from sklearn.utils.metaestimators import available_if
-from sklearn.utils.validation import check_memory
+
+from .base import _ParamsValidationMixin
+from .utils._param_validation import HasMethods, validate_params
 
 __all__ = ["Pipeline", "make_pipeline"]
 
 
-class Pipeline(pipeline.Pipeline):
+class Pipeline(pipeline.Pipeline, _ParamsValidationMixin):
     """Pipeline of transforms and resamples with a final estimator.
 
     Sequentially apply a list of transforms, sampling, and a final estimator.
@@ -128,6 +131,12 @@ class Pipeline(pipeline.Pipeline):
     <BLANKLINE>
     """
 
+    _parameter_constraints: dict = {
+        "steps": "no_validation",  # validated in `_validate_steps`
+        "memory": [None, str, HasMethods(["cache"])],
+        "verbose": ["boolean"],
+    }
+
     # BaseEstimator interface
 
     def _validate_steps(self):
@@ -201,7 +210,10 @@ def _fit(self, X, y=None, **fit_params_steps):
         self.steps = list(self.steps)
         self._validate_steps()
         # Setup the memory
-        memory = check_memory(self.memory)
+        if self.memory is None or isinstance(self.memory, str):
+            memory = joblib.Memory(location=self.memory, verbose=0)
+        else:
+            memory = self.memory
 
         fit_transform_one_cached = memory.cache(pipeline._fit_transform_one)
         fit_resample_one_cached = memory.cache(_fit_resample_one)
@@ -276,6 +288,7 @@ def fit(self, X, y=None, **fit_params):
         self : Pipeline
             This estimator.
         """
+        self._validate_params()
         fit_params_steps = self._check_fit_params(**fit_params)
         Xt, yt = self._fit(X, y, **fit_params_steps)
         with _print_elapsed_time("Pipeline", self._log_message(len(self.steps) - 1)):
@@ -311,6 +324,7 @@ def fit_transform(self, X, y=None, **fit_params):
         Xt : array-like of shape (n_samples, n_transformed_features)
             Transformed samples.
         """
+        self._validate_params()
         fit_params_steps = self._check_fit_params(**fit_params)
         Xt, yt = self._fit(X, y, **fit_params_steps)
 
@@ -354,6 +368,7 @@ def fit_resample(self, X, y=None, **fit_params):
         yt : array-like of shape (n_samples, n_transformed_features)
             Transformed target.
         """
+        self._validate_params()
         fit_params_steps = self._check_fit_params(**fit_params)
         Xt, yt = self._fit(X, y, **fit_params_steps)
         last_step = self._final_estimator
@@ -392,6 +407,7 @@ def fit_predict(self, X, y=None, **fit_params):
         y_pred : ndarray of shape (n_samples,)
             The predicted target.
         """
+        self._validate_params()
         fit_params_steps = self._check_fit_params(**fit_params)
         Xt, yt = self._fit(X, y, **fit_params_steps)
 
@@ -408,6 +424,7 @@ def _fit_resample_one(sampler, X, y, message_clsname="", message=None, **fit_par
         return X_res, y_res, sampler
 
 
+@validate_params({"memory": [None, str, HasMethods(["cache"])], "verbose": ["boolean"]})
 def make_pipeline(*steps, memory=None, verbose=False):
     """Construct a Pipeline from the given estimators.
 
diff --git a/imblearn/tests/test_common.py b/imblearn/tests/test_common.py
index 9ec5764d3..036d84476 100644
--- a/imblearn/tests/test_common.py
+++ b/imblearn/tests/test_common.py
@@ -70,6 +70,5 @@ def test_estimators_imblearn(estimator, check, request):
 )
 def test_check_param_validation(estimator):
     name = estimator.__class__.__name__
-    print(name)
     _set_checking_parameters(estimator)
     check_param_validation(name, estimator)
diff --git a/imblearn/tests/test_pipeline.py b/imblearn/tests/test_pipeline.py
index d2f0b8f5c..8b512659b 100644
--- a/imblearn/tests/test_pipeline.py
+++ b/imblearn/tests/test_pipeline.py
@@ -35,6 +35,7 @@
 from imblearn.pipeline import Pipeline, make_pipeline
 from imblearn.under_sampling import EditedNearestNeighbours as ENN
 from imblearn.under_sampling import RandomUnderSampler
+from imblearn.utils.estimator_checks import check_param_validation
 
 JUNK_FOOD_DOCS = (
     "the pizza pizza beer copyright",
@@ -1341,3 +1342,10 @@ def test_pipeline_score_samples_pca_lof_multiclass():
     # Check the values
     lof.fit(pca.fit_transform(X))
     assert_allclose(pipe.score_samples(X), lof.score_samples(pca.transform(X)))
+
+
+def test_pipeline_param_validation():
+    model = Pipeline(
+        [("sampler", RandomUnderSampler()), ("classifier", LogisticRegression())]
+    )
+    check_param_validation("Pipeline", model)
diff --git a/imblearn/tests/test_public_functions.py b/imblearn/tests/test_public_functions.py
index 1b94b16df..d84732007 100644
--- a/imblearn/tests/test_public_functions.py
+++ b/imblearn/tests/test_public_functions.py
@@ -13,14 +13,16 @@
 )
 
 PARAM_VALIDATION_FUNCTION_LIST = [
+    "imblearn.datasets.fetch_datasets",
     "imblearn.datasets.make_imbalance",
+    "imblearn.metrics.classification_report_imbalanced",
+    "imblearn.metrics.geometric_mean_score",
+    "imblearn.metrics.macro_averaged_mean_absolute_error",
+    "imblearn.metrics.make_index_balanced_accuracy",
     "imblearn.metrics.sensitivity_specificity_support",
     "imblearn.metrics.sensitivity_score",
     "imblearn.metrics.specificity_score",
-    "imblearn.metrics.geometric_mean_score",
-    "imblearn.metrics.make_index_balanced_accuracy",
-    "imblearn.metrics.classification_report_imbalanced",
-    "imblearn.metrics.macro_averaged_mean_absolute_error",
+    "imblearn.pipeline.make_pipeline",
 ]
 
 

From 83b30788fd745fcef4fa03f8b6c6c3532475f2db Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 5 Dec 2022 15:07:57 +0100
Subject: [PATCH 6/8] TST remove redundant test

---
 imblearn/tests/test_pipeline.py | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/imblearn/tests/test_pipeline.py b/imblearn/tests/test_pipeline.py
index 8b512659b..8355f0228 100644
--- a/imblearn/tests/test_pipeline.py
+++ b/imblearn/tests/test_pipeline.py
@@ -643,22 +643,6 @@ def test_classes_property():
     assert_array_equal(clf.classes_, np.unique(y))
 
 
-def test_pipeline_wrong_memory():
-    # Test that an error is raised when memory is not a string or a Memory
-    # instance
-    iris = load_iris()
-    X = iris.data
-    y = iris.target
-    # Define memory as an integer
-    memory = 1
-    cached_pipe = Pipeline(
-        [("transf", DummyTransf()), ("svc", SVC(gamma="scale"))], memory=memory
-    )
-    error_regex = "string or have the same interface as"
-    with raises(ValueError, match=error_regex):
-        cached_pipe.fit(X, y)
-
-
 def test_pipeline_memory_transformer():
     iris = load_iris()
     X = iris.data

From 517f5c63f52414d76f6d9d089aa0a82eb632ca3b Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 5 Dec 2022 16:03:36 +0100
Subject: [PATCH 7/8] revert dataframe

---
 imblearn/datasets/_imbalance.py               | 10 ++----
 imblearn/utils/_param_validation.py           | 21 +----------
 imblearn/utils/tests/test_param_validation.py | 36 -------------------
 3 files changed, 3 insertions(+), 64 deletions(-)

diff --git a/imblearn/datasets/_imbalance.py b/imblearn/datasets/_imbalance.py
index e935f1b9b..8c1c15aec 100644
--- a/imblearn/datasets/_imbalance.py
+++ b/imblearn/datasets/_imbalance.py
@@ -15,7 +15,7 @@
 
 @validate_params(
     {
-        "X": ["array-like", "dataframe"],
+        "X": ["array-like"],
         "y": ["array-like"],
         "sampling_strategy": [Mapping, callable, None],
         "random_state": ["random_state"],
@@ -97,16 +97,10 @@ def make_imbalance(
     """
     target_stats = Counter(y)
     # restrict ratio to be a dict or a callable
-    if isinstance(sampling_strategy, dict) or callable(sampling_strategy):
+    if isinstance(sampling_strategy, Mapping) or callable(sampling_strategy):
         sampling_strategy_ = check_sampling_strategy(
             sampling_strategy, y, "under-sampling", **kwargs
         )
-    else:
-        raise ValueError(
-            f"'sampling_strategy' has to be a dictionary or a "
-            f"function returning a dictionary. Got {type(sampling_strategy)} "
-            f"instead."
-        )
 
     if verbose:
         print(f"The original target distribution in the dataset is: {target_stats}")
diff --git a/imblearn/utils/_param_validation.py b/imblearn/utils/_param_validation.py
index a45292c63..005595276 100644
--- a/imblearn/utils/_param_validation.py
+++ b/imblearn/utils/_param_validation.py
@@ -23,7 +23,7 @@
 # if sklearn_version < parse_version("1.2"):
 if True:
     # TODO: remove `if True` when we have clear support for:
-    # - dataframe
+    # - ignoring `*args` and `**kwargs` in the signature
 
     def validate_parameter_constraints(parameter_constraints, params, caller_name):
         """Validate types and values of given parameters.
@@ -38,7 +38,6 @@ def validate_parameter_constraints(parameter_constraints, params, caller_name):
             Constraints can be:
             - an Interval object, representing a continuous or discrete range of numbers
             - the string "array-like"
-            - the string "dataframe"
             - the string "sparse matrix"
             - the string "random_state"
             - callable
@@ -119,8 +118,6 @@ def make_constraint(constraint):
             return _ArrayLikes()
         if isinstance(constraint, str) and constraint == "sparse matrix":
             return _SparseMatrices()
-        if isinstance(constraint, str) and constraint == "dataframe":
-            return _DataFrames()
         if isinstance(constraint, str) and constraint == "random_state":
             return _RandomStates()
         if constraint is callable:
@@ -472,17 +469,6 @@ def is_satisfied_by(self, val):
         def __str__(self):
             return "a sparse matrix"
 
-    class _DataFrames(_Constraint):
-        """Constraint representing a DataFrame"""
-
-        def is_satisfied_by(self, val):
-            # Let's first try the dataframe protocol and then duck-typing for the older
-            # pandas versions.
-            return hasattr(val, "__dataframe__") or hasattr(val, "iloc")
-
-        def __str__(self):
-            return "a DataFrame"
-
     class _Callables(_Constraint):
         """Constraint representing callables."""
 
@@ -862,11 +848,6 @@ def generate_valid_param(constraint):
         if isinstance(constraint, _SparseMatrices):
             return csr_matrix([[0, 1], [1, 0]])
 
-        if isinstance(constraint, _DataFrames):
-            import pandas as pd
-
-            return pd.DataFrame({"a": [1, 2, 3]})
-
         if isinstance(constraint, _RandomStates):
             return np.random.RandomState(42)
 
diff --git a/imblearn/utils/tests/test_param_validation.py b/imblearn/utils/tests/test_param_validation.py
index ec3a37e13..dae58a790 100644
--- a/imblearn/utils/tests/test_param_validation.py
+++ b/imblearn/utils/tests/test_param_validation.py
@@ -21,7 +21,6 @@
     _Booleans,
     _Callables,
     _CVObjects,
-    _DataFrames,
     _InstancesOf,
     _IterablesNotString,
     _MissingValues,
@@ -37,15 +36,6 @@
 )
 
 
-def has_pandas():
-    try:
-        import pandas as pd
-
-        return True, pd.DataFrame({"a": [1, 2, 3]})
-    except ImportError:
-        return False, None
-
-
 # Some helpers for the tests
 @validate_params({"a": [Real], "b": [Real], "c": [Real], "d": [Real]})
 def _func(a, b=0, *args, c, d=0, **kwargs):
@@ -327,12 +317,6 @@ def test_generate_invalid_param_val_2_intervals(integer_interval, real_interval)
     "constraints",
     [
         [_ArrayLikes()],
-        pytest.param(
-            [_DataFrames()],
-            marks=pytest.mark.skipif(
-                not has_pandas()[0], reason="Pandas not installed"
-            ),
-        ),
         [_InstancesOf(list)],
         [_Callables()],
         [_NoneConstraint()],
@@ -358,12 +342,6 @@ def test_generate_invalid_param_val_all_valid(constraints):
     "constraint",
     [
         _ArrayLikes(),
-        pytest.param(
-            _DataFrames(),
-            marks=pytest.mark.skipif(
-                not has_pandas()[0], reason="Pandas not installed"
-            ),
-        ),
         _Callables(),
         _InstancesOf(list),
         _NoneConstraint(),
@@ -403,13 +381,6 @@ def test_generate_valid_param(constraint):
         (None, None),
         ("array-like", [[1, 2], [3, 4]]),
         ("array-like", np.array([[1, 2], [3, 4]])),
-        pytest.param(
-            "dataframe",
-            has_pandas()[1],
-            marks=pytest.mark.skipif(
-                not has_pandas()[0], reason="Pandas not installed"
-            ),
-        ),
         ("sparse matrix", csr_matrix([[1, 2], [3, 4]])),
         ("random_state", 0),
         ("random_state", np.random.RandomState(0)),
@@ -443,13 +414,6 @@ def test_is_satisfied_by(constraint_declaration, value):
         (Options(Real, {0.42, 1.23}), Options),
         ("array-like", _ArrayLikes),
         ("sparse matrix", _SparseMatrices),
-        pytest.param(
-            "dataframe",
-            _DataFrames,
-            marks=pytest.mark.skipif(
-                not has_pandas()[0], reason="Pandas not installed"
-            ),
-        ),
         ("random_state", _RandomStates),
         (None, _NoneConstraint),
         (callable, _Callables),

From 2506fc8f0c6271dfdc2a6e40c5513b52ee205018 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 5 Dec 2022 16:41:05 +0100
Subject: [PATCH 8/8] iter

---
 imblearn/utils/_param_validation.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/imblearn/utils/_param_validation.py b/imblearn/utils/_param_validation.py
index 005595276..ae3855945 100644
--- a/imblearn/utils/_param_validation.py
+++ b/imblearn/utils/_param_validation.py
@@ -20,8 +20,7 @@
 
 sklearn_version = parse_version(sklearn.__version__)
 
-# if sklearn_version < parse_version("1.2"):
-if True:
+if sklearn_version < parse_version("1.2"):
     # TODO: remove `if True` when we have clear support for:
     # - ignoring `*args` and `**kwargs` in the signature