From f3d167fc35f5733ab203a7457b9588a35d4271b5 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 5 Dec 2022 18:22:05 +0100
Subject: [PATCH 1/6] MAINT add support for feature_names_in

---
 imblearn/base.py                              |  10 +-
 imblearn/combine/_smote_enn.py                |   6 +
 imblearn/combine/_smote_tomek.py              |   6 +
 imblearn/metrics/pairwise.py                  |  11 +
 imblearn/over_sampling/_adasyn.py             |   6 +
 .../over_sampling/_random_over_sampler.py     |   6 +
 imblearn/over_sampling/_smote/base.py         |  18 ++
 imblearn/over_sampling/_smote/cluster.py      |   6 +
 imblearn/over_sampling/_smote/filter.py       |  12 +
 imblearn/tests/test_common.py                 |  16 ++
 .../_cluster_centroids.py                     |   6 +
 .../_condensed_nearest_neighbour.py           |   6 +
 .../_edited_nearest_neighbours.py             |  18 ++
 .../_instance_hardness_threshold.py           |   6 +
 .../_prototype_selection/_nearmiss.py         |   6 +
 .../_neighbourhood_cleaning_rule.py           |   6 +
 .../_one_sided_selection.py                   |   6 +
 .../_random_under_sampler.py                  |   6 +
 .../_prototype_selection/_tomek_links.py      |   6 +
 imblearn/utils/estimator_checks.py            | 249 +++++++++++++++++-
 20 files changed, 408 insertions(+), 4 deletions(-)

diff --git a/imblearn/base.py b/imblearn/base.py
index 4241d0db3..d02aea9a5 100644
--- a/imblearn/base.py
+++ b/imblearn/base.py
@@ -7,7 +7,7 @@
 from abc import ABCMeta, abstractmethod
 
 import numpy as np
-from sklearn.base import BaseEstimator
+from sklearn.base import BaseEstimator, OneToOneFeatureMixin
 from sklearn.preprocessing import label_binarize
 from sklearn.utils.multiclass import check_classification_targets
 
@@ -133,7 +133,7 @@ class attribute, which is a dictionary `param_name: list of constraints`. See
             )
 
 
-class BaseSampler(SamplerMixin, _ParamsValidationMixin):
+class BaseSampler(SamplerMixin, OneToOneFeatureMixin, _ParamsValidationMixin):
     """Base class for sampling algorithms.
 
     Warning: This class should not be used directly. Use the derive classes
@@ -260,6 +260,12 @@ class FunctionSampler(BaseSampler):
 
         .. versionadded:: 0.9
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 0.10
+
     See Also
     --------
     sklearn.preprocessing.FunctionTransfomer : Stateless transformer.
diff --git a/imblearn/combine/_smote_enn.py b/imblearn/combine/_smote_enn.py
index 241fc0f70..1b0ffe0b8 100644
--- a/imblearn/combine/_smote_enn.py
+++ b/imblearn/combine/_smote_enn.py
@@ -67,6 +67,12 @@ class SMOTEENN(BaseSampler):
 
         .. versionadded:: 0.9
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 0.10
+
     See Also
     --------
     SMOTETomek : Over-sample using SMOTE followed by under-sampling removing
diff --git a/imblearn/combine/_smote_tomek.py b/imblearn/combine/_smote_tomek.py
index 9a4bc13e6..94d7c4d01 100644
--- a/imblearn/combine/_smote_tomek.py
+++ b/imblearn/combine/_smote_tomek.py
@@ -66,6 +66,12 @@ class SMOTETomek(BaseSampler):
 
         .. versionadded:: 0.9
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 0.10
+
     See Also
     --------
     SMOTEENN : Over-sample using SMOTE followed by under-sampling using Edited
diff --git a/imblearn/metrics/pairwise.py b/imblearn/metrics/pairwise.py
index ceec92802..4aa7977ef 100644
--- a/imblearn/metrics/pairwise.py
+++ b/imblearn/metrics/pairwise.py
@@ -71,6 +71,17 @@ class ValueDifferenceMetric(BaseEstimator, _ParamsValidationMixin):
         List of length `n_features` containing the conditional probabilities
         for each category given a class.
 
+    n_features_in_ : int
+        Number of features in the input dataset.
+
+        .. versionadded:: 0.10
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 0.10
+
     See Also
     --------
     sklearn.neighbors.DistanceMetric : Interface for fast metric computation.
diff --git a/imblearn/over_sampling/_adasyn.py b/imblearn/over_sampling/_adasyn.py
index 6f4b81fd5..54e88b79f 100644
--- a/imblearn/over_sampling/_adasyn.py
+++ b/imblearn/over_sampling/_adasyn.py
@@ -73,6 +73,12 @@ class ADASYN(BaseOverSampler):
 
         .. versionadded:: 0.9
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 0.10
+
     See Also
     --------
     SMOTE : Over-sample using SMOTE.
diff --git a/imblearn/over_sampling/_random_over_sampler.py b/imblearn/over_sampling/_random_over_sampler.py
index 1f12619dd..7175855ea 100644
--- a/imblearn/over_sampling/_random_over_sampler.py
+++ b/imblearn/over_sampling/_random_over_sampler.py
@@ -76,6 +76,12 @@ class RandomOverSampler(BaseOverSampler):
 
         .. versionadded:: 0.9
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 0.10
+
     See Also
     --------
     BorderlineSMOTE : Over-sample using the borderline-SMOTE variant.
diff --git a/imblearn/over_sampling/_smote/base.py b/imblearn/over_sampling/_smote/base.py
index 15b4664c8..266349ee5 100644
--- a/imblearn/over_sampling/_smote/base.py
+++ b/imblearn/over_sampling/_smote/base.py
@@ -264,6 +264,12 @@ class SMOTE(BaseSMOTE):
 
         .. versionadded:: 0.9
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 0.10
+
     See Also
     --------
     SMOTENC : Over-sample using SMOTE for continuous and categorical features.
@@ -442,6 +448,12 @@ class SMOTENC(SMOTE):
 
         .. versionadded:: 0.9
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 0.10
+
     See Also
     --------
     SMOTE : Over-sample using SMOTE.
@@ -759,6 +771,12 @@ class SMOTEN(SMOTE):
 
         .. versionadded:: 0.9
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 0.10
+
     See Also
     --------
     SMOTE : Over-sample using SMOTE.
diff --git a/imblearn/over_sampling/_smote/cluster.py b/imblearn/over_sampling/_smote/cluster.py
index ccfe07a7e..4ca87e9a0 100644
--- a/imblearn/over_sampling/_smote/cluster.py
+++ b/imblearn/over_sampling/_smote/cluster.py
@@ -93,6 +93,12 @@ class KMeansSMOTE(BaseSMOTE):
 
         .. versionadded:: 0.9
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 0.10
+
     See Also
     --------
     SMOTE : Over-sample using SMOTE.
diff --git a/imblearn/over_sampling/_smote/filter.py b/imblearn/over_sampling/_smote/filter.py
index cf014b9ea..4966b211b 100644
--- a/imblearn/over_sampling/_smote/filter.py
+++ b/imblearn/over_sampling/_smote/filter.py
@@ -100,6 +100,12 @@ class BorderlineSMOTE(BaseSMOTE):
 
         .. versionadded:: 0.9
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 0.10
+
     See Also
     --------
     SMOTE : Over-sample using SMOTE.
@@ -352,6 +358,12 @@ class SVMSMOTE(BaseSMOTE):
 
         .. versionadded:: 0.9
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 0.10
+
     See Also
     --------
     SMOTE : Over-sample using SMOTE.
diff --git a/imblearn/tests/test_common.py b/imblearn/tests/test_common.py
index 0c2e2f301..d78cafd83 100644
--- a/imblearn/tests/test_common.py
+++ b/imblearn/tests/test_common.py
@@ -3,6 +3,7 @@
 #          Christos Aridas
 # License: MIT
 
+import warnings
 from collections import OrderedDict
 
 import numpy as np
@@ -19,6 +20,7 @@
 from imblearn.under_sampling import NearMiss, RandomUnderSampler
 from imblearn.utils.estimator_checks import (
     _set_checking_parameters,
+    check_dataframe_column_names_consistency,
     check_param_validation,
     parametrize_with_checks,
 )
@@ -92,3 +94,17 @@ def test_strategy_as_ordered_dict(Sampler):
     X_res, y_res = sampler.fit_resample(X, y)
     assert X_res.shape[0] == sum(strategy.values())
     assert y_res.shape[0] == sum(strategy.values())
+
+
+@pytest.mark.parametrize(
+    "estimator", _tested_estimators(), ids=_get_check_estimator_ids
+)
+def test_pandas_column_name_consistency(estimator):
+    _set_checking_parameters(estimator)
+    with ignore_warnings(category=(FutureWarning)):
+        with warnings.catch_warnings(record=True) as record:
+            check_dataframe_column_names_consistency(
+                estimator.__class__.__name__, estimator
+            )
+        for warning in record:
+            assert "was fitted without feature names" not in str(warning.message)
diff --git a/imblearn/under_sampling/_prototype_generation/_cluster_centroids.py b/imblearn/under_sampling/_prototype_generation/_cluster_centroids.py
index 5be949ed5..5e2ca3a82 100644
--- a/imblearn/under_sampling/_prototype_generation/_cluster_centroids.py
+++ b/imblearn/under_sampling/_prototype_generation/_cluster_centroids.py
@@ -78,6 +78,12 @@ class ClusterCentroids(BaseUnderSampler):
 
         .. versionadded:: 0.9
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 0.10
+
     See Also
     --------
     EditedNearestNeighbours : Under-sampling by editing samples.
diff --git a/imblearn/under_sampling/_prototype_selection/_condensed_nearest_neighbour.py b/imblearn/under_sampling/_prototype_selection/_condensed_nearest_neighbour.py
index b0d9109cf..2f03ca8a8 100644
--- a/imblearn/under_sampling/_prototype_selection/_condensed_nearest_neighbour.py
+++ b/imblearn/under_sampling/_prototype_selection/_condensed_nearest_neighbour.py
@@ -69,6 +69,12 @@ class CondensedNearestNeighbour(BaseCleaningSampler):
 
         .. versionadded:: 0.9
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 0.10
+
     See Also
     --------
     EditedNearestNeighbours : Undersample by editing samples.
diff --git a/imblearn/under_sampling/_prototype_selection/_edited_nearest_neighbours.py b/imblearn/under_sampling/_prototype_selection/_edited_nearest_neighbours.py
index 84694e746..64419ccdf 100644
--- a/imblearn/under_sampling/_prototype_selection/_edited_nearest_neighbours.py
+++ b/imblearn/under_sampling/_prototype_selection/_edited_nearest_neighbours.py
@@ -76,6 +76,12 @@ class EditedNearestNeighbours(BaseCleaningSampler):
 
         .. versionadded:: 0.9
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 0.10
+
     See Also
     --------
     CondensedNearestNeighbour : Undersample by condensing samples.
@@ -251,6 +257,12 @@ class RepeatedEditedNearestNeighbours(BaseCleaningSampler):
 
         .. versionadded:: 0.9
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 0.10
+
     See Also
     --------
     CondensedNearestNeighbour : Undersample by condensing samples.
@@ -454,6 +466,12 @@ class without early stopping.
 
         .. versionadded:: 0.9
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 0.10
+
     See Also
     --------
     CondensedNearestNeighbour: Under-sampling by condensing samples.
diff --git a/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py b/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py
index b1a6e1150..2d8bfce6c 100644
--- a/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py
+++ b/imblearn/under_sampling/_prototype_selection/_instance_hardness_threshold.py
@@ -67,6 +67,12 @@ class InstanceHardnessThreshold(BaseUnderSampler):
 
         .. versionadded:: 0.9
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 0.10
+
     See Also
     --------
     NearMiss : Undersample based on near-miss search.
diff --git a/imblearn/under_sampling/_prototype_selection/_nearmiss.py b/imblearn/under_sampling/_prototype_selection/_nearmiss.py
index 83f94d890..70f647fa5 100644
--- a/imblearn/under_sampling/_prototype_selection/_nearmiss.py
+++ b/imblearn/under_sampling/_prototype_selection/_nearmiss.py
@@ -72,6 +72,12 @@ class NearMiss(BaseUnderSampler):
 
         .. versionadded:: 0.9
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 0.10
+
     See Also
     --------
     RandomUnderSampler : Random undersample the dataset.
diff --git a/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py b/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py
index 00be9ca71..f9c08ea56 100644
--- a/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py
+++ b/imblearn/under_sampling/_prototype_selection/_neighbourhood_cleaning_rule.py
@@ -83,6 +83,12 @@ class NeighbourhoodCleaningRule(BaseCleaningSampler):
 
         .. versionadded:: 0.9
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 0.10
+
     See Also
     --------
     EditedNearestNeighbours : Undersample by editing noisy samples.
diff --git a/imblearn/under_sampling/_prototype_selection/_one_sided_selection.py b/imblearn/under_sampling/_prototype_selection/_one_sided_selection.py
index 0a1866075..42e9a6edd 100644
--- a/imblearn/under_sampling/_prototype_selection/_one_sided_selection.py
+++ b/imblearn/under_sampling/_prototype_selection/_one_sided_selection.py
@@ -68,6 +68,12 @@ class OneSidedSelection(BaseCleaningSampler):
 
         .. versionadded:: 0.9
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 0.10
+
     See Also
     --------
     EditedNearestNeighbours : Undersample by editing noisy samples.
diff --git a/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py b/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py
index a7c735fa6..ed47fe586 100644
--- a/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py
+++ b/imblearn/under_sampling/_prototype_selection/_random_under_sampler.py
@@ -50,6 +50,12 @@ class RandomUnderSampler(BaseUnderSampler):
 
         .. versionadded:: 0.9
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 0.10
+
     See Also
     --------
     NearMiss : Undersample using near-miss samples.
diff --git a/imblearn/under_sampling/_prototype_selection/_tomek_links.py b/imblearn/under_sampling/_prototype_selection/_tomek_links.py
index 31d62675b..b0f954959 100644
--- a/imblearn/under_sampling/_prototype_selection/_tomek_links.py
+++ b/imblearn/under_sampling/_prototype_selection/_tomek_links.py
@@ -48,6 +48,12 @@ class TomekLinks(BaseCleaningSampler):
 
         .. versionadded:: 0.9
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during `fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 0.10
+
     See Also
     --------
     EditedNearestNeighbours : Undersample by samples edition.
diff --git a/imblearn/utils/estimator_checks.py b/imblearn/utils/estimator_checks.py
index 4c4c72741..254b4e236 100644
--- a/imblearn/utils/estimator_checks.py
+++ b/imblearn/utils/estimator_checks.py
@@ -4,6 +4,7 @@
 # Authors: Guillaume Lemaitre <g.lemaitre58@gmail.com>
 # License: MIT
 
+import re
 import sys
 import traceback
 import warnings
@@ -14,27 +15,39 @@
 import pytest
 import sklearn
 from scipy import sparse
-from sklearn.base import clone
+from sklearn.base import clone, is_classifier, is_regressor
 from sklearn.cluster import KMeans
 from sklearn.datasets import (  # noqa
     load_iris,
+    make_blobs,
     make_classification,
     make_multilabel_classification,
 )
 from sklearn.exceptions import SkipTestWarning
-from sklearn.preprocessing import label_binarize
+from sklearn.preprocessing import StandardScaler, label_binarize
 from sklearn.utils._tags import _safe_tags
 from sklearn.utils._testing import (
+    SkipTest,
     assert_allclose,
     assert_array_equal,
     assert_raises_regex,
     raises,
+    set_random_state,
 )
 from sklearn.utils.estimator_checks import (
     _enforce_estimator_tags_y,
     _get_check_estimator_ids,
     _maybe_mark_xfail,
 )
+
+try:
+    from sklearn.utils.estimator_checks import _enforce_estimator_tags_x
+except ImportError:
+    # scikit-learn >= 1.2
+    from sklearn.utils.estimator_checks import (
+        _enforce_estimator_tags_X as _enforce_estimator_tags_x,
+    )
+
 from sklearn.utils.fixes import parse_version
 from sklearn.utils.multiclass import type_of_target
 
@@ -87,6 +100,8 @@ def _yield_sampler_checks(sampler):
     # stipulated
     yield check_samplers_sample_indices
     yield check_samplers_2d_target
+    yield check_sampler_get_feature_names_out
+    yield check_sampler_get_feature_names_out_pandas
 
 
 def _yield_classifier_checks(classifier):
@@ -567,3 +582,233 @@ def check_param_validation(name, estimator_orig):
                         getattr(estimator, method)(y)  # pragma: no cover
                     else:
                         getattr(estimator, method)(X, y)
+
+
+def check_dataframe_column_names_consistency(name, estimator_orig):
+    try:
+        import pandas as pd
+    except ImportError:
+        raise SkipTest(
+            "pandas is not installed: not checking column name consistency for pandas"
+        )
+
+    tags = _safe_tags(estimator_orig)
+    is_supported_X_types = (
+        "2darray" in tags["X_types"] or "categorical" in tags["X_types"]
+    )
+
+    if not is_supported_X_types or tags["no_validation"]:
+        return
+
+    rng = np.random.RandomState(0)
+
+    estimator = clone(estimator_orig)
+    set_random_state(estimator)
+
+    X_orig = rng.normal(size=(150, 8))
+
+    X_orig = _enforce_estimator_tags_x(estimator, X_orig)
+    n_samples, n_features = X_orig.shape
+
+    names = np.array([f"col_{i}" for i in range(n_features)])
+    X = pd.DataFrame(X_orig, columns=names)
+
+    if is_regressor(estimator):
+        y = rng.normal(size=n_samples)
+    else:
+        y = rng.randint(low=0, high=2, size=n_samples)
+    y = _enforce_estimator_tags_y(estimator, y)
+
+    # Check that calling `fit` does not raise any warnings about feature names.
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "error",
+            message="X does not have valid feature names",
+            category=UserWarning,
+            module="imblearn",
+        )
+        estimator.fit(X, y)
+
+    if not hasattr(estimator, "feature_names_in_"):
+        raise ValueError(
+            "Estimator does not have a feature_names_in_ "
+            "attribute after fitting with a dataframe"
+        )
+    assert isinstance(estimator.feature_names_in_, np.ndarray)
+    assert estimator.feature_names_in_.dtype == object
+    assert_array_equal(estimator.feature_names_in_, names)
+
+    # Only check imblearn estimators for feature_names_in_ in docstring
+    module_name = estimator_orig.__module__
+    if (
+        module_name.startswith("imblearn.")
+        and not ("test_" in module_name or module_name.endswith("_testing"))
+        and ("feature_names_in_" not in (estimator_orig.__doc__))
+    ):
+        raise ValueError(
+            f"Estimator {name} does not document its feature_names_in_ attribute"
+        )
+
+    check_methods = []
+    for method in (
+        "predict",
+        "transform",
+        "decision_function",
+        "predict_proba",
+        "score",
+        "score_samples",
+        "predict_log_proba",
+    ):
+        if not hasattr(estimator, method):
+            continue
+
+        callable_method = getattr(estimator, method)
+        if method == "score":
+            callable_method = partial(callable_method, y=y)
+        check_methods.append((method, callable_method))
+
+    for _, method in check_methods:
+        with warnings.catch_warnings():
+            warnings.filterwarnings(
+                "error",
+                message="X does not have valid feature names",
+                category=UserWarning,
+                module="sklearn",
+            )
+            method(X)  # works without UserWarning for valid features
+
+    invalid_names = [
+        (names[::-1], "Feature names must be in the same order as they were in fit."),
+        (
+            [f"another_prefix_{i}" for i in range(n_features)],
+            "Feature names unseen at fit time:\n- another_prefix_0\n-"
+            " another_prefix_1\n",
+        ),
+        (
+            names[:3],
+            f"Feature names seen at fit time, yet now missing:\n- {min(names[3:])}\n",
+        ),
+    ]
+    params = {
+        key: value
+        for key, value in estimator.get_params().items()
+        if "early_stopping" in key
+    }
+    early_stopping_enabled = any(value is True for value in params.values())
+
+    for invalid_name, additional_message in invalid_names:
+        X_bad = pd.DataFrame(X, columns=invalid_name)
+
+        expected_msg = re.escape(
+            "The feature names should match those that were passed during fit.\n"
+            f"{additional_message}"
+        )
+        for name, method in check_methods:
+            with raises(
+                ValueError, match=expected_msg, err_msg=f"{name} did not raise"
+            ):
+                method(X_bad)
+
+        # partial_fit checks on second call
+        # Do not call partial fit if early_stopping is on
+        if not hasattr(estimator, "partial_fit") or early_stopping_enabled:
+            continue
+
+        estimator = clone(estimator_orig)
+        if is_classifier(estimator):
+            classes = np.unique(y)
+            estimator.partial_fit(X, y, classes=classes)
+        else:
+            estimator.partial_fit(X, y)
+
+        with raises(ValueError, match=expected_msg):
+            estimator.partial_fit(X_bad, y)
+
+
+def check_sampler_get_feature_names_out(name, sampler_orig):
+    tags = sampler_orig._get_tags()
+    if "2darray" not in tags["X_types"] or tags["no_validation"]:
+        return
+
+    X, y = make_blobs(
+        n_samples=30,
+        centers=[[0, 0, 0], [1, 1, 1]],
+        random_state=0,
+        n_features=2,
+        cluster_std=0.1,
+    )
+    X = StandardScaler().fit_transform(X)
+
+    sampler = clone(sampler_orig)
+    X = _enforce_estimator_tags_x(sampler, X)
+
+    n_features = X.shape[1]
+    set_random_state(sampler)
+
+    y_ = y
+    X_res, y_res = sampler.fit_resample(X, y=y_)
+    input_features = [f"feature{i}" for i in range(n_features)]
+
+    # input_features names is not the same length as n_features_in_
+    with raises(ValueError, match="input_features should have length equal"):
+        sampler.get_feature_names_out(input_features[::2])
+
+    feature_names_out = sampler.get_feature_names_out(input_features)
+    assert feature_names_out is not None
+    assert isinstance(feature_names_out, np.ndarray)
+    assert feature_names_out.dtype == object
+    assert all(isinstance(name, str) for name in feature_names_out)
+
+    n_features_out = X_res.shape[1]
+
+    assert (
+        len(feature_names_out) == n_features_out
+    ), f"Expected {n_features_out} feature names, got {len(feature_names_out)}"
+
+
+def check_sampler_get_feature_names_out_pandas(name, sampler_orig):
+    try:
+        import pandas as pd
+    except ImportError:
+        raise SkipTest(
+            "pandas is not installed: not checking column name consistency for pandas"
+        )
+
+    tags = sampler_orig._get_tags()
+    if "2darray" not in tags["X_types"] or tags["no_validation"]:
+        return
+
+    X, y = make_blobs(
+        n_samples=30,
+        centers=[[0, 0, 0], [1, 1, 1]],
+        random_state=0,
+        n_features=2,
+        cluster_std=0.1,
+    )
+    X = StandardScaler().fit_transform(X)
+
+    sampler = clone(sampler_orig)
+    X = _enforce_estimator_tags_x(sampler, X)
+
+    n_features = X.shape[1]
+    set_random_state(sampler)
+
+    y_ = y
+    feature_names_in = [f"col{i}" for i in range(n_features)]
+    df = pd.DataFrame(X, columns=feature_names_in)
+    X_res, y_res = sampler.fit_resample(df, y=y_)
+
+    # error is raised when `input_features` do not match feature_names_in
+    invalid_feature_names = [f"bad{i}" for i in range(n_features)]
+    with raises(ValueError, match="input_features is not equal to feature_names_in_"):
+        sampler.get_feature_names_out(invalid_feature_names)
+
+    feature_names_out_default = sampler.get_feature_names_out()
+    feature_names_in_explicit_names = sampler.get_feature_names_out(feature_names_in)
+    assert_array_equal(feature_names_out_default, feature_names_in_explicit_names)
+
+    n_features_out = X_res.shape[1]
+
+    assert (
+        len(feature_names_out_default) == n_features_out
+    ), f"Expected {n_features_out} feature names, got {len(feature_names_out_default)}"

From e7687b7198089f29b08320112cb4dbfe07e1621e Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 5 Dec 2022 18:23:04 +0100
Subject: [PATCH 2/6] add changelog

---
 doc/whats_new/v0.10.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/doc/whats_new/v0.10.rst b/doc/whats_new/v0.10.rst
index 00809a1d4..2a1fd714e 100644
--- a/doc/whats_new/v0.10.rst
+++ b/doc/whats_new/v0.10.rst
@@ -22,6 +22,10 @@ Compatibility
 - Add support for automatic parameters validation as in scikit-learn >= 1.2.
   :pr:`955` by :user:`Guillaume Lemaitre <glemaitre>`.
 
+- Add support for `feature_names_in_` as well as `get_feature_names_out` for
+  all samplers.
+  :pr:`xxx` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 Deprecation
 ...........
 

From 6f07a1244af950eccdf5407bb852409e83d16def Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 5 Dec 2022 18:24:15 +0100
Subject: [PATCH 3/6] update PR number

---
 doc/whats_new/v0.10.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v0.10.rst b/doc/whats_new/v0.10.rst
index 2a1fd714e..7ce0604b0 100644
--- a/doc/whats_new/v0.10.rst
+++ b/doc/whats_new/v0.10.rst
@@ -24,7 +24,7 @@ Compatibility
 
 - Add support for `feature_names_in_` as well as `get_feature_names_out` for
   all samplers.
-  :pr:`xxx` by :user:`Guillaume Lemaitre <glemaitre>`.
+  :pr:`959` by :user:`Guillaume Lemaitre <glemaitre>`.
 
 Deprecation
 ...........

From f459738c73a78831ff4b776ea0d4bb91a82d02bf Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 5 Dec 2022 18:37:21 +0100
Subject: [PATCH 4/6] adapt import

---
 imblearn/base.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/imblearn/base.py b/imblearn/base.py
index d02aea9a5..012e06634 100644
--- a/imblearn/base.py
+++ b/imblearn/base.py
@@ -7,7 +7,13 @@
 from abc import ABCMeta, abstractmethod
 
 import numpy as np
-from sklearn.base import BaseEstimator, OneToOneFeatureMixin
+from sklearn.base import BaseEstimator
+
+try:
+    # scikit-learn >= 1.2
+    from sklearn.base import OneToOneFeatureMixin
+except ImportError:
+    from sklearn.base import _OneToOneFeatureMixin  # noqa
 from sklearn.preprocessing import label_binarize
 from sklearn.utils.multiclass import check_classification_targets
 

From 1a17906ef43d1ffae1dc76b6a7f2fb8960afc038 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 5 Dec 2022 18:37:31 +0100
Subject: [PATCH 5/6] adapt import

---
 imblearn/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/imblearn/base.py b/imblearn/base.py
index 012e06634..dd4e1b3a8 100644
--- a/imblearn/base.py
+++ b/imblearn/base.py
@@ -13,7 +13,7 @@
     # scikit-learn >= 1.2
     from sklearn.base import OneToOneFeatureMixin
 except ImportError:
-    from sklearn.base import _OneToOneFeatureMixin  # noqa
+    from sklearn.base import _OneToOneFeatureMixin as OneToOneFeatureMixin
 from sklearn.preprocessing import label_binarize
 from sklearn.utils.multiclass import check_classification_targets
 

From 62e4334c602b73ef9d8a74906625c95d93be21e1 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 5 Dec 2022 19:02:24 +0100
Subject: [PATCH 6/6] TST update warns and raises

---
 imblearn/utils/estimator_checks.py | 35 +++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/imblearn/utils/estimator_checks.py b/imblearn/utils/estimator_checks.py
index 254b4e236..e5f50a668 100644
--- a/imblearn/utils/estimator_checks.py
+++ b/imblearn/utils/estimator_checks.py
@@ -699,15 +699,34 @@ def check_dataframe_column_names_consistency(name, estimator_orig):
     for invalid_name, additional_message in invalid_names:
         X_bad = pd.DataFrame(X, columns=invalid_name)
 
-        expected_msg = re.escape(
-            "The feature names should match those that were passed during fit.\n"
-            f"{additional_message}"
-        )
         for name, method in check_methods:
-            with raises(
-                ValueError, match=expected_msg, err_msg=f"{name} did not raise"
-            ):
-                method(X_bad)
+            if sklearn_version >= parse_version("1.2"):
+                expected_msg = re.escape(
+                    "The feature names should match those that were passed during fit."
+                    f"\n{additional_message}"
+                )
+                with raises(
+                    ValueError, match=expected_msg, err_msg=f"{name} did not raise"
+                ):
+                    method(X_bad)
+            else:
+                expected_msg = re.escape(
+                    "The feature names should match those that were passed "
+                    "during fit. Starting version 1.2, an error will be raised.\n"
+                    f"{additional_message}"
+                )
+                with warnings.catch_warnings():
+                    warnings.filterwarnings(
+                        "error",
+                        category=FutureWarning,
+                        module="sklearn",
+                    )
+                    with raises(
+                        FutureWarning,
+                        match=expected_msg,
+                        err_msg=f"{name} did not raise",
+                    ):
+                        method(X_bad)
 
         # partial_fit checks on second call
         # Do not call partial fit if early_stopping is on