scikit-learn-contrib · glemaitre · Dec 3, 2022 · Dec 3, 2022
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -20,3 +20,7 @@ repos:
      -  id: mypy
         files: sklearn/
         additional_dependencies: [pytest==6.2.4]
+-   repo: https://github.com/PyCQA/isort
+    rev: 5.10.1
+    hooks:
+    -   id: isort
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -45,11 +45,14 @@ jobs:
         versionSpec: '3.9'
     - bash: |
         # Include pytest compatibility with mypy
-        pip install pytest flake8 mypy==0.782 black==22.3
+        pip install pytest flake8 mypy==0.782 black==22.3 isort
       displayName: Install linters
     - bash: |
         black --check --diff .
       displayName: Run black
+    - bash: |
+        isort --check --diff .
+      displayName: Run isort
     - bash: |
         ./build_tools/azure/linting.sh
       displayName: Run linting

diff --git a/conftest.py b/conftest.py
@@ -6,6 +6,7 @@
 # rather than the one from site-packages.
 
 import os
+
 import pytest
 
 

diff --git a/doc/conf.py b/doc/conf.py
@@ -15,8 +15,8 @@
 import os
 import sys
 from datetime import datetime
-from pathlib import Path
 from io import StringIO
+from pathlib import Path
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the

diff --git a/doc/sphinxext/github_link.py b/doc/sphinxext/github_link.py
@@ -1,9 +1,9 @@
-from operator import attrgetter
 import inspect
-import subprocess
 import os
+import subprocess
 import sys
 from functools import partial
+from operator import attrgetter
 
 REVISION_CMD = "git rev-parse --short HEAD"
 

diff --git a/examples/api/plot_sampling_strategy_usage.py b/examples/api/plot_sampling_strategy_usage.py
@@ -26,6 +26,7 @@
 
 # %%
 from sklearn.datasets import load_iris
+
 from imblearn.datasets import make_imbalance
 
 iris = load_iris(as_frame=True)

diff --git a/examples/applications/plot_impact_imbalanced_classes.py b/examples/applications/plot_impact_imbalanced_classes.py
@@ -60,9 +60,10 @@
 # As a baseline, we could use a classifier which will always predict the
 # majority class independently of the features provided.
 
+from sklearn.dummy import DummyClassifier
+
 # %%
 from sklearn.model_selection import cross_validate
-from sklearn.dummy import DummyClassifier
 
 dummy_clf = DummyClassifier(strategy="most_frequent")
 scoring = ["accuracy", "balanced_accuracy"]
@@ -121,9 +122,8 @@
 
 # %%
 from sklearn.impute import SimpleImputer
-from sklearn.preprocessing import StandardScaler
-from sklearn.preprocessing import OneHotEncoder
 from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import OneHotEncoder, StandardScaler
 
 num_pipe = make_pipeline(
     StandardScaler(), SimpleImputer(strategy="mean", add_indicator=True)
@@ -139,8 +139,8 @@
 # numerical pipeline
 
 # %%
-from sklearn.compose import make_column_transformer
 from sklearn.compose import make_column_selector as selector
+from sklearn.compose import make_column_transformer
 
 preprocessor_linear = make_column_transformer(
     (num_pipe, selector(dtype_include="number")),
@@ -176,9 +176,10 @@
 # classifier, we will not need to scale the numerical data, and we will only
 # need to ordinal encode the categorical data.
 
+from sklearn.ensemble import RandomForestClassifier
+
 # %%
 from sklearn.preprocessing import OrdinalEncoder
-from sklearn.ensemble import RandomForestClassifier
 
 num_pipe = SimpleImputer(strategy="mean", add_indicator=True)
 cat_pipe = make_pipeline(
@@ -336,8 +337,9 @@
 # applying a single random under-sampling. We will use a gradient-boosting
 # classifier within a :class:`~imblearn.ensemble.BalancedBaggingClassifier`.
 
-from sklearn.experimental import enable_hist_gradient_boosting  # noqa
 from sklearn.ensemble import HistGradientBoostingClassifier
+from sklearn.experimental import enable_hist_gradient_boosting  # noqa
+
 from imblearn.ensemble import BalancedBaggingClassifier
 
 bag_clf = make_pipeline(

diff --git a/examples/applications/plot_multi_class_under_sampling.py b/examples/applications/plot_multi_class_under_sampling.py
@@ -20,9 +20,9 @@
 from sklearn.preprocessing import StandardScaler
 
 from imblearn.datasets import make_imbalance
-from imblearn.under_sampling import NearMiss
-from imblearn.pipeline import make_pipeline
 from imblearn.metrics import classification_report_imbalanced
+from imblearn.pipeline import make_pipeline
+from imblearn.under_sampling import NearMiss
 
 print(__doc__)
 

diff --git a/examples/applications/plot_outlier_rejections.py b/examples/applications/plot_outlier_rejections.py
@@ -13,10 +13,9 @@
 # Authors: Guillaume Lemaitre <[email protected]>
 # License: MIT
 
-import numpy as np
 import matplotlib.pyplot as plt
-
-from sklearn.datasets import make_moons, make_blobs
+import numpy as np
+from sklearn.datasets import make_blobs, make_moons
 from sklearn.ensemble import IsolationForest
 from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import classification_report

diff --git a/examples/applications/plot_over_sampling_benchmark_lfw.py b/examples/applications/plot_over_sampling_benchmark_lfw.py
@@ -72,11 +72,12 @@
 #
 # We will create different pipelines and evaluate them.
 
+from sklearn.neighbors import KNeighborsClassifier
+
 # %%
 from imblearn import FunctionSampler
-from imblearn.over_sampling import ADASYN, RandomOverSampler, SMOTE
+from imblearn.over_sampling import ADASYN, SMOTE, RandomOverSampler
 from imblearn.pipeline import make_pipeline
-from sklearn.neighbors import KNeighborsClassifier
 
 classifier = KNeighborsClassifier(n_neighbors=3)
 
@@ -98,7 +99,7 @@
 # cross-validation.
 
 # %%
-from sklearn.metrics import RocCurveDisplay, roc_curve, auc
+from sklearn.metrics import RocCurveDisplay, auc, roc_curve
 
 disp = []
 for model in pipeline:

diff --git a/examples/applications/plot_topic_classication.py b/examples/applications/plot_topic_classication.py
@@ -87,9 +87,10 @@
 # :class:`~imblearn.pipeline.make_pipeline` function implemented in
 # imbalanced-learn to properly handle the samplers.
 
+from imblearn.pipeline import make_pipeline as make_pipeline_imb
+
 # %%
 from imblearn.under_sampling import RandomUnderSampler
-from imblearn.pipeline import make_pipeline as make_pipeline_imb
 
 model = make_pipeline_imb(TfidfVectorizer(), RandomUnderSampler(), MultinomialNB())
 

diff --git a/examples/applications/porto_seguro_keras_under_sampling.py b/examples/applications/porto_seguro_keras_under_sampling.py
@@ -25,8 +25,9 @@
 ###############################################################################
 
 from collections import Counter
-import pandas as pd
+
 import numpy as np
+import pandas as pd
 
 ###############################################################################
 # First, you should download the Porto Seguro data set from Kaggle. See the
@@ -49,11 +50,9 @@
 ###############################################################################
 
 from sklearn.compose import ColumnTransformer
-from sklearn.pipeline import make_pipeline
-from sklearn.preprocessing import OneHotEncoder
-from sklearn.preprocessing import StandardScaler
-from sklearn.preprocessing import FunctionTransformer
 from sklearn.impute import SimpleImputer
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import FunctionTransformer, OneHotEncoder, StandardScaler
 
 
 def convert_float64(X):
@@ -95,16 +94,12 @@ def convert_float64(X):
 
 os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 
+from tensorflow.keras.layers import Activation, BatchNormalization, Dense, Dropout
+
 ###############################################################################
 # Create a neural-network
 ###############################################################################
 from tensorflow.keras.models import Sequential
-from tensorflow.keras.layers import (
-    Activation,
-    Dense,
-    Dropout,
-    BatchNormalization,
-)
 
 
 def make_model(n_features):
@@ -235,8 +230,8 @@ def fit_predict_balanced_model(X_train, y_train, X_test, y_test):
 )
 df_time = df_time.unstack().reset_index()
 
-import seaborn as sns
 import matplotlib.pyplot as plt
+import seaborn as sns
 
 plt.figure()
 sns.boxplot(y="level_0", x=0, data=df_time)

diff --git a/examples/combine/plot_comparison_combine.py b/examples/combine/plot_comparison_combine.py
@@ -102,11 +102,13 @@ def plot_decision_function(X, y, clf, ax):
 # :class:`~imblearn.combine.SMOTEENN` cleans more noisy data than
 # :class:`~imblearn.combine.SMOTETomek`.
 
+from sklearn.svm import LinearSVC
+
+from imblearn.combine import SMOTEENN, SMOTETomek
+
 # %%
 from imblearn.over_sampling import SMOTE
-from imblearn.combine import SMOTEENN, SMOTETomek
 from imblearn.pipeline import make_pipeline
-from sklearn.svm import LinearSVC
 
 samplers = [SMOTE(random_state=0), SMOTEENN(random_state=0), SMOTETomek(random_state=0)]
 

diff --git a/examples/ensemble/plot_bagging_classifier.py b/examples/ensemble/plot_bagging_classifier.py
@@ -46,9 +46,10 @@
 # :class:`~imblearn.ensemble.BalancedBaggingClassifier` by passing different
 # sampler.
 
+from sklearn.ensemble import BaggingClassifier
+
 # %%
 from sklearn.model_selection import cross_validate
-from sklearn.ensemble import BaggingClassifier
 
 ebb = BaggingClassifier()
 cv_results = cross_validate(ebb, X, y, scoring="balanced_accuracy")
@@ -119,7 +120,9 @@
 
 # %%
 from collections import Counter
+
 import numpy as np
+
 from imblearn import FunctionSampler
 
 

diff --git a/examples/ensemble/plot_comparison_ensemble_classifier.py b/examples/ensemble/plot_comparison_ensemble_classifier.py
@@ -27,9 +27,10 @@
 # (number of majority sample for a minority sample). The data are then split
 # into training and testing.
 
+from sklearn.model_selection import train_test_split
+
 # %%
 from imblearn.datasets import fetch_datasets
-from sklearn.model_selection import train_test_split
 
 satimage = fetch_datasets()["satimage"]
 X, y = satimage.data, satimage.target
@@ -55,6 +56,7 @@
 
 # %%
 from sklearn.metrics import balanced_accuracy_score
+
 from imblearn.metrics import geometric_mean_score
 
 print("Decision tree classifier performance:")
@@ -83,6 +85,7 @@
 
 # %%
 from sklearn.ensemble import BaggingClassifier
+
 from imblearn.ensemble import BalancedBaggingClassifier
 
 bagging = BaggingClassifier(n_estimators=50, random_state=0)
@@ -132,6 +135,7 @@
 
 # %%
 from sklearn.ensemble import RandomForestClassifier
+
 from imblearn.ensemble import BalancedRandomForestClassifier
 
 rf = RandomForestClassifier(n_estimators=50, random_state=0)
@@ -180,6 +184,7 @@
 
 # %%
 from sklearn.ensemble import AdaBoostClassifier
+
 from imblearn.ensemble import EasyEnsembleClassifier, RUSBoostClassifier
 
 base_estimator = AdaBoostClassifier(n_estimators=10)

diff --git a/examples/evaluation/plot_classification_report.py b/examples/evaluation/plot_classification_report.py
@@ -14,9 +14,9 @@
 
 
 from sklearn import datasets
+from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import StandardScaler
 from sklearn.svm import LinearSVC
-from sklearn.model_selection import train_test_split
 
 from imblearn import over_sampling as os
 from imblearn import pipeline as pl

diff --git a/examples/evaluation/plot_metrics.py b/examples/evaluation/plot_metrics.py
@@ -50,12 +50,14 @@
 # We will create a pipeline made of a :class:`~imblearn.over_sampling.SMOTE`
 # over-sampler followed by a :class:`~sklearn.svm.LinearSVC` classifier.
 
-# %%
-from imblearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
-from imblearn.over_sampling import SMOTE
 from sklearn.svm import LinearSVC
 
+from imblearn.over_sampling import SMOTE
+
+# %%
+from imblearn.pipeline import make_pipeline
+
 model = make_pipeline(
     StandardScaler(),
     SMOTE(random_state=RANDOM_STATE),

diff --git a/examples/model_selection/plot_validation_curve.py b/examples/model_selection/plot_validation_curve.py
@@ -48,10 +48,11 @@
 # search which `k_neighbors` parameter is the most adequate with the dataset
 # that we generated.
 
+from sklearn.tree import DecisionTreeClassifier
+
 # %%
 from imblearn.over_sampling import SMOTE
 from imblearn.pipeline import make_pipeline
-from sklearn.tree import DecisionTreeClassifier
 
 model = make_pipeline(
     SMOTE(random_state=RANDOM_STATE), DecisionTreeClassifier(random_state=RANDOM_STATE)

diff --git a/examples/over-sampling/plot_comparison_over_sampling.py b/examples/over-sampling/plot_comparison_over_sampling.py
@@ -134,9 +134,10 @@ def plot_decision_function(X, y, clf, ax, title=None):
 # class. The class :class:`~imblearn.over_sampling.RandomOverSampler`
 # implements such of a strategy.
 
+from imblearn.over_sampling import RandomOverSampler
+
 # %%
 from imblearn.pipeline import make_pipeline
-from imblearn.over_sampling import RandomOverSampler
 
 X, y = create_dataset(n_samples=100, weights=(0.05, 0.25, 0.7))
 
@@ -185,7 +186,7 @@ def plot_decision_function(X, y, clf, ax, title=None):
 
 # %%
 from imblearn import FunctionSampler  # to use a idendity sampler
-from imblearn.over_sampling import SMOTE, ADASYN
+from imblearn.over_sampling import ADASYN, SMOTE
 
 X, y = create_dataset(n_samples=150, weights=(0.1, 0.2, 0.7))
 
@@ -261,7 +262,7 @@ def plot_decision_function(X, y, clf, ax, title=None):
 # density.
 
 # %%
-from imblearn.over_sampling import BorderlineSMOTE, KMeansSMOTE, SVMSMOTE
+from imblearn.over_sampling import SVMSMOTE, BorderlineSMOTE, KMeansSMOTE
 
 X, y = create_dataset(n_samples=5000, weights=(0.01, 0.05, 0.94), class_sep=0.8)
 
@@ -292,6 +293,7 @@ def plot_decision_function(X, y, clf, ax, title=None):
 
 # %%
 from collections import Counter
+
 from imblearn.over_sampling import SMOTENC
 
 rng = np.random.RandomState(42)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -6,6 +6,7 @@
		# rather than the one from site-packages.

		import os

		import pytest


Expand Down
-Original file line number
+Diff line change
@@ Expand Up / @@ -26,6 +26,7 @@ @@
     # %%
     from sklearn.datasets import load_iris
     from imblearn.datasets import make_imbalance
     iris = load_iris(as_frame=True)
@@ Expand Down @@