Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,7 @@ repos:
- id: mypy
files: sklearn/
additional_dependencies: [pytest==6.2.4]
- repo: https://github.com/PyCQA/isort
rev: 5.10.1
hooks:
- id: isort
5 changes: 4 additions & 1 deletion azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,14 @@ jobs:
versionSpec: '3.9'
- bash: |
# Include pytest compatibility with mypy
pip install pytest flake8 mypy==0.782 black==22.3
pip install pytest flake8 mypy==0.782 black==22.3 isort
displayName: Install linters
- bash: |
black --check --diff .
displayName: Run black
- bash: |
isort --check --diff .
displayName: Run isort
- bash: |
./build_tools/azure/linting.sh
displayName: Run linting
Expand Down
1 change: 1 addition & 0 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# rather than the one from site-packages.

import os

import pytest


Expand Down
2 changes: 1 addition & 1 deletion doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
import os
import sys
from datetime import datetime
from pathlib import Path
from io import StringIO
from pathlib import Path

# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
Expand Down
4 changes: 2 additions & 2 deletions doc/sphinxext/github_link.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from operator import attrgetter
import inspect
import subprocess
import os
import subprocess
import sys
from functools import partial
from operator import attrgetter

REVISION_CMD = "git rev-parse --short HEAD"

Expand Down
1 change: 1 addition & 0 deletions examples/api/plot_sampling_strategy_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

# %%
from sklearn.datasets import load_iris

from imblearn.datasets import make_imbalance

iris = load_iris(as_frame=True)
Expand Down
14 changes: 8 additions & 6 deletions examples/applications/plot_impact_imbalanced_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,10 @@
# As a baseline, we could use a classifier which will always predict the
# majority class independently of the features provided.

from sklearn.dummy import DummyClassifier

# %%
from sklearn.model_selection import cross_validate
from sklearn.dummy import DummyClassifier

dummy_clf = DummyClassifier(strategy="most_frequent")
scoring = ["accuracy", "balanced_accuracy"]
Expand Down Expand Up @@ -121,9 +122,8 @@

# %%
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler

num_pipe = make_pipeline(
StandardScaler(), SimpleImputer(strategy="mean", add_indicator=True)
Expand All @@ -139,8 +139,8 @@
# numerical pipeline

# %%
from sklearn.compose import make_column_transformer
from sklearn.compose import make_column_selector as selector
from sklearn.compose import make_column_transformer

preprocessor_linear = make_column_transformer(
(num_pipe, selector(dtype_include="number")),
Expand Down Expand Up @@ -176,9 +176,10 @@
# classifier, we will not need to scale the numerical data, and we will only
# need to ordinal encode the categorical data.

from sklearn.ensemble import RandomForestClassifier

# %%
from sklearn.preprocessing import OrdinalEncoder
from sklearn.ensemble import RandomForestClassifier

num_pipe = SimpleImputer(strategy="mean", add_indicator=True)
cat_pipe = make_pipeline(
Expand Down Expand Up @@ -336,8 +337,9 @@
# applying a single random under-sampling. We will use a gradient-boosting
# classifier within a :class:`~imblearn.ensemble.BalancedBaggingClassifier`.

from sklearn.experimental import enable_hist_gradient_boosting # noqa
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.experimental import enable_hist_gradient_boosting # noqa

from imblearn.ensemble import BalancedBaggingClassifier

bag_clf = make_pipeline(
Expand Down
4 changes: 2 additions & 2 deletions examples/applications/plot_multi_class_under_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
from sklearn.preprocessing import StandardScaler

from imblearn.datasets import make_imbalance
from imblearn.under_sampling import NearMiss
from imblearn.pipeline import make_pipeline
from imblearn.metrics import classification_report_imbalanced
from imblearn.pipeline import make_pipeline
from imblearn.under_sampling import NearMiss

print(__doc__)

Expand Down
5 changes: 2 additions & 3 deletions examples/applications/plot_outlier_rejections.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,9 @@
# Authors: Guillaume Lemaitre <[email protected]>
# License: MIT

import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import make_moons, make_blobs
import numpy as np
from sklearn.datasets import make_blobs, make_moons
from sklearn.ensemble import IsolationForest
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
Expand Down
7 changes: 4 additions & 3 deletions examples/applications/plot_over_sampling_benchmark_lfw.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,12 @@
#
# We will create different pipelines and evaluate them.

from sklearn.neighbors import KNeighborsClassifier

# %%
from imblearn import FunctionSampler
from imblearn.over_sampling import ADASYN, RandomOverSampler, SMOTE
from imblearn.over_sampling import ADASYN, SMOTE, RandomOverSampler
from imblearn.pipeline import make_pipeline
from sklearn.neighbors import KNeighborsClassifier

classifier = KNeighborsClassifier(n_neighbors=3)

Expand All @@ -98,7 +99,7 @@
# cross-validation.

# %%
from sklearn.metrics import RocCurveDisplay, roc_curve, auc
from sklearn.metrics import RocCurveDisplay, auc, roc_curve

disp = []
for model in pipeline:
Expand Down
3 changes: 2 additions & 1 deletion examples/applications/plot_topic_classication.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,10 @@
# :class:`~imblearn.pipeline.make_pipeline` function implemented in
# imbalanced-learn to properly handle the samplers.

from imblearn.pipeline import make_pipeline as make_pipeline_imb

# %%
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import make_pipeline as make_pipeline_imb

model = make_pipeline_imb(TfidfVectorizer(), RandomUnderSampler(), MultinomialNB())

Expand Down
19 changes: 7 additions & 12 deletions examples/applications/porto_seguro_keras_under_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@
###############################################################################

from collections import Counter
import pandas as pd

import numpy as np
import pandas as pd

###############################################################################
# First, you should download the Porto Seguro data set from Kaggle. See the
Expand All @@ -49,11 +50,9 @@
###############################################################################

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import FunctionTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import FunctionTransformer, OneHotEncoder, StandardScaler


def convert_float64(X):
Expand Down Expand Up @@ -95,16 +94,12 @@ def convert_float64(X):

os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

from tensorflow.keras.layers import Activation, BatchNormalization, Dense, Dropout

###############################################################################
# Create a neural-network
###############################################################################
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
Activation,
Dense,
Dropout,
BatchNormalization,
)


def make_model(n_features):
Expand Down Expand Up @@ -235,8 +230,8 @@ def fit_predict_balanced_model(X_train, y_train, X_test, y_test):
)
df_time = df_time.unstack().reset_index()

import seaborn as sns
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure()
sns.boxplot(y="level_0", x=0, data=df_time)
Expand Down
6 changes: 4 additions & 2 deletions examples/combine/plot_comparison_combine.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,13 @@ def plot_decision_function(X, y, clf, ax):
# :class:`~imblearn.combine.SMOTEENN` cleans more noisy data than
# :class:`~imblearn.combine.SMOTETomek`.

from sklearn.svm import LinearSVC

from imblearn.combine import SMOTEENN, SMOTETomek

# %%
from imblearn.over_sampling import SMOTE
from imblearn.combine import SMOTEENN, SMOTETomek
from imblearn.pipeline import make_pipeline
from sklearn.svm import LinearSVC

samplers = [SMOTE(random_state=0), SMOTEENN(random_state=0), SMOTETomek(random_state=0)]

Expand Down
5 changes: 4 additions & 1 deletion examples/ensemble/plot_bagging_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,10 @@
# :class:`~imblearn.ensemble.BalancedBaggingClassifier` by passing different
# sampler.

from sklearn.ensemble import BaggingClassifier

# %%
from sklearn.model_selection import cross_validate
from sklearn.ensemble import BaggingClassifier

ebb = BaggingClassifier()
cv_results = cross_validate(ebb, X, y, scoring="balanced_accuracy")
Expand Down Expand Up @@ -119,7 +120,9 @@

# %%
from collections import Counter

import numpy as np

from imblearn import FunctionSampler


Expand Down
7 changes: 6 additions & 1 deletion examples/ensemble/plot_comparison_ensemble_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,10 @@
# (number of majority sample for a minority sample). The data are then split
# into training and testing.

from sklearn.model_selection import train_test_split

# %%
from imblearn.datasets import fetch_datasets
from sklearn.model_selection import train_test_split

satimage = fetch_datasets()["satimage"]
X, y = satimage.data, satimage.target
Expand All @@ -55,6 +56,7 @@

# %%
from sklearn.metrics import balanced_accuracy_score

from imblearn.metrics import geometric_mean_score

print("Decision tree classifier performance:")
Expand Down Expand Up @@ -83,6 +85,7 @@

# %%
from sklearn.ensemble import BaggingClassifier

from imblearn.ensemble import BalancedBaggingClassifier

bagging = BaggingClassifier(n_estimators=50, random_state=0)
Expand Down Expand Up @@ -132,6 +135,7 @@

# %%
from sklearn.ensemble import RandomForestClassifier

from imblearn.ensemble import BalancedRandomForestClassifier

rf = RandomForestClassifier(n_estimators=50, random_state=0)
Expand Down Expand Up @@ -180,6 +184,7 @@

# %%
from sklearn.ensemble import AdaBoostClassifier

from imblearn.ensemble import EasyEnsembleClassifier, RUSBoostClassifier

base_estimator = AdaBoostClassifier(n_estimators=10)
Expand Down
2 changes: 1 addition & 1 deletion examples/evaluation/plot_classification_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@


from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split

from imblearn import over_sampling as os
from imblearn import pipeline as pl
Expand Down
8 changes: 5 additions & 3 deletions examples/evaluation/plot_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,14 @@
# We will create a pipeline made of a :class:`~imblearn.over_sampling.SMOTE`
# over-sampler followed by a :class:`~sklearn.svm.LinearSVC` classifier.

# %%
from imblearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.svm import LinearSVC

from imblearn.over_sampling import SMOTE

# %%
from imblearn.pipeline import make_pipeline

model = make_pipeline(
StandardScaler(),
SMOTE(random_state=RANDOM_STATE),
Expand Down
3 changes: 2 additions & 1 deletion examples/model_selection/plot_validation_curve.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,11 @@
# search which `k_neighbors` parameter is the most adequate with the dataset
# that we generated.

from sklearn.tree import DecisionTreeClassifier

# %%
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeClassifier

model = make_pipeline(
SMOTE(random_state=RANDOM_STATE), DecisionTreeClassifier(random_state=RANDOM_STATE)
Expand Down
8 changes: 5 additions & 3 deletions examples/over-sampling/plot_comparison_over_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,10 @@ def plot_decision_function(X, y, clf, ax, title=None):
# class. The class :class:`~imblearn.over_sampling.RandomOverSampler`
# implements such of a strategy.

from imblearn.over_sampling import RandomOverSampler

# %%
from imblearn.pipeline import make_pipeline
from imblearn.over_sampling import RandomOverSampler

X, y = create_dataset(n_samples=100, weights=(0.05, 0.25, 0.7))

Expand Down Expand Up @@ -185,7 +186,7 @@ def plot_decision_function(X, y, clf, ax, title=None):

# %%
from imblearn import FunctionSampler # to use a idendity sampler
from imblearn.over_sampling import SMOTE, ADASYN
from imblearn.over_sampling import ADASYN, SMOTE

X, y = create_dataset(n_samples=150, weights=(0.1, 0.2, 0.7))

Expand Down Expand Up @@ -261,7 +262,7 @@ def plot_decision_function(X, y, clf, ax, title=None):
# density.

# %%
from imblearn.over_sampling import BorderlineSMOTE, KMeansSMOTE, SVMSMOTE
from imblearn.over_sampling import SVMSMOTE, BorderlineSMOTE, KMeansSMOTE

X, y = create_dataset(n_samples=5000, weights=(0.01, 0.05, 0.94), class_sep=0.8)

Expand Down Expand Up @@ -292,6 +293,7 @@ def plot_decision_function(X, y, clf, ax, title=None):

# %%
from collections import Counter

from imblearn.over_sampling import SMOTENC

rng = np.random.RandomState(42)
Expand Down
Loading