Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions .pep8speaks.yml

This file was deleted.

13 changes: 5 additions & 8 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
repos:
- repo: https://github.com/python/black
rev: stable
hooks:
- id: black
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: 21.6b0
hooks:
- id: black
- repo: https://gitlab.com/pycqa/flake8
rev: 3.7.8
rev: 3.9.2
hooks:
- id: flake8
types: [file, python]
# only check for unused imports for now, as long as
# the code is not fully PEP8 compatible
args: [--select=F401]
31 changes: 19 additions & 12 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,26 @@ jobs:
- job: linting
displayName: Linting
pool:
vmImage: ubuntu-18.04
vmImage: ubuntu-20.04
steps:
- bash: echo "##vso[task.prependpath]$CONDA/bin"
displayName: Add conda to PATH
- bash: sudo chown -R $USER $CONDA
displayName: Take ownership of conda installation
- bash: conda create --name flake8_env --yes flake8
displayName: Install flake8
- task: UsePythonVersion@0
inputs:
versionSpec: '3.9'
- bash: |
# Include pytest compatibility with mypy
pip install pytest flake8 mypy==0.782 black==21.6b0
displayName: Install linters
- bash: |
black --check .
displayName: Run black
- bash: |
source activate flake8_env
./build_tools/circle/linting.sh
displayName: Run linting

- template: build_tools/azure/posix.yml
parameters:
name: Linux_Runs
vmImage: ubuntu-18.04
vmImage: ubuntu-20.04
matrix:
pylatest_pip_openblas_pandas:
DISTRIB: 'conda-pip-latest'
Expand All @@ -33,15 +36,14 @@ jobs:
- template: build_tools/azure/posix.yml
parameters:
name: Linux
vmImage: ubuntu-18.04
vmImage: ubuntu-20.04
dependsOn: [linting]
matrix:
# Linux environment to test that scikit-learn can be built against
# versions of numpy, scipy with ATLAS that comes with Ubuntu Bionic 18.04
# i.e. numpy 1.13.3 and scipy 0.19
py36_ubuntu_atlas:
DISTRIB: 'ubuntu'
PYTHON_VERSION: '3.6'
JOBLIB_VERSION: '*'
pylatest_conda_pandas_keras:
DISTRIB: 'conda'
Expand All @@ -61,11 +63,16 @@ jobs:
TENSORFLOW_VERSION: '*'
COVERAGE: 'true'
TEST_DOCSTRINGS: 'true'
sklearn_0_24_conda:
DISTRIB: 'conda'
PYTHON_VERSION: '3.7'
SKLEARN_VERSION: '0.24.2'
INSTALL_MKL: 'true'

- template: build_tools/azure/posix-32.yml
parameters:
name: Linux32
vmImage: ubuntu-18.04
vmImage: ubuntu-20.04
dependsOn: [linting]
matrix:
py36_ubuntu_atlas_32bit:
Expand Down
7 changes: 6 additions & 1 deletion build_tools/azure/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,15 @@ if [[ "$DISTRIB" == "conda" ]]; then
fi

make_conda $TO_INSTALL
python -m pip install scikit-learn

TO_INSTALL=""

if [[ -n "$SKLEARN_VERSION" ]]; then
TO_INSTALL="$TO_INSTALL scikit-learn=$SKLEARN_VERSION"
else
TO_INSTALL="$TO_INSTALL scikit-learn"
fi

if [[ -n "$PANDAS_VERSION" ]]; then
TO_INSTALL="$TO_INSTALL pandas=$PANDAS_VERSION"
fi
Expand Down
2 changes: 1 addition & 1 deletion doc/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ The imbalanced-learn package requires the following dependencies:
* python (>=3.6)
* numpy (>=1.13.3)
* scipy (>=0.19.1)
* scikit-learn (>=0.23)
* scikit-learn (>=0.24)
* keras 2 (optional)
* tensorflow (optional)

Expand Down
10 changes: 8 additions & 2 deletions doc/sphinxext/sphinx_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,11 @@ class IssueRole(object):
EXTERNAL_REPO_REGEX = re.compile(r"^(\w+)/(.+)([#@])([\w]+)$")

def __init__(
self, uri_config_option, format_kwarg, github_uri_template, format_text=None,
self,
uri_config_option,
format_kwarg,
github_uri_template,
format_text=None,
):
self.uri_config_option = uri_config_option
self.format_kwarg = format_kwarg
Expand All @@ -103,7 +107,9 @@ def make_node(self, name, issue_no, config, options=None):
)
path = name_map.get(name)
ref = "https://github.com/{issues_github_path}/{path}/{n}".format(
issues_github_path="{}/{}".format(username, repo), path=path, n=issue,
issues_github_path="{}/{}".format(username, repo),
path=path,
n=issue,
)
formatted_issue = self.format_text(issue).lstrip("#")
text = "{username}/{repo}{symbol}{formatted_issue}".format(**locals())
Expand Down
13 changes: 13 additions & 0 deletions doc/whats_new/v0.8.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
.. _changes_0_8:

Version 0.8.1
=============

**In development**

Changelog

Maintenance
...........

- Make `imbalanced-learn` compatible with `scikit-learn` 1.0.
:pr:`864` by :user:`Guillaume Lemaitre <glemaitre>`.

Version 0.8.0
=============

Expand Down
10 changes: 8 additions & 2 deletions imblearn/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,9 @@ def fit_resample(self, X, y):

output = self._fit_resample(X, y)

y_ = label_binarize(output[1], np.unique(y)) if binarize_y else output[1]
y_ = (
label_binarize(output[1], classes=np.unique(y)) if binarize_y else output[1]
)

X_, y_ = arrays_transformer.transform(output[0], y_)
return (X_, y_) if len(output) == 2 else (X_, y_, output[2])
Expand Down Expand Up @@ -284,7 +286,11 @@ def fit_resample(self, X, y):

if self.validate:

y_ = label_binarize(output[1], np.unique(y)) if binarize_y else output[1]
y_ = (
label_binarize(output[1], classes=np.unique(y))
if binarize_y
else output[1]
)
X_, y_ = arrays_transformer.transform(output[0], y_)
return (X_, y_) if len(output) == 2 else (X_, y_, output[2])

Expand Down
14 changes: 10 additions & 4 deletions imblearn/ensemble/_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,15 +422,13 @@ def fit(self, X, y, sample_weight=None):
)
if sample_weight is not None:
sample_weight = _check_sample_weight(sample_weight, X)
self._n_features = X.shape[1]

if issparse(X):
# Pre-sort indices to avoid that each individual tree of the
# ensemble sorts the indices.
X.sort_indices()

# Remap output
_, self.n_features_ = X.shape

y = np.atleast_1d(y)
if y.ndim == 2 and y.shape[1] == 1:
warn(
Expand Down Expand Up @@ -627,5 +625,13 @@ def _set_oob_score(self, X, y):

self.oob_score_ = oob_score / self.n_outputs_

@property
def n_features_(self):
"""Number of features when fitting the estimator."""
return getattr(self.n_features_in_, self._n_features)

def _more_tags(self):
return {"multioutput": False}
return {
"multioutput": False,
"multilabel": False,
}
38 changes: 30 additions & 8 deletions imblearn/ensemble/tests/test_easy_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,10 @@
def test_easy_ensemble_classifier(n_estimators, base_estimator):
# Check classification for various parameter settings.
X, y = make_imbalance(
iris.data, iris.target, sampling_strategy={0: 20, 1: 25, 2: 50}, random_state=0,
iris.data,
iris.target,
sampling_strategy={0: 20, 1: 25, 2: 50},
random_state=0,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

Expand All @@ -72,7 +75,10 @@ def test_easy_ensemble_classifier(n_estimators, base_estimator):
def test_base_estimator():
# Check base_estimator and its default values.
X, y = make_imbalance(
iris.data, iris.target, sampling_strategy={0: 20, 1: 25, 2: 50}, random_state=0,
iris.data,
iris.target,
sampling_strategy={0: 20, 1: 25, 2: 50},
random_state=0,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

Expand All @@ -91,7 +97,10 @@ def test_base_estimator():

def test_bagging_with_pipeline():
X, y = make_imbalance(
iris.data, iris.target, sampling_strategy={0: 20, 1: 25, 2: 50}, random_state=0,
iris.data,
iris.target,
sampling_strategy={0: 20, 1: 25, 2: 50},
random_state=0,
)
estimator = EasyEnsembleClassifier(
n_estimators=2,
Expand All @@ -109,7 +118,9 @@ def test_warm_start(random_state=42):
for n_estimators in [5, 10]:
if clf_ws is None:
clf_ws = EasyEnsembleClassifier(
n_estimators=n_estimators, random_state=random_state, warm_start=True,
n_estimators=n_estimators,
random_state=random_state,
warm_start=True,
)
else:
clf_ws.set_params(n_estimators=n_estimators)
Expand Down Expand Up @@ -182,7 +193,10 @@ def test_warm_start_equivalence():
)
def test_easy_ensemble_classifier_error(n_estimators, msg_error):
X, y = make_imbalance(
iris.data, iris.target, sampling_strategy={0: 20, 1: 25, 2: 50}, random_state=0,
iris.data,
iris.target,
sampling_strategy={0: 20, 1: 25, 2: 50},
random_state=0,
)
with pytest.raises(ValueError, match=msg_error):
eec = EasyEnsembleClassifier(n_estimators=n_estimators)
Expand All @@ -191,7 +205,10 @@ def test_easy_ensemble_classifier_error(n_estimators, msg_error):

def test_easy_ensemble_classifier_single_estimator():
X, y = make_imbalance(
iris.data, iris.target, sampling_strategy={0: 20, 1: 25, 2: 50}, random_state=0,
iris.data,
iris.target,
sampling_strategy={0: 20, 1: 25, 2: 50},
random_state=0,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

Expand All @@ -205,14 +222,19 @@ def test_easy_ensemble_classifier_single_estimator():

def test_easy_ensemble_classifier_grid_search():
X, y = make_imbalance(
iris.data, iris.target, sampling_strategy={0: 20, 1: 25, 2: 50}, random_state=0,
iris.data,
iris.target,
sampling_strategy={0: 20, 1: 25, 2: 50},
random_state=0,
)

parameters = {
"n_estimators": [1, 2],
"base_estimator__n_estimators": [3, 4],
}
grid_search = GridSearchCV(
EasyEnsembleClassifier(base_estimator=AdaBoostClassifier()), parameters, cv=5,
EasyEnsembleClassifier(base_estimator=AdaBoostClassifier()),
parameters,
cv=5,
)
grid_search.fit(X, y)
18 changes: 14 additions & 4 deletions imblearn/ensemble/tests/test_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ def imbalanced_dataset():
[
({"n_estimators": "whatever"}, "n_estimators must be an integer"),
({"n_estimators": -100}, "n_estimators must be greater than zero"),
({"bootstrap": False, "oob_score": True}, "Out of bag estimation only",),
(
{"bootstrap": False, "oob_score": True},
"Out of bag estimation only",
),
],
)
def test_balanced_random_forest_error(imbalanced_dataset, forest_params, err_msg):
Expand Down Expand Up @@ -105,7 +108,10 @@ def test_balanced_random_forest_oob(imbalanced_dataset):
X, y, random_state=42, stratify=y
)
est = BalancedRandomForestClassifier(
oob_score=True, random_state=0, n_estimators=1000, min_samples_leaf=2,
oob_score=True,
random_state=0,
n_estimators=1000,
min_samples_leaf=2,
)

est.fit(X_train, y_train)
Expand Down Expand Up @@ -135,12 +141,16 @@ def test_little_tree_with_small_max_samples():

# First fit with no restriction on max samples
est1 = BalancedRandomForestClassifier(
n_estimators=1, random_state=rng, max_samples=None,
n_estimators=1,
random_state=rng,
max_samples=None,
)

# Second fit with max samples restricted to just 2
est2 = BalancedRandomForestClassifier(
n_estimators=1, random_state=rng, max_samples=2,
n_estimators=1,
random_state=rng,
max_samples=2,
)

est1.fit(X, y)
Expand Down
Loading