Skip to content

Commit 99d5206

Browse files
committed
update documentation
1 parent 964d082 commit 99d5206

File tree

5 files changed

+100
-44
lines changed

5 files changed

+100
-44
lines changed

imblearn/over_sampling/_smote/base.py

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -224,10 +224,17 @@ class SMOTE(BaseSMOTE):
224224
{random_state}
225225
226226
k_neighbors : int or object, default=5
227-
If ``int``, number of nearest neighbours to used to construct synthetic
228-
samples. If object, an estimator that inherits from
229-
:class:`~sklearn.neighbors.base.KNeighborsMixin` that will be used to
230-
find the k_neighbors.
227+
The nearest neighbors used to define the neighborhood of samples to use
228+
to generate the synthetic samples. You can pass:
229+
230+
- an `int` corresponding to the number of neighbors to use. A
231+
`~sklearn.neighbors.NearestNeighbors` instance will be fitted in this
232+
case.
233+
- an instance of a compatible nearest neighbors algorithm that should
234+
implement both methods `kneighbors` and `kneighbors_graph`. For
235+
instance, it could correspond to a
236+
:class:`~sklearn.neighbors.NearestNeighbors` but could be extended to
237+
any compatible class.
231238
232239
{n_jobs}
233240
@@ -367,10 +374,17 @@ class SMOTENC(SMOTE):
367374
{random_state}
368375
369376
k_neighbors : int or object, default=5
370-
If ``int``, number of nearest neighbours to used to construct synthetic
371-
samples. If object, an estimator that inherits from
372-
:class:`~sklearn.neighbors.base.KNeighborsMixin` that will be used to
373-
find the k_neighbors.
377+
The nearest neighbors used to define the neighborhood of samples to use
378+
to generate the synthetic samples. You can pass:
379+
380+
- an `int` corresponding to the number of neighbors to use. A
381+
`~sklearn.neighbors.NearestNeighbors` instance will be fitted in this
382+
case.
383+
- an instance of a compatible nearest neighbors algorithm that should
384+
implement both methods `kneighbors` and `kneighbors_graph`. For
385+
instance, it could correspond to a
386+
:class:`~sklearn.neighbors.NearestNeighbors` but could be extended to
387+
any compatible class.
374388
375389
{n_jobs}
376390
@@ -636,10 +650,17 @@ class SMOTEN(SMOTE):
636650
{random_state}
637651
638652
k_neighbors : int or object, default=5
639-
If ``int``, number of nearest neighbours to used to construct synthetic
640-
samples. If object, an estimator that inherits from
641-
:class:`~sklearn.neighbors.base.KNeighborsMixin` that will be used to
642-
find the k_neighbors.
653+
The nearest neighbors used to define the neighborhood of samples to use
654+
to generate the synthetic samples. You can pass:
655+
656+
- an `int` corresponding to the number of neighbors to use. A
657+
`~sklearn.neighbors.NearestNeighbors` instance will be fitted in this
658+
case.
659+
- an instance of a compatible nearest neighbors algorithm that should
660+
implement both methods `kneighbors` and `kneighbors_graph`. For
661+
instance, it could correspond to a
662+
:class:`~sklearn.neighbors.NearestNeighbors` but could be extended to
663+
any compatible class.
643664
644665
{n_jobs}
645666

imblearn/over_sampling/_smote/cluster.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,17 @@ class KMeansSMOTE(BaseSMOTE):
4545
{random_state}
4646
4747
k_neighbors : int or object, default=2
48-
If ``int``, number of nearest neighbours to used to construct synthetic
49-
samples. If object, an estimator that inherits from
50-
:class:`~sklearn.neighbors.base.KNeighborsMixin` that will be used to
51-
find the k_neighbors.
48+
The nearest neighbors used to define the neighborhood of samples to use
49+
to generate the synthetic samples. You can pass:
50+
51+
- an `int` corresponding to the number of neighbors to use. A
52+
`~sklearn.neighbors.NearestNeighbors` instance will be fitted in this
53+
case.
54+
- an instance of a compatible nearest neighbors algorithm that should
55+
implement both methods `kneighbors` and `kneighbors_graph`. For
56+
instance, it could correspond to a
57+
:class:`~sklearn.neighbors.NearestNeighbors` but could be extended to
58+
any compatible class.
5259
5360
{n_jobs}
5461

imblearn/over_sampling/_smote/filter.py

Lines changed: 44 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -47,18 +47,32 @@ class BorderlineSMOTE(BaseSMOTE):
4747
{random_state}
4848
4949
k_neighbors : int or object, default=5
50-
If ``int``, number of nearest neighbours to used to construct synthetic
51-
samples. If object, an estimator that inherits from
52-
:class:`~sklearn.neighbors.base.KNeighborsMixin` that will be used to
53-
find the k_neighbors.
50+
The nearest neighbors used to define the neighborhood of samples to use
51+
to generate the synthetic samples. You can pass:
52+
53+
- an `int` corresponding to the number of neighbors to use. A
54+
`~sklearn.neighbors.NearestNeighbors` instance will be fitted in this
55+
case.
56+
- an instance of a compatible nearest neighbors algorithm that should
57+
implement both methods `kneighbors` and `kneighbors_graph`. For
58+
instance, it could correspond to a
59+
:class:`~sklearn.neighbors.NearestNeighbors` but could be extended to
60+
any compatible class.
5461
5562
{n_jobs}
5663
5764
m_neighbors : int or object, default=10
58-
If int, number of nearest neighbours to use to determine if a minority
59-
sample is in danger. If object, an estimator that inherits
60-
from :class:`~sklearn.neighbors.base.KNeighborsMixin` that will be used
61-
to find the m_neighbors.
65+
The nearest neighbors used to determine if a minority sample is in
66+
"danger". You can pass:
67+
68+
- an `int` corresponding to the number of neighbors to use. A
69+
`~sklearn.neighbors.NearestNeighbors` instance will be fitted in this
70+
case.
71+
- an instance of a compatible nearest neighbors algorithm that should
72+
implement both methods `kneighbors` and `kneighbors_graph`. For
73+
instance, it could correspond to a
74+
:class:`~sklearn.neighbors.NearestNeighbors` but could be extended to
75+
any compatible class.
6276
6377
kind : {{"borderline-1", "borderline-2"}}, default='borderline-1'
6478
The type of SMOTE algorithm to use one of the following options:
@@ -261,18 +275,32 @@ class SVMSMOTE(BaseSMOTE):
261275
{random_state}
262276
263277
k_neighbors : int or object, default=5
264-
If ``int``, number of nearest neighbours to used to construct synthetic
265-
samples. If object, an estimator that inherits from
266-
:class:`~sklearn.neighbors.base.KNeighborsMixin` that will be used to
267-
find the k_neighbors.
278+
The nearest neighbors used to define the neighborhood of samples to use
279+
to generate the synthetic samples. You can pass:
280+
281+
- an `int` corresponding to the number of neighbors to use. A
282+
`~sklearn.neighbors.NearestNeighbors` instance will be fitted in this
283+
case.
284+
- an instance of a compatible nearest neighbors algorithm that should
285+
implement both methods `kneighbors` and `kneighbors_graph`. For
286+
instance, it could correspond to a
287+
:class:`~sklearn.neighbors.NearestNeighbors` but could be extended to
288+
any compatible class.
268289
269290
{n_jobs}
270291
271292
m_neighbors : int or object, default=10
272-
If int, number of nearest neighbours to use to determine if a minority
273-
sample is in danger. If object, an estimator that inherits from
274-
:class:`~sklearn.neighbors.base.KNeighborsMixin` that will be used to
275-
find the m_neighbors.
293+
The nearest neighbors used to determine if a minority sample is in
294+
"danger". You can pass:
295+
296+
- an `int` corresponding to the number of neighbors to use. A
297+
`~sklearn.neighbors.NearestNeighbors` instance will be fitted in this
298+
case.
299+
- an instance of a compatible nearest neighbors algorithm that should
300+
implement both methods `kneighbors` and `kneighbors_graph`. For
301+
instance, it could correspond to a
302+
:class:`~sklearn.neighbors.NearestNeighbors` but could be extended to
303+
any compatible class.
276304
277305
svm_estimator : estimator object, default=SVC()
278306
A parametrized :class:`~sklearn.svm.SVC` classifier can be passed.

imblearn/under_sampling/_prototype_generation/tests/test_cluster_centroids.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@
55
import numpy as np
66
from scipy import sparse
77

8-
from sklearn.base import BaseEstimator
98
from sklearn.linear_model import LogisticRegression
109
from sklearn.cluster import KMeans
1110
from sklearn.datasets import make_classification
1211

1312
from imblearn.under_sampling import ClusterCentroids
13+
from imblearn.utils.testing import CustomClusterer
1414

1515
RND_SEED = 0
1616
X = np.array(
@@ -155,16 +155,6 @@ def test_cluster_centroids_hard_target_class():
155155
assert sum(sample_from_minority_in_majority) == 0
156156

157157

158-
class FakeCluster(BaseEstimator):
159-
"""Class that mimics a cluster that does not expose `cluster_centers_`."""
160-
161-
def __init__(self, n_clusters=1):
162-
self.n_clusters = n_clusters
163-
164-
def fit(self, X, y=None):
165-
return self
166-
167-
168158
def test_cluster_centroids_error_estimator():
169159
"""Check that an error is raised when estimator does not have a cluster API."""
170160

@@ -180,4 +170,4 @@ def test_cluster_centroids_error_estimator():
180170
"`cluster_centers_`."
181171
)
182172
with pytest.raises(RuntimeError, match=err_msg):
183-
ClusterCentroids(estimator=FakeCluster()).fit_resample(X, Y)
173+
ClusterCentroids(estimator=CustomClusterer()).fit_resample(X, Y)

imblearn/utils/testing.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,3 +195,13 @@ def kneighbors_graph(X=None, n_neighbors=None, mode="connectivity"):
195195
"""This method is not used within imblearn but it is required for
196196
duck-typing."""
197197
pass
198+
199+
200+
class CustomClusterer(BaseEstimator):
201+
"""Class that mimics a cluster that does not expose `cluster_centers_`."""
202+
203+
def __init__(self, n_clusters=1):
204+
self.n_clusters = n_clusters
205+
206+
def fit(self, X, y=None):
207+
return self

0 commit comments

Comments
 (0)