Skip to content

Commit fbac276

Browse files
committed
add doc
1 parent 049dde9 commit fbac276

File tree

1 file changed

+74
-3
lines changed

1 file changed

+74
-3
lines changed

imblearn/over_sampling/_smote.py

Lines changed: 74 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1297,7 +1297,75 @@ def _fit_resample(self, X, y):
12971297
return X_resampled, y_resampled
12981298

12991299

1300+
@Substitution(
1301+
sampling_strategy=BaseOverSampler._sampling_strategy_docstring,
1302+
n_jobs=_n_jobs_docstring,
1303+
random_state=_random_state_docstring,
1304+
)
13001305
class SMOTEN(SMOTE):
1306+
"""Perform SMOTE over-sampling for nominal categorical features only.
1307+
1308+
This method is refered as SMOTEN in [1]_.
1309+
1310+
Read more in the :ref:`User Guide <smote_adasyn>`.
1311+
1312+
Parameters
1313+
----------
1314+
{sampling_strategy}
1315+
1316+
{random_state}
1317+
1318+
k_neighbors : int or object, default=5
1319+
If ``int``, number of nearest neighbours to used to construct synthetic
1320+
samples. If object, an estimator that inherits from
1321+
:class:`~sklearn.neighbors.base.KNeighborsMixin` that will be used to
1322+
find the k_neighbors.
1323+
1324+
{n_jobs}
1325+
1326+
See Also
1327+
--------
1328+
SMOTE : Over-sample using SMOTE.
1329+
1330+
SMOTENC : Over-sample using SMOTE for continuous and categorical features.
1331+
1332+
BorderlineSMOTE : Over-sample using the borderline-SMOTE variant.
1333+
1334+
SVMSMOTE : Over-sample using the SVM-SMOTE variant.
1335+
1336+
ADASYN : Over-sample using ADASYN.
1337+
1338+
KMeansSMOTE : Over-sample applying a clustering before to oversample using
1339+
SMOTE.
1340+
1341+
Notes
1342+
-----
1343+
See the original papers: [1]_ for more details.
1344+
1345+
Supports multi-class resampling. A one-vs.-rest scheme is used as
1346+
originally proposed in [1]_.
1347+
1348+
References
1349+
----------
1350+
.. [1] N. V. Chawla, K. W. Bowyer, L. O.Hall, W. P. Kegelmeyer, "SMOTE:
1351+
synthetic minority over-sampling technique," Journal of artificial
1352+
intelligence research, 321-357, 2002.
1353+
1354+
Examples
1355+
--------
1356+
>>> import numpy as np
1357+
>>> X = np.array(["A"] * 10 + ["B"] * 20 + ["C"] * 30, dtype=object).reshape(-1, 1)
1358+
>>> y = np.array([0] * 20 + [1] * 40, dtype=np.int32)
1359+
>>> from collections import Counter
1360+
>>> print(f"Original class counts: {{Counter(y)}}")
1361+
Original class counts: Counter({{1: 40, 0: 20}})
1362+
>>> from imblearn.over_sampling import SMOTEN
1363+
>>> sampler = SMOTEN(random_state=0)
1364+
>>> X_res, y_res = sampler.fit_resample(X, y)
1365+
>>> print(f"Class counts after resampling {{Counter(y_res)}}")
1366+
Class counts after resampling Counter({{0: 40, 1: 40}})
1367+
"""
1368+
13011369
def _check_X_y(self, X, y):
13021370
y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
13031371
X, y = self._validate_data(
@@ -1315,9 +1383,12 @@ def _make_samples(self, X_class, klass, y_dtype, nn_indices, n_samples):
13151383
samples_indices = random_state.choice(
13161384
np.arange(X_class.shape[0]), size=n_samples, replace=True
13171385
)
1318-
X_new = np.empty(shape=(n_samples, X_class.shape[1]), dtype=X_class.dtype)
1319-
for idx, sample_idx in enumerate(samples_indices):
1320-
X_new[idx, :] = stats.mode(X_class[nn_indices[sample_idx]], axis=0).mode
1386+
# for each drawn samples, select its k-neighbors and generate a sample
1387+
# where for each feature individually, each category generated is the
1388+
# most common category
1389+
X_new = np.squeeze(
1390+
stats.mode(X_class[nn_indices[samples_indices]], axis=1).mode, axis=1
1391+
)
13211392
y_new = np.full(n_samples, fill_value=klass, dtype=y_dtype)
13221393
return X_new, y_new
13231394

0 commit comments

Comments
 (0)