|
5 | 5 |
|
6 | 6 | The following example attends to make a qualitative comparison between the |
7 | 7 | different over-sampling algorithms available in the imbalanced-learn package. |
8 | | -
|
9 | 8 | """ |
10 | 9 |
|
11 | 10 | # Authors: Guillaume Lemaitre <[email protected]> |
|
19 | 18 | from sklearn.datasets import make_classification |
20 | 19 | from sklearn.svm import LinearSVC |
21 | 20 |
|
| 21 | +from imblearn import FunctionSampler |
22 | 22 | from imblearn.pipeline import make_pipeline |
23 | 23 | from imblearn.over_sampling import ADASYN |
24 | 24 | from imblearn.over_sampling import ( |
|
29 | 29 | KMeansSMOTE, |
30 | 30 | ) |
31 | 31 | from imblearn.over_sampling import RandomOverSampler |
32 | | -from imblearn.base import BaseSampler |
33 | 32 |
|
34 | 33 | print(__doc__) |
35 | 34 |
|
| 35 | +import seaborn as sns |
| 36 | + |
| 37 | +sns.set_context("poster") |
| 38 | + |
36 | 39 |
|
37 | 40 | ############################################################################### |
38 | 41 | # The following function will be used to create toy dataset. It using the |
@@ -68,13 +71,7 @@ def create_dataset( |
68 | 71 | def plot_resampling(X, y, sampling, ax): |
69 | 72 | X_res, y_res = sampling.fit_resample(X, y) |
70 | 73 | ax.scatter(X_res[:, 0], X_res[:, 1], c=y_res, alpha=0.8, edgecolor="k") |
71 | | - # make nice plotting |
72 | | - ax.spines["top"].set_visible(False) |
73 | | - ax.spines["right"].set_visible(False) |
74 | | - ax.get_xaxis().tick_bottom() |
75 | | - ax.get_yaxis().tick_left() |
76 | | - ax.spines["left"].set_position(("outward", 10)) |
77 | | - ax.spines["bottom"].set_position(("outward", 10)) |
| 74 | + sns.despine(ax=ax, offset=10) |
78 | 75 | return Counter(y_res) |
79 | 76 |
|
80 | 77 |
|
@@ -170,19 +167,9 @@ def plot_decision_function(X, y, clf, ax): |
170 | 167 | # Instead of repeating the same samples when over-sampling, we can use some |
171 | 168 | # specific heuristic instead. ADASYN and SMOTE can be used in this case. |
172 | 169 |
|
173 | | - |
174 | | -# Make an identity sampler |
175 | | -class FakeSampler(BaseSampler): |
176 | | - |
177 | | - _sampling_type = "bypass" |
178 | | - |
179 | | - def _fit_resample(self, X, y): |
180 | | - return X, y |
181 | | - |
182 | | - |
183 | 170 | fig, axs = plt.subplots(2, 2, figsize=(15, 15)) |
184 | 171 | X, y = create_dataset(n_samples=10000, weights=(0.01, 0.05, 0.94)) |
185 | | -sampler = FakeSampler() |
| 172 | +sampler = FunctionSampler() |
186 | 173 | clf = make_pipeline(sampler, LinearSVC()) |
187 | 174 | plot_resampling(X, y, sampler, axs[0, 0]) |
188 | 175 | axs[0, 0].set_title(f"Original data - y={Counter(y)}") |
|
0 commit comments