Skip to content

Commit 77560c2

Browse files
committed
iter
1 parent 77a9605 commit 77560c2

File tree

3 files changed

+59
-40
lines changed

3 files changed

+59
-40
lines changed

examples/over-sampling/plot_comparison_over_sampling.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@
2525

2626

2727
# %%
28+
from sklearn.datasets import make_classification
29+
30+
2831
def create_dataset(
2932
n_samples=1000,
3033
weights=(0.01, 0.01, 0.98),

examples/under-sampling/plot_illustration_nearmiss.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
88
"""
99

10+
# Authors: Guillaume Lemaitre <[email protected]>
11+
# License: MIT
12+
1013
import matplotlib.pyplot as plt
1114
import numpy as np
1215

examples/under-sampling/plot_illustration_tomek_links.py

Lines changed: 53 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -4,61 +4,72 @@
44
==============================================
55
66
This example illustrates what is a Tomek link.
7-
87
"""
98

10-
import matplotlib.pyplot as plt
11-
import numpy as np
12-
13-
from imblearn.under_sampling import TomekLinks
9+
# Authors: Guillaume Lemaitre <[email protected]>
10+
# License: MIT
1411

12+
# %%
1513
print(__doc__)
1614

17-
rng = np.random.RandomState(18)
15+
import matplotlib.pyplot as plt
16+
import seaborn as sns
1817

19-
###############################################################################
18+
sns.set_context("poster")
19+
20+
# %% [markdown]
2021
# This function allows to make nice plotting
2122

23+
# %%
24+
2225

2326
def make_plot_despine(ax):
24-
ax.spines["top"].set_visible(False)
25-
ax.spines["right"].set_visible(False)
26-
ax.get_xaxis().tick_bottom()
27-
ax.get_yaxis().tick_left()
28-
ax.spines["left"].set_position(("outward", 10))
29-
ax.spines["bottom"].set_position(("outward", 10))
30-
ax.set_xlim([0.0, 3.5])
31-
ax.set_ylim([0.0, 3.5])
27+
sns.despine(ax=ax, offset=10)
28+
# ax.axis("square")
29+
ax.set_xlim([0, 3])
30+
ax.set_ylim([0, 3])
3231
ax.set_xlabel(r"$X_1$")
3332
ax.set_ylabel(r"$X_2$")
34-
ax.legend()
33+
ax.legend(loc="lower right")
3534

3635

37-
###############################################################################
38-
# Generate some data with one Tomek link
36+
# %% [markdown]
37+
# We will generate some toy data that illustrates how
38+
# :class:`~imblearn.under_sampling.TomekLinks` is used to clean a dataset.
39+
40+
# %%
41+
import numpy as np
42+
43+
rng = np.random.RandomState(18)
3944

40-
# minority class
4145
X_minority = np.transpose(
4246
[[1.1, 1.3, 1.15, 0.8, 0.55, 2.1], [1.0, 1.5, 1.7, 2.5, 0.55, 1.9]]
4347
)
44-
# majority class
4548
X_majority = np.transpose(
4649
[
4750
[2.1, 2.12, 2.13, 2.14, 2.2, 2.3, 2.5, 2.45],
4851
[1.5, 2.1, 2.7, 0.9, 1.0, 1.4, 2.4, 2.9],
4952
]
5053
)
5154

52-
###############################################################################
55+
# %% [markdown]
5356
# In the figure above, the samples highlighted in green form a Tomek link since
54-
# they are of different classes and are nearest neighbours of each other.
57+
# they are of different classes and are nearest neighbors of each other.
5558

56-
fig, ax = plt.subplots(1, 1, figsize=(6, 6))
59+
fig, ax = plt.subplots(figsize=(8, 8))
5760
ax.scatter(
58-
X_minority[:, 0], X_minority[:, 1], label="Minority class", s=200, marker="_",
61+
X_minority[:, 0],
62+
X_minority[:, 1],
63+
label="Minority class",
64+
s=200,
65+
marker="_",
5966
)
6067
ax.scatter(
61-
X_majority[:, 0], X_majority[:, 1], label="Majority class", s=200, marker="+",
68+
X_majority[:, 0],
69+
X_majority[:, 1],
70+
label="Majority class",
71+
s=200,
72+
marker="+",
6273
)
6374

6475
# highlight the samples of interest
@@ -69,27 +80,27 @@ def make_plot_despine(ax):
6980
s=200,
7081
alpha=0.3,
7182
)
72-
ax.set_title("Illustration of a Tomek link")
7383
make_plot_despine(ax)
84+
fig.suptitle("Illustration of a Tomek link")
7485
fig.tight_layout()
7586

76-
###############################################################################
77-
# We can run the ``TomekLinks`` sampling to remove the corresponding
78-
# samples. If ``sampling_strategy='auto'`` only the sample from the majority
79-
# class will be removed. If ``sampling_strategy='all'`` both samples will be
80-
# removed.
87+
# %% [markdown]
88+
# We can run the :class:`~imblearn.under_sampling.TomekLinks` sampling to
89+
# remove the corresponding samples. If `sampling_strategy='auto'` only the
90+
# sample from the majority class will be removed. If `sampling_strategy='all'`
91+
# both samples will be removed.
92+
93+
# %%
94+
from imblearn.under_sampling import TomekLinks
8195

82-
sampler = TomekLinks()
96+
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(16, 8))
8397

84-
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
98+
samplers = {
99+
"Removing only majority samples": TomekLinks(sampling_strategy="auto"),
100+
"Removing all samples": TomekLinks(sampling_strategy="all"),
101+
}
85102

86-
ax_arr = (ax1, ax2)
87-
title_arr = ("Removing only majority samples", "Removing all samples")
88-
for ax, title, sampler in zip(
89-
ax_arr,
90-
title_arr,
91-
[TomekLinks(sampling_strategy="auto"), TomekLinks(sampling_strategy="all")],
92-
):
103+
for ax, (title, sampler) in zip(axs, samplers.items()):
93104
X_res, y_res = sampler.fit_resample(
94105
np.vstack((X_minority, X_majority)),
95106
np.array([0] * X_minority.shape[0] + [1] * X_majority.shape[0]),
@@ -123,3 +134,5 @@ def make_plot_despine(ax):
123134
fig.tight_layout()
124135

125136
plt.show()
137+
138+
# %%

0 commit comments

Comments
 (0)