44==============================================
55
66This example illustrates what is a Tomek link.
7-
87"""
98
10- import matplotlib .pyplot as plt
11- import numpy as np
12-
13- from imblearn .under_sampling import TomekLinks
9+ # Authors: Guillaume Lemaitre <[email protected] > 10+ # License: MIT
1411
12+ # %%
1513print (__doc__ )
1614
17- rng = np .random .RandomState (18 )
15+ import matplotlib .pyplot as plt
16+ import seaborn as sns
1817
19- ###############################################################################
18+ sns .set_context ("poster" )
19+
20+ # %% [markdown]
2021# This function allows to make nice plotting
2122
23+ # %%
24+
2225
2326def make_plot_despine (ax ):
24- ax .spines ["top" ].set_visible (False )
25- ax .spines ["right" ].set_visible (False )
26- ax .get_xaxis ().tick_bottom ()
27- ax .get_yaxis ().tick_left ()
28- ax .spines ["left" ].set_position (("outward" , 10 ))
29- ax .spines ["bottom" ].set_position (("outward" , 10 ))
30- ax .set_xlim ([0.0 , 3.5 ])
31- ax .set_ylim ([0.0 , 3.5 ])
27+ sns .despine (ax = ax , offset = 10 )
28+ # ax.axis("square")
29+ ax .set_xlim ([0 , 3 ])
30+ ax .set_ylim ([0 , 3 ])
3231 ax .set_xlabel (r"$X_1$" )
3332 ax .set_ylabel (r"$X_2$" )
34- ax .legend ()
33+ ax .legend (loc = "lower right" )
3534
3635
37- ###############################################################################
38- # Generate some data with one Tomek link
36+ # %% [markdown]
37+ # We will generate some toy data that illustrates how
38+ # :class:`~imblearn.under_sampling.TomekLinks` is used to clean a dataset.
39+
40+ # %%
41+ import numpy as np
42+
43+ rng = np .random .RandomState (18 )
3944
40- # minority class
4145X_minority = np .transpose (
4246 [[1.1 , 1.3 , 1.15 , 0.8 , 0.55 , 2.1 ], [1.0 , 1.5 , 1.7 , 2.5 , 0.55 , 1.9 ]]
4347)
44- # majority class
4548X_majority = np .transpose (
4649 [
4750 [2.1 , 2.12 , 2.13 , 2.14 , 2.2 , 2.3 , 2.5 , 2.45 ],
4851 [1.5 , 2.1 , 2.7 , 0.9 , 1.0 , 1.4 , 2.4 , 2.9 ],
4952 ]
5053)
5154
52- ###############################################################################
55+ # %% [markdown]
5356# In the figure above, the samples highlighted in green form a Tomek link since
54- # they are of different classes and are nearest neighbours of each other.
57+ # they are of different classes and are nearest neighbors of each other.
5558
56- fig , ax = plt .subplots (1 , 1 , figsize = (6 , 6 ))
59+ fig , ax = plt .subplots (figsize = (8 , 8 ))
5760ax .scatter (
58- X_minority [:, 0 ], X_minority [:, 1 ], label = "Minority class" , s = 200 , marker = "_" ,
61+ X_minority [:, 0 ],
62+ X_minority [:, 1 ],
63+ label = "Minority class" ,
64+ s = 200 ,
65+ marker = "_" ,
5966)
6067ax .scatter (
61- X_majority [:, 0 ], X_majority [:, 1 ], label = "Majority class" , s = 200 , marker = "+" ,
68+ X_majority [:, 0 ],
69+ X_majority [:, 1 ],
70+ label = "Majority class" ,
71+ s = 200 ,
72+ marker = "+" ,
6273)
6374
6475# highlight the samples of interest
@@ -69,27 +80,27 @@ def make_plot_despine(ax):
6980 s = 200 ,
7081 alpha = 0.3 ,
7182)
72- ax .set_title ("Illustration of a Tomek link" )
7383make_plot_despine (ax )
84+ fig .suptitle ("Illustration of a Tomek link" )
7485fig .tight_layout ()
7586
76- ###############################################################################
77- # We can run the ``TomekLinks`` sampling to remove the corresponding
78- # samples. If ``sampling_strategy='auto'`` only the sample from the majority
79- # class will be removed. If ``sampling_strategy='all'`` both samples will be
80- # removed.
87+ # %% [markdown]
88+ # We can run the :class:`~imblearn.under_sampling.TomekLinks` sampling to
89+ # remove the corresponding samples. If `sampling_strategy='auto'` only the
90+ # sample from the majority class will be removed. If `sampling_strategy='all'`
91+ # both samples will be removed.
92+
93+ # %%
94+ from imblearn .under_sampling import TomekLinks
8195
82- sampler = TomekLinks ( )
96+ fig , axs = plt . subplots ( nrows = 1 , ncols = 2 , figsize = ( 16 , 8 ) )
8397
84- fig , (ax1 , ax2 ) = plt .subplots (1 , 2 , figsize = (12 , 6 ))
98+ samplers = {
99+ "Removing only majority samples" : TomekLinks (sampling_strategy = "auto" ),
100+ "Removing all samples" : TomekLinks (sampling_strategy = "all" ),
101+ }
85102
86- ax_arr = (ax1 , ax2 )
87- title_arr = ("Removing only majority samples" , "Removing all samples" )
88- for ax , title , sampler in zip (
89- ax_arr ,
90- title_arr ,
91- [TomekLinks (sampling_strategy = "auto" ), TomekLinks (sampling_strategy = "all" )],
92- ):
103+ for ax , (title , sampler ) in zip (axs , samplers .items ()):
93104 X_res , y_res = sampler .fit_resample (
94105 np .vstack ((X_minority , X_majority )),
95106 np .array ([0 ] * X_minority .shape [0 ] + [1 ] * X_majority .shape [0 ]),
@@ -123,3 +134,5 @@ def make_plot_despine(ax):
123134fig .tight_layout ()
124135
125136plt .show ()
137+
138+ # %%
0 commit comments