Skip to content

Commit c23988e

Browse files
authored
Merge pull request #110 from sdpython/tsne
Fixes perplexity issue with PredictableTSNE
2 parents a726345 + 0f3430a commit c23988e

File tree

4 files changed

+23
-17
lines changed

4 files changed

+23
-17
lines changed

_unittests/ut_mlmodel/test_tsne_predictable.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,20 @@
66
import numpy
77
from numpy.random import RandomState
88
from sklearn import datasets
9+
from sklearn.exceptions import ConvergenceWarning
910
from sklearn.preprocessing import StandardScaler
1011
from sklearn.neighbors import KNeighborsRegressor
1112
from sklearn.neural_network import MLPRegressor
1213
from sklearn.manifold import TSNE
13-
from pyquickhelper.pycode import ExtTestCase, skipif_circleci
14+
from pyquickhelper.pycode import (
15+
ExtTestCase, skipif_circleci, ignore_warnings)
1416
from mlinsights.mlmodel import PredictableTSNE
1517
from mlinsights.mlmodel import test_sklearn_pickle, test_sklearn_clone
1618

1719

1820
class TestPredictableTSNE(ExtTestCase):
1921

22+
@ignore_warnings(ConvergenceWarning)
2023
def test_predictable_tsne(self):
2124
iris = datasets.load_iris()
2225
X, y = iris.data[:20], iris.target[:20]
@@ -28,6 +31,7 @@ def test_predictable_tsne(self):
2831
self.assertNotEmpty(pred)
2932

3033
@skipif_circleci('stuck')
34+
@ignore_warnings(ConvergenceWarning)
3135
def test_predictable_tsne_knn(self):
3236
iris = datasets.load_iris()
3337
X, y = iris.data[:20], iris.target[:20]
@@ -39,6 +43,7 @@ def test_predictable_tsne_knn(self):
3943
self.assertIsInstance(clr.estimator_, KNeighborsRegressor)
4044
self.assertEqual(pred.shape, (X.shape[0], 2))
4145

46+
@ignore_warnings(ConvergenceWarning)
4247
def test_predictable_tsne_intercept_weights(self):
4348
iris = datasets.load_iris()
4449
X, y = iris.data[:20], iris.target[:20]
@@ -48,15 +53,18 @@ def test_predictable_tsne_intercept_weights(self):
4853
self.assertGreater(clr.loss_, 0)
4954
self.assertEqual(acc.shape, (X.shape[0], 2))
5055

56+
@ignore_warnings(ConvergenceWarning)
5157
def test_predictable_tsne_pickle(self):
5258
iris = datasets.load_iris()
5359
X, y = iris.data[:20], iris.target[:20]
5460
test_sklearn_pickle(lambda: PredictableTSNE(), X, y)
5561

62+
@ignore_warnings(ConvergenceWarning)
5663
def test_predictable_tsne_clone(self):
5764
self.maxDiff = None
5865
test_sklearn_clone(lambda: PredictableTSNE())
5966

67+
@ignore_warnings(ConvergenceWarning)
6068
def test_predictable_tsne_relevance(self):
6169
state = RandomState(seed=0)
6270
Xs = []

mlinsights/mlbatch/pipeline_cache.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
@file
33
@brief Caches training.
44
"""
5-
from distutils.version import StrictVersion
5+
from distutils.version import StrictVersion # pylint: disable=W0402
66
from sklearn import __version__ as skl_version
77
from sklearn.base import clone
88
from sklearn.pipeline import Pipeline, _fit_transform_one

mlinsights/mlmodel/predictable_tsne.py

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,23 +20,19 @@ class PredictableTSNE(BaseEstimator, TransformerMixin):
2020
which approximates the outputs of a :epkg:`TSNE` transformer.
2121
Notebooks :ref:`predictabletsnerst` gives an example on how to
2222
use this class.
23+
24+
:param normalizer: None by default
25+
:param transformer: :epkg:`sklearn:manifold:TSNE` by default
26+
:param estimator: :epkg:`sklearn:neural_network:MLPRegressor` by default
27+
:param normalize: normalizes the outputs, centers and normalizes
28+
the output of the *t-SNE* and applies that same
29+
normalization to he prediction of the estimator
30+
:param keep_tsne_output: if True, keep raw outputs of
31+
:epkg:`TSNE` is stored in member `tsne_outputs_`
2332
"""
2433

2534
def __init__(self, normalizer=None, transformer=None, estimator=None,
2635
normalize=True, keep_tsne_outputs=False):
27-
"""
28-
@param normalizer None by default
29-
@param transformer :epkg:`sklearn:manifold:TSNE`
30-
by default
31-
@param estimator :epkg:`sklearn:neural_network:MLPRegressor`
32-
by default
33-
@param normalize normalizes the outputs, centers and normalizes
34-
the output of the *t-SNE* and applies that same
35-
normalization to he prediction of the estimator
36-
@param keep_tsne_output if True, keep raw outputs of
37-
:epkg:`TSNE` is stored in member
38-
*tsne_outputs_*
39-
"""
4036
TransformerMixin.__init__(self)
4137
BaseEstimator.__init__(self)
4238
if estimator is None:
@@ -98,6 +94,9 @@ def fit(self, X, y, sample_weight=None):
9894
self.normalizer_ = None
9995

10096
self.transformer_ = clone(self.transformer)
97+
if (hasattr(self.transformer_, 'perplexity') and
98+
self.transformer_.perplexity >= X.shape[0]):
99+
self.transformer_.perplexity = X.shape[0] - 1
101100

102101
sig = inspect.signature(self.transformer.fit_transform)
103102
pars = {}

mlinsights/plotting/visualize.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -219,8 +219,7 @@ def pipeline2dot(pipe, data, **params):
219219
elif isinstance(raw_data, numpy.ndarray):
220220
if len(raw_data.shape) != 2:
221221
raise NotImplementedError( # pragma: no cover
222-
f"Unexpected training data dimension: {data.shape}."
223-
f"") # pylint: disable=E1101
222+
f"Unexpected training data dimension {raw_data.shape}.")
224223
for i in range(raw_data.shape[1]):
225224
data['X%d' % i] = 'sch0:f%d' % i
226225
elif not isinstance(raw_data, list):

0 commit comments

Comments
 (0)