|
58 | 58 | "import matplotlib.pyplot as plot\n", |
59 | 59 | "import typing as typ\n", |
60 | 60 | "import numpy as np\n", |
61 | | - "from sklearn.manifold import TSNE\n", |
| 61 | + "from openTSNE.sklearn import TSNE\n", |
62 | 62 | "from neo4j import GraphDatabase" |
63 | 63 | ] |
64 | 64 | }, |
|
69 | 69 | "metadata": {}, |
70 | 70 | "outputs": [], |
71 | 71 | "source": [ |
72 | | - "import sklearn\n", |
73 | | - "print('The scikit-learn version is {}.'.format(sklearn.__version__))\n", |
74 | | - "print('The pandas version is {}.'.format(pd.__version__))\n" |
| 72 | + "from openTSNE import __version__ as openTSNE_version\n", |
| 73 | + "print('The openTSNE version is: {}'.format(openTSNE_version))\n", |
| 74 | + "print('The pandas version is: {}'.format(pd.__version__))\n" |
75 | 75 | ] |
76 | 76 | }, |
77 | 77 | { |
|
231 | 231 | "\n", |
232 | 232 | "> It converts similarities between data points to joint probabilities and tries to minimize the Kullback-Leibler divergence between the joint probabilities of the low-dimensional embedding and the high-dimensional data.\n", |
233 | 233 | "\n", |
234 | | - "(see https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html#sklearn.manifold.TSNE)" |
| 234 | + "(see https://opentsne.readthedocs.io)" |
235 | 235 | ] |
236 | 236 | }, |
237 | 237 | { |
|
245 | 245 | " \"\"\"\n", |
246 | 246 | " Reduces the dimensionality of the node embeddings (e.g. 64 floating point numbers in an array)\n", |
247 | 247 | " to two dimensions for 2D visualization.\n", |
248 | | - " see https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html#sklearn.manifold.TSNE\n", |
| 248 | + " see https://opentsne.readthedocs.io\n", |
249 | 249 | " \"\"\"\n", |
250 | 250 | "\n", |
251 | 251 | " if embeddings.empty: \n", |
|
258 | 258 | " # See https://bobbyhadz.com/blog/python-attributeerror-list-object-has-no-attribute-shape\n", |
259 | 259 | " embeddings_as_numpy_array = np.array(embeddings.embedding.to_list())\n", |
260 | 260 | "\n", |
261 | | - " # The parameter \"perplexity\" needs to be smaller than the sample size\n", |
262 | | - " # See https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html\n", |
263 | | - " number_of_nodes=embeddings.shape[0]\n", |
264 | | - " perplexity = min(number_of_nodes - 1.0, 30.0)\n", |
265 | | - " print(\"t-SNE: Sample size (Number of nodes)={size}\".format(size = number_of_nodes))\n", |
266 | | - " print(\"t-SNE: perplexity={perplexity}\".format(perplexity=perplexity))\n", |
267 | | - "\n", |
268 | 261 | " # Use t-distributed stochastic neighbor embedding (t-SNE) to reduce the dimensionality \n", |
269 | 262 | " # of the previously calculated node embeddings to 2 dimensions for visualization\n", |
270 | | - " t_distributed_stochastic_neighbor_embedding = TSNE(n_components=2, perplexity=perplexity, verbose=1, random_state=50)\n", |
| 263 | + " t_distributed_stochastic_neighbor_embedding = TSNE(n_components=2, verbose=1, random_state=47)\n", |
271 | 264 | " two_dimension_node_embeddings = t_distributed_stochastic_neighbor_embedding.fit_transform(embeddings_as_numpy_array)\n", |
272 | 265 | " display(two_dimension_node_embeddings.shape) # Display the shape of the t-SNE result\n", |
273 | 266 | "\n", |
|
365 | 358 | "source": [ |
366 | 359 | "### 1.1 Generate Node Embeddings using Fast Random Projection (Fast RP) for Java Packages\n", |
367 | 360 | "\n", |
368 | | - "[Fast Random Projection](https://neo4j.com/docs/graph-data-science/current/machine-learning/node-embeddings/fastrp) is used to reduce the dimensionality of the node feature space while preserving most of the distance information. Nodes with similar neighborhood result in node embedding with similar vectors." |
| 361 | + "[Fast Random Projection](https://neo4j.com/docs/graph-data-science/current/machine-learning/node-embeddings/fastrp) is used to reduce the dimensionality of the node feature space while preserving most of the distance information. Nodes with similar neighborhood result in node embedding with similar vectors.\n", |
| 362 | + "\n", |
| 363 | + "**👉Hint:** To skip existing node embeddings and always calculate them based on the parameters below edit `Node_Embeddings_0a_Query_Calculated` so that it won't return any results." |
369 | 364 | ] |
370 | 365 | }, |
371 | 366 | { |
|
511 | 506 | "name": "python", |
512 | 507 | "nbconvert_exporter": "python", |
513 | 508 | "pygments_lexer": "ipython3", |
514 | | - "version": "3.11.4" |
| 509 | + "version": "3.11.9" |
515 | 510 | }, |
516 | 511 | "title": "Object Oriented Design Quality Metrics for Java with Neo4j" |
517 | 512 | }, |
|
0 commit comments