Skip to content

Commit ccf2bd6

Browse files
committed
Visualize HashGNN and node2vec node embeddings
1 parent 468e210 commit ccf2bd6

File tree

3 files changed

+148
-23
lines changed

3 files changed

+148
-23
lines changed

cypher/Node_Embeddings/Node_Embeddings_2c_Hash_GNN_Mutate.cypher

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22

33
CALL gds.beta.hashgnn.mutate(
44
$dependencies_projection + '-cleaned', {
5-
,embeddingDensity: toInteger($dependencies_projection_embedding_dimension) * 2
6-
,iterations: 3
5+
,embeddingDensity: toInteger($dependencies_projection_embedding_dimension) * 16
6+
,iterations: 4
77
,generateFeatures: {
8-
dimension: toInteger($dependencies_projection_embedding_dimension) * 4
9-
,densityLevel: 1
8+
dimension: toInteger($dependencies_projection_embedding_dimension) * 8
9+
,densityLevel: 2
1010
}
1111
,outputDimension: toInteger($dependencies_projection_embedding_dimension)
12+
,neighborInfluence: 1.0
13+
,randomSeed: 42
1214
,mutateProperty: $dependencies_projection_write_property
1315
}
1416
)

cypher/Node_Embeddings/Node_Embeddings_2d_Hash_GNN_Stream.cypher

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22

33
CALL gds.beta.hashgnn.stream(
44
$dependencies_projection + '-cleaned', {
5-
,embeddingDensity: toInteger($dependencies_projection_embedding_dimension) * 2
6-
,iterations: 3
5+
,embeddingDensity: toInteger($dependencies_projection_embedding_dimension) * 16
6+
,iterations: 4
77
,generateFeatures: {
8-
dimension: toInteger($dependencies_projection_embedding_dimension) * 4
9-
,densityLevel: 1
8+
dimension: toInteger($dependencies_projection_embedding_dimension) * 8
9+
,densityLevel: 2
1010
}
1111
,outputDimension: toInteger($dependencies_projection_embedding_dimension)
12+
,neighborInfluence: 1.0
13+
,randomSeed: 42
1214
}
1315
)
1416
YIELD nodeId, embedding

jupyter/NodeEmbeddings.ipynb

Lines changed: 136 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,12 @@
213213
"outputs": [],
214214
"source": [
215215
"def prepare_node_embeddings_for_2d_visualization(embeddings: pd.DataFrame) -> pd.DataFrame:\n",
216+
" \"\"\"\n",
217+
" Reduces the dimensionality of the node embeddings (e.g. 64 floating point numbers in an array)\n",
218+
" to two dimensions for 2D visualization.\n",
219+
" see https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html#sklearn.manifold.TSNE\n",
220+
" \"\"\"\n",
221+
"\n",
216222
" if embeddings.empty: \n",
217223
" print(\"No projected data for node embeddings dimensionality reduction available\")\n",
218224
" return embeddings\n",
@@ -350,7 +356,7 @@
350356
"id": "76d8bca1",
351357
"metadata": {},
352358
"source": [
353-
"### 1.2 Dimensionality reduction with t-distributed stochastic neighbor embedding (t-SNE) for Java\n",
359+
"### 1.2 Dimensionality reduction with t-distributed stochastic neighbor embedding (t-SNE)\n",
354360
"\n",
355361
"This step takes the original node embeddings with a higher dimensionality (e.g. list of 32 floats) and\n",
356362
"reduces them to a 2 dimensional array for visualization. \n",
@@ -375,7 +381,7 @@
375381
"id": "f908c47f",
376382
"metadata": {},
377383
"source": [
378-
"### 1.3 Plot the node embeddings reduced to two dimensions for Java"
384+
"### 1.3 Visualization of the node embeddings reduced to two dimensions"
379385
]
380386
},
381387
{
@@ -387,7 +393,67 @@
387393
"source": [
388394
"plot_2d_node_embeddings(\n",
389395
" node_embeddings_for_visualization, \n",
390-
" \"Java Package nodes positioned by their dependency relationships using t-SNE\"\n",
396+
" \"Java Package positioned by their dependency relationships (FastRP node embeddings + t-SNE)\"\n",
397+
")"
398+
]
399+
},
400+
{
401+
"cell_type": "markdown",
402+
"id": "b690b9a7",
403+
"metadata": {},
404+
"source": [
405+
"### 1.4 Node Embeddings for Java Packages using HashGNN"
406+
]
407+
},
408+
{
409+
"cell_type": "code",
410+
"execution_count": null,
411+
"id": "3d3cfb7a",
412+
"metadata": {},
413+
"outputs": [],
414+
"source": [
415+
"java_package_embeddings_parameters={\n",
416+
" \"dependencies_projection\": \"java-package-embeddings-notebook\",\n",
417+
" \"dependencies_projection_node\": \"Package\",\n",
418+
" \"dependencies_projection_weight_property\": \"weight25PercentInterfaces\",\n",
419+
" \"dependencies_projection_write_property\": \"embeddingsHashGNN\",\n",
420+
" \"dependencies_projection_embedding_dimension\":\"64\"\n",
421+
"}\n",
422+
"embeddings = create_node_embeddings(\"../cypher/Node_Embeddings/Node_Embeddings_2d_Hash_GNN_Stream.cypher\", java_package_embeddings_parameters)\n",
423+
"node_embeddings_for_visualization = prepare_node_embeddings_for_2d_visualization(embeddings)\n",
424+
"plot_2d_node_embeddings(\n",
425+
" node_embeddings_for_visualization, \n",
426+
" \"Java Package positioned by their dependency relationships (HashGNN node embeddings + t-SNE)\"\n",
427+
")"
428+
]
429+
},
430+
{
431+
"cell_type": "markdown",
432+
"id": "248d88b4",
433+
"metadata": {},
434+
"source": [
435+
"### 2.5 Node Embeddings for Java Packages using node2vec"
436+
]
437+
},
438+
{
439+
"cell_type": "code",
440+
"execution_count": null,
441+
"id": "62c40c45",
442+
"metadata": {},
443+
"outputs": [],
444+
"source": [
445+
"java_package_embeddings_parameters={\n",
446+
" \"dependencies_projection\": \"java-package-embeddings-notebook\",\n",
447+
" \"dependencies_projection_node\": \"Package\",\n",
448+
" \"dependencies_projection_weight_property\": \"weight25PercentInterfaces\",\n",
449+
" \"dependencies_projection_write_property\": \"embeddingsNode2Vec\",\n",
450+
" \"dependencies_projection_embedding_dimension\":\"64\"\n",
451+
"}\n",
452+
"embeddings = create_node_embeddings(\"../cypher/Node_Embeddings/Node_Embeddings_3d_Node2Vec_Stream.cypher\", java_package_embeddings_parameters)\n",
453+
"node_embeddings_for_visualization = prepare_node_embeddings_for_2d_visualization(embeddings)\n",
454+
"plot_2d_node_embeddings(\n",
455+
" node_embeddings_for_visualization, \n",
456+
" \"Java Package positioned by their dependency relationships (node2vec node embeddings + t-SNE)\"\n",
391457
")"
392458
]
393459
},
@@ -396,17 +462,17 @@
396462
"id": "0b42163d",
397463
"metadata": {},
398464
"source": [
399-
"## 1. Typescript Modules"
465+
"## 2. Typescript Modules"
400466
]
401467
},
402468
{
403469
"cell_type": "markdown",
404470
"id": "3b468bae",
405471
"metadata": {},
406472
"source": [
407-
"### 1.1 Generate Node Embeddings using Fast Random Projection (Fast RP) for Typescript Modules\n",
473+
"### 2.1 Generate Node Embeddings for Typescript Modules using Fast Random Projection (Fast RP)\n",
408474
"\n",
409-
"See section 1.2 for some background about node embeddings."
475+
"See section 1.1 for some background about node embeddings."
410476
]
411477
},
412478
{
@@ -431,14 +497,9 @@
431497
"id": "ad17607c",
432498
"metadata": {},
433499
"source": [
434-
"### 1.2 Dimensionality reduction with t-distributed stochastic neighbor embedding (t-SNE) for Typescript\n",
500+
"### 2.2 Dimensionality reduction with t-distributed stochastic neighbor embedding (t-SNE)\n",
435501
"\n",
436-
"This step takes the original node embeddings with a higher dimensionality (e.g. list of 32 floats) and\n",
437-
"reduces them to a 2 dimensional array for visualization. \n",
438-
"\n",
439-
"> It converts similarities between data points to joint probabilities and tries to minimize the Kullback-Leibler divergence between the joint probabilities of the low-dimensional embedding and the high-dimensional data.\n",
440-
"\n",
441-
"(see https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html#sklearn.manifold.TSNE)"
502+
"See section 1.2 for some background about t-SNE."
442503
]
443504
},
444505
{
@@ -456,7 +517,7 @@
456517
"id": "20084589",
457518
"metadata": {},
458519
"source": [
459-
"### 1.3 Plot the node embeddings reduced to two dimensions for Typescript"
520+
"### 2.3 Plot the node embeddings reduced to two dimensions for Typescript"
460521
]
461522
},
462523
{
@@ -468,7 +529,67 @@
468529
"source": [
469530
"plot_2d_node_embeddings(\n",
470531
" node_embeddings_for_visualization, \n",
471-
" \"Typescript Module nodes positioned by their dependency relationships using t-SNE\"\n",
532+
" \"Typescript Modules positioned by their dependency relationships (FastRP node embeddings + t-SNE)\"\n",
533+
")"
534+
]
535+
},
536+
{
537+
"cell_type": "markdown",
538+
"id": "6cac9be7",
539+
"metadata": {},
540+
"source": [
541+
"### 2.4 Node Embeddings for Typescript Modules using HashGNN"
542+
]
543+
},
544+
{
545+
"cell_type": "code",
546+
"execution_count": null,
547+
"id": "8fe68eca",
548+
"metadata": {},
549+
"outputs": [],
550+
"source": [
551+
"typescript_module_embeddings_parameters={\n",
552+
" \"dependencies_projection\": \"typescript-module-embeddings-notebook\",\n",
553+
" \"dependencies_projection_node\": \"Module\",\n",
554+
" \"dependencies_projection_weight_property\": \"lowCouplingElement25PercentWeight\",\n",
555+
" \"dependencies_projection_write_property\": \"embeddingsHashGNN\",\n",
556+
" \"dependencies_projection_embedding_dimension\":\"64\"\n",
557+
"}\n",
558+
"embeddings = create_node_embeddings(\"../cypher/Node_Embeddings/Node_Embeddings_2d_Hash_GNN_Stream.cypher\", typescript_module_embeddings_parameters)\n",
559+
"node_embeddings_for_visualization = prepare_node_embeddings_for_2d_visualization(embeddings)\n",
560+
"plot_2d_node_embeddings(\n",
561+
" node_embeddings_for_visualization, \n",
562+
" \"Typescript Modules positioned by their dependency relationships (HashGNN node embeddings + t-SNE)\"\n",
563+
")"
564+
]
565+
},
566+
{
567+
"cell_type": "markdown",
568+
"id": "0a7d66f5",
569+
"metadata": {},
570+
"source": [
571+
"### 2.5 Node Embeddings for Typescript Modules using node2vec"
572+
]
573+
},
574+
{
575+
"cell_type": "code",
576+
"execution_count": null,
577+
"id": "ea6c52ca",
578+
"metadata": {},
579+
"outputs": [],
580+
"source": [
581+
"typescript_module_embeddings_parameters={\n",
582+
" \"dependencies_projection\": \"typescript-module-embeddings-notebook\",\n",
583+
" \"dependencies_projection_node\": \"Module\",\n",
584+
" \"dependencies_projection_weight_property\": \"lowCouplingElement25PercentWeight\",\n",
585+
" \"dependencies_projection_write_property\": \"embeddingsNode2Vec\",\n",
586+
" \"dependencies_projection_embedding_dimension\":\"64\"\n",
587+
"}\n",
588+
"embeddings = create_node_embeddings(\"../cypher/Node_Embeddings/Node_Embeddings_3d_Node2Vec_Stream.cypher\", typescript_module_embeddings_parameters)\n",
589+
"node_embeddings_for_visualization = prepare_node_embeddings_for_2d_visualization(embeddings)\n",
590+
"plot_2d_node_embeddings(\n",
591+
" node_embeddings_for_visualization, \n",
592+
" \"Typescript Modules positioned by their dependency relationships (node2vec node embeddings + t-SNE)\"\n",
472593
")"
473594
]
474595
}

0 commit comments

Comments
 (0)