|
213 | 213 | "outputs": [], |
214 | 214 | "source": [ |
215 | 215 | "def prepare_node_embeddings_for_2d_visualization(embeddings: pd.DataFrame) -> pd.DataFrame:\n", |
| 216 | + " \"\"\"\n", |
| 217 | + " Reduces the dimensionality of the node embeddings (e.g. 64 floating point numbers in an array)\n", |
| 218 | + " to two dimensions for 2D visualization.\n", |
| 219 | + " see https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html#sklearn.manifold.TSNE\n", |
| 220 | + " \"\"\"\n", |
| 221 | + "\n", |
216 | 222 | " if embeddings.empty: \n", |
217 | 223 | " print(\"No projected data for node embeddings dimensionality reduction available\")\n", |
218 | 224 | " return embeddings\n", |
|
350 | 356 | "id": "76d8bca1", |
351 | 357 | "metadata": {}, |
352 | 358 | "source": [ |
353 | | - "### 1.2 Dimensionality reduction with t-distributed stochastic neighbor embedding (t-SNE) for Java\n", |
| 359 | + "### 1.2 Dimensionality reduction with t-distributed stochastic neighbor embedding (t-SNE)\n", |
354 | 360 | "\n", |
355 | 361 | "This step takes the original node embeddings with a higher dimensionality (e.g. list of 32 floats) and\n", |
356 | 362 | "reduces them to a 2 dimensional array for visualization. \n", |
|
375 | 381 | "id": "f908c47f", |
376 | 382 | "metadata": {}, |
377 | 383 | "source": [ |
378 | | - "### 1.3 Plot the node embeddings reduced to two dimensions for Java" |
| 384 | + "### 1.3 Visualization of the node embeddings reduced to two dimensions" |
379 | 385 | ] |
380 | 386 | }, |
381 | 387 | { |
|
387 | 393 | "source": [ |
388 | 394 | "plot_2d_node_embeddings(\n", |
389 | 395 | " node_embeddings_for_visualization, \n", |
390 | | - " \"Java Package nodes positioned by their dependency relationships using t-SNE\"\n", |
| 396 | + " \"Java Package positioned by their dependency relationships (FastRP node embeddings + t-SNE)\"\n", |
| 397 | + ")" |
| 398 | + ] |
| 399 | + }, |
| 400 | + { |
| 401 | + "cell_type": "markdown", |
| 402 | + "id": "b690b9a7", |
| 403 | + "metadata": {}, |
| 404 | + "source": [ |
| 405 | + "### 1.4 Node Embeddings for Java Packages using HashGNN" |
| 406 | + ] |
| 407 | + }, |
| 408 | + { |
| 409 | + "cell_type": "code", |
| 410 | + "execution_count": null, |
| 411 | + "id": "3d3cfb7a", |
| 412 | + "metadata": {}, |
| 413 | + "outputs": [], |
| 414 | + "source": [ |
| 415 | + "java_package_embeddings_parameters={\n", |
| 416 | + " \"dependencies_projection\": \"java-package-embeddings-notebook\",\n", |
| 417 | + " \"dependencies_projection_node\": \"Package\",\n", |
| 418 | + " \"dependencies_projection_weight_property\": \"weight25PercentInterfaces\",\n", |
| 419 | + " \"dependencies_projection_write_property\": \"embeddingsHashGNN\",\n", |
| 420 | + " \"dependencies_projection_embedding_dimension\":\"64\"\n", |
| 421 | + "}\n", |
| 422 | + "embeddings = create_node_embeddings(\"../cypher/Node_Embeddings/Node_Embeddings_2d_Hash_GNN_Stream.cypher\", java_package_embeddings_parameters)\n", |
| 423 | + "node_embeddings_for_visualization = prepare_node_embeddings_for_2d_visualization(embeddings)\n", |
| 424 | + "plot_2d_node_embeddings(\n", |
| 425 | + " node_embeddings_for_visualization, \n", |
| 426 | + " \"Java Package positioned by their dependency relationships (HashGNN node embeddings + t-SNE)\"\n", |
| 427 | + ")" |
| 428 | + ] |
| 429 | + }, |
| 430 | + { |
| 431 | + "cell_type": "markdown", |
| 432 | + "id": "248d88b4", |
| 433 | + "metadata": {}, |
| 434 | + "source": [ |
| 435 | + "### 2.5 Node Embeddings for Java Packages using node2vec" |
| 436 | + ] |
| 437 | + }, |
| 438 | + { |
| 439 | + "cell_type": "code", |
| 440 | + "execution_count": null, |
| 441 | + "id": "62c40c45", |
| 442 | + "metadata": {}, |
| 443 | + "outputs": [], |
| 444 | + "source": [ |
| 445 | + "java_package_embeddings_parameters={\n", |
| 446 | + " \"dependencies_projection\": \"java-package-embeddings-notebook\",\n", |
| 447 | + " \"dependencies_projection_node\": \"Package\",\n", |
| 448 | + " \"dependencies_projection_weight_property\": \"weight25PercentInterfaces\",\n", |
| 449 | + " \"dependencies_projection_write_property\": \"embeddingsNode2Vec\",\n", |
| 450 | + " \"dependencies_projection_embedding_dimension\":\"64\"\n", |
| 451 | + "}\n", |
| 452 | + "embeddings = create_node_embeddings(\"../cypher/Node_Embeddings/Node_Embeddings_3d_Node2Vec_Stream.cypher\", java_package_embeddings_parameters)\n", |
| 453 | + "node_embeddings_for_visualization = prepare_node_embeddings_for_2d_visualization(embeddings)\n", |
| 454 | + "plot_2d_node_embeddings(\n", |
| 455 | + " node_embeddings_for_visualization, \n", |
| 456 | + " \"Java Package positioned by their dependency relationships (node2vec node embeddings + t-SNE)\"\n", |
391 | 457 | ")" |
392 | 458 | ] |
393 | 459 | }, |
|
396 | 462 | "id": "0b42163d", |
397 | 463 | "metadata": {}, |
398 | 464 | "source": [ |
399 | | - "## 1. Typescript Modules" |
| 465 | + "## 2. Typescript Modules" |
400 | 466 | ] |
401 | 467 | }, |
402 | 468 | { |
403 | 469 | "cell_type": "markdown", |
404 | 470 | "id": "3b468bae", |
405 | 471 | "metadata": {}, |
406 | 472 | "source": [ |
407 | | - "### 1.1 Generate Node Embeddings using Fast Random Projection (Fast RP) for Typescript Modules\n", |
| 473 | + "### 2.1 Generate Node Embeddings for Typescript Modules using Fast Random Projection (Fast RP)\n", |
408 | 474 | "\n", |
409 | | - "See section 1.2 for some background about node embeddings." |
| 475 | + "See section 1.1 for some background about node embeddings." |
410 | 476 | ] |
411 | 477 | }, |
412 | 478 | { |
|
431 | 497 | "id": "ad17607c", |
432 | 498 | "metadata": {}, |
433 | 499 | "source": [ |
434 | | - "### 1.2 Dimensionality reduction with t-distributed stochastic neighbor embedding (t-SNE) for Typescript\n", |
| 500 | + "### 2.2 Dimensionality reduction with t-distributed stochastic neighbor embedding (t-SNE)\n", |
435 | 501 | "\n", |
436 | | - "This step takes the original node embeddings with a higher dimensionality (e.g. list of 32 floats) and\n", |
437 | | - "reduces them to a 2 dimensional array for visualization. \n", |
438 | | - "\n", |
439 | | - "> It converts similarities between data points to joint probabilities and tries to minimize the Kullback-Leibler divergence between the joint probabilities of the low-dimensional embedding and the high-dimensional data.\n", |
440 | | - "\n", |
441 | | - "(see https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html#sklearn.manifold.TSNE)" |
| 502 | + "See section 1.2 for some background about t-SNE." |
442 | 503 | ] |
443 | 504 | }, |
444 | 505 | { |
|
456 | 517 | "id": "20084589", |
457 | 518 | "metadata": {}, |
458 | 519 | "source": [ |
459 | | - "### 1.3 Plot the node embeddings reduced to two dimensions for Typescript" |
| 520 | + "### 2.3 Plot the node embeddings reduced to two dimensions for Typescript" |
460 | 521 | ] |
461 | 522 | }, |
462 | 523 | { |
|
468 | 529 | "source": [ |
469 | 530 | "plot_2d_node_embeddings(\n", |
470 | 531 | " node_embeddings_for_visualization, \n", |
471 | | - " \"Typescript Module nodes positioned by their dependency relationships using t-SNE\"\n", |
| 532 | + " \"Typescript Modules positioned by their dependency relationships (FastRP node embeddings + t-SNE)\"\n", |
| 533 | + ")" |
| 534 | + ] |
| 535 | + }, |
| 536 | + { |
| 537 | + "cell_type": "markdown", |
| 538 | + "id": "6cac9be7", |
| 539 | + "metadata": {}, |
| 540 | + "source": [ |
| 541 | + "### 2.4 Node Embeddings for Typescript Modules using HashGNN" |
| 542 | + ] |
| 543 | + }, |
| 544 | + { |
| 545 | + "cell_type": "code", |
| 546 | + "execution_count": null, |
| 547 | + "id": "8fe68eca", |
| 548 | + "metadata": {}, |
| 549 | + "outputs": [], |
| 550 | + "source": [ |
| 551 | + "typescript_module_embeddings_parameters={\n", |
| 552 | + " \"dependencies_projection\": \"typescript-module-embeddings-notebook\",\n", |
| 553 | + " \"dependencies_projection_node\": \"Module\",\n", |
| 554 | + " \"dependencies_projection_weight_property\": \"lowCouplingElement25PercentWeight\",\n", |
| 555 | + " \"dependencies_projection_write_property\": \"embeddingsHashGNN\",\n", |
| 556 | + " \"dependencies_projection_embedding_dimension\":\"64\"\n", |
| 557 | + "}\n", |
| 558 | + "embeddings = create_node_embeddings(\"../cypher/Node_Embeddings/Node_Embeddings_2d_Hash_GNN_Stream.cypher\", typescript_module_embeddings_parameters)\n", |
| 559 | + "node_embeddings_for_visualization = prepare_node_embeddings_for_2d_visualization(embeddings)\n", |
| 560 | + "plot_2d_node_embeddings(\n", |
| 561 | + " node_embeddings_for_visualization, \n", |
| 562 | + " \"Typescript Modules positioned by their dependency relationships (HashGNN node embeddings + t-SNE)\"\n", |
| 563 | + ")" |
| 564 | + ] |
| 565 | + }, |
| 566 | + { |
| 567 | + "cell_type": "markdown", |
| 568 | + "id": "0a7d66f5", |
| 569 | + "metadata": {}, |
| 570 | + "source": [ |
| 571 | + "### 2.5 Node Embeddings for Typescript Modules using node2vec" |
| 572 | + ] |
| 573 | + }, |
| 574 | + { |
| 575 | + "cell_type": "code", |
| 576 | + "execution_count": null, |
| 577 | + "id": "ea6c52ca", |
| 578 | + "metadata": {}, |
| 579 | + "outputs": [], |
| 580 | + "source": [ |
| 581 | + "typescript_module_embeddings_parameters={\n", |
| 582 | + " \"dependencies_projection\": \"typescript-module-embeddings-notebook\",\n", |
| 583 | + " \"dependencies_projection_node\": \"Module\",\n", |
| 584 | + " \"dependencies_projection_weight_property\": \"lowCouplingElement25PercentWeight\",\n", |
| 585 | + " \"dependencies_projection_write_property\": \"embeddingsNode2Vec\",\n", |
| 586 | + " \"dependencies_projection_embedding_dimension\":\"64\"\n", |
| 587 | + "}\n", |
| 588 | + "embeddings = create_node_embeddings(\"../cypher/Node_Embeddings/Node_Embeddings_3d_Node2Vec_Stream.cypher\", typescript_module_embeddings_parameters)\n", |
| 589 | + "node_embeddings_for_visualization = prepare_node_embeddings_for_2d_visualization(embeddings)\n", |
| 590 | + "plot_2d_node_embeddings(\n", |
| 591 | + " node_embeddings_for_visualization, \n", |
| 592 | + " \"Typescript Modules positioned by their dependency relationships (node2vec node embeddings + t-SNE)\"\n", |
472 | 593 | ")" |
473 | 594 | ] |
474 | 595 | } |
|
0 commit comments