From 6ccee6c7f4a9cfc717722838dc77d445a5713efb Mon Sep 17 00:00:00 2001 From: JohT <7671054+JohT@users.noreply.github.com> Date: Sun, 14 Jul 2024 08:40:41 +0200 Subject: [PATCH] Add visibility reports for Typescript --- ...tistics_for_elements_for_Typescript.cypher | 39 ++ ..._elements_per_module_for_Typescript.cypher | 23 + jupyter/VisibilityMetricsJava.ipynb | 4 +- jupyter/VisibilityMetricsTypescript.ipynb | 443 ++++++++++++++++++ scripts/reports/VisibilityMetricsCsv.sh | 5 + 5 files changed, 512 insertions(+), 2 deletions(-) create mode 100644 cypher/Visibility/Global_relative_visibility_statistics_for_elements_for_Typescript.cypher create mode 100644 cypher/Visibility/Relative_visibility_exported_elements_to_all_elements_per_module_for_Typescript.cypher create mode 100644 jupyter/VisibilityMetricsTypescript.ipynb diff --git a/cypher/Visibility/Global_relative_visibility_statistics_for_elements_for_Typescript.cypher b/cypher/Visibility/Global_relative_visibility_statistics_for_elements_for_Typescript.cypher new file mode 100644 index 000000000..c8370b8de --- /dev/null +++ b/cypher/Visibility/Global_relative_visibility_statistics_for_elements_for_Typescript.cypher @@ -0,0 +1,39 @@ +// Global relative visibility statistics for elements for Typescript + + MATCH (project:Directory)<-[:HAS_ROOT]-(:Project)-[:CONTAINS]->(module:TS:Module) + MATCH (module)-[:DECLARES]->(anyElement:TS) +OPTIONAL MATCH (module)-[:EXPORTS]->(exportedElement:TS) + WITH project.absoluteFileName AS projectPath + ,module + ,COUNT(DISTINCT exportedElement) AS exportedElements + ,COUNT(DISTINCT anyElement) AS allElements + WITH projectPath + ,module + ,exportedElements + ,allElements + ,toFloat(exportedElements) / allElements AS relativeVisibility + WITH projectPath + ,SUM(exportedElements) AS exported + ,SUM(allElements) AS all + ,AVG(relativeVisibility) AS average + ,MIN(relativeVisibility) AS min + ,MAX(relativeVisibility) AS max + ,percentileCont(relativeVisibility, 0.25) AS percentile25 + ,percentileCont(relativeVisibility, 0.5) AS percentile50 + ,percentileCont(relativeVisibility, 0.75) AS percentile75 + ,percentileCont(relativeVisibility, 0.90) AS percentile90 + ,percentileCont(relativeVisibility, 0.95) AS percentile95 + ,percentileCont(relativeVisibility, 0.99) AS percentile99 +RETURN projectPath + ,all + ,exported + ,min + ,max + ,average + ,percentile25 + ,percentile50 + ,percentile75 + ,percentile90 + ,percentile95 + ,percentile99 +ORDER BY projectPath \ No newline at end of file diff --git a/cypher/Visibility/Relative_visibility_exported_elements_to_all_elements_per_module_for_Typescript.cypher b/cypher/Visibility/Relative_visibility_exported_elements_to_all_elements_per_module_for_Typescript.cypher new file mode 100644 index 000000000..161e21273 --- /dev/null +++ b/cypher/Visibility/Relative_visibility_exported_elements_to_all_elements_per_module_for_Typescript.cypher @@ -0,0 +1,23 @@ +// Relative visibility: exported elements to all elements per module + + MATCH (project:Directory)<-[:HAS_ROOT]-(:Project)-[:CONTAINS]->(module:TS:Module) + MATCH (module)-[:DECLARES]->(moduleElement:TS) +// Check if element is not only declared but also exported. +// Only EXPORTS refers to re-exported elements. +OPTIONAL MATCH (module)-[export:EXPORTS]->(moduleElement) + WITH project.absoluteFileName AS projectPath + ,module + ,count(DISTINCT export) AS exportedElements + ,count(DISTINCT moduleElement) AS allElements + WITH projectPath + ,module + ,exportedElements + ,allElements + ,toFloat(exportedElements) / allElements AS relativeVisibility +RETURN projectPath + ,replace(module.fileName, './', '') AS modulePath + ,module.name AS moduleName + ,exportedElements + ,allElements + ,relativeVisibility +ORDER BY relativeVisibility DESC, allElements DESC, projectPath ASC, moduleName ASC \ No newline at end of file diff --git a/jupyter/VisibilityMetricsJava.ipynb b/jupyter/VisibilityMetricsJava.ipynb index 9a1d1235d..3b42c7be2 100644 --- a/jupyter/VisibilityMetricsJava.ipynb +++ b/jupyter/VisibilityMetricsJava.ipynb @@ -6,7 +6,7 @@ "id": "2f0eabc4", "metadata": {}, "source": [ - "# Visibility Metrics\n", + "# Visibility Metrics for Java\n", "
\n", "\n", "### References\n", @@ -436,7 +436,7 @@ "pygments_lexer": "ipython3", "version": "3.11.4" }, - "title": "Object Oriented Design Quality Metrics for Java with Neo4j" + "title": "Visibility Metrics for Java" }, "nbformat": 4, "nbformat_minor": 5 diff --git a/jupyter/VisibilityMetricsTypescript.ipynb b/jupyter/VisibilityMetricsTypescript.ipynb new file mode 100644 index 000000000..4dc9e3186 --- /dev/null +++ b/jupyter/VisibilityMetricsTypescript.ipynb @@ -0,0 +1,443 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "2f0eabc4", + "metadata": {}, + "source": [ + "# Visibility Metrics for Typescript\n", + "
\n", + "\n", + "### References\n", + "- [Visibility Metrics and the Importance of Hiding Things](https://dzone.com/articles/visibility-metrics-and-the-importance-of-hiding-th)\n", + "- [Calculate metrics](https://101.jqassistant.org/calculate-metrics/index.html)\n", + "- [Controlling Access to Members of a Class](https://docs.oracle.com/javase/tutorial/java/javaOO/accesscontrol.html)\n", + "- [Neo4j Python Driver](https://neo4j.com/docs/api/python-driver/current)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4191f259", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plot\n", + "from neo4j import GraphDatabase" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "acf605be", + "metadata": {}, + "outputs": [], + "source": [ + "#The following cell uses the build-in %html \"magic\" to override the CSS style for tables to a much smaller size.\n", + "#This is especially needed for PDF export of tables with multiple columns." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3cc19954", + "metadata": {}, + "outputs": [], + "source": [ + "%%html\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33c356d7", + "metadata": {}, + "outputs": [], + "source": [ + "# Main Colormap\n", + "main_color_map = 'nipy_spectral'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c5dab37", + "metadata": {}, + "outputs": [], + "source": [ + "# Please set the environment variable \"NEO4J_INITIAL_PASSWORD\" in your shell \n", + "# before starting jupyter notebook to provide the password for the user \"neo4j\". \n", + "# It is not recommended to hardcode the password into jupyter notebook for security reasons.\n", + "\n", + "driver = GraphDatabase.driver(uri=\"bolt://localhost:7687\", auth=(\"neo4j\", os.environ.get(\"NEO4J_INITIAL_PASSWORD\")))\n", + "driver.verify_connectivity()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1db254b", + "metadata": {}, + "outputs": [], + "source": [ + "def get_cypher_query_from_file(cypherFileName):\n", + " with open(cypherFileName) as file:\n", + " return ' '.join(file.readlines())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "59310f6f", + "metadata": {}, + "outputs": [], + "source": [ + "def query_cypher_to_data_frame(filename):\n", + " records, summary, keys = driver.execute_query(get_cypher_query_from_file(filename))\n", + " return pd.DataFrame([r.values() for r in records], columns=keys)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bcb3049f", + "metadata": {}, + "outputs": [], + "source": [ + "#The following cell uses the build-in %html \"magic\" to override the CSS style for tables to a much smaller size.\n", + "#This is especially needed for PDF export of tables with multiple columns." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d31581da", + "metadata": {}, + "outputs": [], + "source": [ + "%%html\n", + "" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "d7cfd862", + "metadata": {}, + "source": [ + "## Relative Visibility Of Elements\n", + "\n", + "A Typescript element (variable, function, class, ...) may be exported in which case it is visible and can be imported everywhere (if there are no other rules defined). If there is no \"export\" keyword and the element (variable, function, class, ...) is only declared, then it is only visible within the file or module.\n", + "\n", + "The relative visibility is the number of inner components that are visible outside (exported) divided by the number of all components:\n", + "\n", + "$$ relative visibility = \\frac{exported\\:elements}{all\\:declared\\:elements} $$\n", + "\n", + "Using directories with an index file as a module and exporting only the elements (variables, function, classes, ...) that the caller of the module should use is a good way to improve encapsulation and implementation detail hiding.\n", + "\n", + "### How to apply the results\n", + "\n", + "The relative visibility is between zero (no element is exported) and one (all elements are exported). A value lower than one means that there are elements that are not exported. The lower the value is, the better the encapsulation and the better the implementation details are hidden. \n", + "\n", + "Non exported elements can't be accessed from another modules so they can be changed without affecting code in other modules. They clearly indicate functionality that only belongs to one modules. This also motivates to split up code into smaller pieces with a dedicated reason to change (single responsibility)." + ] + }, + { + "cell_type": "markdown", + "id": "c9536fd9", + "metadata": {}, + "source": [ + "### Table 1a - Top 40 projects with lowest median of module encapsulation\n", + "\n", + "This table shows the relative visibility statistics aggregated for all modules per project and focusses on projects with many modules and hardly any non-exported elements (lowest median, high visibility). Module directories with an index file and intentional exporting helps to improve encapsulation.\n", + "\n", + "Only the top 40 entries are shown. The whole table can be found in the following CSV report: \n", + "`Global_relative_visibility_statistics_for_elements_for_Typescript`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68ed42d0", + "metadata": {}, + "outputs": [], + "source": [ + "# Query the visibility statistics per project (all modules aggregated)\n", + "# The results will be used in multiple tables below.\n", + "relative_visibility_per_project_aggregated=query_cypher_to_data_frame(\"../cypher/Visibility/Global_relative_visibility_statistics_for_elements_for_Typescript.cypher\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa634a4e", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Sort by the \"percentile50\" (median) and \"all\" (number of modules in the project) descending\n", + "relative_visibility_statistics_highest_median=relative_visibility_per_project_aggregated.sort_values(by=['percentile50', 'all'], ascending=[False, False])\n", + "\n", + "# Reset the index (row numbering starting at 0 and increasing by 1)\n", + "relative_visibility_statistics_highest_median=relative_visibility_statistics_highest_median.reset_index(drop=True)\n", + "\n", + "relative_visibility_statistics_highest_median.head(40)" + ] + }, + { + "cell_type": "markdown", + "id": "1b84fd51", + "metadata": {}, + "source": [ + "### Table 1b - Top 40 projects with highest median of module encapsulation\n", + "\n", + "This table shows the relative visibility statistics aggregated for all modules per project and focusses on project with many modules and the highest median of non-exported elements (variables, functions, classes, ...) (low visibility). Module directories with an index file and intentional exporting helps to improve encapsulation.\n", + "\n", + "Only the top 40 entries are shown. The whole table can be found in the following CSV report: \n", + "`Global_relative_visibility_statistics_for_elements_for_Typescript`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dc59a07d", + "metadata": {}, + "outputs": [], + "source": [ + "# Sort by the \"percentile50\" (median) ascending and \"all\" (number of packages in the artifact) descending\n", + "relative_visibility_statistics_lowest_median=relative_visibility_per_project_aggregated.sort_values(by=['percentile50', 'all'], ascending=[True, False])\n", + "\n", + "# Reset the index (row numbering starting at 0 and increasing by 1)\n", + "relative_visibility_statistics_lowest_median=relative_visibility_statistics_lowest_median.reset_index(drop=True)\n", + "\n", + "relative_visibility_statistics_lowest_median.head(40)" + ] + }, + { + "cell_type": "markdown", + "id": "5196ecc2", + "metadata": {}, + "source": [ + "### Table 1 Chart 1 - Relative visibility in projects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f467a8dd", + "metadata": {}, + "outputs": [], + "source": [ + "plot.figure();\n", + "fig, axes = plot.subplots(nrows=3, ncols=1, sharex=True)\n", + "\n", + "number_of_modules_grid_ticks=[1, 2, 5, 10, 20, 50, 100, 200, 500, 1_000, 2_000, 5_000, 10_000]\n", + "\n", + "relative_visibility_per_project_aggregated.plot(\n", + " ax=axes[0],\n", + " kind='scatter',\n", + " title='Relative visibility in projects (75% percentile)', \n", + " x='percentile75',\n", + " y='all',\n", + " grid=True,\n", + " logy=True,\n", + " yticks=number_of_modules_grid_ticks,\n", + " xlabel='relative visibility',\n", + " ylabel='number of modules',\n", + " cmap=main_color_map,\n", + " figsize=(10,4),\n", + ")\n", + "relative_visibility_per_project_aggregated.plot(\n", + " ax=axes[1],\n", + " kind='scatter',\n", + " title='Relative visibility in projects (50% percentile)', \n", + " x='percentile50',\n", + " y='all',\n", + " grid=True,\n", + " logy=True,\n", + " yticks=number_of_modules_grid_ticks,\n", + " xlabel='relative visibility',\n", + " ylabel='number of modules',\n", + " cmap=main_color_map,\n", + " figsize=(10,4),\n", + ")\n", + "relative_visibility_per_project_aggregated.plot(\n", + " ax=axes[2],\n", + " kind='scatter',\n", + " title='Relative visibility in artifacts (25% percentile)', \n", + " x='percentile25',\n", + " y='all',\n", + " grid=True,\n", + " logy=True,\n", + " yticks=number_of_modules_grid_ticks,\n", + " xlabel='relative visibility',\n", + " ylabel='number of packages',\n", + " cmap=main_color_map,\n", + " figsize=(10,10),\n", + ")\n", + "axes[0].grid(color = 'grey', linestyle = '-', linewidth = 0.2)\n", + "axes[1].grid(color = 'grey', linestyle = '-', linewidth = 0.2)\n", + "axes[2].grid(color = 'grey', linestyle = '-', linewidth = 0.2)\n", + "plot.show()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "3f59da8d", + "metadata": {}, + "source": [ + "### Table 2a - Top 40 modules with the highest visibility and lowest encapsulation\n", + "\n", + "This table shows the relative visibility statistics per module and project and focusses on modules with many elements (variables, functions, classes, ...), hardly any non-exported ones and therefore the highest relative visibility (lowest encapsulation). Module directories with an index file and intentional exporting helps to improve encapsulation.\n", + "\n", + "Only the top 40 entries are shown. The whole table can be found in the following CSV report: \n", + "`Relative_visibility_exported_elements_to_all_elements_per_module_for_Typescript`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e91c8c5", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Query the visibility statistics per module and project (all elements aggregated)\n", + "# The results will be used in multiple tables below.\n", + "relative_visibility_per_module=query_cypher_to_data_frame(\"../cypher/Visibility/Relative_visibility_exported_elements_to_all_elements_per_module_for_Typescript.cypher\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "48f7f2d2", + "metadata": {}, + "outputs": [], + "source": [ + "# Sort by the \"relativeVisibility\" and \"allElements\" (number of elements in the module) descending\n", + "highest_relative_visibility_module=relative_visibility_per_module.sort_values(by=['relativeVisibility', 'allElements'], ascending=[False, False])\n", + "\n", + "# Reset the index (row numbering starting at 0 and increasing by 1)\n", + "highest_relative_visibility_module=highest_relative_visibility_module.reset_index(drop=True)\n", + "\n", + "highest_relative_visibility_module.head(40)" + ] + }, + { + "cell_type": "markdown", + "id": "c6786ef1", + "metadata": {}, + "source": [ + "### Table 2b - Top 40 modules with the lowest visibility and highest encapsulation\n", + "\n", + "This table shows the relative visibility statistics per modules and project and focusses on modules with many elements (variables, functions, classes, ...), many non-exported ones and therefore the lowest relative visibility (highest encapsulation). Non-exported elements help to improve encapsulation. Zero percent visibility and therefore modules with no exported elements are suspicious to contain dead code.\n", + "\n", + "Only the top 40 entries are shown. The whole table can be found in the following CSV report: \n", + "`Relative_visibility_public_types_to_all_types_per_package`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "48c20ca4", + "metadata": {}, + "outputs": [], + "source": [ + "# Sort by the \"relativeVisibility\" ascending and \"allTypes\" (number of types in the package) descending\n", + "lowest_relative_visibility_modules=relative_visibility_per_module.sort_values(by=['relativeVisibility', 'allElements'], ascending=[True, False])\n", + "\n", + "# Reset the index (row numbering starting at 0 and increasing by 1)\n", + "lowest_relative_visibility_modules=lowest_relative_visibility_modules.reset_index(drop=True)\n", + "\n", + "lowest_relative_visibility_modules.head(40)" + ] + }, + { + "cell_type": "markdown", + "id": "8ff237fd", + "metadata": {}, + "source": [ + "### Table 2 Chart 1 - Relative visibility of modules" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98b12846", + "metadata": {}, + "outputs": [], + "source": [ + "plot.figure();\n", + "\n", + "number_of_types_grid_ticks=[1, 2, 5, 10, 20, 50, 100, 200, 500, 1_000, 2_000, 5_000, 10_000]\n", + "\n", + "relative_visibility_per_module.plot(\n", + " kind='scatter',\n", + " title='Relative visibility of modules', \n", + " x='relativeVisibility',\n", + " y='allElements',\n", + " grid=True,\n", + " logy=True,\n", + " yticks=number_of_types_grid_ticks,\n", + " xlabel='relative visibility',\n", + " ylabel='number of elements',\n", + " cmap=main_color_map,\n", + " figsize=(10,4),\n", + ")\n", + "\n", + "plot.show()" + ] + } + ], + "metadata": { + "authors": [ + { + "name": "JohT" + } + ], + "code_graph_analysis_pipeline_data_validation": "ValidateTypescriptModuleDependencies", + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + }, + "title": "Visibility Metrics for Typescript" + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/scripts/reports/VisibilityMetricsCsv.sh b/scripts/reports/VisibilityMetricsCsv.sh index 252f89e43..9f95b3512 100755 --- a/scripts/reports/VisibilityMetricsCsv.sh +++ b/scripts/reports/VisibilityMetricsCsv.sh @@ -38,8 +38,13 @@ mkdir -p "${FULL_REPORT_DIRECTORY}" # Local Constants VISIBILITY_CYPHER_DIR="${CYPHER_DIR}/Visibility" +# For Java execute_cypher "${VISIBILITY_CYPHER_DIR}/Global_relative_visibility_statistics_for_types.cypher" > "${FULL_REPORT_DIRECTORY}/RelativeVisibilityPerArtifact.csv" execute_cypher "${VISIBILITY_CYPHER_DIR}/Relative_visibility_public_types_to_all_types_per_package.cypher" > "${FULL_REPORT_DIRECTORY}/RelativeVisibilityPerPackage.csv" +# For TypeScript +execute_cypher "${VISIBILITY_CYPHER_DIR}/Global_relative_visibility_statistics_for_elements_for_Typescript.cypher" > "${FULL_REPORT_DIRECTORY}/RelativeVisibilityPerTypescriptProject.csv" +execute_cypher "${VISIBILITY_CYPHER_DIR}/Relative_visibility_exported_elements_to_all_elements_per_module_for_Typescript.cypher" > "${FULL_REPORT_DIRECTORY}/RelativeVisibilityPerTypescriptModule.csv" + # Clean-up after report generation. Empty reports will be deleted. source "${SCRIPTS_DIR}/cleanupAfterReportGeneration.sh" "${FULL_REPORT_DIRECTORY}" \ No newline at end of file