diff --git a/cypher/General_Enrichment/Add_file_name and_extension.cypher b/cypher/General_Enrichment/Add_file_name and_extension.cypher index 96fac1fe9..ec1ef1a24 100644 --- a/cypher/General_Enrichment/Add_file_name and_extension.cypher +++ b/cypher/General_Enrichment/Add_file_name and_extension.cypher @@ -2,6 +2,7 @@ MATCH (file:File) WHERE file.fileName IS NOT NULL + AND file.name IS NULL // Don't override an already existing "name" property WITH * ,file.fileName AS fileName WITH * diff --git a/cypher/Path_Finding/Path_Finding_5_All_pairs_shortest_path_distribution_per_project.cypher b/cypher/Path_Finding/Path_Finding_5_All_pairs_shortest_path_distribution_per_project.cypher index 4649c772a..683479b16 100644 --- a/cypher/Path_Finding/Path_Finding_5_All_pairs_shortest_path_distribution_per_project.cypher +++ b/cypher/Path_Finding/Path_Finding_5_All_pairs_shortest_path_distribution_per_project.cypher @@ -32,8 +32,10 @@ OPTIONAL MATCH (targetScan:TS:Scan)-[:CONTAINS_PROJECT]->(targetProject) // Group by project name, if the target project is the same and the distance. Return those as result. RETURN sourceProject.name AS sourceProject ,sourceScan.name AS sourceScan + ,source.rootProjectName AS sourceRootProject ,(targetProject <> sourceProject) AS isDifferentTargetProject ,(targetScan <> sourceScan) AS isDifferentTargetScan + ,(target.rootProjectName <> source.rootProjectName) AS isDifferentTargetRootProject ,distance ,distanceTotalPairCount ,distanceTotalSourceCount @@ -41,6 +43,8 @@ RETURN sourceProject.name AS sourceProject ,count(*) AS pairCount ,count(DISTINCT sourceNodeId) AS sourceNodeCount ,count(DISTINCT targetNodeId) AS targetNodeCount - ,collect(DISTINCT source.fileName + ' ->' + target.fileName)[0..4] AS examples + ,collect(DISTINCT source.fileName + ' -> ' + target.fileName)[0..4] AS examples + ,collect(DISTINCT sourceProject.name + ' -> ' + targetProject.name)[0..4] AS exampleProjects + ,collect(DISTINCT sourceScan.name + ' -> ' + targetScan.name)[0..4] AS exampleScans // Sort by source project name, if the target project is the same and the distance, all ascending ORDER BY sourceProject, isDifferentTargetProject, distance \ No newline at end of file diff --git a/cypher/Path_Finding/Path_Finding_5_All_pairs_shortest_path_examples.cypher b/cypher/Path_Finding/Path_Finding_5_All_pairs_shortest_path_examples.cypher index 4aba1d9fa..963346525 100644 --- a/cypher/Path_Finding/Path_Finding_5_All_pairs_shortest_path_examples.cypher +++ b/cypher/Path_Finding/Path_Finding_5_All_pairs_shortest_path_examples.cypher @@ -14,7 +14,7 @@ OPTIONAL MATCH (sourceProject:Artifact|Project)-[:CONTAINS]->(source) // Optionally get the name of the scan that contained that project OPTIONAL MATCH (sourceScan:TS:Scan)-[:CONTAINS_PROJECT]->(sourceProject) - WITH *, coalesce(sourceScan, sourceProject).name AS sourceContainerName + WITH *, coalesce(source.rootProjectName, sourceScan.name, sourceProject.name) AS sourceContainerName ORDER BY distance DESC, sourceContainerName ASC // Only output the top 10 entries LIMIT 10 diff --git a/cypher/Path_Finding/Path_Finding_6_Longest_paths_distribution_per_project.cypher b/cypher/Path_Finding/Path_Finding_6_Longest_paths_distribution_per_project.cypher index 3c2f21bc9..dc186ee04 100644 --- a/cypher/Path_Finding/Path_Finding_6_Longest_paths_distribution_per_project.cypher +++ b/cypher/Path_Finding/Path_Finding_6_Longest_paths_distribution_per_project.cypher @@ -30,8 +30,10 @@ OPTIONAL MATCH (targetScan:TS:Scan)-[:CONTAINS_PROJECT]->(targetProject) // Group by project name, if the target project is the same and the distance. Return those as result. RETURN sourceProject.name AS sourceProject ,sourceScan.name AS sourceScan + ,source.rootProjectName AS sourceRootProject ,(targetProject <> sourceProject) AS isDifferentTargetProject ,(targetScan <> sourceScan) AS isDifferentTargetScan + ,(target.rootProjectName <> source.rootProjectName) AS isDifferentTargetRootProject ,distance ,distanceTotalPairCount ,distanceTotalSourceCount @@ -39,7 +41,9 @@ RETURN sourceProject.name AS sourceProject ,count(*) AS pairCount ,count(DISTINCT sourceNodeId) AS sourceNodeCount ,count(DISTINCT targetNodeId) AS targetNodeCount - ,collect(DISTINCT source.fileName + ' ->' + target.fileName)[0..4] AS examples + ,collect(DISTINCT source.fileName + ' -> ' + target.fileName)[0..4] AS examples + ,collect(DISTINCT sourceProject.name + ' -> ' + targetProject.name)[0..4] AS exampleProjects + ,collect(DISTINCT sourceScan.name + ' -> ' + targetScan.name)[0..4] AS exampleScans // Sort by source project name, if the target project is the same and the distance, all ascending ORDER BY sourceProject, isDifferentTargetProject, distance diff --git a/cypher/Path_Finding/Path_Finding_6_Longest_paths_examples.cypher b/cypher/Path_Finding/Path_Finding_6_Longest_paths_examples.cypher index 6d12571bc..78e8f5ee3 100644 --- a/cypher/Path_Finding/Path_Finding_6_Longest_paths_examples.cypher +++ b/cypher/Path_Finding/Path_Finding_6_Longest_paths_examples.cypher @@ -17,7 +17,7 @@ OPTIONAL MATCH (targetProject:Artifact|Project)-[:CONTAINS]->(target) // Optionally get the name of the scan that contained that project OPTIONAL MATCH (sourceScan:TS:Scan)-[:CONTAINS_PROJECT]->(sourceProject) OPTIONAL MATCH (targetScan:TS:Scan)-[:CONTAINS_PROJECT]->(targetProject) - WITH *, coalesce(sourceScan, sourceProject).name AS sourceContainerName + WITH *, coalesce(source.rootProjectName, sourceScan.name, sourceProject.name) AS sourceContainerName ORDER BY distance DESC, sourceContainerName ASC // Only output the top 10 entries LIMIT 10 diff --git a/cypher/Typescript_Enrichment/Add_RESOLVES_TO_relationship_for_matching_modules.cypher b/cypher/Typescript_Enrichment/Add_RESOLVES_TO_relationship_for_matching_modules.cypher index b03658972..fe44b5910 100644 --- a/cypher/Typescript_Enrichment/Add_RESOLVES_TO_relationship_for_matching_modules.cypher +++ b/cypher/Typescript_Enrichment/Add_RESOLVES_TO_relationship_for_matching_modules.cypher @@ -13,6 +13,7 @@ WHERE module.globalFqn IS NOT NULL AND externalModule.moduleName = module.moduleName AND externalModule.namespace = module.namespace AND externalModule.extensionExtended = module.extensionExtended + AND externalModule.globalFqn ENDS WITH module.localModulePath ) ) AND module <> externalModule diff --git a/cypher/Typescript_Enrichment/Set_localRootPath_for_modules.cypher b/cypher/Typescript_Enrichment/Set_localRootPath_for_modules.cypher new file mode 100644 index 000000000..aa3ebcbfd --- /dev/null +++ b/cypher/Typescript_Enrichment/Set_localRootPath_for_modules.cypher @@ -0,0 +1,18 @@ + +// Set "localProjectPath", "localProjectPath" and "localModulePath" on Typescript Module nodes + + MATCH (module:TS:Module) + WITH *, ltrim(module.localFqn, '.') AS trimmedLocalFqn + WITH *, split(module.globalFqn, trimmedLocalFqn)[0] AS rootPath + WITH *, reverse(split(reverse(rootPath), reverse('source/'))[0]) AS localProjectPath + WITH *, split(localProjectPath, '/')[0] AS rootProjectName + SET module.localProjectPath = localProjectPath + ,module.rootProjectName = rootProjectName + ,module.localModulePath = localProjectPath + trimmedLocalFqn +RETURN count(DISTINCT localProjectPath) AS identifiedLocalProjectPaths +// Debugging +//RETURN rootPath +// ,localProjectPath +// ,count(*) +// ,collect(DISTINCT module.localFqn)[0..2] AS exampleModuleLocalFqn +// ,collect(DISTINCT trimmedLocalFqn)[0..2] AS exampleTrimmedModuleLocalFqn \ No newline at end of file diff --git a/jupyter/PathFindingTypescript.ipynb b/jupyter/PathFindingTypescript.ipynb index c0a7e0e86..8a5c6af97 100644 --- a/jupyter/PathFindingTypescript.ipynb +++ b/jupyter/PathFindingTypescript.ipynb @@ -253,7 +253,7 @@ " \"\"\"\n", " \n", " print(\"No projected data for path finding available\")\n", - " return pd.DataFrame(columns=['totalCost', 'sourceProject', 'sourceScan', 'isDifferentTargetProject', 'isDifferentTargetScan', 'distance', 'distanceTotalPairCount', 'distanceTotalSourceCount', 'distanceTotalTargetCount', 'nodeCount', 'pairCount'])" + " return pd.DataFrame(columns=['totalCost', 'sourceProject', 'sourceScan', 'sourceRootProject', 'isDifferentTargetProject', 'isDifferentTargetScan', 'isDifferentTargetRootProject', 'distance', 'distanceTotalPairCount', 'distanceTotalSourceCount', 'distanceTotalTargetCount', 'nodeCount', 'pairCount'])" ] }, { @@ -356,7 +356,7 @@ " # If not already grouped, group by the given column and the distance and sum up the pair count (=number of paths)\n", " data_frame = data_frame.groupby([column, \"distance\"], as_index=False)[\"pairCount\"].apply(sum)\n", "\n", - " # The rows of the parameter \"column\" contain the source project or scan (e.g. Java artifact or Typescript project) and their path count.\n", + " # The rows of the parameter \"column\" contain the source project (e.g. Java artifact or Typescript project) and their path count.\n", " # The columns contain the distances (length of the paths).\n", " data_frame = data_frame.pivot(index='distance', columns=column, values='pairCount')\n", "\n", @@ -745,7 +745,7 @@ "outputs": [], "source": [ "# Execute algorithm \"All pairs shortest path\" and query overall and project specific results\n", - "all_pairs_shortest_paths_distribution_per_project_and_scan=query_if_data_available(is_module_data_available, \"../cypher/Path_Finding/Path_Finding_5_All_pairs_shortest_path_distribution_per_project.cypher\", module_path_finding_parameters)" + "all_pairs_shortest_paths_distribution_per_project_and_root_project=query_if_data_available(is_module_data_available, \"../cypher/Path_Finding/Path_Finding_5_All_pairs_shortest_path_distribution_per_project.cypher\", module_path_finding_parameters)" ] }, { @@ -763,7 +763,7 @@ "metadata": {}, "outputs": [], "source": [ - "module_dependencies_graph_diameter=all_pairs_shortest_paths_distribution_per_project_and_scan['distance'].max()\n", + "module_dependencies_graph_diameter=all_pairs_shortest_paths_distribution_per_project_and_root_project['distance'].max()\n", "print('The diameter (longest shortest path) of the projected module dependencies Graph is:', module_dependencies_graph_diameter)" ] }, @@ -782,7 +782,7 @@ "metadata": {}, "outputs": [], "source": [ - "all_pairs_shortest_paths_distribution_per_project_in_total=get_total_distance_distribution(all_pairs_shortest_paths_distribution_per_project_and_scan)\n", + "all_pairs_shortest_paths_distribution_per_project_in_total=get_total_distance_distribution(all_pairs_shortest_paths_distribution_per_project_and_root_project)\n", "all_pairs_shortest_paths_distribution_per_project_in_total.head(50)" ] }, @@ -848,7 +848,7 @@ "metadata": {}, "outputs": [], "source": [ - "all_pairs_shortest_paths_distribution_per_project_and_scan.head(10)" + "all_pairs_shortest_paths_distribution_per_project_and_root_project.head(10)" ] }, { @@ -870,7 +870,7 @@ "metadata": {}, "outputs": [], "source": [ - "all_pairs_shortest_paths_distribution_per_project_isolated=all_pairs_shortest_paths_distribution_per_project_and_scan.query('isDifferentTargetProject == False')\n", + "all_pairs_shortest_paths_distribution_per_project_isolated=all_pairs_shortest_paths_distribution_per_project_and_root_project.query('isDifferentTargetProject == False')\n", "all_pairs_shortest_paths_distribution_per_project_isolated.head(10)" ] }, @@ -986,11 +986,11 @@ "id": "f76616b7", "metadata": {}, "source": [ - "### 1.1.5 All pairs shortest path for each scan\n", + "### 1.1.5 All pairs shortest path for each root project\n", "\n", - "In this section we'll focus only on pairs of nodes that both belong to the same scan, filtering out every line that has `isDifferentTargetScan==False`. The first ten rows are shown in a table followed by charts that show the distribution of shortest path distances across different scans in stacked bar charts (absolute and normalized).\n", + "In this section we'll focus only on pairs of nodes that both belong to the same root project, filtering out every line that has `isDifferentTargetRootProject==False`. The first ten rows are shown in a table followed by charts that show the distribution of shortest path distances across different root projects in stacked bar charts (absolute and normalized).\n", "\n", - "**Note:** It is possible that a (shortest) path could have nodes in between that belong to different scans. Therefore, the data of each scan isn't perfectly isolated. However, it shows how the dependencies interact across scans \"in real life\" while still providing a decent isolation of each scan." + "**Note:** It is possible that a (shortest) path could have nodes in between that belong to different root projects. Therefore, the data of each root project isn't perfectly isolated. However, it shows how the dependencies interact across root projects \"in real life\" while still providing a decent isolation of each root project." ] }, { @@ -1000,10 +1000,10 @@ "metadata": {}, "outputs": [], "source": [ - "all_pairs_shortest_paths_distribution_per_scan_isolated=all_pairs_shortest_paths_distribution_per_project_and_scan.query('isDifferentTargetScan == False')\n", + "all_pairs_shortest_paths_distribution_per_root_project_isolated=all_pairs_shortest_paths_distribution_per_project_and_root_project.query('isDifferentTargetRootProject == False')\n", "\n", - "all_pairs_shortest_paths_distribution_per_scan_isolated.\\\n", - " groupby([\"sourceScan\", \"distance\"], as_index=False)\\\n", + "all_pairs_shortest_paths_distribution_per_root_project_isolated.\\\n", + " groupby([\"sourceRootProject\", \"distance\"], as_index=False)\\\n", " [[\"pairCount\", \"sourceNodeCount\",\"targetNodeCount\"]].\\\n", " apply(max).head(20)" ] @@ -1013,9 +1013,9 @@ "id": "3f9defa3", "metadata": {}, "source": [ - "#### All pairs shortest path for each scan - Longest shortest path (Diameter) for each scan\n", + "#### All pairs shortest path for each root project - Longest shortest path (Diameter) for each root project\n", "\n", - "Shows the top 20 scans with the longest shortest path (=Graph Diameter)." + "Shows the top 20 root projects with the longest shortest path (=Graph Diameter)." ] }, { @@ -1025,8 +1025,8 @@ "metadata": {}, "outputs": [], "source": [ - "graph_diameter_per_scan = get_longest_path_for_column('sourceScan', all_pairs_shortest_paths_distribution_per_scan_isolated)\n", - "graph_diameter_per_scan.head(20)" + "graph_diameter_per_root_project = get_longest_path_for_column('sourceRootProject', all_pairs_shortest_paths_distribution_per_root_project_isolated)\n", + "graph_diameter_per_root_project.head(20)" ] }, { @@ -1037,9 +1037,9 @@ "outputs": [], "source": [ "plot_longest_distance_of_each_row(\n", - " data_frame=graph_diameter_per_scan,\n", - " title='Longest shortest path (\"diameter\") for Typescript module dependencies per scan',\n", - " xlabel='Scan',\n", + " data_frame=graph_diameter_per_root_project,\n", + " title='Longest shortest path (\"diameter\") for Typescript module dependencies per root project',\n", + " xlabel='root project',\n", " ylabel='longest path length'\n", ")" ] @@ -1049,7 +1049,7 @@ "id": "0551c259", "metadata": {}, "source": [ - "#### All pairs shortest path for each scan - Bar chart (absolute)" + "#### All pairs shortest path for each root project - Bar chart (absolute)" ] }, { @@ -1059,7 +1059,7 @@ "metadata": {}, "outputs": [], "source": [ - "all_pairs_shortest_paths_distribution_per_scan_isolated_pivot = get_distance_distribution_for_each('sourceScan', all_pairs_shortest_paths_distribution_per_scan_isolated)" + "all_pairs_shortest_paths_distribution_per_root_project_isolated_pivot = get_distance_distribution_for_each('sourceRootProject', all_pairs_shortest_paths_distribution_per_root_project_isolated)" ] }, { @@ -1070,9 +1070,9 @@ "outputs": [], "source": [ "plot_stacked_distances_for_each_row(\n", - " data_frame=all_pairs_shortest_paths_distribution_per_scan_isolated_pivot,\n", - " title='All pairs shortest path for Typescript module dependencies stacked per scan (absolute, logarithmic)',\n", - " xlabel='Scan',\n", + " data_frame=all_pairs_shortest_paths_distribution_per_root_project_isolated_pivot,\n", + " title='All pairs shortest path for Typescript module dependencies stacked per root project (absolute, logarithmic)',\n", + " xlabel='Root project',\n", " ylabel='Typescript module paths',\n", " logy=True\n", ")" @@ -1083,9 +1083,9 @@ "id": "7bb61fa8", "metadata": {}, "source": [ - "#### All pairs shortest path for each scan - Bar chart (normalized)\n", + "#### All pairs shortest path for each root project - Bar chart (normalized)\n", "\n", - "Shows the top 50 scans with the highest number of dependency paths stacked by their length." + "Shows the top 50 root projects with the highest number of dependency paths stacked by their length." ] }, { @@ -1096,8 +1096,8 @@ "outputs": [], "source": [ "# Normalize data (percent of sum pairs)\n", - "all_pairs_shortest_paths_distribution_per_scan_isolated_normalized_pivot=normalize_distance_distribution_for_each_row(all_pairs_shortest_paths_distribution_per_project_isolated_pivot)\n", - "all_pairs_shortest_paths_distribution_per_scan_isolated_normalized_pivot.head(50)" + "all_pairs_shortest_paths_distribution_per_root_project_isolated_normalized_pivot=normalize_distance_distribution_for_each_row(all_pairs_shortest_paths_distribution_per_project_isolated_pivot)\n", + "all_pairs_shortest_paths_distribution_per_root_project_isolated_normalized_pivot.head(50)" ] }, { @@ -1108,9 +1108,9 @@ "outputs": [], "source": [ "plot_stacked_distances_for_each_row(\n", - " data_frame=all_pairs_shortest_paths_distribution_per_scan_isolated_normalized_pivot.head(50),\n", - " title='All pairs shortest path for Typescript module dependencies stacked per scan (normalized in %)',\n", - " xlabel='Scan',\n", + " data_frame=all_pairs_shortest_paths_distribution_per_root_project_isolated_normalized_pivot.head(50),\n", + " title='All pairs shortest path for Typescript module dependencies stacked per root project (normalized in %)',\n", + " xlabel='root project',\n", " ylabel='Typescript module paths'\n", ")" ] @@ -1386,11 +1386,11 @@ "id": "b76e3a9c", "metadata": {}, "source": [ - "### 1.2.4 Longest path for each scan\n", + "### 1.2.4 Longest path for each root project\n", "\n", - "In this section we'll focus only on pairs of nodes that both belong to the same scan, filtering out every line that has `isDifferentTargetScan==False`. The first ten rows are shown in a table followed by charts that show the distribution of longest path distances across different scans in stacked bar charts (absolute and normalized).\n", + "In this section we'll focus only on pairs of nodes that both belong to the same root project, filtering out every line that has `isDifferentTargetRootProject==False`. The first ten rows are shown in a table followed by charts that show the distribution of longest path distances across different root projects in stacked bar charts (absolute and normalized).\n", "\n", - "**Note:** It is possible that a (longest) path could have nodes in-between that belong to different scans. Therefore, the data of each scan isn't perfectly isolated. However, it shows how the dependencies interact across scans \"in real life\" while still providing a decent amount of isolation of each scan." + "**Note:** It is possible that a (longest) path could have nodes in-between that belong to different root projects. Therefore, the data of each root project isn't perfectly isolated. However, it shows how the dependencies interact across root projects \"in real life\" while still providing a decent amount of isolation of each root project." ] }, { @@ -1400,8 +1400,8 @@ "metadata": {}, "outputs": [], "source": [ - "longest_paths_distribution_per_scan_isolated=longest_paths_distribution_per_project.query('isDifferentTargetScan == False')\n", - "longest_paths_distribution_per_scan_isolated.head(10)" + "longest_paths_distribution_per_root_project_isolated=longest_paths_distribution_per_project.query('isDifferentTargetRootProject == False')\n", + "longest_paths_distribution_per_root_project_isolated.head(10)" ] }, { @@ -1409,9 +1409,9 @@ "id": "a6c0e349", "metadata": {}, "source": [ - "#### Longest path for each scan - Max. longest path for each scan\n", + "#### Longest path for each root project - Max. longest path for each root project\n", "\n", - "Shows the top 20 scans with their max. longest path." + "Shows the top 20 root projects with their max. longest path." ] }, { @@ -1421,8 +1421,8 @@ "metadata": {}, "outputs": [], "source": [ - "longest_path_per_scan = get_longest_path_for_column('sourceScan', longest_paths_distribution_per_scan_isolated)\n", - "longest_path_per_scan.head(20)" + "longest_path_per_root_project = get_longest_path_for_column('sourceRootProject', longest_paths_distribution_per_root_project_isolated)\n", + "longest_path_per_root_project.head(20)" ] }, { @@ -1433,8 +1433,8 @@ "outputs": [], "source": [ "plot_longest_distance_of_each_row(\n", - " data_frame=longest_path_per_scan,\n", - " title='Max. longest path for Typescript module dependencies per scan',\n", + " data_frame=longest_path_per_root_project,\n", + " title='Max. longest path for Typescript module dependencies per root project',\n", " xlabel='Module',\n", " ylabel='max. longest path length'\n", ")" @@ -1445,7 +1445,7 @@ "id": "8622d0f2", "metadata": {}, "source": [ - "#### Longest path for each scan - Bar chart (absolute)" + "#### Longest path for each root project - Bar chart (absolute)" ] }, { @@ -1455,7 +1455,7 @@ "metadata": {}, "outputs": [], "source": [ - "longest_paths_distribution_per_scan_isolated_pivot = get_distance_distribution_for_each('sourceScan', longest_paths_distribution_per_scan_isolated)" + "longest_paths_distribution_per_root_project_isolated_pivot = get_distance_distribution_for_each('sourceRootProject', longest_paths_distribution_per_root_project_isolated)" ] }, { @@ -1466,8 +1466,8 @@ "outputs": [], "source": [ "plot_stacked_distances_for_each_row(\n", - " data_frame=longest_paths_distribution_per_scan_isolated_pivot,\n", - " title='Longest path for Typescript module dependencies stacked per scan (absolute, logarithmic)',\n", + " data_frame=longest_paths_distribution_per_root_project_isolated_pivot,\n", + " title='Longest path for Typescript module dependencies stacked per root project (absolute, logarithmic)',\n", " xlabel='Module',\n", " ylabel='Typescript module paths',\n", " logy=True\n", @@ -1479,9 +1479,9 @@ "id": "47bb4916", "metadata": {}, "source": [ - "#### Longest path for each scan - Bar chart (normalized)\n", + "#### Longest path for each root project - Bar chart (normalized)\n", "\n", - "Shows the top 50 scans with the highest number of dependency paths stacked by their length." + "Shows the top 50 root projects with the highest number of dependency paths stacked by their length." ] }, { @@ -1492,8 +1492,8 @@ "outputs": [], "source": [ "# Normalize data (percent of sum pairs)\n", - "longest_paths_distribution_per_scan_isolated_normalized_pivot=normalize_distance_distribution_for_each_row(longest_paths_distribution_per_scan_isolated_pivot)\n", - "longest_paths_distribution_per_scan_isolated_normalized_pivot.head(50)" + "longest_paths_distribution_per_root_project_isolated_normalized_pivot=normalize_distance_distribution_for_each_row(longest_paths_distribution_per_root_project_isolated_pivot)\n", + "longest_paths_distribution_per_root_project_isolated_normalized_pivot.head(50)" ] }, { @@ -1504,9 +1504,9 @@ "outputs": [], "source": [ "plot_stacked_distances_for_each_row(\n", - " data_frame=longest_paths_distribution_per_scan_isolated_normalized_pivot.head(50),\n", - " title='Longest path for Typescript module dependencies stacked per scan (normalized in %)',\n", - " xlabel='Scan',\n", + " data_frame=longest_paths_distribution_per_root_project_isolated_normalized_pivot.head(50),\n", + " title='Longest path for Typescript module dependencies stacked per root project (normalized in %)',\n", + " xlabel='Root project',\n", " ylabel='Typescript module paths'\n", ")" ] @@ -1555,7 +1555,7 @@ ], "code_graph_analysis_pipeline_data_validation": "ValidateTypescriptModuleDependencies", "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "codegraph", "language": "python", "name": "python3" }, diff --git a/renovate.json b/renovate.json index bc552f6c9..9360ccb13 100644 --- a/renovate.json +++ b/renovate.json @@ -79,7 +79,7 @@ { "customType": "regex", "fileMatch": [ - "^scripts\/profiles\/Neo4jv5\\.sh$", + "^scripts\/profiles\/Neo4jv5.*\\.sh$", "^scripts\/profiles\/Default\\.sh$", "^scripts\/[^\/]*\\.sh$" ], @@ -92,7 +92,7 @@ { "customType": "regex", "fileMatch": [ - "^scripts\/profiles\/Neo4jv5\\.sh$", + "^scripts\/profiles\/Neo4jv5.*\\.sh$", "^scripts\/profiles\/Default\\.sh$", "^scripts\/[^\/]*\\.sh$" ], @@ -105,7 +105,7 @@ { "customType": "regex", "fileMatch": [ - "^scripts\/profiles\/Neo4jv5\\.sh$", + "^scripts\/profiles\/Neo4jv5.*\\.sh$", "^scripts\/profiles\/Default\\.sh$", "^scripts\/[^\/]*\\.sh$" ], @@ -118,7 +118,7 @@ { "customType": "regex", "fileMatch": [ - "^scripts\/profiles\/Neo4jv5\\.sh$", + "^scripts\/profiles\/Neo4jv5.*\\.sh$", "^scripts\/profiles\/Default\\.sh$", "^scripts\/[^\/]*\\.sh$" ], @@ -133,7 +133,7 @@ { "customType": "regex", "fileMatch": [ - "^scripts\/profiles\/Neo4jv5\\.sh$", + "^scripts\/profiles\/Neo4jv5.*\\.sh$", "^scripts\/profiles\/Default\\.sh$", "^scripts\/[^\/]*\\.sh$" ], diff --git a/scripts/analysis/analyze.sh b/scripts/analysis/analyze.sh index a56e3e0dc..ba02f4e63 100755 --- a/scripts/analysis/analyze.sh +++ b/scripts/analysis/analyze.sh @@ -76,13 +76,13 @@ while [[ $# -gt 0 ]]; do done # Assure that the analysis report compilation only consists of letters and numbers -if ! [[ ${analysisReportCompilation} =~ ^[[:alnum:]]+$ ]]; then +if ! [[ ${analysisReportCompilation} =~ ^[-[:alnum:]]+$ ]]; then echo "analyze: Report can only contain letters and numbers." exit 1 fi # Assure that the settings profile only consists of letters and numbers -if ! [[ ${settingsProfile} =~ ^[[:alnum:]]+$ ]]; then +if ! [[ ${settingsProfile} =~ ^[-[:alnum:]]+$ ]]; then echo "analyze: Error: Settings profile can only contain letters and numbers." exit 1 fi diff --git a/scripts/prepareAnalysis.sh b/scripts/prepareAnalysis.sh index 8d5a1d450..11b9ad028 100644 --- a/scripts/prepareAnalysis.sh +++ b/scripts/prepareAnalysis.sh @@ -43,6 +43,7 @@ TYPESCRIPT_CYPHER_DIR="$CYPHER_DIR/Typescript_Enrichment" GENERAL_ENRICHMENT_CYPHER_DIR="${CYPHER_DIR}/General_Enrichment" COLOR_RED='\033[0;31m' +COLOR_YELLOW='\033[0;33m' COLOR_DEFAULT='\033[0m' # Preparation - Data verification: DEPENDS_ON relationships @@ -64,6 +65,7 @@ execute_cypher "${GENERAL_ENRICHMENT_CYPHER_DIR}/Add_file_name and_extension.cyp # Preparation - Enrich Graph for Typescript by adding "module" and "name" properties execute_cypher "${TYPESCRIPT_CYPHER_DIR}/Index_module_name.cypher" execute_cypher "${TYPESCRIPT_CYPHER_DIR}/Add_module_properties.cypher" +execute_cypher "${TYPESCRIPT_CYPHER_DIR}/Set_localRootPath_for_modules.cypher" execute_cypher "${TYPESCRIPT_CYPHER_DIR}/Mark_test_modules.cypher" # Preparation - Enrich Graph for Typescript by adding a name properties @@ -82,10 +84,11 @@ execute_cypher "${TYPESCRIPT_CYPHER_DIR}/Remove_duplicate_CONTAINS_relations_bet execute_cypher "${TYPESCRIPT_CYPHER_DIR}/Link_projects_to_npm_packages.cypher" dataVerificationResult=$( execute_cypher "${TYPESCRIPT_CYPHER_DIR}/Verify_projects_linked_to_npm_packages.cypher" "${@}") if is_csv_column_greater_zero "${dataVerificationResult}" "unresolvedProjectsCount"; then - # There are Typescript projects and the unresolvedProjectsCount is greater than zero - echo -e "${COLOR_RED}prepareAnalysis: Error: Data verification failed. There are Typescript projects without a linked npm package:${COLOR_DEFAULT}" - echo -e "${COLOR_RED}${dataVerificationResult}${COLOR_DEFAULT}" - exit 1 + # Warning: There are Typescript projects that are not linked to NPM Packages (unresolvedProjectsCount is greater than zero). + # It is possible to have projects with a tsconfig.json file but without a package.json e.g. for testing purposes. + echo -e "${COLOR_YELLOW}prepareAnalysis: Data verification warning: There are Typescript projects that are not linked to a npm package:${COLOR_DEFAULT}" + echo -e "${COLOR_YELLOW}${dataVerificationResult}${COLOR_DEFAULT}" + # Since this is now only a warning, execution will be continued. fi execute_cypher "${TYPESCRIPT_CYPHER_DIR}/Link_external_modules_to_corresponding_npm_dependency.cypher" diff --git a/scripts/profiles/Neo4jv4.sh b/scripts/profiles/Neo4jv4.sh index 6b31b8a6a..48058fd84 100755 --- a/scripts/profiles/Neo4jv4.sh +++ b/scripts/profiles/Neo4jv4.sh @@ -22,5 +22,5 @@ NEO4J_OPEN_GDS_PLUGIN_VERSION=${NEO4J_OPEN_GDS_PLUGIN_VERSION:-"2.6.8"} # Open p NEO4J_GDS_PLUGIN_EDITION=${NEO4J_GDS_PLUGIN_EDITION:-"open"} # Graph Data Science Plugin Edition: "open" for OpenGDS, "full" for the full version with Neo4j license JQASSISTANT_CLI_VERSION=${JQASSISTANT_CLI_VERSION:-"1.12.2"} # Version number of the jQAssistant command line interface. Version 1.12.2 is compatible with Neo4j v4 -JQASSISTANT_CLI_ARTIFACT=${JQASSISTANT_CLI_ARTIFACT:-"jqassistant-commandline-neo4jv4"} # For Neo4j 4: "jqassistant-commandline-neo4jv4" +JQASSISTANT_CLI_ARTIFACT=${JQASSISTANT_CLI_ARTIFACT:-"jqassistant-commandline-neo4jv4"} JQASSISTANT_CONFIG_TEMPLATE=${JQASSISTANT_CONFIG_TEMPLATE:-"template-neo4jv4-jqassistant.yaml"} # Name of the template file for the jqassistant configuration \ No newline at end of file diff --git a/scripts/profiles/Neo4jv5-continue-on-scan-errors.sh b/scripts/profiles/Neo4jv5-continue-on-scan-errors.sh index fbebcd12b..67f58ba78 100755 --- a/scripts/profiles/Neo4jv5-continue-on-scan-errors.sh +++ b/scripts/profiles/Neo4jv5-continue-on-scan-errors.sh @@ -13,14 +13,14 @@ NEO4J_HTTPS_PORT=${NEO4J_HTTPS_PORT:-"7473"} # Neo4j HTTPS port for encrypted qu NEO4J_BOLT_PORT=${NEO4J_BOLT_PORT:-"7687"} # Neo4j's own "Bolt Protocol" port # Awesome Procedures (APOC) Plugin for Neo4j -NEO4J_APOC_PLUGIN_VERSION=${NEO4J_APOC_PLUGIN_VERSION:-"5.24.0"} # Version number matches Neo4j version since 5.x +NEO4J_APOC_PLUGIN_VERSION=${NEO4J_APOC_PLUGIN_VERSION:-"5.24.1"} # Version number matches Neo4j version since 5.x NEO4J_APOC_PLUGIN_EDITION=${NEO4J_APOC_PLUGIN_EDITION:-"core"} # Since Neo4j v5 the core edition is updated with Neo4j NEO4J_APOC_PLUGIN_GITHUB=${NEO4J_APOC_PLUGIN_GITHUB:-"neo4j/apoc"} # Core edition was moved to "neo4j/apoc" for Neo4j v5 -NEO4J_GDS_PLUGIN_VERSION=${NEO4J_GDS_PLUGIN_VERSION:-"2.8.0"} # Version 2.4.0 is the newest version of june 2023 and compatible with Neo4j v5 +NEO4J_GDS_PLUGIN_VERSION=${NEO4J_GDS_PLUGIN_VERSION:-"2.10.1"} # Version 2.4.0 is the newest version of june 2023 and compatible with Neo4j v5 NEO4J_OPEN_GDS_PLUGIN_VERSION=${NEO4J_OPEN_GDS_PLUGIN_VERSION:-"2.11.0"} # Open package variant of the graph-data-science plugin for Neo4j (https://github.com/JohT/open-graph-data-science-packaging). Since version 2.4. compatible with Neo4j 5.x. NEO4J_GDS_PLUGIN_EDITION=${NEO4J_GDS_PLUGIN_EDITION:-"open"} # Graph Data Science Plugin Edition: "open" for OpenGDS, "full" for the full version with Neo4j license JQASSISTANT_CLI_VERSION=${JQASSISTANT_CLI_VERSION:-"2.3.1"} # Version number of the jQAssistant command line interface. Version 1.12.2 is compatible with Neo4j v4 -JQASSISTANT_CLI_ARTIFACT=${JQASSISTANT_CLI_ARTIFACT:-"template-neo4jv5-jqassistant-continue-on-error"} -JQASSISTANT_CONFIG_TEMPLATE=${JQASSISTANT_CONFIG_TEMPLATE:-"template-neo4jv5-jqassistant.yaml"} # Name of the template file for the jqassistant configuration \ No newline at end of file +JQASSISTANT_CLI_ARTIFACT=${JQASSISTANT_CLI_ARTIFACT:-"jqassistant-commandline-neo4jv5"} +JQASSISTANT_CONFIG_TEMPLATE=${JQASSISTANT_CONFIG_TEMPLATE:-"template-neo4jv5-jqassistant-continue-on-error.yaml"} # Name of the template file for the jqassistant configuration \ No newline at end of file diff --git a/scripts/profiles/Neo4jv5.sh b/scripts/profiles/Neo4jv5.sh index c4101732a..4663bbc61 100755 --- a/scripts/profiles/Neo4jv5.sh +++ b/scripts/profiles/Neo4jv5.sh @@ -17,10 +17,10 @@ NEO4J_APOC_PLUGIN_VERSION=${NEO4J_APOC_PLUGIN_VERSION:-"5.24.1"} # Version numbe NEO4J_APOC_PLUGIN_EDITION=${NEO4J_APOC_PLUGIN_EDITION:-"core"} # Since Neo4j v5 the core edition is updated with Neo4j NEO4J_APOC_PLUGIN_GITHUB=${NEO4J_APOC_PLUGIN_GITHUB:-"neo4j/apoc"} # Core edition was moved to "neo4j/apoc" for Neo4j v5 -NEO4J_GDS_PLUGIN_VERSION=${NEO4J_GDS_PLUGIN_VERSION:-"2.10.1"} # Version 2.4.0 is the newest version of june 2023 and compatible with Neo4j v5 +NEO4J_GDS_PLUGIN_VERSION=${NEO4J_GDS_PLUGIN_VERSION:-"2.10.1"} # Version 2.4.0 is the newest version of june 2023 and compatible with Neo4j v5 NEO4J_OPEN_GDS_PLUGIN_VERSION=${NEO4J_OPEN_GDS_PLUGIN_VERSION:-"2.11.0"} # Open package variant of the graph-data-science plugin for Neo4j (https://github.com/JohT/open-graph-data-science-packaging). Since version 2.4. compatible with Neo4j 5.x. -NEO4J_GDS_PLUGIN_EDITION=${NEO4J_GDS_PLUGIN_EDITION:-"open"} # Graph Data Science Plugin Edition: "open" for OpenGDS, "full" for the full version with Neo4j license +NEO4J_GDS_PLUGIN_EDITION=${NEO4J_GDS_PLUGIN_EDITION:-"open"} # Graph Data Science Plugin Edition: "open" for OpenGDS, "full" for the full version with Neo4j license JQASSISTANT_CLI_VERSION=${JQASSISTANT_CLI_VERSION:-"2.3.1"} # Version number of the jQAssistant command line interface. Version 1.12.2 is compatible with Neo4j v4 -JQASSISTANT_CLI_ARTIFACT=${JQASSISTANT_CLI_ARTIFACT:-"jqassistant-commandline-neo4jv5"} # For Neo4jv5: "jqassistant-commandline-distribution" +JQASSISTANT_CLI_ARTIFACT=${JQASSISTANT_CLI_ARTIFACT:-"jqassistant-commandline-neo4jv5"} JQASSISTANT_CONFIG_TEMPLATE=${JQASSISTANT_CONFIG_TEMPLATE:-"template-neo4jv5-jqassistant.yaml"} # Name of the template file for the jqassistant configuration \ No newline at end of file diff --git a/scripts/resetAndScan.sh b/scripts/resetAndScan.sh index 8b695ad94..973a39ac8 100755 --- a/scripts/resetAndScan.sh +++ b/scripts/resetAndScan.sh @@ -14,7 +14,7 @@ set -o errexit -o pipefail JQASSISTANT_CLI_VERSION=${JQASSISTANT_CLI_VERSION:-"2.3.1"} # Version number of the jQAssistant command line interface. Version 1.12.2 is compatible with Neo4j v4 -JQASSISTANT_CLI_ARTIFACT=${JQASSISTANT_CLI_ARTIFACT:-"jqassistant-commandline-neo4jv5"} # Neo4j v5: "jqassistant-commandline-neo4jv5", Neo4j v4: "jqassistant-commandline-neo4jv4" +JQASSISTANT_CLI_ARTIFACT=${JQASSISTANT_CLI_ARTIFACT:-"jqassistant-commandline-neo4jv5"} # Name of the jQAssistant Maven artifact JQASSISTANT_CONFIG_TEMPLATE=${JQASSISTANT_CONFIG_TEMPLATE:-"template-neo4jv5-jqassistant.yaml"} # Neo4j v5: "template-neo4jv5-jqassistant.yaml", Neo4j v4: "template-neo4jv4-jqassistant.yaml" NEO4J_INITIAL_PASSWORD=${NEO4J_INITIAL_PASSWORD:-""} # Neo4j login password that was set to replace the temporary initial password diff --git a/scripts/scanTypescript.sh b/scripts/scanTypescript.sh index de503ecec..40293c727 100755 --- a/scripts/scanTypescript.sh +++ b/scripts/scanTypescript.sh @@ -43,6 +43,7 @@ fi # Returns all directories (multi-line) that contain a "package.json" file within the given base directory. find_directories_with_package_json_file() { find -L "${1}" \ + -path "${1}/package.json" -prune -o \ -type d -name "node_modules" -prune -o \ -type d -name "dist" -prune -o \ -type d -name ".yalc" -prune -o \ @@ -91,10 +92,11 @@ is_valid_scan_result() { fi local scan_file_size; scan_file_size=$(wc -c "${scan_result_file}" | awk '{print $1}') - if [ "${scan_file_size}" -le "600" ]; then - echo "scanTypescript: Info: The scanned file ${scan_result_file} is too small: ${scan_file_size} < 600" >&2 + if [ "${scan_file_size}" -le "900" ]; then + echo "scanTypescript: Info: The scanned file ${scan_result_file} is too small: ${scan_file_size} < 900" >&2 false else + echo "scanTypescript: The scanned file size: ${scan_file_size}" >&2 true fi } @@ -127,6 +129,8 @@ if [ "${changeDetectionReturnCode}" != "0" ] || [ "${TYPESCRIPT_SCAN_DRY_RUN}" = echo "scanTypescript: Info: Unsuccessful source directory scan. Trying to scan all contained packages individually." >&2 contained_package_directories=$( find_directories_with_package_json_file "${source_directory}" ) + echo "scanTypescript: contained_package_directories:" >&2 + echo "${contained_package_directories}" >&2 total_package_directories=$(echo "${contained_package_directories}" | wc -l | awk '{print $1}') processed_package_directories=0 diff --git a/scripts/setupJQAssistant.sh b/scripts/setupJQAssistant.sh index 9d8661ed1..38e4677f5 100755 --- a/scripts/setupJQAssistant.sh +++ b/scripts/setupJQAssistant.sh @@ -12,7 +12,7 @@ set -o errexit -o pipefail JQASSISTANT_CLI_VERSION=${JQASSISTANT_CLI_VERSION:-"2.3.1"} # Version number of the jQAssistant command line interface. Version 1.12.2 is compatible with Neo4j v4 JQASSISTANT_CLI_DOWNLOAD_URL=${JQASSISTANT_CLI_DOWNLOAD_URL:-"https://repo1.maven.org/maven2/com/buschmais/jqassistant/cli"} # Download URL for the jQAssistant CLI -JQASSISTANT_CLI_ARTIFACT=${JQASSISTANT_CLI_ARTIFACT:-"jqassistant-commandline-neo4jv5"} # Neo4j v5: "jqassistant-commandline-neo4jv5", Neo4j v4: "jqassistant-commandline-neo4jv4" +JQASSISTANT_CLI_ARTIFACT=${JQASSISTANT_CLI_ARTIFACT:-"jqassistant-commandline-neo4jv5"} JQASSISTANT_CLI_DISTRIBUTION=${JQASSISTANT_CLI_DISTRIBUTION:-"distribution.zip"} # Neo4j v5 & v4: "distribution.zip" TOOLS_DIRECTORY=${TOOLS_DIRECTORY:-"tools"} # Get the tools directory (defaults to "tools") SHARED_DOWNLOADS_DIRECTORY="${SHARED_DOWNLOADS_DIRECTORY:-$(dirname "$( pwd )")/downloads}" diff --git a/scripts/setupNeo4j.sh b/scripts/setupNeo4j.sh index f0b4495a3..084f02756 100755 --- a/scripts/setupNeo4j.sh +++ b/scripts/setupNeo4j.sh @@ -116,6 +116,7 @@ if [ ! -d "${NEO4J_INSTALLATION_DIRECTORY}" ] ; then neo4jImportPath=$(convertPosixToWindowsPathIfNecessary "${IMPORT_DIRECTORY}") # Create import directory in case it doesn't exist. + # The import needs to be configured even if its not used since it will be configured below and validated by Neo4j. mkdir -p "${IMPORT_DIRECTORY}" if [[ "$NEO4J_MAJOR_VERSION_NUMBER" -ge 5 ]]; then