From 3dd71d986f9e42e5a62ed56dbfe7719292809c1a Mon Sep 17 00:00:00 2001 From: JohT <7671054+JohT@users.noreply.github.com> Date: Sun, 25 Aug 2024 09:40:53 +0200 Subject: [PATCH 1/8] Remove duplicate CONTAINS relationships between files --- ...plicate_CONTAINS_relations_between_files.cypher | 14 ++++++++++++++ scripts/prepareAnalysis.sh | 3 +++ 2 files changed, 17 insertions(+) create mode 100644 cypher/Typescript_Enrichment/Remove_duplicate_CONTAINS_relations_between_files.cypher diff --git a/cypher/Typescript_Enrichment/Remove_duplicate_CONTAINS_relations_between_files.cypher b/cypher/Typescript_Enrichment/Remove_duplicate_CONTAINS_relations_between_files.cypher new file mode 100644 index 000000000..3efa12028 --- /dev/null +++ b/cypher/Typescript_Enrichment/Remove_duplicate_CONTAINS_relations_between_files.cypher @@ -0,0 +1,14 @@ +// Remove duplicate CONTAINS relationships with the same properties between files + + MATCH (directory:File)-[contains_relation:CONTAINS]-(file:File) + WITH directory + ,file + ,keys(contains_relation) AS contains_relation_property_names + ,collect(DISTINCT contains_relation)[1..] AS contains_relations + WHERE size(contains_relations) > 0 +UNWIND contains_relations AS contains_relation +DELETE contains_relation +RETURN count(*) +// For Debugging +// RETURN directory, file +// LIMIT 10 \ No newline at end of file diff --git a/scripts/prepareAnalysis.sh b/scripts/prepareAnalysis.sh index 2998c32e6..e7473d2c7 100644 --- a/scripts/prepareAnalysis.sh +++ b/scripts/prepareAnalysis.sh @@ -66,6 +66,9 @@ execute_cypher "${TYPESCRIPT_CYPHER_DIR}/Add_RESOLVES_TO_relationship_for_matchi execute_cypher "${TYPESCRIPT_CYPHER_DIR}/Add_RESOLVES_TO_relationship_for_matching_declarations.cypher" execute_cypher "${TYPESCRIPT_CYPHER_DIR}/Add_DEPENDS_ON_relationship_to_resolved_modules.cypher" +# Preparation - Cleanup Graph for Typescript by removing duplicate relationships +execute_cypher "${TYPESCRIPT_CYPHER_DIR}/Remove_duplicate_CONTAINS_relations_between_files.cypher" + # Preparation - Enrich Graph for Typescript by adding relationships between corresponding TS:Project and NPM:Package nodes execute_cypher "${TYPESCRIPT_CYPHER_DIR}/Link_projects_to_npm_packages.cypher" From 360e0f28aa6d90ab74f458b63a2ef87f52c5aaca Mon Sep 17 00:00:00 2001 From: JohT <7671054+JohT@users.noreply.github.com> Date: Sun, 25 Aug 2024 09:58:38 +0200 Subject: [PATCH 2/8] Add config dir name to Typescript project name if different to project root --- .../Add_name_to_property_on_projects.cypher | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/cypher/Typescript_Enrichment/Add_name_to_property_on_projects.cypher b/cypher/Typescript_Enrichment/Add_name_to_property_on_projects.cypher index 5978cb6b0..d354e19a2 100644 --- a/cypher/Typescript_Enrichment/Add_name_to_property_on_projects.cypher +++ b/cypher/Typescript_Enrichment/Add_name_to_property_on_projects.cypher @@ -1,7 +1,21 @@ // Set name property on Typescript project nodes MATCH (project:TS:Project)-[:HAS_ROOT]->(root:Directory) - WITH project - ,reverse(split(reverse(root.absoluteFileName), '/')[0]) AS projectName - SET project.name = projectName -RETURN count(*) AS numberOfNamesProjects \ No newline at end of file +OPTIONAL MATCH (project)-[:HAS_CONFIG]->(config:File)<-[:CONTAINS]-(config_dir:Directory) + WITH * + ,reverse(split(reverse(root.absoluteFileName), '/')[0]) AS projectNameFromRoot + ,reverse(split(reverse(config_dir.absoluteFileName), '/')[0]) AS projectNameFromConfig + WITH * + ,projectNameFromRoot + '/' + + nullif(projectNameFromConfig, projectNameFromRoot) AS projectNameWithDifferentConfigIfPresent + WITH * + ,coalesce(projectNameWithDifferentConfigIfPresent, projectNameFromRoot) AS projectName + SET project.name = projectName +RETURN count(*) AS numberOfNamesProjects +// For debugging +//RETURN projectNameFromRoot +// ,projectNameFromConfig +// ,projectNameWithDifferentConfig +// ,projectName +// ,project, root, config +//LIMIT 10 \ No newline at end of file From e8cbdd776c4516c17264103d9cbfa5ab360d5eee Mon Sep 17 00:00:00 2001 From: JohT <7671054+JohT@users.noreply.github.com> Date: Sun, 25 Aug 2024 11:57:52 +0200 Subject: [PATCH 3/8] Improve and verify linking Typescript projects to npm packages --- .../Link_projects_to_npm_packages.cypher | 14 +++++++------- .../Verify_projects_linked_to_npm_packages.cypher | 13 +++++++++++++ scripts/prepareAnalysis.sh | 14 +++++++++++++- 3 files changed, 33 insertions(+), 8 deletions(-) create mode 100644 cypher/Typescript_Enrichment/Verify_projects_linked_to_npm_packages.cypher diff --git a/cypher/Typescript_Enrichment/Link_projects_to_npm_packages.cypher b/cypher/Typescript_Enrichment/Link_projects_to_npm_packages.cypher index a99e2c9c4..c341ae58f 100644 --- a/cypher/Typescript_Enrichment/Link_projects_to_npm_packages.cypher +++ b/cypher/Typescript_Enrichment/Link_projects_to_npm_packages.cypher @@ -10,15 +10,15 @@ MATCH (npmPackage:NPM:Package) , '/package.json' , '' ) AS relativeNpmPackageDirectory - MATCH (project:TS:Project)-[:HAS_ROOT]->(projectRoot:Directory) - WHERE projectRoot.absoluteFileName ENDS WITH relativeNpmPackageDirectory + MATCH (project:TS:Project)-[:HAS_CONFIG]->(config:File)<-[:CONTAINS]-(projectConfigDir:Directory) + WHERE projectConfigDir.absoluteFileName ENDS WITH relativeNpmPackageDirectory WITH npmPackage ,relativeNpmPackageDirectory ,collect(DISTINCT project) AS projects - ,collect(DISTINCT projectRoot) AS projectRoots + ,collect(DISTINCT projectConfigDir) AS projectConfigDirs // Assure that the found connection is unique and not ambiguous - WHERE size(projects) = 1 - AND size(projectRoots) = 1 + WHERE size(projects) = 1 + AND size(projectConfigDirs) = 1 UNWIND projects AS project // Create a HAS_NPM_PACKAGE relationship between the Typescript project and the npm package MERGE (project)-[:HAS_NPM_PACKAGE]->(npmPackage) @@ -27,6 +27,6 @@ MATCH (npmPackage:NPM:Package) SET npmPackage.relativeFileDirectory = relativeNpmPackageDirectory RETURN count(*) AS numberOfCreatedNpmPackageRelationships // Detailed results for debugging -//RETURN npmPackage.fileName AS npmPackageFileName -// ,projectRoots[0].absoluteFileName AS projectRootDirectory +//RETURN npmPackage.fileName AS npmPackageFileName +// ,projectConfigDirs[0].absoluteFileName AS projectConfigDirectory // ,relativeNpmPackageDirectory \ No newline at end of file diff --git a/cypher/Typescript_Enrichment/Verify_projects_linked_to_npm_packages.cypher b/cypher/Typescript_Enrichment/Verify_projects_linked_to_npm_packages.cypher new file mode 100644 index 000000000..4db79e613 --- /dev/null +++ b/cypher/Typescript_Enrichment/Verify_projects_linked_to_npm_packages.cypher @@ -0,0 +1,13 @@ +// Verify that all Typescript projects are linked to npm packages + + MATCH (project:TS:Project) + WITH count(project) AS totalProjectsCount + ,collect(project) AS projects +UNWIND projects AS project +OPTIONAL MATCH (project)-[:HAS_NPM_PACKAGE]->(npm:NPM:Package) + WITH totalProjectsCount + ,count(npm) AS npmLinkedProjectsCount +RETURN npmLinkedProjectsCount + ,totalProjectsCount + ,(totalProjectsCount - npmLinkedProjectsCount) AS unresolvedProjectsCount + ,(100.0 / totalProjectsCount * npmLinkedProjectsCount) AS npmLinkedProjectsPercentage \ No newline at end of file diff --git a/scripts/prepareAnalysis.sh b/scripts/prepareAnalysis.sh index e7473d2c7..857a162e2 100644 --- a/scripts/prepareAnalysis.sh +++ b/scripts/prepareAnalysis.sh @@ -7,6 +7,7 @@ # Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands) set -o errexit -o pipefail +# Overrideable Defaults IMPORT_GIT_LOG_DATA_IF_SOURCE_IS_PRESENT=${IMPORT_GIT_LOG_DATA_IF_SOURCE_IS_PRESENT:-"full"} # Select how to import git log data. Options: "none", "aggregated", "full". Default="full". ## Get this "scripts" directory if not already set @@ -40,10 +41,14 @@ ARTIFACT_DEPENDENCIES_CYPHER_DIR="$CYPHER_DIR/Artifact_Dependencies" TYPES_CYPHER_DIR="$CYPHER_DIR/Types" TYPESCRIPT_CYPHER_DIR="$CYPHER_DIR/Typescript_Enrichment" +COLOR_RED='\033[0;31m' +COLOR_DEFAULT='\033[0m' + # Preparation - Data verification: DEPENDS_ON relationships dataVerificationResult=$( execute_cypher "${CYPHER_DIR}/Data_verification_DEPENDS_ON_relationships.cypher" "${@}") if ! is_csv_column_greater_zero "${dataVerificationResult}" "sourceNodeCount"; then - echo "prepareAnalysis: Error: Data verification failed. At least one DEPENDS_ON relationship required. Check if the artifacts directory is empty or if the scan failed." + echo -e "${COLOR_RED}prepareAnalysis: Error: Data verification failed. At least one DEPENDS_ON relationship is required. Check if the artifacts directory is empty or if the scan failed.${COLOR_DEFAULT}" + echo -e "${COLOR_RED}${dataVerificationResult}${COLOR_DEFAULT}" exit 1 fi @@ -71,6 +76,13 @@ execute_cypher "${TYPESCRIPT_CYPHER_DIR}/Remove_duplicate_CONTAINS_relations_bet # Preparation - Enrich Graph for Typescript by adding relationships between corresponding TS:Project and NPM:Package nodes execute_cypher "${TYPESCRIPT_CYPHER_DIR}/Link_projects_to_npm_packages.cypher" +dataVerificationResult=$( execute_cypher "${TYPESCRIPT_CYPHER_DIR}/Verify_projects_linked_to_npm_packages.cypher" "${@}") +if is_csv_column_greater_zero "${dataVerificationResult}" "unresolvedProjectsCount"; then + # There are Typescript projects and the unresolvedProjectsCount is greater than zero + echo -e "${COLOR_RED}prepareAnalysis: Error: Data verification failed. There are Typescript projects without a linked npm package:${COLOR_DEFAULT}" + echo -e "${COLOR_RED}${dataVerificationResult}${COLOR_DEFAULT}" + exit 1 +fi # Preparation - Add weights to Java Package DEPENDS_ON relationships execute_cypher_summarized "${DEPENDS_ON_CYPHER_DIR}/Add_weight_property_for_Java_Interface_Dependencies_to_Package_DEPENDS_ON_Relationship.cypher" From 3151f10b0391a7463e6425d3198670c2f5c31128 Mon Sep 17 00:00:00 2001 From: JohT <7671054+JohT@users.noreply.github.com> Date: Sun, 25 Aug 2024 15:37:40 +0200 Subject: [PATCH 4/8] Improve logging --- scripts/copyPackageJsonFiles.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scripts/copyPackageJsonFiles.sh b/scripts/copyPackageJsonFiles.sh index 6238a52da..6b4f77f03 100755 --- a/scripts/copyPackageJsonFiles.sh +++ b/scripts/copyPackageJsonFiles.sh @@ -25,10 +25,12 @@ fi echo "copyPackageJsonFiles: Existing package.json files will be copied from from ${SOURCE_DIRECTORY} to ../${ARTIFACTS_DIRECTORY}/${NPM_PACKAGE_JSON_ARTIFACTS_DIRECTORY}" echo "copyPackageJsonFiles: Author will be removed as workaround for https://github.com/jqassistant-plugin/jqassistant-npm-plugin/issues/5" + copied_package_json_files=0 + for file in $( find -L . -type d -name node_modules -prune -o -name 'package.json' -print0 | xargs -0 -r -I {}); do fileDirectory=$(dirname "${file}") targetDirectory="../${ARTIFACTS_DIRECTORY}/${NPM_PACKAGE_JSON_ARTIFACTS_DIRECTORY}/${fileDirectory}" - # echo "copyPackageJsonFiles: Copying ${file} to ${targetDirectory}" + # echo "copyPackageJsonFiles: Debug: Copying ${file} to ${targetDirectory}" # debug logging mkdir -p "${targetDirectory}" cp -rf "${file}" "${targetDirectory}" @@ -39,5 +41,9 @@ fi jq 'del(.author)' "${targetDirectory}/${fileName}" > "${targetDirectory}/${fileName}.edited" jq 'del(.contributors)' "${targetDirectory}/${fileName}.edited" > "${targetDirectory}/${fileName}" rm -f "${targetDirectory}/${fileName}.edited" + + copied_package_json_files=$((copied_package_json_files + 1)) done + + echo "copyPackageJsonFiles: Successfully copied ${copied_package_json_files} files" ) \ No newline at end of file From 4b4251589c393c1d37ed7b737602a34c3d6ccd12 Mon Sep 17 00:00:00 2001 From: JohT <7671054+JohT@users.noreply.github.com> Date: Sun, 25 Aug 2024 16:43:37 +0200 Subject: [PATCH 5/8] Fix latest git tag detection started from main folder --- scripts/examples/analyzeAntDesign.sh | 9 ++++++++- scripts/examples/analyzeAxonFramework.sh | 9 ++++++++- scripts/examples/analyzeReactRouter.sh | 9 ++++++++- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/scripts/examples/analyzeAntDesign.sh b/scripts/examples/analyzeAntDesign.sh index 938c4c7e0..2bb1f0fcb 100755 --- a/scripts/examples/analyzeAntDesign.sh +++ b/scripts/examples/analyzeAntDesign.sh @@ -10,12 +10,19 @@ # Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands) set -o errexit -o pipefail +## Get this "scripts" directory if not already set +# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution. +# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes. +# This way non-standard tools like readlink aren't needed. +SCRIPTS_DIR=${SCRIPTS_DIR:-$( CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P )} # Repository directory containing the shell scripts +echo "analyzerAntDesign: SCRIPTS_DIR=$SCRIPTS_DIR" + # Read the first input argument containing the version of the project projectVersion=$1 if [ -z "${projectVersion}" ]; then echo "analyzerAntDesign: Optional parameter is not specified. Detecting latest version..." >&2 echo "analyzerAntDesign: Usage example: $0 " >&2 - projectVersion=$( ./../../scripts/examples/detectLatestGitTag.sh --url "https://github.com/ant-design/ant-design.git" ) + projectVersion=$( "${SCRIPTS_DIR}/detectLatestGitTag.sh" --url "https://github.com/ant-design/ant-design.git" ) echo "analyzerAntDesign: Using latest version: ${projectVersion}" >&2 fi diff --git a/scripts/examples/analyzeAxonFramework.sh b/scripts/examples/analyzeAxonFramework.sh index 091ef9940..8281810f9 100755 --- a/scripts/examples/analyzeAxonFramework.sh +++ b/scripts/examples/analyzeAxonFramework.sh @@ -9,11 +9,18 @@ # Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands) set -o errexit -o pipefail +## Get this "scripts" directory if not already set +# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution. +# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes. +# This way non-standard tools like readlink aren't needed. +SCRIPTS_DIR=${SCRIPTS_DIR:-$( CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P )} # Repository directory containing the shell scripts +echo "analyzeAxonFramework: SCRIPTS_DIR=$SCRIPTS_DIR" + artifactsVersion=$1 if [ -z "${artifactsVersion}" ]; then echo "analyzeAxonFramework: Optional parameter is not specified. Detecting latest version..." >&2 echo "analyzeAxonFramework: Usage example: $0 " >&2 - artifactsVersion=$( ./../../scripts/examples/detectLatestGitTag.sh --url "https://github.com/AxonFramework/AxonFramework.git" --prefix "axon-") + artifactsVersion=$( "${SCRIPTS_DIR}/detectLatestGitTag.sh" --url "https://github.com/AxonFramework/AxonFramework.git" --prefix "axon-") echo "analyzeAxonFramework: Using latest version: ${artifactsVersion}" >&2 fi diff --git a/scripts/examples/analyzeReactRouter.sh b/scripts/examples/analyzeReactRouter.sh index d49540cd0..d098e4b38 100755 --- a/scripts/examples/analyzeReactRouter.sh +++ b/scripts/examples/analyzeReactRouter.sh @@ -9,12 +9,19 @@ # Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands) set -o errexit -o pipefail +## Get this "scripts" directory if not already set +# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution. +# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes. +# This way non-standard tools like readlink aren't needed. +SCRIPTS_DIR=${SCRIPTS_DIR:-$( CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P )} # Repository directory containing the shell scripts +echo "analyzerReactRouter: SCRIPTS_DIR=$SCRIPTS_DIR" + # Read the first input argument containing the version of the project projectVersion=$1 if [ -z "${projectVersion}" ]; then echo "analyzerReactRouter: Optional parameter is not specified. Detecting latest version..." >&2 echo "analyzerReactRouter: Usage example: $0 " >&2 - projectVersion=$( ./../../scripts/examples/detectLatestGitTag.sh --url "https://github.com/remix-run/react-router.git" ) + projectVersion=$( "${SCRIPTS_DIR}/detectLatestGitTag.sh" --url "https://github.com/remix-run/react-router.git" --prefix "react-router@") echo "analyzerReactRouter: Using latest version: ${projectVersion}" >&2 fi From 142d004dccf53349bf9dd5d6ecd9327e1fe1fc04 Mon Sep 17 00:00:00 2001 From: JohT <7671054+JohT@users.noreply.github.com> Date: Sun, 25 Aug 2024 18:37:13 +0200 Subject: [PATCH 6/8] Fix find command that doesn't return anything on Linux --- scripts/copyPackageJsonFiles.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/copyPackageJsonFiles.sh b/scripts/copyPackageJsonFiles.sh index 6b4f77f03..57ad91a9f 100755 --- a/scripts/copyPackageJsonFiles.sh +++ b/scripts/copyPackageJsonFiles.sh @@ -27,7 +27,8 @@ fi copied_package_json_files=0 - for file in $( find -L . -type d -name node_modules -prune -o -name 'package.json' -print0 | xargs -0 -r -I {}); do + #for file in $( find -L . -type d -name node_modules -prune -o -name 'package.json' -print0 | xargs -0 -r -I {}); do + for file in $( find -L . -type d -name node_modules -prune -o -name 'package.json' -print0 | xargs -0 -r -I {} echo {}); do fileDirectory=$(dirname "${file}") targetDirectory="../${ARTIFACTS_DIRECTORY}/${NPM_PACKAGE_JSON_ARTIFACTS_DIRECTORY}/${fileDirectory}" # echo "copyPackageJsonFiles: Debug: Copying ${file} to ${targetDirectory}" # debug logging From fd1feec0c3445e3761f41998b761ddad376a633f Mon Sep 17 00:00:00 2001 From: JohT <7671054+JohT@users.noreply.github.com> Date: Sun, 25 Aug 2024 19:34:54 +0200 Subject: [PATCH 7/8] Remove outdated step in getting started guide. --- GETTING_STARTED.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/GETTING_STARTED.md b/GETTING_STARTED.md index 425ec3a57..da412b4a3 100644 --- a/GETTING_STARTED.md +++ b/GETTING_STARTED.md @@ -38,9 +38,7 @@ Please read through the [Prerequisites](./README.md#hammer_and_wrench-prerequisi 1. Move the artifacts (e.g. Java jars json files) you want to analyze into the `artifacts` directory. -1. Optionally, create a subdirectory `typescript` inside the `artifacts` directory and move the Typescript analysis json files you want to analyze into it. - -1. Optionally, create a `source` directory and clone the corresponding source code into it to also gather git log data. +1. Optionally, create a `source` directory and clone the corresponding source code into it to also scan git data. 1. Alternatively to the steps above, run an already predefined download script From be6e47c836c169e62d72f23a0689895c1911589c Mon Sep 17 00:00:00 2001 From: JohT <7671054+JohT@users.noreply.github.com> Date: Sun, 25 Aug 2024 19:43:38 +0200 Subject: [PATCH 8/8] Update documentation on how to add Jupyter notebooks. Also related to 3dadb9a. --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 2be3e9960..d6b8e9efb 100644 --- a/README.md +++ b/README.md @@ -173,8 +173,7 @@ The [Code Structure Analysis Pipeline](./.github/workflows/java-code-analysis.ym - How can i add a Jupyter Notebook report to the pipeline? 👉 Put your new notebook into the [jupyter](./jupyter) directory. - 👉 Create a new Jupyter report script in the [scripts/reports](./scripts/reports/) directory. Take [OverviewJupyter.sh](./scripts/reports/OverviewJupyter.sh) as a reference for example. - 👉 The script will automatically be included because of the directory and its name ending with "Jupyter.sh". + 👉 The file will then automatically be picked up by [executeJupyterNotebookReport.sh](./scripts/executeJupyterNotebookReport.sh). - How can i analyze a different code basis automatically? 👉 Create a new download script like the ones in the [scripts/downloader](./scripts/downloader/) directory. Take for example [downloadAxonFramework.sh](./scripts/downloader/downloadAxonFramework.sh) as a reference for Java projects and [downloadReactRouter.sh](./scripts/downloader/downloadReactRouter.sh) as a reference for Typescript projects.