From f2803e1b9f86107215d89408f2319460f48820c1 Mon Sep 17 00:00:00 2001 From: JohT <7671054+JohT@users.noreply.github.com> Date: Wed, 6 Nov 2024 08:04:18 +0100 Subject: [PATCH 1/5] Optimize performance for git plugin RESOLVES_TO matching --- cypher/GitLog/Index_absolute_file_name.cypher | 3 +++ scripts/importGit.sh | 1 + 2 files changed, 4 insertions(+) create mode 100644 cypher/GitLog/Index_absolute_file_name.cypher diff --git a/cypher/GitLog/Index_absolute_file_name.cypher b/cypher/GitLog/Index_absolute_file_name.cypher new file mode 100644 index 000000000..d0bb2e689 --- /dev/null +++ b/cypher/GitLog/Index_absolute_file_name.cypher @@ -0,0 +1,3 @@ +// Create index for the absolute file name + +CREATE INDEX INDEX_ABSOLUTE_FILE_NAME IF NOT EXISTS FOR (file:File) ON (file.absoluteFileName) \ No newline at end of file diff --git a/scripts/importGit.sh b/scripts/importGit.sh index d6466279f..183051371 100755 --- a/scripts/importGit.sh +++ b/scripts/importGit.sh @@ -150,6 +150,7 @@ postGitPluginImport() { execute_cypher "${GIT_LOG_CYPHER_DIR}/Index_commit_sha.cypher" execute_cypher "${GIT_LOG_CYPHER_DIR}/Index_file_name.cypher" execute_cypher "${GIT_LOG_CYPHER_DIR}/Index_file_relative_path.cypher" + execute_cypher "${GIT_LOG_CYPHER_DIR}/Index_absolute_file_name.cypher" commonPostGitImport From 99a3e5be36148f625b2a18c248e5a2cc5350e6b4 Mon Sep 17 00:00:00 2001 From: JohT <7671054+JohT@users.noreply.github.com> Date: Wed, 6 Nov 2024 08:04:41 +0100 Subject: [PATCH 2/5] Improve progress information for Typescript scanning --- scripts/scanTypescript.sh | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/scripts/scanTypescript.sh b/scripts/scanTypescript.sh index bf5ce21ce..72bbe1efc 100755 --- a/scripts/scanTypescript.sh +++ b/scripts/scanTypescript.sh @@ -115,11 +115,15 @@ is_valid_scan_result() { } is_change_detected() { - # Scan and analyze Typescript sources only when they had been changed + local COLOR_DARK_GREY='\033[0;30m' + local COLOR_DEFAULT='\033[0m' local source_directory_name; source_directory_name=$(basename "${source_directory}"); + + echo -e "${COLOR_DARK_GREY}" changeDetectionHashFilePath="./${SOURCE_DIRECTORY}/typescriptScanChangeDetection-${source_directory_name}.sha" changeDetectionReturnCode=$( source "${SCRIPTS_DIR}/detectChangedFiles.sh" --readonly --hashfile "${changeDetectionHashFilePath}" --paths "${source_directory}") - + echo -e "${COLOR_DEFAULT}" + if [ "${changeDetectionReturnCode}" == "0" ] && [ "${TYPESCRIPT_SCAN_CHANGE_DETECTION}" = true ]; then true else @@ -143,6 +147,10 @@ total_source_directories=$(echo "${source_directories}" | wc -l | awk '{print $1 processed_source_directories=0 for source_directory in ${source_directories}; do + processed_source_directories=$((processed_source_directories + 1)) + progress_info_source_dirs="${processed_source_directories}/${total_source_directories}" + + # Scan and analyze Typescript sources only when they had been changed if is_change_detected; then echo "scanTypescript: Files in ${source_directory} unchanged. Scan skipped." continue # skipping scan since it had already be done according to change detection. @@ -151,9 +159,6 @@ for source_directory in ${source_directories}; do #Debugging log for change detection. "scan_directory" already logs scanning and the source directory. #echo "scanTypescript: Detected change (${changeDetectionReturnCode}) in ${source_directory}. Scanning Typescript source using @jqassistant/ts-lce." - processed_source_directories=$((processed_source_directories + 1)) - progress_info_source_dirs="${processed_source_directories}/${total_source_directories}" - if [ -f "${source_directory}/tsconfig.json" ] \ && scan_directory "${source_directory}" "${progress_info_source_dirs}" \ && is_valid_scan_result "${source_directory}" @@ -164,14 +169,17 @@ for source_directory in ${source_directories}; do echo "scanTypescript: Info: Unsuccessful or skipped source directory scan. Scan all contained packages individually." >&2 contained_package_directories=$( find_directories_with_package_json_file "${source_directory}" ) - echo "scanTypescript: contained_package_directories:" >&2 - echo "${contained_package_directories}" >&2 + #Debugging: List all package directories. + #echo "scanTypescript: contained_package_directories:" >&2 + #echo "${contained_package_directories}" >&2 total_package_directories=$(echo "${contained_package_directories}" | wc -l | awk '{print $1}') processed_package_directories=0 + main_source_directory_name=$(basename "${source_directory}"); + for contained_package_directory in ${contained_package_directories}; do processed_package_directories=$((processed_package_directories + 1)) - progress_info_package_dirs="${progress_info_source_dirs}: ${processed_package_directories}/${total_package_directories}" + progress_info_package_dirs="${main_source_directory_name} ${progress_info_source_dirs}: ${processed_package_directories}/${total_package_directories}" scan_directory "${contained_package_directory}" "${progress_info_package_dirs}" done From 5d155d660c6271e98c468ee09bc0b3af017f84f0 Mon Sep 17 00:00:00 2001 From: JohT <7671054+JohT@users.noreply.github.com> Date: Sat, 9 Nov 2024 20:58:37 +0100 Subject: [PATCH 3/5] Add packageName to Typescript Project nodes --- .../Typescript_Enrichment/Link_projects_to_npm_packages.cypher | 1 + 1 file changed, 1 insertion(+) diff --git a/cypher/Typescript_Enrichment/Link_projects_to_npm_packages.cypher b/cypher/Typescript_Enrichment/Link_projects_to_npm_packages.cypher index 3b93b7948..b8358c6a7 100644 --- a/cypher/Typescript_Enrichment/Link_projects_to_npm_packages.cypher +++ b/cypher/Typescript_Enrichment/Link_projects_to_npm_packages.cypher @@ -26,6 +26,7 @@ MATCH (npmPackage:NPM:Package) // that contains the package.json file SET npmPackage.relativeFileDirectory = ltrim(relativeNpmPackageDirectory, '/') ,project.version = npmPackage.version + ,project.packageName = npmPackage.name RETURN count(*) AS numberOfCreatedNpmPackageRelationships // Detailed results for debugging //RETURN npmPackage.fileName AS npmPackageFileName From 2f8f35849bcae5494197939758f10b4f56769789 Mon Sep 17 00:00:00 2001 From: JohT <7671054+JohT@users.noreply.github.com> Date: Sat, 9 Nov 2024 21:02:30 +0100 Subject: [PATCH 4/5] Treat "testing" and "-test" folders as TestRelated --- cypher/Typescript_Enrichment/Mark_test_modules.cypher | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cypher/Typescript_Enrichment/Mark_test_modules.cypher b/cypher/Typescript_Enrichment/Mark_test_modules.cypher index 5d762eb57..7c74b7130 100644 --- a/cypher/Typescript_Enrichment/Mark_test_modules.cypher +++ b/cypher/Typescript_Enrichment/Mark_test_modules.cypher @@ -4,6 +4,8 @@ MATCH (m:Module) WITH m ,(m.globalFqn contains '/__tests__/') OR (m.globalFqn contains '/test/') OR + (m.globalFqn contains '/testing/') OR + (m.globalFqn contains '-tests/') OR (m.globalFqn contains '/tests/') AS isInTestFolder ,(m.globalFqn contains '/__mocks__/') OR (m.globalFqn contains '/mock/') OR From 8f3ba20f77bee0c32194921b3466d11b5f41c773 Mon Sep 17 00:00:00 2001 From: JohT <7671054+JohT@users.noreply.github.com> Date: Sun, 10 Nov 2024 09:33:38 +0100 Subject: [PATCH 5/5] Provide script to initialize an analysis project --- GETTING_STARTED.md | 95 +++++++++++++++++++++------------------------- init.sh | 53 ++++++++++++++++++++++++++ 2 files changed, 96 insertions(+), 52 deletions(-) create mode 100755 init.sh diff --git a/GETTING_STARTED.md b/GETTING_STARTED.md index 6b4fc1c2b..110d9617c 100644 --- a/GETTING_STARTED.md +++ b/GETTING_STARTED.md @@ -1,8 +1,8 @@ # Code Graph Analysis Pipeline - Getting started guide -This document describes the steps to get started as quickly as possible. -For more details on what you can do with this pipeline see [README](./README.md). -For more details on how the commands work in detail see [COMMANDS](./COMMANDS.md). +This document describes the steps to get started as quickly as possible. +👉 For more details on what else you can do see [README](./README.md). +👉 For more details on how the commands work in detail see [COMMANDS](./COMMANDS.md). ## 🛠 Prerequisites @@ -12,11 +12,11 @@ Please read through the [Prerequisites](./README.md#hammer_and_wrench-prerequisi Just run one of the following examples in the directory of this file: -- [./scripts/examples/analyzeAxonFramework.sh](./scripts/examples/analyzeAxonFramework.sh) (Java event-sourcing library) -- [./scripts/examples/analyzeAntDesign.sh](./scripts/examples/analyzeAntDesign.sh) (Typescript UI library) -- [./scripts/examples/analyzeReactRouter.sh](./scripts/examples/analyzeReactRouter.sh) (Typescript React library) +- Java Event-Sourcing Framework: [./scripts/examples/analyzeAxonFramework.sh](./scripts/examples/analyzeAxonFramework.sh) +- Typescript UI Library: [./scripts/examples/analyzeAntDesign.sh](./scripts/examples/analyzeAntDesign.sh) +- Typescript React Library: [./scripts/examples/analyzeReactRouter.sh](./scripts/examples/analyzeReactRouter.sh) -Use these optional command line options as you like: +Use these optional command line options as needed: - (Recommended) Only create CSV reports and skip Python and Node.js dependent reports. Example: @@ -38,81 +38,72 @@ Use these optional command line options as you like: ## Start an own analysis -1. Create a directory for all analysis projects. +### 1. Setup - ```shell - mkdir temp - cd temp - ``` +- Have a look at the [prerequisites](./README.md#hammer_and_wrench-prerequisites). -1. Create a working directory for your specific analysis. - - ```shell - mkdir MyFirstAnalysis - cd MyFirstAnalysis - ``` - -1. Choose an initial password for Neo4j if not already done. +- Choose an initial password for Neo4j if not already done. ```shell export NEO4J_INITIAL_PASSWORD=theinitialpasswordthatihavechosenforneo4j ``` -1. Create the `artifacts` directory for the code to be analyzed (without `cd` afterwards). +- Initialize you analysis project using [./init.sh](./init.sh). ```shell - mkdir artifacts + ./init.sh MyAnalysisProjectName ``` -1. Move the artifacts (e.g. Java jars json files) you want to analyze into the `artifacts` directory. +### 2. Prepare the code to be analyzed + +- Move the artifacts (e.g. Java jars json files) you want to analyze into the `artifacts` directory. -1. If you want to analyze Typescript code, create a symbolic link inside the `source` directory that points to the Typescript project or copy the project into it. +- If you want to analyze Typescript code, create a symbolic link inside the `source` directory that points to the Typescript project. Alternatively you can also copy the project into the `source` directory. -1. If you want to include git data like changed files and authors, create a symbolic link inside the `source` directory that points to the repository or clone it in the `source` directory. If you already have your Typescript project in there, you of course don't have to do it twice. If you are analyzing Java artifacts (no source needed), it is sufficient to use a bare clone that only contains the git history without the sources using `git clone --bare`. +- If you want to include git data like changed files and authors, create a symbolic link inside the `source` directory that points to the repository or clone it into the `source` directory. If you already have your Typescript project in there, you of course don't have to do it twice. If you are analyzing Java artifacts (full source not needed), it is sufficient to use a bare clone that only contains the git history without the sources using `git clone --bare`. -1. Alternatively to the steps above, run an already predefined download script +- Alternatively to the steps above, run an already predefined download script ```shell ./../../scripts/downloader/downloadAxonFramework.sh ``` -1. Optionally use a script to download artifacts from Maven ([details](./COMMANDS.md#download-maven-artifacts-to-analyze)). +- Optionally use a script to download artifacts from Maven ([details](./COMMANDS.md#download-maven-artifacts-to-analyze)). -1. Start the analysis. +### Start the analysis - - Without any additional dependencies: +- Without any additional dependencies: - ```shell - ./../../scripts/analysis/analyze.sh --report Csv - ``` + ```shell + ./../../scripts/analysis/analyze.sh --report Csv + ``` - - Jupyter notebook reports when Python and Conda are installed: +- Jupyter notebook reports when Python and Conda are installed: - ```shell - ./../../scripts/analysis/analyze.sh --report Jupyter - ``` + ```shell + ./../../scripts/analysis/analyze.sh --report Jupyter + ``` - - Graph visualizations when Node.js and npm are installed: +- Graph visualizations when Node.js and npm are installed: - ```shell - ./../../scripts/analysis/analyze.sh --report Jupyter - ``` + ```shell + ./../../scripts/analysis/analyze.sh --report Jupyter + ``` - - All reports with Python, Conda, Node.js and npm installed: +- All reports with Python, Conda, Node.js and npm installed: - ```shell - ./../../scripts/analysis/analyze.sh - ``` + ```shell + ./../../scripts/analysis/analyze.sh + ``` - - To explore the database yourself without any automatically generated reports and no additional requirements: +- To explore the database yourself without any automatically generated reports and no additional requirements: - ```shell - ./../../scripts/analysis/analyze.sh --explore - ``` + ```shell + ./../../scripts/analysis/analyze.sh --explore + ``` + +👉 Open your browser and login to your [local Neo4j Web UI](http://localhost:7474/browser) with "neo4j" as user and the initial password you've chosen. - Then open your browser and login to your [local Neo4j Web UI](http://localhost:7474/browser) with "neo4j" as user and the initial password you've chosen. +## GitHub Actions -👉 See [scripts/examples/analyzeAxonFramework.sh](./scripts/examples/analyzeAxonFramework.sh) as an example script that combines all the above steps for a Java Project. -👉 See [scripts/examples/analyzeReactRouter.sh](./scripts/examples/analyzeReactRouter.sh) as an example script that combines all the above steps for a Typescript Project. -👉 See [scripts/examples/analyzeAntDesign.sh](./scripts/examples/analyzeAntDesign.sh) as an example script that combines all the above steps for a large scale monorepo Typescript Project. 👉 See [Code Structure Analysis Pipeline](./.github/workflows/java-code-analysis.yml) on how to do this within a GitHub Actions Workflow. diff --git a/init.sh b/init.sh new file mode 100755 index 000000000..bba421d20 --- /dev/null +++ b/init.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash + +# Initializes a new analysis project by creating all necessary directories based on the given input parameter with the analysis name. + +# Note: This script needs to be executed in the root of this directory (= same directory as this file) + +# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands) +set -o errexit -o pipefail + +ARTIFACTS_DIRECTORY=${ARTIFACTS_DIRECTORY:-"artifacts"} +SOURCE_DIRECTORY=${SOURCE_DIRECTORY:-"source"} + +# Read the first (and only) parameter containing the name of the analysis. +analysisName="${1}" +if [ -z "${analysisName}" ]; then + echo "init: Error: Missing parameter ." >&2 + echo "init: Usage example: ${0} " >&2 + exit 1 +fi + +nameOfThisScript=$(basename "${0}") +if [ ! -f "./${nameOfThisScript}" ]; then + echo "init: Error: Please execute the script in the root directory of the code-graph-analysis-pipeline repository." >&2 + echo "init: Change to the directory of this ${nameOfThisScript} script and execute it from there." >&2 + exit 1 +fi + +# Check if initial password environment variable is set +if [ -z "${NEO4J_INITIAL_PASSWORD}" ]; then + echo "init: Error: Environment variable NEO4J_INITIAL_PASSWORD is recommended to be set first. Use 'export NEO4J_INITIAL_PASSWORD='." + exit 1 +fi + +# Create the temporary directory for all analysis projects if it hadn't been created yet. +mkdir -p ./temp +cd ./temp + +# Create the analysis directory inside the temp directory using the given parameter if it hadn't been created yet. +mkdir -p "./${analysisName}" +cd "./${analysisName}" + +# Create the artifacts directory inside the analysis directory for e.g. Java jar/ear files if it hadn't been created yet. +mkdir -p "./${ARTIFACTS_DIRECTORY}" + +# Create the source directory inside the analysis directory for source code projects/repositories if it hadn't been created yet. +mkdir -p "./${SOURCE_DIRECTORY}" + +# Create symbolic links to the most common scripts for code analysis. +ln -s "./../../scripts/analysis/analyze.sh" . +ln -s "./../../scripts/startNeo4j.sh" . +ln -s "./../../scripts/stopNeo4j.sh" . + +echo "init: Successfully initialized analysis project ${analysisName}" >&2 \ No newline at end of file