From 0d5d3d962715bafc95078cc877899b54426930c2 Mon Sep 17 00:00:00 2001 From: JohT <7671054+JohT@users.noreply.github.com> Date: Sat, 16 Nov 2024 10:59:19 +0100 Subject: [PATCH 1/3] Fix init script by providing forwarding scripts --- GETTING_STARTED.md | 6 ++++++ init.sh | 18 ++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/GETTING_STARTED.md b/GETTING_STARTED.md index 110d9617c..dcc9619bd 100644 --- a/GETTING_STARTED.md +++ b/GETTING_STARTED.md @@ -54,6 +54,12 @@ Use these optional command line options as needed: ./init.sh MyAnalysisProjectName ``` +- Change into the analysis directory. + + ```shell + cd ./temp/MyAnalysisProjectName + ``` + ### 2. Prepare the code to be analyzed - Move the artifacts (e.g. Java jars json files) you want to analyze into the `artifacts` directory. diff --git a/init.sh b/init.sh index bba421d20..41ee6117e 100755 --- a/init.sh +++ b/init.sh @@ -31,6 +31,16 @@ if [ -z "${NEO4J_INITIAL_PASSWORD}" ]; then exit 1 fi +createForwardingScript() { + local originalScript="${1}" + local scriptName;scriptName=$(basename "$originalScript") + + cp -n "${originalScript}" . + echo "#!/usr/bin/env bash" > "./${scriptName}" + # shellcheck disable=SC2016 + echo "${originalScript} \"\${@}\"" >> "./${scriptName}" +} + # Create the temporary directory for all analysis projects if it hadn't been created yet. mkdir -p ./temp cd ./temp @@ -45,9 +55,9 @@ mkdir -p "./${ARTIFACTS_DIRECTORY}" # Create the source directory inside the analysis directory for source code projects/repositories if it hadn't been created yet. mkdir -p "./${SOURCE_DIRECTORY}" -# Create symbolic links to the most common scripts for code analysis. -ln -s "./../../scripts/analysis/analyze.sh" . -ln -s "./../../scripts/startNeo4j.sh" . -ln -s "./../../scripts/stopNeo4j.sh" . +# Create forwarding scripts for the most important commands +createForwardingScript "./../../scripts/analysis/analyze.sh" +createForwardingScript "./../../scripts/startNeo4j.sh" +createForwardingScript "./../../scripts/stopNeo4j.sh" echo "init: Successfully initialized analysis project ${analysisName}" >&2 \ No newline at end of file From 0d9a70b3c78aa2f80d4ff121261041b5566ed506 Mon Sep 17 00:00:00 2001 From: JohT <7671054+JohT@users.noreply.github.com> Date: Sat, 16 Nov 2024 10:59:59 +0100 Subject: [PATCH 2/3] Exclude reports from change detection --- scripts/detectChangedFiles.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/detectChangedFiles.sh b/scripts/detectChangedFiles.sh index f4ae282c5..f27192fdd 100755 --- a/scripts/detectChangedFiles.sh +++ b/scripts/detectChangedFiles.sh @@ -10,6 +10,7 @@ # A second call without this option will be needed for the change detection to work. # This is helpful to decide if an operation should be done based on changes while waiting for its success to finally save the change state. # --paths Comma-separated list of file- and directory-names that are used for calculating the hash based on their name and size. +# --hashfile Path to the file that contains the hash for change detection. Default in environment variable CHANGE_DETECTION_HASH_FILE_PATH # Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands) set -o errexit -o pipefail @@ -88,6 +89,7 @@ file_names_and_sizes() { -type d -name "node_modules" -prune -o \ -type d -name "target" -prune -o \ -type d -name "temp" -prune -o \ + -type d -name ".reports" -prune -o \ -not -path "${hashFilePath}" \ -type f \ -exec stat -f "%N %z" {} + \ From 690077a3be7dc4f619cbbed53fd4cc6bf0df1e12 Mon Sep 17 00:00:00 2001 From: JohT <7671054+JohT@users.noreply.github.com> Date: Sun, 17 Nov 2024 08:47:09 +0100 Subject: [PATCH 3/3] Improve git file to code file matching performance --- ...VES_TO_relationships_to_git_files_for_Java.cypher | 8 +++++--- ..._relationships_to_git_files_for_Typescript.cypher | 12 +++++++----- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/cypher/GitLog/Add_RESOLVES_TO_relationships_to_git_files_for_Java.cypher b/cypher/GitLog/Add_RESOLVES_TO_relationships_to_git_files_for_Java.cypher index 5085f852a..804e8649d 100644 --- a/cypher/GitLog/Add_RESOLVES_TO_relationships_to_git_files_for_Java.cypher +++ b/cypher/GitLog/Add_RESOLVES_TO_relationships_to_git_files_for_Java.cypher @@ -6,7 +6,7 @@ // The differences are subtle but need to be thought through and tested carefully. // Having separate files makes it obvious that there needs to be one for every new source code language. -MATCH (code_file:!Git&File) +MATCH (code_file:!Git&File&!Directory) WHERE code_file.fileName IS NOT NULL // Use only original code files, no resolved duplicates AND NOT EXISTS { (code_file)-[:RESOLVES_TO]->(other_file:File&!Git) } @@ -17,8 +17,10 @@ MATCH (git_file:Git&File) ,git_file ,coalesce(git_file.fileName, git_file.relativePath) AS gitFileName WHERE gitFileName ENDS WITH codeFileName -MERGE (git_file)-[:RESOLVES_TO]->(code_file) - SET git_file.resolved = true + CALL { WITH git_file, code_file + MERGE (git_file)-[:RESOLVES_TO]->(code_file) + ON CREATE SET git_file.resolved = true + } IN TRANSACTIONS RETURN count(DISTINCT codeFileName) AS numberOfCodeFiles ,collect(DISTINCT codeFileName + ' <-> ' + gitFileName + '\n')[0..4] AS examples // RETURN codeFileName, gitFileName diff --git a/cypher/GitLog/Add_RESOLVES_TO_relationships_to_git_files_for_Typescript.cypher b/cypher/GitLog/Add_RESOLVES_TO_relationships_to_git_files_for_Typescript.cypher index 11f249d55..82059ad04 100644 --- a/cypher/GitLog/Add_RESOLVES_TO_relationships_to_git_files_for_Typescript.cypher +++ b/cypher/GitLog/Add_RESOLVES_TO_relationships_to_git_files_for_Typescript.cypher @@ -3,13 +3,13 @@ // The differences are subtle but need to be thought through and tested carefully. // Having separate files makes it obvious that there needs to be one for every new source code language. -MATCH (code_file:!Git&File) +MATCH (code_file:!Git&File&!Directory&!Scan) WHERE (code_file.absoluteFileName IS NOT NULL OR code_file.fileName IS NOT NULL) - // Use only original code files, no resolved duplicates - AND NOT EXISTS { (code_file)-[:RESOLVES_TO]->(other_file:File&!Git) } WITH code_file ,coalesce(code_file.absoluteFileName, code_file.fileName) AS codeFileName MATCH (git_file:Git&File) +WHERE codeFileName ENDS WITH git_file.fileName + OR codeFileName ENDS WITH git_file.relativePath // Use repository if available to overcome ambiguity in multi source analysis OPTIONAL MATCH (git_repository:Git&Repository)-[:HAS_FILE]->(git_file) WITH * @@ -19,8 +19,10 @@ OPTIONAL MATCH (git_repository:Git&Repository)-[:HAS_FILE]->(git_file) ,coalesce(git_repository.name + '/', '') + git_file.relativePath ) AS gitFileName WHERE codeFileName ENDS WITH gitFileName -MERGE (git_file)-[:RESOLVES_TO]->(code_file) - SET git_file.resolved = true + CALL { WITH git_file, code_file + MERGE (git_file)-[:RESOLVES_TO]->(code_file) + ON CREATE SET git_file.resolved = true + } IN TRANSACTIONS RETURN count(DISTINCT codeFileName) AS numberOfCodeFiles ,collect(DISTINCT codeFileName + ' <-> ' + gitFileName + '\n')[0..4] AS examples // RETURN codeFileName, gitFileName