From 0d5d3d962715bafc95078cc877899b54426930c2 Mon Sep 17 00:00:00 2001
From: JohT <7671054+JohT@users.noreply.github.com>
Date: Sat, 16 Nov 2024 10:59:19 +0100
Subject: [PATCH 1/3] Fix init script by providing forwarding scripts

---
 GETTING_STARTED.md |  6 ++++++
 init.sh            | 18 ++++++++++++++----
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/GETTING_STARTED.md b/GETTING_STARTED.md
index 110d9617c..dcc9619bd 100644
--- a/GETTING_STARTED.md
+++ b/GETTING_STARTED.md
@@ -54,6 +54,12 @@ Use these optional command line options as needed:
     ./init.sh MyAnalysisProjectName
     ```
 
+- Change into the analysis directory.
+
+    ```shell
+    cd ./temp/MyAnalysisProjectName
+    ```
+
 ### 2. Prepare the code to be analyzed
 
 - Move the artifacts (e.g. Java jars json files) you want to analyze into the `artifacts` directory.
diff --git a/init.sh b/init.sh
index bba421d20..41ee6117e 100755
--- a/init.sh
+++ b/init.sh
@@ -31,6 +31,16 @@ if [ -z "${NEO4J_INITIAL_PASSWORD}" ]; then
     exit 1
 fi
 
+createForwardingScript() {
+    local originalScript="${1}"
+    local scriptName;scriptName=$(basename "$originalScript")
+
+    cp -n "${originalScript}" .
+    echo "#!/usr/bin/env bash" > "./${scriptName}"
+    # shellcheck disable=SC2016
+    echo "${originalScript} \"\${@}\"" >> "./${scriptName}"
+}
+
 # Create the temporary directory for all analysis projects if it hadn't been created yet.
 mkdir -p ./temp
 cd ./temp
@@ -45,9 +55,9 @@ mkdir -p "./${ARTIFACTS_DIRECTORY}"
 # Create the source directory inside the analysis directory for source code projects/repositories if it hadn't been created yet.
 mkdir -p "./${SOURCE_DIRECTORY}"
 
-# Create symbolic links to the most common scripts for code analysis.
-ln -s "./../../scripts/analysis/analyze.sh" .
-ln -s "./../../scripts/startNeo4j.sh" .
-ln -s "./../../scripts/stopNeo4j.sh" .
+# Create forwarding scripts for the most important commands
+createForwardingScript "./../../scripts/analysis/analyze.sh"
+createForwardingScript "./../../scripts/startNeo4j.sh"
+createForwardingScript "./../../scripts/stopNeo4j.sh"
 
 echo "init: Successfully initialized analysis project ${analysisName}" >&2
\ No newline at end of file

From 0d9a70b3c78aa2f80d4ff121261041b5566ed506 Mon Sep 17 00:00:00 2001
From: JohT <7671054+JohT@users.noreply.github.com>
Date: Sat, 16 Nov 2024 10:59:59 +0100
Subject: [PATCH 2/3] Exclude reports from change detection

---
 scripts/detectChangedFiles.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/detectChangedFiles.sh b/scripts/detectChangedFiles.sh
index f4ae282c5..f27192fdd 100755
--- a/scripts/detectChangedFiles.sh
+++ b/scripts/detectChangedFiles.sh
@@ -10,6 +10,7 @@
 #            A second call without this option will be needed for the change detection to work.
 #            This is helpful to decide if an operation should be done based on changes while waiting for its success to finally save the change state.
 # --paths Comma-separated list of file- and directory-names that are used for calculating the hash based on their name and size.
+# --hashfile Path to the file that contains the hash for change detection. Default in environment variable CHANGE_DETECTION_HASH_FILE_PATH
 
 # Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
 set -o errexit -o pipefail
@@ -88,6 +89,7 @@ file_names_and_sizes() {
           -type d -name "node_modules" -prune -o \
           -type d -name "target" -prune -o \
           -type d -name "temp" -prune -o \
+          -type d -name ".reports" -prune -o \
           -not -path "${hashFilePath}" \
           -type f \
           -exec stat -f "%N %z" {} + \

From 690077a3be7dc4f619cbbed53fd4cc6bf0df1e12 Mon Sep 17 00:00:00 2001
From: JohT <7671054+JohT@users.noreply.github.com>
Date: Sun, 17 Nov 2024 08:47:09 +0100
Subject: [PATCH 3/3] Improve git file to code file matching performance

---
 ...VES_TO_relationships_to_git_files_for_Java.cypher |  8 +++++---
 ..._relationships_to_git_files_for_Typescript.cypher | 12 +++++++-----
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/cypher/GitLog/Add_RESOLVES_TO_relationships_to_git_files_for_Java.cypher b/cypher/GitLog/Add_RESOLVES_TO_relationships_to_git_files_for_Java.cypher
index 5085f852a..804e8649d 100644
--- a/cypher/GitLog/Add_RESOLVES_TO_relationships_to_git_files_for_Java.cypher
+++ b/cypher/GitLog/Add_RESOLVES_TO_relationships_to_git_files_for_Java.cypher
@@ -6,7 +6,7 @@
 //       The differences are subtle but need to be thought through and tested carefully.
 //       Having separate files makes it obvious that there needs to be one for every new source code language.
 
-MATCH (code_file:!Git&File)
+MATCH (code_file:!Git&File&!Directory)
 WHERE  code_file.fileName IS NOT NULL
   // Use only original code files, no resolved duplicates
   AND NOT EXISTS { (code_file)-[:RESOLVES_TO]->(other_file:File&!Git) }
@@ -17,8 +17,10 @@ MATCH (git_file:Git&File)
      ,git_file
      ,coalesce(git_file.fileName, git_file.relativePath) AS gitFileName
 WHERE gitFileName ENDS WITH codeFileName
-MERGE (git_file)-[:RESOLVES_TO]->(code_file)
-  SET git_file.resolved = true
+ CALL { WITH git_file, code_file
+       MERGE (git_file)-[:RESOLVES_TO]->(code_file)
+          ON CREATE SET git_file.resolved = true
+      } IN TRANSACTIONS
 RETURN count(DISTINCT codeFileName)  AS numberOfCodeFiles
       ,collect(DISTINCT codeFileName + ' <-> ' + gitFileName + '\n')[0..4] AS examples
 // RETURN codeFileName, gitFileName
diff --git a/cypher/GitLog/Add_RESOLVES_TO_relationships_to_git_files_for_Typescript.cypher b/cypher/GitLog/Add_RESOLVES_TO_relationships_to_git_files_for_Typescript.cypher
index 11f249d55..82059ad04 100644
--- a/cypher/GitLog/Add_RESOLVES_TO_relationships_to_git_files_for_Typescript.cypher
+++ b/cypher/GitLog/Add_RESOLVES_TO_relationships_to_git_files_for_Typescript.cypher
@@ -3,13 +3,13 @@
 //       The differences are subtle but need to be thought through and tested carefully.
 //       Having separate files makes it obvious that there needs to be one for every new source code language.
 
-MATCH (code_file:!Git&File)
+MATCH (code_file:!Git&File&!Directory&!Scan)
 WHERE (code_file.absoluteFileName IS NOT NULL OR code_file.fileName IS NOT NULL)
-  // Use only original code files, no resolved duplicates
-  AND NOT EXISTS { (code_file)-[:RESOLVES_TO]->(other_file:File&!Git) } 
  WITH code_file
      ,coalesce(code_file.absoluteFileName, code_file.fileName) AS codeFileName
 MATCH (git_file:Git&File)
+WHERE codeFileName      ENDS WITH git_file.fileName
+   OR codeFileName      ENDS WITH git_file.relativePath
 // Use repository if available to overcome ambiguity in multi source analysis
 OPTIONAL MATCH (git_repository:Git&Repository)-[:HAS_FILE]->(git_file)
  WITH *
@@ -19,8 +19,10 @@ OPTIONAL MATCH (git_repository:Git&Repository)-[:HAS_FILE]->(git_file)
               ,coalesce(git_repository.name + '/', '')      + git_file.relativePath
       ) AS gitFileName
 WHERE codeFileName      ENDS WITH gitFileName
-MERGE (git_file)-[:RESOLVES_TO]->(code_file)
-  SET git_file.resolved = true
+ CALL { WITH git_file, code_file
+       MERGE (git_file)-[:RESOLVES_TO]->(code_file)
+          ON CREATE SET git_file.resolved = true
+      } IN TRANSACTIONS
 RETURN count(DISTINCT codeFileName)  AS numberOfCodeFiles
       ,collect(DISTINCT codeFileName + ' <-> ' + gitFileName + '\n')[0..4] AS examples
 // RETURN codeFileName, gitFileName