JohT · JohT · Oct 12, 2023 · Oct 9, 2023 · Oct 9, 2023 · Oct 11, 2023
diff --git a/.github/workflows/code-structure-analysis.yml b/.github/workflows/code-structure-analysis.yml
@@ -121,6 +121,7 @@ jobs:
       shell: bash -el {0}
       env:
         NEO4J_INITIAL_PASSWORD: ${{ secrets.NEO4J_INITIAL_PASSWORD }}
+        ENABLE_JUPYTER_NOTEBOOK_PDF_GENERATION: "true"
       run: |
         ./../../scripts/analysis/analyze.sh
 
@@ -136,7 +137,6 @@ jobs:
         name: code-analysis-logs-java-${{ matrix.java }}-python-${{ matrix.python }}-mambaforge-${{ matrix.mambaforge }}
         path: |
           ./temp/**/runtime/*
-          ./results
         retention-days: 5
 
     # Upload successful results in case they are needed for troubleshooting

diff --git a/COMMANDS.md b/COMMANDS.md
@@ -83,12 +83,12 @@ If only the Jupyter reports are needed e.g. when the CSV reports had already bee
 ./../../scripts/analysis/analyze.sh --report Jupyter
 ```
 
-#### Start an analysis without PDF generation
+#### Start an analysis with PDF generation
 
-Generating a PDF from a Jupyter notebook using [nbconvert](https://nbconvert.readthedocs.io) might take a while or even fail due to a timeout error. Here is an example on how to skip PDF generation:
+Note: Generating a PDF from a Jupyter notebook using [nbconvert](https://nbconvert.readthedocs.io) takes some time and might even fail due to a timeout error.
 
 ```shell
-SKIP_JUPYTER_NOTEBOOK_PDF_GENERATION=true ./../../scripts/analysis/analyze.sh
+ENABLE_JUPYTER_NOTEBOOK_PDF_GENERATION=true ./../../scripts/analysis/analyze.sh
 ```
 
 #### Setup everything to explore the graph manually

diff --git a/README.md b/README.md
@@ -124,15 +124,15 @@ The [Code Structure Analysis Pipeline](./.github/workflows/code-structure-analys
 - How can i trigger a full rescan of all artifacts?  
   👉 Delete the file `artifactsChangeDetectionHash.txt` in the `artifacts` directory.
 
-- How can PDF generation be skipped to speed up report generation and not depend on chromium?  
-  👉 Set environment variable `SKIP_JUPYTER_NOTEBOOK_PDF_GENERATION` to anything except an empty string. Example:  
+- How can PDF generation for Jupyter Notebooks be enabled (depends on chromium, takes more time)?  
+  👉 Set environment variable `ENABLE_JUPYTER_NOTEBOOK_PDF_GENERATION` to anything except an empty string. Example:  
 
   ```shell
-  export SKIP_JUPYTER_NOTEBOOK_PDF_GENERATION="true"
+  export ENABLE_JUPYTER_NOTEBOOK_PDF_GENERATION="true"
   ```
 
-  👉 Or prepend your command with `SKIP_JUPYTER_NOTEBOOK_PDF_GENERATION="true"` like:  
+  👉 Alternatively prepend your command with `ENABLE_JUPYTER_NOTEBOOK_PDF_GENERATION="true"` like:  
 
   ```shell
-  SKIP_JUPYTER_NOTEBOOK_PDF_GENERATION=true ./../../scripts/analysis/analyze.sh
+  ENABLE_JUPYTER_NOTEBOOK_PDF_GENERATION=true ./../../scripts/analysis/analyze.sh
   ```
diff --git a/cypher/Community_Detection/Community_Detection_7d_Modularity_Members.cypher b/cypher/Community_Detection/Community_Detection_7d_Modularity_Members.cypher
@@ -6,17 +6,25 @@ CALL gds.modularity.stream(
     ,communityProperty: $dependencies_projection_write_property
 })
  YIELD communityId, modularity
-  WITH communityId, modularity
+  WITH collect({communityId: communityId, modularity: modularity}) AS modularities
  MATCH (member)
- WHERE member[$dependencies_projection_write_property] = communityId
+ WHERE member[$dependencies_projection_write_property] IS NOT NULL
    AND $dependencies_projection_node IN LABELS(member)
-  WITH communityId
-      ,modularity
+  WITH modularities 
+      ,member[$dependencies_projection_write_property]                               AS communityId
       ,coalesce(member.fqn, member.fileName, member.name)                            AS memberName
       ,coalesce(member.name, replace(last(split(member.fileName, '/')), '.jar', '')) AS shortMemberName
-RETURN communityId
-      ,modularity
+  WITH modularities
+      ,communityId
       ,count(DISTINCT memberName)        AS memberCount
       ,collect(DISTINCT shortMemberName) AS shortMemberNames
       ,collect(DISTINCT memberName)      AS memberNames
+      ,reduce(memberModularity = 0, modularity IN modularities | 
+        CASE modularity.communityId WHEN communityId THEN modularity.modularity 
+        ELSE memberModularity END)       AS memberModularity
+ RETURN communityId
+       ,memberModularity
+       ,memberCount
+       ,shortMemberNames
+       ,memberNames
 ORDER BY communityId ASCENDING
diff --git a/cypher/Community_Detection/Community_Detection_7e_Write_Modularity.cypher b/cypher/Community_Detection/Community_Detection_7e_Write_Modularity.cypher
@@ -1,15 +1,20 @@
-// Community Detection Write Modularity
+// Community Detection Modularity Write
 
 CALL gds.modularity.stream(
  $dependencies_projection + '-without-empty', {
      relationshipWeightProperty: $dependencies_projection_weight_property
     ,communityProperty: $dependencies_projection_write_property
 })
  YIELD communityId, modularity
-  WITH communityId, modularity
+  WITH collect({communityId: communityId, modularity: modularity}) AS modularities
  MATCH (member)
- WHERE member[$dependencies_projection_write_property] = communityId
+ WHERE member[$dependencies_projection_write_property] IS NOT NULL
    AND $dependencies_projection_node IN LABELS(member)
-  CALL apoc.create.setProperty(member, $dependencies_projection_write_property + 'Modularity', modularity)
+  WITH modularities 
+      ,member
+      ,reduce(memberModularity = 0, modularity IN modularities | 
+        CASE modularity.communityId WHEN member[$dependencies_projection_write_property] THEN modularity.modularity 
+        ELSE memberModularity END)       AS memberModularity
+  CALL apoc.create.setProperty(member, $dependencies_projection_write_property + 'Modularity', memberModularity)
  YIELD node
-RETURN count(DISTINCT node) AS writtenModularityNodes;
+RETURN count(DISTINCT node) AS writtenModularityNodes
diff --git a/cypher/Community_Detection/Compare_Louvain_vs_Leiden_Results.cypher b/cypher/Community_Detection/Compare_Louvain_vs_Leiden_Results.cypher
@@ -2,12 +2,12 @@
 
  MATCH (member)
  WHERE $dependencies_projection_node IN LABELS(member)
+   AND member.louvainCommunityId IS NOT NULL
+   AND member.leidenCommunityId  IS NOT NULL
  WITH member.communityLouvainId    AS louvainCommunityId
      ,member.communityLeidenId     AS leidenCommunityId
      ,coalesce(member.fqn, member.fileName, member.signature, member.name)          AS memberName
      ,coalesce(member.name, replace(last(split(member.fileName, '/')), '.jar', '')) AS shortMemberName
- WHERE louvainCommunityId IS NOT NULL
-   AND leidenCommunityId  IS NOT NULL
  WITH louvainCommunityId
      ,leidenCommunityId
      ,collect(DISTINCT shortMemberName) AS shortMemberNames

diff --git a/scripts/analysis/analyze.sh b/scripts/analysis/analyze.sh
@@ -30,6 +30,9 @@
 
 # Requires setupNeo4j.sh,setupJQAssistant.sh,startNeo4j.sh,resetAndScanChanged.sh,prepareAnalysis.sh,stopNeo4j.sh,comilations/*.sh,profiles/*.sh
 
+# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
+set -eo pipefail
+
 # Overrideable variables with directory names
 REPORTS_SCRIPTS_DIRECTORY=${REPORTS_SCRIPTS_DIRECTORY:-"reports"} # Working directory containing the generated reports
 REPORT_COMPILATIONS_SCRIPTS_DIRECTORY=${REPORT_COMPILATIONS_SCRIPTS_DIRECTORY:-"compilations"} # Repository directory that contains scripts that execute selected report generation scripts
@@ -116,18 +119,18 @@ fi
 
 # Execute the settings profile script that sets all the neccessary settings variables (overrideable by environment variables).
 echo "analyze: Using analysis settings profile script ${SETTINGS_PROFILE_SCRIPT}"
-source "${SETTINGS_PROFILE_SCRIPT}" || exit 2
+source "${SETTINGS_PROFILE_SCRIPT}"
 
 # Setup Tools
-source "${SCRIPTS_DIR}/setupNeo4j.sh" || exit 3
-source "${SCRIPTS_DIR}/setupJQAssistant.sh" || exit 3
-source "${SCRIPTS_DIR}/startNeo4j.sh" || exit 3
+source "${SCRIPTS_DIR}/setupNeo4j.sh"
+source "${SCRIPTS_DIR}/setupJQAssistant.sh"
+source "${SCRIPTS_DIR}/startNeo4j.sh"
 
 # Scan and analyze artifacts when they were changed
-source "${SCRIPTS_DIR}/resetAndScanChanged.sh" || exit 4
+source "${SCRIPTS_DIR}/resetAndScanChanged.sh"
 
 # Prepare and validate graph database before analyzing and creating reports 
-source "${SCRIPTS_DIR}/prepareAnalysis.sh" || exit 5
+source "${SCRIPTS_DIR}/prepareAnalysis.sh"
 
 if ${exploreMode}; then
   echo "analyze: Explore mode activated. Report generation will be skipped. Neo4j keeps running."
@@ -138,7 +141,7 @@ fi
 # Create Reports
 #########################
 echo "analyze: Creating Reports with ${REPORT_COMPILATION_SCRIPT} ..."
-source "${REPORT_COMPILATION_SCRIPT}" || exit 6
+source "${REPORT_COMPILATION_SCRIPT}"
 
 # Stop Neo4j at the end
 source "${SCRIPTS_DIR}/stopNeo4j.sh"
diff --git a/scripts/copyReportsIntoResults.sh b/scripts/copyReportsIntoResults.sh
@@ -8,6 +8,9 @@
 
 # Requires generateJupyterReportReference.sh
 
+# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
+set -eo pipefail
+
 ## Get this "scripts" directory if not already set
 # Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution. 
 # CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes.

diff --git a/scripts/detectChangedArtifacts.sh b/scripts/detectChangedArtifacts.sh
@@ -4,6 +4,9 @@
 # The hash value is generated based on all files (their names and properties) within the artifacts directory.
 # A change is detected when the current hash and the stored one differ.
 
+# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
+set -eo pipefail
+
 ARTIFACTS_DIRECTORY=${ARTIFACTS_DIRECTORY:-"artifacts"}
 ARTIFACTS_CHANGE_DETECTION_HASH_FILE=${ARTIFACTS_CHANGE_DETECTION_HASH_FILE:-"artifactsChangeDetectionHash.txt"} # Name of the file that contains the hash code of the file list for change detection
 ARTIFACTS_CHANGE_DETECTION_HASH_FILE_PATH="./${ARTIFACTS_DIRECTORY}/$ARTIFACTS_CHANGE_DETECTION_HASH_FILE"
@@ -16,7 +19,9 @@ fi
 
 # Use find to list all files in the directory with their properties,
 # sort the output, and pipe it to md5 to create a hash
-CURRENT_ARTIFACTS_HASH="$( find "./$ARTIFACTS_DIRECTORY" -type f -not -name "${ARTIFACTS_CHANGE_DETECTION_HASH_FILE}" -exec stat -f "%N %p %z" {} + | sort | md5 -r )"
+# Use openssl md5 that is at least available on Mac and Linux. 
+# See: https://github.com/TRON-US/go-btfs/issues/90#issuecomment-517409369
+CURRENT_ARTIFACTS_HASH="$( find "./$ARTIFACTS_DIRECTORY" -type f -not -name "${ARTIFACTS_CHANGE_DETECTION_HASH_FILE}" -exec openssl md5 -binary {} + | openssl md5 | awk '{print $2}' )"
 
 # Assume that the files where changed if the file containing the hash of the file list does not exist yet.
 if [ ! -f "${ARTIFACTS_CHANGE_DETECTION_HASH_FILE_PATH}" ] ; then

diff --git a/scripts/documentation/appendEnvironmentVariables.sh b/scripts/documentation/appendEnvironmentVariables.sh
@@ -5,6 +5,9 @@
 # Note: If called with "clear" instead of a filename then the generated markdown reference documentation file is deleted.
 #       This is helpful to start over with the generation of a new document.
 
+# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
+set -eo pipefail
+
 # Markdown file name
 markdownFile="ENVIRONMENT_VARIABLES.md"
 

diff --git a/scripts/documentation/generateCsvReportReference.sh b/scripts/documentation/generateCsvReportReference.sh
@@ -5,6 +5,9 @@
 # Note: This script was generated by Chat-GPT after some messages back and forth:
 # https://chat.openai.com/share/0bd3cde7-32d0-460d-830c-79b7d00a2492
 
+# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
+set -eo pipefail
+
 # Output Markdown file name
 output_file="CSV_REPORTS.md"
 

diff --git a/scripts/documentation/generateCypherReference.sh b/scripts/documentation/generateCypherReference.sh
@@ -3,6 +3,9 @@
 # Generates "CYPHER.md" containing a reference to all Cypher files in this directory and its subdirectories.
 # This script was generated by Chat-GPT after some messages back and forth and then tuned manually.
 
+# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
+set -eo pipefail
+
 # Markdown file name
 markdown_file="CYPHER.md"
 

diff --git a/scripts/documentation/generateEnvironmentVariableReference.sh b/scripts/documentation/generateEnvironmentVariableReference.sh
@@ -4,6 +4,9 @@
 
 # Requires appendEnvironmentVariable.sh
 
+# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
+set -eo pipefail
+
 ## Get this "scripts" directory if not already set
 # Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution. 
 # CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes.

diff --git a/scripts/documentation/generateImageReference.sh b/scripts/documentation/generateImageReference.sh
@@ -2,6 +2,9 @@
 
 # Generates "IMAGES.md" containing a reference to all images (PNG) in this directory and its subdirectories.
 
+# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
+set -eo pipefail
+
 # Markdown file name
 markdown_file="IMAGES.md"
 

diff --git a/scripts/documentation/generateJupyterReportReference.sh b/scripts/documentation/generateJupyterReportReference.sh
@@ -3,6 +3,9 @@
 # Generates "JUPYTER_REPORTS.md" containing a reference to all Jupyter Notebook Markdown reports in this directory and its subdirectories.
 # This script was generated by Chat-GPT after some messages back and forth and then tuned manually.
 
+# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
+set -eo pipefail
+
 # Markdown file name
 markdown_file="JUPYTER_REPORTS.md"
 

diff --git a/scripts/documentation/generateScriptReference.sh b/scripts/documentation/generateScriptReference.sh
@@ -3,6 +3,9 @@
 # Generates "SCRIPTS.md" containing a reference to all scripts in this directory and its subdirectories.
 # This script was generated by Chat-GPT after some messages back and forth and then tuned manually.
 
+# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
+set -eo pipefail
+
 # Markdown file name
 markdown_file="SCRIPTS.md"
 

diff --git a/scripts/download.sh b/scripts/download.sh
@@ -7,6 +7,9 @@
 # --url Download URL (required)
 # --filename Target file name with extension without path (optional, default = basename of download URL)
 
+# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
+set -eo pipefail
+
 # Function to display script usage
 usage() {
   echo "Usage: $0 --url https://my.download.url [--filename download-file-name-without-path.ext> (default=url filename)]"

diff --git a/scripts/downloadMavenArtifact.sh b/scripts/downloadMavenArtifact.sh
@@ -10,6 +10,9 @@
 
 # Requires download.sh
 
+# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
+set -eo pipefail
+
 # Overrideable constants
 ARTIFACTS_DIRECTORY=${ARTIFACTS_DIRECTORY:-"artifacts"}
 SHARED_DOWNLOADS_DIRECTORY="${SHARED_DOWNLOADS_DIRECTORY:-$(dirname "$( pwd )")/downloads}"
@@ -67,16 +70,16 @@ DOWNLOAD_URL="${BASE_URL}/${GROUP_ID_FOR_API}/${artifactId}/${version}/${ARTIFAC
 
 # Download Maven Artifact into the "targetDirectory"
 if [ ! -f "./${targetDirectory}/${ARTIFACT_FILENAME}" ] ; then
-    source ${SCRIPTS_DIR}/download.sh --url "${DOWNLOAD_URL}" || exit 1
+    source ${SCRIPTS_DIR}/download.sh --url "${DOWNLOAD_URL}"
 
     # Create artifacts targetDirectory if it doen't exist
-    mkdir -p "./${targetDirectory}" || exit 1
+    mkdir -p "./${targetDirectory}"
 
     # Delete already existing older versions of the artifact
-    rm -f "./${targetDirectory}/${artifactId}"* || exit 1
+    rm -f "./${targetDirectory}/${artifactId}"*
 
     # Copy artifact into artifacts targetDirectory
-    cp -R "${SHARED_DOWNLOADS_DIRECTORY}/${ARTIFACT_FILENAME}" "./${targetDirectory}" || exit 1
+    cp -R "${SHARED_DOWNLOADS_DIRECTORY}/${ARTIFACT_FILENAME}" "./${targetDirectory}"
 else
     echo "downloadMavenArtifact: ${ARTIFACT_FILENAME} already downloaded into target directory ${targetDirectory}"
 fi

diff --git a/scripts/examples/analyzeAxonFramework.sh b/scripts/examples/analyzeAxonFramework.sh
@@ -6,6 +6,9 @@
 # Note: The first (and only) parameter is the version of AxonFramework to analyze.
 # Note: This script is meant to be started in the root directory of this repository.
 
+# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
+set -eo pipefail
+
 # Read the first input argument containing the version of the artifacts
 if [ "$#" -ne 1 ]; then
   echo "analyzeAxonFramework Error: Usage: $0 <version>" >&2
@@ -20,12 +23,12 @@ if [ -z "${NEO4J_INITIAL_PASSWORD}" ]; then
 fi
 
 # Create the temporary directory for all analysis projects.
-mkdir -p ./temp || exit 1
-cd ./temp || exit 1
+mkdir -p ./temp
+cd ./temp
 
 # Create the working directory for this specific analysis.
-mkdir -p "./AxonFramework-${artifactsVersion}" || exit 2
-cd "./AxonFramework-${artifactsVersion}"  || exit 2
+mkdir -p "./AxonFramework-${artifactsVersion}"
+cd "./AxonFramework-${artifactsVersion}" 
 
 # Create the artifacts directory that will contain the code to be analyzed.
 mkdir -p ./artifacts