JohT · JohT · Nov 1, 2023 · Oct 30, 2023 · Oct 14, 2023 · Oct 22, 2023
diff --git a/.github/workflows/code-structure-analysis.yml b/.github/workflows/code-structure-analysis.yml
@@ -97,6 +97,10 @@ jobs:
         auto-activate-base: false
         use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
 
+    - name: Conda environment info
+      shell: bash -el {0}
+      run: conda info
+
     - name: Setup temp directory if missing
       run: mkdir -p ./temp
 

diff --git a/COMMANDS.md b/COMMANDS.md
@@ -2,47 +2,11 @@
 
 ## Start an analysis
 
-1. Create a directory for all analysis projects
-
-    ```shell
-    mkdir temp
-    cd temp
-    ```
-
-1. Create a working directory for your specific analysis
-
-    ```shell
-    mkdir MyFirstAnalysis
-    cd MyFirstAnalysis
-    ```
-
-1. Choose an initial password for Neo4j
-
-    ```shell
-    export NEO4J_INITIAL_PASSWORD=theinitialpasswordthatihavechosenforneo4j
-    ```
-
-1. Create the `artifacts` directory for the code to be analyzed (without `cd` afterwards)
-
-    ```shell
-    mkdir artifacts
-    ```
-
-1. Move the artifacts you want to analyze into the `artifacts` directory
-
-1. Optionally run a predefined script to download artifacts
-
-    ```shell
-    ./../../scripts/downloader/downloadAxonFramework.sh <version>
-    ```
-
-1. Optionally use a script to download artifacts from Maven ([details](#download-maven-artifacts-to-analyze))
-
-1. Start the analysis
-
-    ```shell
-    ./../../scripts/analysis/analyze.sh
-    ```
+An analysis is started with the script [analyze.sh](./scripts/analysis/analyze.sh).
+To run all analysis steps simple execute the following command:
+```shell
+./../../scripts/analysis/analyze.sh
+```
 
 👉 See [scripts/examples/analyzeAxonFramework.sh](./scripts/examples/analyzeAxonFramework.sh) as an example script that combines all the above steps.  
 👉 See [Code Structure Analysis Pipeline](./.github/workflows/code-structure-analysis.yml) on how to do this within a GitHub Actions Workflow.

diff --git a/GETTING_STARTED.md b/GETTING_STARTED.md
@@ -0,0 +1,73 @@
+# Code Graph Analysis Pipeline - Getting started guide
+
+This document describes the steps to get started as quickly as possible. 
+For more details on what you can do with this pipeline see [README](./README.md).
+For more details on how the commands work in detail see [COMMANDS](./COMMANDS.md).
+
+## 🛠 Prerequisites
+
+Please read through the [Prerequisites](./README.md#🛠-prerequisites) in the [README](./README.md) file for what is required to run the scripts.
+
+## Start an analysis
+
+1. Create a directory for all analysis projects
+
+    ```shell
+    mkdir temp
+    cd temp
+    ```
+
+1. Create a working directory for your specific analysis
+
+    ```shell
+    mkdir MyFirstAnalysis
+    cd MyFirstAnalysis
+    ```
+
+1. Choose an initial password for Neo4j
+
+    ```shell
+    export NEO4J_INITIAL_PASSWORD=theinitialpasswordthatihavechosenforneo4j
+    ```
+
+1. Create the `artifacts` directory for the code to be analyzed (without `cd` afterwards)
+
+    ```shell
+    mkdir artifacts
+    ```
+
+1. Move the artifacts you want to analyze into the `artifacts` directory
+
+1. Optionally run a predefined script to download artifacts
+
+    ```shell
+    ./../../scripts/downloader/downloadAxonFramework.sh <version>
+    ```
+
+1. Optionally use a script to download artifacts from Maven ([details](#download-maven-artifacts-to-analyze))
+
+1. Start the analysis
+
+  - Without any additional dependencies:
+    ```shell
+    ./../../scripts/analysis/analyze.sh --report Csv
+    ```
+  - Jupyter notebook reports when Python and Conda are installed:
+    ```shell
+    ./../../scripts/analysis/analyze.sh --report Jupyter
+    ```
+  - Graph visualizations when Node.js and npm are installed:
+    ```shell
+    ./../../scripts/analysis/analyze.sh --report Jupyter
+    ```
+  - All reports with Python, Conda, Node.js and npm installed:
+    ```shell
+    ./../../scripts/analysis/analyze.sh
+    ```
+  - To explore the database yourself without any automatically generated reports and no additional requirements:
+    ```shell
+    ./../../scripts/analysis/analyze.sh
+    ```
+
+👉 See [scripts/examples/analyzeAxonFramework.sh](./scripts/examples/analyzeAxonFramework.sh) as an example script that combines all the above steps.  
+👉 See [Code Structure Analysis Pipeline](./.github/workflows/code-structure-analysis.yml) on how to do this within a GitHub Actions Workflow.
diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@ Contained within this repository is a comprehensive and automated code graph ana
 
 ---
 
-## 🚀 Features
+## ✨ Features
 
 - Analyze static code structure as a graph
 - Fully automated [pipeline](./.github/workflows/code-structure-analysis.yml) from tool installation to report generation
@@ -37,13 +37,36 @@ Here are some reports that utilize Neo4j's [Graph Data Science Library](https://
 
 ## 🛠 Prerequisites
 
-- Java 17 is required (June 2023 Neo4j 5.x requirement)
-- Python and a conda package manager are required for Jupyter Notebook reports
-- Chromium will automatically be downloaded if needed for Jupyter Notebook reports in PDF format
+- Java 17 is [required for Neo4j](https://neo4j.com/docs/operations-manual/current/installation/requirements/#deployment-requirements-software) (Neo4j 5.x requirement).
+- On Windows it is recommended to use the git bash provided by [git for windows](https://gitforwindows.org).
+- [jq](https://github.com/jqlang/jq) the "lightweight and flexible command-line JSON processor" needs to be installed. Latest releases: https://github.com/jqlang/jq/releases/latest. Check using `jq --version`.
+- Set environment variable `NEO4J_INITIAL_PASSWORD` to a password of your choice. For example:
+  ```shell
+  export NEO4J_INITIAL_PASSWORD=neo4j_password_of_my_choice
+  ```
+  To run Jupyter notebooks, create an `.env` file in the folder from where you open the notebook containing for example: `NEO4J_INITIAL_PASSWORD=neo4j_password_of_my_choice`
+
+### Further Prerequisites for Python and Jupyter Notebooks
+
+- Python is required for Jupyter Notebook reports.
+- A conda package manager like [Miniconda](https://docs.conda.io/projects/miniconda/en/latest) or [Anaconda](https://www.anaconda.com/download)(Recommended for Windows) is required for Jupyter Notebook reports.
+- Chromium will automatically be downloaded if needed for Jupyter Notebook PDF reports generation.
+
+### Further Prerequisites for Graph Visualization
+
+These tools are needed to run the graph visualization scripts of directory [graph-visualization](./graph-visualization):
+
+- [Node.js](https://nodejs.org/en)
+- [npm](https://www.npmjs.com)
+
+### Hints for Windows
+
+- Add this line to your `~/.bashrc` file if you are using Anaconda3: `/c/ProgramData/Anaconda3/etc/profile.d/conda.sh`. Try to find a similar script for other conda package managers or versions.
+- Run `conda init` in the git bash opened as administrator. Running it in normal mode usually leads to an error message.
 
-## Getting Started
+## 🚀 Getting Started
 
-See [Start an analysis](./COMMANDS.md#start-an-analysis) in the [Commands Reference](./COMMANDS.md) on how to start an analysis on your local machine.
+See [GETTING_STARTED.md](./GETTING_STARTED.md) on how to get started on your local machine.
 
 ## 🏗 Pipeline and Tools
 

diff --git a/cypher/Community_Detection/Community_Detection_3a_WeaklyConnectedComponents_Estimate.cypher b/cypher/Community_Detection/Community_Detection_3a_WeaklyConnectedComponents_Estimate.cypher
@@ -23,4 +23,4 @@ RETURN requiredMemory
       ,heapPercentageMin
       ,heapPercentageMax
       ,treeView
-      ,mapView
+      //,mapView //doesn't work on Windows with git bash jq version jq-1.7-dirty
diff --git a/cypher/Community_Detection/Community_Detection_4a_Label_Propagation_Estimate.cypher b/cypher/Community_Detection/Community_Detection_4a_Label_Propagation_Estimate.cypher
@@ -23,4 +23,4 @@ RETURN requiredMemory
       ,heapPercentageMin
       ,heapPercentageMax
       ,treeView
-      ,mapView
+      //,mapView //doesn't work on Windows with git bash jq version jq-1.7-dirty
diff --git a/cypher/Community_Detection/Community_Detection_5a_K_Core_Decomposition_Estimate.cypher b/cypher/Community_Detection/Community_Detection_5a_K_Core_Decomposition_Estimate.cypher
@@ -21,4 +21,4 @@ RETURN requiredMemory
       ,heapPercentageMin
       ,heapPercentageMax
       ,treeView
-      ,mapView
+      //,mapView //doesn't work on Windows with git bash jq version jq-1.7-dirty
diff --git a/cypher/Community_Detection/Community_Detection_6a_Approximate_Maximum_k_cut_Estimate.cypher b/cypher/Community_Detection/Community_Detection_6a_Approximate_Maximum_k_cut_Estimate.cypher
@@ -23,4 +23,4 @@ RETURN requiredMemory
       ,heapPercentageMin
       ,heapPercentageMax
       ,treeView
-      ,mapView
+      //,mapView //doesn't work on Windows with git bash jq version jq-1.7-dirty
diff --git a/cypher/External_Dependencies/External_package_usage_per_artifact_sorted_top.cypher b/cypher/External_Dependencies/External_package_usage_per_artifact_sorted_top.cypher
@@ -46,7 +46,6 @@ RETURN artifactName
       ,externalTypeRate
       ,numberOfExternalTypeCaller
       ,numberOfExternalTypeCalls
-      ,size(externalPackageNames) AS numberOfExternalPackages
-      ,externalPackageNames[0..4] AS top5ExternalPackages
-      ,externalTypeNames[0..1]    AS someExternalTypes
-LIMIT 40
+      ,size(externalPackageNames)                 AS numberOfExternalPackages
+      ,externalPackageNames[0..4]                 AS top5ExternalPackages
+      ,apoc.coll.flatten(externalTypeNames)[0..9] AS someExternalTypes
diff --git a/cypher/Node_Embeddings/Node_Embeddings_1a_Fast_Random_Projection_Estimate.cypher b/cypher/Node_Embeddings/Node_Embeddings_1a_Fast_Random_Projection_Estimate.cypher
@@ -6,5 +6,5 @@ CALL gds.fastRP.stream.estimate(
      ,relationshipWeightProperty: $dependencies_projection_weight_property
   }
 )
- YIELD requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView, mapView
-RETURN requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView, mapView
+ YIELD requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView
+RETURN requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView
diff --git a/cypher/Node_Embeddings/Node_Embeddings_2a_Hash_GNN_Estimate.cypher b/cypher/Node_Embeddings/Node_Embeddings_2a_Hash_GNN_Estimate.cypher
@@ -11,5 +11,5 @@ CALL gds.beta.hashgnn.stream.estimate(
      ,outputDimension: toInteger($dependencies_projection_embedding_dimension)
   }
 )
- YIELD requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView, mapView
-RETURN requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView, mapView
+ YIELD requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView
+RETURN requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView
diff --git a/cypher/Node_Embeddings/Node_Embeddings_3a_Node2Vec_Estimate.cypher b/cypher/Node_Embeddings/Node_Embeddings_3a_Node2Vec_Estimate.cypher
@@ -8,5 +8,5 @@ CALL gds.node2vec.write.estimate(
      ,writeProperty: $dependencies_projection_write_property
   }
 )
- YIELD requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView, mapView
-RETURN requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView, mapView
+ YIELD requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView
+RETURN requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView
diff --git a/cypher/Similarity/Similarity_1a_Estimate.cypher b/cypher/Similarity/Similarity_1a_Estimate.cypher
@@ -8,5 +8,5 @@ CALL gds.nodeSimilarity.write.estimate(
     ,writeProperty: 'score'
     ,topK: 3
 })
- YIELD requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView, mapView
-RETURN requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView, mapView
+ YIELD requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView
+RETURN requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView
diff --git a/graph-visualization/renderVisualizations.js b/graph-visualization/renderVisualizations.js
@@ -9,6 +9,7 @@ import jimp from "jimp";
 const indexOfScriptFilePathArgument = 1;
 const __filename = process.argv[indexOfScriptFilePathArgument];
 const __dirname = dirname(__filename);
+console.log(`renderVisualizations.js: dirname=${__dirname}`);
 
 /**
  * Crops the image in the buffer so that there is no empty frame around it.
@@ -88,14 +89,17 @@ let browser;
  * and takes a screenshot of the canvas elements using {@link takeCanvasScreenshots}.
  */
 (async () => {
+  console.log('renderVisualizations.js: Starting headless browser...');
   browser = await puppeteer.launch({ headless: "new" }); // { headless: false } for testing
 
   // Get all *.html files in this (script) directory and its subdirectories
-  const htmlFiles = globSync(`${__dirname}/**/*.html`, { ignore: `${__dirname}/node_modules/**` });
+  // The separate filter is needed to ignore the "node_modules" directory. 
+  // Glob's build-in filter doesn't seem to work on Windows.
+  const htmlFiles = globSync(`${__dirname}/**/*.html`, { absolute: true }).filter(file => !file.includes('node_modules'));
   for (const htmlFile of htmlFiles) {
     await takeCanvasScreenshots(browser, htmlFile);
   }
-  console.log(`Successfully rendered ${htmlFiles.length} html file(s)`);
+  console.log(`renderVisualizations.js: Successfully rendered ${htmlFiles.length} html file(s)`);
 })()
   .catch((err) => console.error(err))
   .finally(() => browser?.close());
diff --git a/scripts/activateCondaEnvironment.sh b/scripts/activateCondaEnvironment.sh
@@ -0,0 +1,85 @@
+#!/usr/bin/env bash
+
+# Activates the Conda (Python package manager) environment "codegraph" with all packages needed to execute the Jupyter Notebooks.
+
+# Note: This script uses the conda environment defined in CODEGRAPH_CONDA_ENVIRONMENT (defaults to "codegraph").
+#       If the environment hadn't been created yet it will use "environment.yml" 
+#       in the same directory as the given jupyter notebook ipynb file
+#       to create the environment.
+
+# Requires operatingSystemFunctions.sh
+
+# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
+set -eo pipefail
+
+## Get this "scripts" directory if not already set
+# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution. 
+# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes.
+# This way non-standard tools like readlink aren't needed.
+SCRIPTS_DIR=${SCRIPTS_DIR:-$( CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P )} # Repository directory containing the shell scripts
+echo "activateCondaEnvironment: SCRIPTS_DIR=$SCRIPTS_DIR"
+
+# Get the "jupyter" directory by taking the path of this script and going two directory up and then to "jupyter".
+JUPYTER_NOTEBOOK_DIRECTORY=${JUPYTER_NOTEBOOK_DIRECTORY:-"${SCRIPTS_DIR}/../jupyter"} # Repository directory containing the Jupyter Notebooks
+echo "activateCondaEnvironment: JUPYTER_NOTEBOOK_DIRECTORY=$JUPYTER_NOTEBOOK_DIRECTORY"
+
+# Define conda environment to use for code structure analysis. Default "codegraph"
+CODEGRAPH_CONDA_ENVIRONMENT=${CODEGRAPH_CONDA_ENVIRONMENT:-"codegraph"} # Name of the conda environment to use for code graph analysis
+echo "activateCondaEnvironment: CONDA_PREFIX=${CONDA_PREFIX}"
+echo "activateCondaEnvironment: Current conda environment=${CONDA_DEFAULT_ENV}"
+echo "activateCondaEnvironment: Target conda environment=${CODEGRAPH_CONDA_ENVIRONMENT}"
+
+if [ "${CONDA_DEFAULT_ENV}" = "${CODEGRAPH_CONDA_ENVIRONMENT}" ] ; then
+    echo "activateCondaEnvironment: Skipping activation. Target conda environment ${CODEGRAPH_CONDA_ENVIRONMENT} is already activated."
+    exit 0
+fi
+
+# Include operation system function to for example detect Windows.
+source "${SCRIPTS_DIR}/operatingSystemFunctions.sh"
+
+# Determine the path to "conda"
+if [ -n "${CONDA}" ]; then
+    if isWindows; then
+        pathToConda="${CONDA}\\Scripts\\" # the trailing backslash character is required
+    else
+        pathToConda="${CONDA}/bin/" # the trailing slash character is required
+    fi
+else
+    pathToConda=""
+fi
+echo "activateCondaEnvironment: pathToConda=${pathToConda}"
+
+scriptExtension=$(ifWindows ".bat" "")
+echo "activateCondaEnvironment: scriptExtension=${scriptExtension}"
+
+# Activate conda shell hook. Also resets CONDA_DEFAULT_ENV to base. 
+# Thats why CONDA_DEFAULT_ENV (base) is never equal to CODEGRAPH_CONDA_ENVIRONMENT (codegraph).
+eval "$(${pathToConda}conda${scriptExtension} shell.bash hook)"
+echo "activateCondaEnvironment: Current conda environment after shell hook=${CONDA_DEFAULT_ENV}"
+
+# Create (if missing) and activate Conda environment for code structure graph analysis
+if { "${pathToConda}conda" env list | grep "$CODEGRAPH_CONDA_ENVIRONMENT "; } >/dev/null 2>&1; then
+    echo "activateCondaEnvironment: Conda environment $CODEGRAPH_CONDA_ENVIRONMENT already created"
+else
+    if [ ! -f "${JUPYTER_NOTEBOOK_DIRECTORY}/environment.yml" ] ; then
+        echo "activateCondaEnvironment: Couldn't find environment file ${jupyter_notebook_file_path}/environment.yml."
+        exit 2
+    fi
+    echo "activateCondaEnvironment: Creating Conda environment ${CODEGRAPH_CONDA_ENVIRONMENT}"
+    "${pathToConda}conda" env create --file "${jupyter_notebook_file_path}/environment.yml" --name "${CODEGRAPH_CONDA_ENVIRONMENT}"
+fi
+
+echo "activateCondaEnvironment: Activating Conda environment ${CODEGRAPH_CONDA_ENVIRONMENT}"
+"${pathToConda}conda" activate ${CODEGRAPH_CONDA_ENVIRONMENT}
+
+if [ "${CONDA_DEFAULT_ENV}" != "${CODEGRAPH_CONDA_ENVIRONMENT}" ] ; then
+    echo "activateCondaEnvironment: Retry activating Conda environment ${CODEGRAPH_CONDA_ENVIRONMENT} with plain 'conda' command"
+    conda activate ${CODEGRAPH_CONDA_ENVIRONMENT}
+fi
+
+if [ "${CONDA_DEFAULT_ENV}" = "${CODEGRAPH_CONDA_ENVIRONMENT}" ] ; then
+    echo "activateCondaEnvironment: Activated Conda environment: ${CONDA_DEFAULT_ENV}"
+else
+    echo "activateCondaEnvironment: Error: Failed to activate Conda environment ${CODEGRAPH_CONDA_ENVIRONMENT}. ${CONDA_DEFAULT_ENV} still active."
+    exit 1
+fi
diff --git a/scripts/downloadMavenArtifact.sh b/scripts/downloadMavenArtifact.sh
@@ -70,7 +70,7 @@ DOWNLOAD_URL="${BASE_URL}/${GROUP_ID_FOR_API}/${artifactId}/${version}/${ARTIFAC
 
 # Download Maven Artifact into the "targetDirectory"
 if [ ! -f "./${targetDirectory}/${ARTIFACT_FILENAME}" ] ; then
-    source ${SCRIPTS_DIR}/download.sh --url "${DOWNLOAD_URL}"
+    source "${SCRIPTS_DIR}/download.sh" --url "${DOWNLOAD_URL}"
 
     # Create artifacts targetDirectory if it doen't exist
     mkdir -p "./${targetDirectory}"