diff --git a/.github/workflows/java-code-analysis.yml b/.github/workflows/java-code-analysis.yml index beb6e9f0c..5c0ccee41 100644 --- a/.github/workflows/java-code-analysis.yml +++ b/.github/workflows/java-code-analysis.yml @@ -122,7 +122,7 @@ jobs: ENABLE_JUPYTER_NOTEBOOK_PDF_GENERATION: "true" IMPORT_GIT_LOG_DATA_IF_SOURCE_IS_PRESENT: "" # Options: "none", "aggregated", "full". default = "plugin" or "" run: | - ./../../scripts/analysis/analyze.sh + ./../../scripts/analysis/analyze.sh --profile Neo4jv5-low-memory - name: Move reports from the temp to the results directory preserving their surrounding directory working-directory: temp diff --git a/.github/workflows/typescript-code-analysis.yml b/.github/workflows/typescript-code-analysis.yml index 348594a8d..699053316 100644 --- a/.github/workflows/typescript-code-analysis.yml +++ b/.github/workflows/typescript-code-analysis.yml @@ -136,7 +136,7 @@ jobs: ENABLE_JUPYTER_NOTEBOOK_PDF_GENERATION: "true" IMPORT_GIT_LOG_DATA_IF_SOURCE_IS_PRESENT: "" # Options: "none", "aggregated", "full". default = "plugin" or "" run: | - ./../../scripts/analysis/analyze.sh + ./../../scripts/analysis/analyze.sh --profile Neo4jv5-low-memory - name: Move reports from the temp to the results directory preserving their surrounding directory working-directory: temp diff --git a/COMMANDS.md b/COMMANDS.md index f0f133a40..4554f325a 100644 --- a/COMMANDS.md +++ b/COMMANDS.md @@ -71,9 +71,11 @@ The [analyze.sh](./scripts/analysis/analyze.sh) command comes with these command - `--report Csv` only generates CSV reports. This speeds up the report generation and doesn't depend on Python, Jupyter Notebook or any other related dependencies. The default value os `All` to generate all reports. `Jupiter` will only generate Jupyter Notebook reports. `DatabaseCsvExport` exports the whole graph database as a CSV file (performance intense, check if there are security concerns first). - `--profile Neo4jv4` uses the older long term support (june 2023) version v4.4.x of Neo4j and suitable compatible versions of plugins and JQAssistant. `Neo4jv5` will explicitly select the newest (june 2023) version 5.x of Neo4j. Without setting -a profile, the newest versions will be used. Profiles can be found in the directory [scripts/profiles](./scripts/profiles/). +a profile, the newest versions will be used. Other profiles can be found in the directory [scripts/profiles](./scripts/profiles/). -- `--profile Neo4jv5-continue-on-scan-errors` is based on the default profile (`Neo4jv5`) but uses the jQAssistant configuration template [template-neo4jv5-jqassistant-continue-on-error.yaml](./scripts/configuration/template-neo4jv5-jqassistant-continue-on-error.yaml) to continue on scan error instead of failing fast. This is temporarily useful when there is a known error that needs to be ignored. It is still recommended to use the default profile and fail fast if there is something wrong. Profiles can be found in the directory [scripts/profiles](./scripts/profiles/). +- `--profile Neo4jv5-continue-on-scan-errors` is based on the default profile (`Neo4jv5`) but uses the jQAssistant configuration template [template-neo4jv5-jqassistant-continue-on-error.yaml](./scripts/configuration/template-neo4jv5-jqassistant-continue-on-error.yaml) to continue on scan error instead of failing fast. This is temporarily useful when there is a known error that needs to be ignored. It is still recommended to use the default profile and fail fast if there is something wrong. Other profiles can be found in the directory [scripts/profiles](./scripts/profiles/). + +- `--profile Neo4jv5-low-memory` is based on the default profile (`Neo4jv5`) but uses only half of the memory (RAM) as configured in [template-neo4j-low-memory.conf](./scripts/configuration/template-neo4j-low-memory.conf). This is useful for the analysis of smaller codebases with less resources. Other profiles can be found in the directory [scripts/profiles](./scripts/profiles/). - `--explore` activates the "explore" mode where no reports are generated. Furthermore, Neo4j won't be stopped at the end of the script and will therefore continue running. This makes it easy to just set everything up but then use the running Neo4j server to explore the data manually. diff --git a/README.md b/README.md index 696d2dfbd..96d8d8ede 100644 --- a/README.md +++ b/README.md @@ -243,6 +243,12 @@ The [Code Structure Analysis Pipeline](./.github/workflows/java-code-analysis.ym ./../../scripts/analysis/analyze.sh --profile Neo4jv5-continue-on-scan-errors ``` +- How can i reduce the memory (RAM) consumption? + 👉 Use the profile `Neo4jv5-low-memory` (default = `Neo4jv5`): + + ```shell + ./../../scripts/analysis/analyze.sh --profile Neo4jv5-low-memory + ``` ## 🕸 Web References diff --git a/scripts/configuration/template-neo4j-low-memory.conf b/scripts/configuration/template-neo4j-low-memory.conf new file mode 100644 index 000000000..82c7cb45d --- /dev/null +++ b/scripts/configuration/template-neo4j-low-memory.conf @@ -0,0 +1,25 @@ + +# The following static configuration entries were taken from "template-neo4j.conf". + +# List of procedures and user defined functions that are allowed +# full access to the database through unsupported/insecure internal APIs. +dbms.security.procedures.unrestricted=apoc.*,gds.* + +# Memory: Java Heap Size +server.memory.heap.initial_size=4g +server.memory.heap.max_size=4g + +# Memory: The amount of memory to use for mapping the store files. +server.memory.pagecache.size=1g + +# Memory: Exits JVM on the first occurrence of an out-of-memory error. +server.jvm.additional=-XX:+ExitOnOutOfMemoryError + +# Memory: Limit the amount of memory that all of the running transaction can consume. +db.memory.transaction.total.max=3g + +# Memory: Limit the amount of memory that a single transaction can consume. +db.memory.transaction.max=3g + +# Transaction: Retention policy for transaction logs needed to perform recovery and backups. +db.tx_log.rotation.retention_policy=keep_none \ No newline at end of file diff --git a/scripts/configuration/template-neo4j-v4-low-memory.conf b/scripts/configuration/template-neo4j-v4-low-memory.conf new file mode 100644 index 000000000..fd5b3b8a1 --- /dev/null +++ b/scripts/configuration/template-neo4j-v4-low-memory.conf @@ -0,0 +1,25 @@ + +# The following static configuration entries were taken from "template-neo4j.conf". + +# List of procedures and user defined functions that are allowed +# full access to the database through unsupported/insecure internal APIs. +dbms.security.procedures.unrestricted=apoc.*,gds.* + +# Memory: Java Heap Size +dbms.memory.heap.initial_size=4g +dbms.memory.heap.max_size=4g + +# Memory: The amount of memory to use for mapping the store files. +dbms.memory.pagecache.size=1g + +# Memory: Exits JVM on the first occurrence of an out-of-memory error. +dbms.jvm.additional=-XX:+ExitOnOutOfMemoryError + +# Memory: Limit the amount of memory that all of the running transaction can consume. +dbms.memory.transaction.global_max_size=3g + +# Memory: Limit the amount of memory that a single transaction can consume. +dbms.memory.transaction.max_size=3g + +# Transaction: Retention policy for transaction logs needed to perform recovery and backups. +dbms.tx_log.rotation.retention_policy=keep_none \ No newline at end of file diff --git a/scripts/copyReportsIntoResults.sh b/scripts/copyReportsIntoResults.sh index d356cbc8f..e7ff6688d 100755 --- a/scripts/copyReportsIntoResults.sh +++ b/scripts/copyReportsIntoResults.sh @@ -6,8 +6,6 @@ # Notice that this scripts needs to be executed within the "temp" directory. -# Requires generateJupyterReportReference.sh - # Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands) set -o errexit -o pipefail diff --git a/scripts/profiles/Neo4jv4-low-memory.sh b/scripts/profiles/Neo4jv4-low-memory.sh new file mode 100755 index 000000000..91c312e53 --- /dev/null +++ b/scripts/profiles/Neo4jv4-low-memory.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +# Sets all settings variables for an analysis with Neo4j v4.4.x (long term support (LTS) version as of may 2023). +# The chosen settings are tested to be compatible and working. + +NEO4J_VERSION=${NEO4J_VERSION:-"4.4.20"} # Version 4.4.x is the current long term support (LTS) version (may 2023) +NEO4J_HTTP_TRANSACTION_ENDPOINT=${NEO4J_HTTP_TRANSACTION_ENDPOINT:-"db/data/transaction/commit"} # Since Neo4j v5 it is "db//tx/commit" +NEO4J_CONFIG_TEMPLATE=${NEO4J_CONFIG_TEMPLATE:-"template-neo4j-v4-low-memory.conf"} # Name of the template file ("configuration" folder) for the Neo4j configuration + +# Overrideable settings variables for ports (optional, defaults also defined in sub scripts where needed) +# Override them if you need to run multiple neo4j database servers in parallel. +NEO4J_HTTP_PORT=${NEO4J_HTTP_PORT:-"7474"} # Neo4j HTTP API port for executing queries +NEO4J_HTTPS_PORT=${NEO4J_HTTPS_PORT:-"7473"} # Neo4j HTTPS port for encrypted querying +NEO4J_BOLT_PORT=${NEO4J_BOLT_PORT:-"7687"} # Neo4j's own "Bolt Protocol" port + +# Awesome Procedures (APOC) Plugin for Neo4j +NEO4J_APOC_PLUGIN_VERSION=${NEO4J_APOC_PLUGIN_VERSION:-"4.4.0.15"} # Version number matches Neo4j version +NEO4J_APOC_PLUGIN_EDITION=${NEO4J_APOC_PLUGIN_EDITION:-"all"} # Since Neo4j v5 only the core edition is maintained +NEO4J_APOC_PLUGIN_GITHUB=${NEO4J_APOC_PLUGIN_GITHUB:-"neo4j-contrib/neo4j-apoc-procedures"} # Location for the old plugins compatible to Neo4j v4 + +NEO4J_GDS_PLUGIN_VERSION=${NEO4J_GDS_PLUGIN_VERSION:-"2.3.4"} # Graph Data Science Plugin Version 2.3.x is compatible with Neo4j 4.4.x +NEO4J_OPEN_GDS_PLUGIN_VERSION=${NEO4J_OPEN_GDS_PLUGIN_VERSION:-"2.6.8"} # Open package variant of the graph-data-science plugin for Neo4j (https://github.com/JohT/open-graph-data-science-packaging). Since version 2.4. compatible with Neo4j 5.x. +NEO4J_GDS_PLUGIN_EDITION=${NEO4J_GDS_PLUGIN_EDITION:-"open"} # Graph Data Science Plugin Edition: "open" for OpenGDS, "full" for the full version with Neo4j license + +JQASSISTANT_CLI_VERSION=${JQASSISTANT_CLI_VERSION:-"1.12.2"} # Version number of the jQAssistant command line interface. Version 1.12.2 is compatible with Neo4j v4 +JQASSISTANT_CLI_ARTIFACT=${JQASSISTANT_CLI_ARTIFACT:-"jqassistant-commandline-neo4jv4"} +JQASSISTANT_CONFIG_TEMPLATE=${JQASSISTANT_CONFIG_TEMPLATE:-"template-neo4jv4-jqassistant.yaml"} # Name of the template file for the jqassistant configuration \ No newline at end of file diff --git a/scripts/profiles/Neo4jv4.sh b/scripts/profiles/Neo4jv4.sh index 48058fd84..f6405cd99 100755 --- a/scripts/profiles/Neo4jv4.sh +++ b/scripts/profiles/Neo4jv4.sh @@ -5,6 +5,7 @@ NEO4J_VERSION=${NEO4J_VERSION:-"4.4.20"} # Version 4.4.x is the current long term support (LTS) version (may 2023) NEO4J_HTTP_TRANSACTION_ENDPOINT=${NEO4J_HTTP_TRANSACTION_ENDPOINT:-"db/data/transaction/commit"} # Since Neo4j v5 it is "db//tx/commit" +NEO4J_CONFIG_TEMPLATE=${NEO4J_CONFIG_TEMPLATE:-"template-neo4j-v4.conf"} # Name of the template file ("configuration" folder) for the Neo4j configuration # Overrideable settings variables for ports (optional, defaults also defined in sub scripts where needed) # Override them if you need to run multiple neo4j database servers in parallel. diff --git a/scripts/profiles/Neo4jv5-continue-on-scan-errors.sh b/scripts/profiles/Neo4jv5-continue-on-scan-errors.sh index 9a92d9750..eea0ace77 100755 --- a/scripts/profiles/Neo4jv5-continue-on-scan-errors.sh +++ b/scripts/profiles/Neo4jv5-continue-on-scan-errors.sh @@ -5,6 +5,7 @@ NEO4J_VERSION=${NEO4J_VERSION:-"5.23.0"} # Version 5.9.0 is the current version of june 2023 NEO4J_HTTP_TRANSACTION_ENDPOINT=${NEO4J_HTTP_TRANSACTION_ENDPOINT:-"db/neo4j/tx/commit"} # Since Neo4j v5 it is "db//tx/commit" +NEO4J_CONFIG_TEMPLATE=${NEO4J_CONFIG_TEMPLATE:-"template-neo4j.conf"} # Name of the template file ("configuration" folder) for the Neo4j configuration # Overrideable settings variables for ports (optional, defaults also defined in sub scripts where needed) # Override them if you need to run multiple neo4j database servers in parallel. diff --git a/scripts/profiles/Neo4jv5-low-memory.sh b/scripts/profiles/Neo4jv5-low-memory.sh new file mode 100755 index 000000000..c5dfc39dc --- /dev/null +++ b/scripts/profiles/Neo4jv5-low-memory.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +# Sets all settings variables for an analysis with Neo4j v5.x (newest version as of june 2023). +# The chosen settings are tested to be compatible and working. + +NEO4J_VERSION=${NEO4J_VERSION:-"5.23.0"} # Version 5.9.0 is the current version of june 2023 +NEO4J_HTTP_TRANSACTION_ENDPOINT=${NEO4J_HTTP_TRANSACTION_ENDPOINT:-"db/neo4j/tx/commit"} # Since Neo4j v5 it is "db//tx/commit" +NEO4J_CONFIG_TEMPLATE=${NEO4J_CONFIG_TEMPLATE:-"template-neo4j-low-memory.conf"} # Name of the template file ("configuration" folder) for the Neo4j configuration + +# Overrideable settings variables for ports (optional, defaults also defined in sub scripts where needed) +# Override them if you need to run multiple neo4j database servers in parallel. +NEO4J_HTTP_PORT=${NEO4J_HTTP_PORT:-"7474"} # Neo4j HTTP API port for executing queries +NEO4J_HTTPS_PORT=${NEO4J_HTTPS_PORT:-"7473"} # Neo4j HTTPS port for encrypted querying +NEO4J_BOLT_PORT=${NEO4J_BOLT_PORT:-"7687"} # Neo4j's own "Bolt Protocol" port + +# Awesome Procedures (APOC) Plugin for Neo4j +NEO4J_APOC_PLUGIN_VERSION=${NEO4J_APOC_PLUGIN_VERSION:-"5.25.1"} # Version number matches Neo4j version since 5.x +NEO4J_APOC_PLUGIN_EDITION=${NEO4J_APOC_PLUGIN_EDITION:-"core"} # Since Neo4j v5 the core edition is updated with Neo4j +NEO4J_APOC_PLUGIN_GITHUB=${NEO4J_APOC_PLUGIN_GITHUB:-"neo4j/apoc"} # Core edition was moved to "neo4j/apoc" for Neo4j v5 + +NEO4J_GDS_PLUGIN_VERSION=${NEO4J_GDS_PLUGIN_VERSION:-"2.13.2"} # Version 2.4.0 is the newest version of june 2023 and compatible with Neo4j v5 +NEO4J_OPEN_GDS_PLUGIN_VERSION=${NEO4J_OPEN_GDS_PLUGIN_VERSION:-"2.13.2"} # Open package variant of the graph-data-science plugin for Neo4j (https://github.com/JohT/open-graph-data-science-packaging). Since version 2.4. compatible with Neo4j 5.x. +NEO4J_GDS_PLUGIN_EDITION=${NEO4J_GDS_PLUGIN_EDITION:-"open"} # Graph Data Science Plugin Edition: "open" for OpenGDS, "full" for the full version with Neo4j license + +JQASSISTANT_CLI_VERSION=${JQASSISTANT_CLI_VERSION:-"2.6.0-M2"} # Version number of the jQAssistant command line interface. Version 1.12.2 is compatible with Neo4j v4 +JQASSISTANT_CLI_ARTIFACT=${JQASSISTANT_CLI_ARTIFACT:-"jqassistant-commandline-neo4jv5"} +JQASSISTANT_CONFIG_TEMPLATE=${JQASSISTANT_CONFIG_TEMPLATE:-"template-neo4jv5-jqassistant.yaml"} # Name of the template file for the jqassistant configuration \ No newline at end of file diff --git a/scripts/profiles/Neo4jv5.sh b/scripts/profiles/Neo4jv5.sh index fd60dfa89..92b777971 100755 --- a/scripts/profiles/Neo4jv5.sh +++ b/scripts/profiles/Neo4jv5.sh @@ -5,6 +5,7 @@ NEO4J_VERSION=${NEO4J_VERSION:-"5.23.0"} # Version 5.9.0 is the current version of june 2023 NEO4J_HTTP_TRANSACTION_ENDPOINT=${NEO4J_HTTP_TRANSACTION_ENDPOINT:-"db/neo4j/tx/commit"} # Since Neo4j v5 it is "db//tx/commit" +NEO4J_CONFIG_TEMPLATE=${NEO4J_CONFIG_TEMPLATE:-"template-neo4j.conf"} # Name of the template file ("configuration" folder) for the Neo4j configuration # Overrideable settings variables for ports (optional, defaults also defined in sub scripts where needed) # Override them if you need to run multiple neo4j database servers in parallel. diff --git a/scripts/setupNeo4j.sh b/scripts/setupNeo4j.sh index 9b0d2afd7..b4e27b518 100755 --- a/scripts/setupNeo4j.sh +++ b/scripts/setupNeo4j.sh @@ -27,6 +27,8 @@ NEO4J_HTTP_PORT=${NEO4J_HTTP_PORT:-"7474"} # Neo4j HTTP API port for executing q NEO4J_HTTPS_PORT=${NEO4J_HTTPS_PORT:-"7473"} # Neo4j HTTPS port for encrypted querying NEO4J_BOLT_PORT=${NEO4J_BOLT_PORT:-"7687"} # Neo4j's own "Bolt Protocol" port +NEO4J_CONFIG_TEMPLATE=${NEO4J_CONFIG_TEMPLATE:-"template-neo4j.conf"} # Name of the template file ("configuration" folder) for the Neo4j configuration. Defaults to "template-neo4j.conf". + # Internal constants NEO4J_INSTALLATION_NAME="neo4j-${NEO4J_EDITION}-${NEO4J_VERSION}" NEO4J_INSTALLATION_DIRECTORY="${TOOLS_DIRECTORY}/${NEO4J_INSTALLATION_NAME}" @@ -163,12 +165,8 @@ if [ ! -d "${NEO4J_INSTALLATION_DIRECTORY}" ] ; then } >> "${NEO4J_CONFIG}" fi - echo "setupNeo4j: Configuring static settings (memory, procedure permissions, ...)" - if [[ "$NEO4J_MAJOR_VERSION_NUMBER" -ge 5 ]]; then - cat "${SCRIPTS_DIR}/configuration/template-neo4j.conf" >> "${NEO4J_CONFIG}" - else - cat "${SCRIPTS_DIR}/configuration/template-neo4j-v4.conf" >> "${NEO4J_CONFIG}" - fi + echo "setupNeo4j: Appending configuration template ${NEO4J_CONFIG_TEMPLATE} (memory, procedure permissions, ...)" + cat "${SCRIPTS_DIR}/configuration/${NEO4J_CONFIG_TEMPLATE}" >> "${NEO4J_CONFIG}" # Set initial password for user "neo4j" otherwise the default password "neo4j" would need to be changed immediately (prompt). # This needs to be done after the configuration changes.