From dd56d270f8e9758bc7a9e80297a56766a9c300ca Mon Sep 17 00:00:00 2001 From: JohT Date: Mon, 30 Oct 2023 09:50:44 +0100 Subject: [PATCH 1/9] Add operating system detection script functions --- scripts/operatingSystemFunctions.sh | 49 +++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 scripts/operatingSystemFunctions.sh diff --git a/scripts/operatingSystemFunctions.sh b/scripts/operatingSystemFunctions.sh new file mode 100644 index 000000000..6bdd98203 --- /dev/null +++ b/scripts/operatingSystemFunctions.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +# Provides operating system dependent functions e.g. to detect Windows. + +# Requires executeQuery.sh + +# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands) +set -eo pipefail + +## Get this "scripts" directory if not already set +# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution. +# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes. +# This way non-standard tools like readlink aren't needed. +SCRIPTS_DIR=${SCRIPTS_DIR:-$( CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P )} # Repository directory containing the shell scripts + +# Return true if the script is running on Windows, otherwise false. +# Example: if isWindows; then echo "Running on Windows" +isWindows() { + [[ "$OSTYPE" == "cygwin" ]] || [[ "$OSTYPE" == "msys" ]] || [[ "$OSTYPE" == "win32" ]] +} + +# Echoes/Returns the first argument if the script is running on Windows, otherwise the second argument. +# Example: artifactPostfix=$(ifWindows "windows.zip" "unix.tar.gz") +ifWindows() { + if isWindows; then + echo "$1" + else + echo "$2" + fi +} + +# Prints out a message if Windows was detected for the current OSTYPE or not. +# Example: printWindows +printWindows() { + ifWindows "operatingSystemFunctions: Detected Windows for OSTYPE ${OSTYPE}" "operatingSystemFunctions: No Windows detected for OSTYPE ${OSTYPE}" +} + +# Converts the POSIX path given as the first argument to Windows path format if the script is running on Windows. +# Otherwise it just returns the path unchanged (for non-windows systems). +# Example: path=$(convertPosixToWindowsPath "${path}") +convertPosixToWindowsPathIfNecessary() { + if isWindows; then + echo "$1" | sed -e 's/^\///' -e 's/\//\\/g' -e 's/^./\0:/' + else + echo "$1" + fi +} + +printWindows \ No newline at end of file From 5345a8d636928c6606eeab9d2218f72a2c70bdb8 Mon Sep 17 00:00:00 2001 From: JohT Date: Sat, 14 Oct 2023 11:01:27 +0200 Subject: [PATCH 2/9] Support Windows using git bash --- README.md | 3 +- scripts/downloadMavenArtifact.sh | 2 +- scripts/executeQuery.sh | 10 ++--- scripts/executeQueryFunctions.sh | 7 ++-- scripts/setupJQAssistant.sh | 2 +- scripts/setupNeo4j.sh | 55 ++++++++++++++++++---------- scripts/setupNeo4jInitialPassword.sh | 23 +++++++++--- scripts/startNeo4j.sh | 55 +++++++++++++++++++++------- scripts/stopNeo4j.sh | 45 ++++++++++++++++------- scripts/waitForNeo4jHttp.sh | 10 ++--- 10 files changed, 143 insertions(+), 69 deletions(-) diff --git a/README.md b/README.md index 4b695fea5..024fe4da2 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,8 @@ Here are some reports that utilize Neo4j's [Graph Data Science Library](https:// ## 🛠 Prerequisites -- Java 17 is required (June 2023 Neo4j 5.x requirement) +- Java 17 is [required for Neo4j](https://neo4j.com/docs/operations-manual/current/installation/requirements/#deployment-requirements-software) (5.x requirement) +- [jq](https://github.com/jqlang/jq) the "lightweight and flexible command-line JSON processor" needs to be installed. Latest releases: https://github.com/jqlang/jq/releases/latest. Check using `jq --version`. - Python and a conda package manager are required for Jupyter Notebook reports - Chromium will automatically be downloaded if needed for Jupyter Notebook reports in PDF format diff --git a/scripts/downloadMavenArtifact.sh b/scripts/downloadMavenArtifact.sh index 238939568..b920e6923 100755 --- a/scripts/downloadMavenArtifact.sh +++ b/scripts/downloadMavenArtifact.sh @@ -70,7 +70,7 @@ DOWNLOAD_URL="${BASE_URL}/${GROUP_ID_FOR_API}/${artifactId}/${version}/${ARTIFAC # Download Maven Artifact into the "targetDirectory" if [ ! -f "./${targetDirectory}/${ARTIFACT_FILENAME}" ] ; then - source ${SCRIPTS_DIR}/download.sh --url "${DOWNLOAD_URL}" + source "${SCRIPTS_DIR}/download.sh" --url "${DOWNLOAD_URL}" # Create artifacts targetDirectory if it doen't exist mkdir -p "./${targetDirectory}" diff --git a/scripts/executeQuery.sh b/scripts/executeQuery.sh index 9fd2b61e0..e3210d8ae 100755 --- a/scripts/executeQuery.sh +++ b/scripts/executeQuery.sh @@ -97,14 +97,14 @@ cypher_query_for_api="{\"statements\":[{\"statement\":${cypher_query},\"paramete #echo "executeQuery: Cypher Query for API: ${cypher_query_for_api}" # Calls the Neo4j HTTP API using cURL ( https://curl.se ) -cyper_query_result=$(curl --silent -S --fail-with-body -H Accept:application/json -H Content-Type:application/json \ +cypher_query_result=$(curl --silent -S --fail-with-body -H Accept:application/json -H Content-Type:application/json \ -u neo4j:"${NEO4J_INITIAL_PASSWORD}" \ "http://localhost:${NEO4J_HTTP_PORT}/${NEO4J_HTTP_TRANSACTION_ENDPOINT}" \ -d "${cypher_query_for_api}") -#echo "executeQuery: Cypher Query Result: ${cyper_query_result}" +#echo "executeQuery: Cypher Query Result: ${cypher_query_result}" # If there is a error message print it to syserr >&2 in red color -error_message=$( echo "${cyper_query_result}" | jq -r '.errors[0] // empty' ) +error_message=$( echo "${cypher_query_result}" | jq -r '.errors[0] // empty' ) if [[ -n "${error_message}" ]]; then redColor='\033[0;31m' noColor='\033[0m' # No Color @@ -114,9 +114,9 @@ fi # Output results in CSV format if [ "${no_source_reference}" = true ] ; then - echo -n "${cyper_query_result}" | jq -r '(.results[0])? | .columns,(.data[].row)? | map(if type == "array" then join(",") else . end) | flatten | @csv' + echo -n "${cypher_query_result}" | jq -r '(.results[0])? | .columns,(.data[].row)? | map(if type == "array" then join(",") else . end) | flatten | @csv' else cypher_query_file_relative_name=${cypher_query_file_name#/**/cypher/} sourceFileReferenceInfo="Source Cypher File: ${cypher_query_file_relative_name}" - echo -n "${cyper_query_result}" | jq -r --arg sourceReference "${sourceFileReferenceInfo}" '(.results[0])? | .columns + [$sourceReference], (.data[].row)? + [""] | map(if type == "array" then join(",") else . end) | flatten | @csv' + echo -n "${cypher_query_result}" | jq -r --arg sourceReference "${sourceFileReferenceInfo}" '(.results[0])? | .columns + [$sourceReference], (.data[].row)? + [""] | map(if type == "array" then join(",") else . end) | flatten | @csv' fi \ No newline at end of file diff --git a/scripts/executeQueryFunctions.sh b/scripts/executeQueryFunctions.sh index a4ea6e057..ddd157bf4 100644 --- a/scripts/executeQueryFunctions.sh +++ b/scripts/executeQueryFunctions.sh @@ -48,8 +48,8 @@ execute_cypher_expect_results() { # Function to execute a cypher query from the given file (first and only argument) using Neo4j's HTTP API execute_cypher_http() { - # (Neo4j HTTP API Script) Execute the Cyper query contained in the file and print the results as CSV - source $SCRIPTS_DIR/executeQuery.sh "${@}" # "${@}": Get all function arguments and forward them + # (Neo4j HTTP API Script) Execute the Cypher query contained in the file and print the results as CSV + source "$SCRIPTS_DIR/executeQuery.sh" "${@}" # "${@}": Get all function arguments and forward them } # Function to execute a cypher query from the given file (first and only argument) with a summarized (console) output using Neo4j's HTTP API @@ -107,11 +107,10 @@ execute_cypher_shell() { query_parameters=$(cypher_shell_query_parameters "${@}") echo "executeQuery: query_parameters=${query_parameters}" - # (Neo4j Cyper Shell CLI) Execute the Cyper query contained in the file and print the results as CSV + # (Neo4j Cypher Shell CLI) Execute the Cypher query contained in the file and print the results as CSV cat $cypherFileName | NEO4J_HOME="${NEO4J_DIRECTORY}" ${NEO4J_BIN}/cypher-shell -u neo4j -p "${NEO4J_INITIAL_PASSWORD}" --format plain --param "${query_parameters}" || exit 1 # Display the name of the Cypher file without its path at the bottom of the CSV (or console) separated by an empty line - # TODO Find a solution to move the source reference to the last column name echo "" echo "\"Source Cypher File:\",\"$(basename -- "${cypherFileName}")\"" } diff --git a/scripts/setupJQAssistant.sh b/scripts/setupJQAssistant.sh index 931c4ba06..71801c574 100755 --- a/scripts/setupJQAssistant.sh +++ b/scripts/setupJQAssistant.sh @@ -52,7 +52,7 @@ JQASSISTANT_INSTALLATION_DIRECTORY="${TOOLS_DIRECTORY}/${JQASSISTANT_INSTALLATIO if [ ! -d "${JQASSISTANT_INSTALLATION_DIRECTORY}" ] ; then jqassistant_cli_fulldownload_url=${JQASSISTANT_CLI_DOWNLOAD_URL}/${JQASSISTANT_CLI_ARTIFACT}/${JQASSISTANT_CLI_VERSION}/${JQASSISTANT_CLI_ARTIFACT}-${JQASSISTANT_CLI_VERSION}-${JQASSISTANT_CLI_DISTRIBUTION} jqassistant_cli_fulldownload_file="${JQASSISTANT_INSTALLATION_NAME}.zip" - source ${SCRIPTS_DIR}/download.sh --url "${jqassistant_cli_fulldownload_url}" --filename "${jqassistant_cli_fulldownload_file}" + source "${SCRIPTS_DIR}/download.sh" --url "${jqassistant_cli_fulldownload_url}" --filename "${jqassistant_cli_fulldownload_file}" # Unpack the ZIP file (-q option for less verbose output) unzip -q "${SHARED_DOWNLOADS_DIRECTORY}/${jqassistant_cli_fulldownload_file}" -d "${TOOLS_DIRECTORY}" diff --git a/scripts/setupNeo4j.sh b/scripts/setupNeo4j.sh index b455a0b23..ae25be238 100755 --- a/scripts/setupNeo4j.sh +++ b/scripts/setupNeo4j.sh @@ -4,7 +4,7 @@ # Note: The environment variable NEO4J_INITIAL_PASSWORD needs to be set. -# Requires download.sh,setupNeo4jInitialPassword.sh +# Requires download.sh,setupNeo4jInitialPassword.sh,operatingSystemFunctions.sh # Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands) set -eo pipefail @@ -68,14 +68,25 @@ if [ -z "${NEO4J_INITIAL_PASSWORD}" ]; then exit 1 fi +# Include operation system function to for example detect Windows. +source "${SCRIPTS_DIR}/operatingSystemFunctions.sh" + # Download and extract Neo4j if [ ! -d "${NEO4J_INSTALLATION_DIRECTORY}" ] ; then - neo4jDownloadArchiveFileName="${NEO4J_INSTALLATION_NAME}-unix.tar.gz" - source ${SCRIPTS_DIR}/download.sh --url "https://dist.neo4j.org/${neo4jDownloadArchiveFileName}" || exit 1 + neo4jInstallationSystemPostfix=$(ifWindows "windows.zip" "unix.tar.gz") + neo4jDownloadArchiveFileName="${NEO4J_INSTALLATION_NAME}-${neo4jInstallationSystemPostfix}" + echo "setupNeo4j: Using Neo4j distribution ${neo4jDownloadArchiveFileName}" + + source "${SCRIPTS_DIR}/download.sh" --url "https://dist.neo4j.org/${neo4jDownloadArchiveFileName}" || exit 1 - # Extract the tar file - tar -xf "${SHARED_DOWNLOADS_DIRECTORY}/${neo4jDownloadArchiveFileName}" --directory "${TOOLS_DIRECTORY}" || exit 1 + if [[ ${neo4jInstallationSystemPostfix} == "unix.tar.gz" ]]; then + # Extract the unix tar file + tar -xf "${SHARED_DOWNLOADS_DIRECTORY}/${neo4jDownloadArchiveFileName}" --directory "${TOOLS_DIRECTORY}" || exit 1 + else + # Unpack the windows ZIP file (-q option for less verbose output, ignore warnings (backslash) with || true) + unzip -q "${SHARED_DOWNLOADS_DIRECTORY}/${neo4jDownloadArchiveFileName}" -d "${TOOLS_DIRECTORY}" || true + fi # Fail if Neo4j hadn't been downloaded successfully if [ ! -d "${NEO4J_INSTALLATION_DIRECTORY}" ] ; then @@ -87,16 +98,22 @@ if [ ! -d "${NEO4J_INSTALLATION_DIRECTORY}" ] ; then # instead of inside the neo4j directory echo "setupNeo4j: Configuring dynamic settings (data directories, ports, ...)" + neo4jDataPath=$(convertPosixToWindowsPathIfNecessary "${NEO4J_DATA_PATH}") + neo4jLogsPath=$(convertPosixToWindowsPathIfNecessary "${NEO4J_RUNTIME_PATH}/logs") + neo4jDumpsPath=$(convertPosixToWindowsPathIfNecessary "${NEO4J_RUNTIME_PATH}/dumps") + neo4jRunPath=$(convertPosixToWindowsPathIfNecessary "${NEO4J_RUNTIME_PATH}/run") + neo4jTransactionsPath=$(convertPosixToWindowsPathIfNecessary "${NEO4J_DATA_PATH}/transactions") + if [[ "$NEO4J_MAJOR_VERSION_NUMBER" -ge 5 ]]; then echo "setupNeo4j: Neo4j v5 or higher detected" { echo "" echo "# Paths of data directories in the installation (v5)" - echo "server.directories.data=${NEO4J_DATA_PATH}" - echo "server.directories.logs=${NEO4J_RUNTIME_PATH}/logs" - echo "server.directories.dumps.root=${NEO4J_RUNTIME_PATH}/dumps" - echo "server.directories.run=${NEO4J_RUNTIME_PATH}/run" - echo "server.directories.transaction.logs.root=${NEO4J_DATA_PATH}/transactions" + echo "server.directories.data=${neo4jDataPath}" + echo "server.directories.logs=${neo4jLogsPath}" + echo "server.directories.dumps.root=${neo4jDumpsPath}" + echo "server.directories.run=${neo4jRunPath}" + echo "server.directories.transaction.logs.root=${neo4jTransactionsPath}" echo "" echo "# Ports Configuration (v5)" echo "server.bolt.listen_address=:${NEO4J_BOLT_PORT}" @@ -111,11 +128,11 @@ if [ ! -d "${NEO4J_INSTALLATION_DIRECTORY}" ] ; then { echo "" echo "# Paths of data directories in the installation" - echo "dbms.directories.data=${NEO4J_DATA_PATH}" - echo "dbms.directories.logs=${NEO4J_RUNTIME_PATH}/logs" - echo "dbms.directories.dumps.root=${NEO4J_RUNTIME_PATH}/dumps" - echo "dbms.directories.run=${NEO4J_RUNTIME_PATH}/run" - echo "dbms.directories.transaction.logs.root=${NEO4J_DATA_PATH}/transactions" + echo "dbms.directories.data=${neo4jDataPath}" + echo "dbms.directories.logs=${neo4jLogsPath}" + echo "dbms.directories.dumps.root=${neo4jDumpsPath}" + echo "dbms.directories.run=${neo4jRunPath}" + echo "dbms.directories.transaction.logs.root=${neo4jTransactionsPath}" echo "" echo "# Ports Configuration" echo "dbms.connector.bolt.listen_address=:${NEO4J_BOLT_PORT}" @@ -136,9 +153,9 @@ if [ ! -d "${NEO4J_INSTALLATION_DIRECTORY}" ] ; then # Set initial password for user "neo4j" otherwise the default password "neo4j" would need to be changed immediately (prompt). # This needs to be done after the configuration changes. - source ${SCRIPTS_DIR}/setupNeo4jInitialPassword.sh + source "${SCRIPTS_DIR}/setupNeo4jInitialPassword.sh" - echo "setupNeo4j: Installed sucessfully" + echo "setupNeo4j: Installed successfully" else echo "setupNeo4j: ${NEO4J_INSTALLATION_NAME} already installed" fi @@ -146,7 +163,7 @@ fi # Download and Install the Neo4j Plugin "Awesome Procedures for Neo4j" (APOC) if [ ! -f "${NEO4J_PLUGINS}/${NEO4J_APOC_PLUGIN_ARTIFACT}" ] ; then - source ${SCRIPTS_DIR}/download.sh --url "https://github.com/${NEO4J_APOC_PLUGIN_GITHUB}/releases/download/${NEO4J_APOC_PLUGIN_VERSION}/${NEO4J_APOC_PLUGIN_ARTIFACT}" || exit 1 + source "${SCRIPTS_DIR}/download.sh" --url "https://github.com/${NEO4J_APOC_PLUGIN_GITHUB}/releases/download/${NEO4J_APOC_PLUGIN_VERSION}/${NEO4J_APOC_PLUGIN_ARTIFACT}" || exit 1 # Uninstall previously installed Neo4j Plugin "Awesome Procedures for Neo4j" (APOC) rm -f "${NEO4J_PLUGINS}/apoc*.jar" @@ -192,7 +209,7 @@ fi if [ ! -f "${NEO4J_PLUGINS}/${neo4jGraphDataScienceReleaseArtifact}" ] ; then # Download the Neo4j Plugin "Graph Data Science" (GDS) - source ${SCRIPTS_DIR}/download.sh --url "${neo4jGraphDataScienceDownloadUrl}/${neo4jGraphDataScienceReleaseArtifact}" || exit 1 + source "${SCRIPTS_DIR}/download.sh" --url "${neo4jGraphDataScienceDownloadUrl}/${neo4jGraphDataScienceReleaseArtifact}" || exit 1 # Uninstall previously installed Neo4j Plugin "Graph Data Science" (GDS) rm -f "${NEO4J_PLUGINS}/*graph-data-science*.jar" diff --git a/scripts/setupNeo4jInitialPassword.sh b/scripts/setupNeo4jInitialPassword.sh index 7ffd4a208..bb62d6775 100755 --- a/scripts/setupNeo4jInitialPassword.sh +++ b/scripts/setupNeo4jInitialPassword.sh @@ -15,6 +15,13 @@ TOOLS_DIRECTORY=${TOOLS_DIRECTORY:-"tools"} # Get the tools directory (defaults NEO4J_INSTALLATION_NAME="neo4j-${NEO4J_EDITION}-${NEO4J_VERSION}" NEO4J_INSTALLATION_DIRECTORY="${TOOLS_DIRECTORY}/${NEO4J_INSTALLATION_NAME}" +## Get this "scripts" directory if not already set +# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution. +# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes. +# This way non-standard tools like readlink aren't needed. +SCRIPTS_DIR=${SCRIPTS_DIR:-$( CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P )} # Repository directory containing the shell scripts +echo "startNeo4j: SCRIPTS_DIR=$SCRIPTS_DIR" + # Check if environment variable is set if [ -z "${NEO4J_INITIAL_PASSWORD}" ]; then echo "Requires environment variable NEO4J_INITIAL_PASSWORD to be set first. Use 'export NEO4J_INITIAL_PASSWORD='." @@ -32,18 +39,24 @@ echo "setupNeo4jInitialPassword: Using ${NEO4J_INSTALLATION_DIRECTORY} as NEO4J_ # Extract the first component of the version number (=major version number) NEO4J_MAJOR_VERSION_NUMBER=$(echo "$NEO4J_VERSION" | cut -d'.' -f1) +echo "setupNeo4jInitialPassword: Neo4j version ${NEO4J_MAJOR_VERSION_NUMBER} detected" + +# Include operation system function to for example detect Windows. +source "${SCRIPTS_DIR}/operatingSystemFunctions.sh" + +scriptExtension=$(ifWindows ".bat" "") # Check if the first component is greater than or equal to 5 if [[ "$NEO4J_MAJOR_VERSION_NUMBER" -ge 5 ]]; then - echo "setupNeo4jInitialPassword: Neo4j v5 or higher detected" + echo "setupNeo4jInitialPassword: Neo4j v5 or higher used" # Neo4j version < 5 # Set the initial password using a temporary NEO4J_HOME environment variable pointing to the current setup - NEO4J_HOME="${NEO4J_INSTALLATION_DIRECTORY}" ${NEO4J_INSTALLATION_DIRECTORY}/bin/neo4j-admin dbms set-initial-password "${NEO4J_INITIAL_PASSWORD}" + NEO4J_HOME="${NEO4J_INSTALLATION_DIRECTORY}"; "${NEO4J_INSTALLATION_DIRECTORY}/bin/neo4j-admin${scriptExtension}" dbms set-initial-password "${NEO4J_INITIAL_PASSWORD}" else - echo "setupNeo4jInitialPassword: Neo4j v4 or lower detected" + echo "setupNeo4jInitialPassword: Neo4j v4 or lower used" # Neo4j version >= 5 # Set the initial password using a temporary NEO4J_HOME environment variable pointing to the current setup - NEO4J_HOME="${NEO4J_INSTALLATION_DIRECTORY}" ${NEO4J_INSTALLATION_DIRECTORY}/bin/neo4j-admin set-initial-password "${NEO4J_INITIAL_PASSWORD}" + NEO4J_HOME="${NEO4J_INSTALLATION_DIRECTORY}"; "${NEO4J_INSTALLATION_DIRECTORY}/bin/neo4j-admin${scriptExtension}" set-initial-password "${NEO4J_INITIAL_PASSWORD}" fi -echo "setupNeo4jInitialPassword: Initial password set sucessfully" +echo "setupNeo4jInitialPassword: Initial password set successfully" diff --git a/scripts/startNeo4j.sh b/scripts/startNeo4j.sh index 08b97e6aa..9e6b2fe25 100755 --- a/scripts/startNeo4j.sh +++ b/scripts/startNeo4j.sh @@ -5,7 +5,7 @@ # Note: Does nothing if the database is already running. # Note: It requires Neo4j to be installed in the TOOLS_DIRECTORY. -# Requires waitForNeo4jHttp.sh +# Requires waitForNeo4jHttp.sh,operatingSystemFunctions.sh # Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands) set -eo pipefail @@ -17,8 +17,9 @@ NEO4J_HTTP_PORT=${NEO4J_HTTP_PORT:-"7474"} # Neo4j's own "Bolt Protocol" port # Internal Constants NEO4J_DIR="${TOOLS_DIRECTORY}/neo4j-${NEO4J_EDITION}-${NEO4J_VERSION}" +NEO4J_DIR_WINDOWS="${TOOLS_DIRECTORY}\neo4j-${NEO4J_EDITION}-${NEO4J_VERSION}" NEO4J_BIN="${NEO4J_DIR}/bin" -WAIT_TIMES="1 2 4 8 16 32" +NEO4J_BIN_WINDOWS="${NEO4J_DIR_WINDOWS}\bin" ## Get this "scripts" directory if not already set # Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution. @@ -41,30 +42,56 @@ else exit 1 fi +# Include operation system function to for example detect Windows. +source "${SCRIPTS_DIR}/operatingSystemFunctions.sh" + +scriptExtension=$(ifWindows ".bat" "") +echo "startNeo4j: Using scriptExtension ${scriptExtension} for Windows." + # Check if Neo4j is stopped (not running) using a temporary NEO4J_HOME environment variable that points to the current installation -neo4jStatus="$( NEO4J_HOME=${NEO4J_DIR} ${NEO4J_BIN}/neo4j status 2>&1 || true)" +neo4jStatus="$( NEO4J_HOME=${NEO4J_DIR} | ${NEO4J_BIN}/neo4j${scriptExtension} status 2>&1 || true)" neo4jNotRunning=$(echo "$neo4jStatus" | grep "not running" || true) if [ -n "${neo4jNotRunning}" ]; then echo "startNeo4j: Starting neo4j-${NEO4J_EDITION}-${NEO4J_VERSION} in ${NEO4J_DIR}" # Check if there is already a process that listens to the Neo4j HTTP port - port_listener_process_id=$( lsof -t -i:"${NEO4J_HTTP_PORT}" -sTCP:LISTEN || true ) - if [ -n "${port_listener_process_id}" ]; then - echo "startNeo4j: There is already a process that listens to port ${NEO4J_HTTP_PORT}" - ps -p "${port_listener_process_id}" - echo "startNeo4j: Use this command to stop it: kill -9 \$( lsof -t -i:${NEO4J_HTTP_PORT} -sTCP:LISTEN )" - exit 1 + if isWindows; then + echo "startNeo4j: Skipping detection of processes listening to port ${NEO4J_HTTP_PORT} on Windows" + else + port_listener_process_id=$( lsof -t -i:"${NEO4J_HTTP_PORT}" -sTCP:LISTEN || true ) + if [ -n "${port_listener_process_id}" ]; then + echo "startNeo4j: There is already a process that listens to port ${NEO4J_HTTP_PORT}" + ps -p "${port_listener_process_id}" + echo "startNeo4j: Use this command to stop it: kill -9 \$( lsof -t -i:${NEO4J_HTTP_PORT} -sTCP:LISTEN )" + exit 1 + fi fi # Start Neo4j using a temporary NEO4J_HOME environment variable that points to the current installation - NEO4J_HOME=${NEO4J_DIR} ${NEO4J_BIN}/neo4j start - + if isWindows; then + neo4jStartCommand="${NEO4J_BIN_WINDOWS}\neo4j.bat console --verbose" + # On Windows it is necessary to take the absolute full qualified path to Neo4j for the environment variable NEO4J_HOME. + # It also works without any environment variable but this would likely lead to ambiguity problems when there are multiple Neo4j instances installed. + # If the path is wrong content-wise this leads to a ClassNotFoundException. + # If the path is wrong syntactically there is an error while reading the plugins directory. + windowsCommandEnvironment="set NEO4J_HOME=%cd%\\${NEO4J_DIR_WINDOWS}&& echo NEO4J_HOME=!NEO4J_HOME!" + windowsCommand="${windowsCommandEnvironment}&&${neo4jStartCommand}" + + echo "startNeo4j: Starting Neo4j on Windows in a separate console window..." + echo "startNeo4j: The following Windows command is used: ${windowsCommand}" + echo "startNeo4j: IMPORTANT: Only close the console window when the scripts and your work is finished !" + + cmd //c start cmd //v //k "${windowsCommand}" + else + NEO4J_HOME=${NEO4J_DIR} ${NEO4J_BIN}/neo4j start --verbose + fi + # Wait some time for the start of the database echo "${WAIT_TIMES}" | tr ' ' '\n' | while read waitTime; do echo "startNeo4j: Waiting for ${waitTime} second(s)" sleep ${waitTime} - neo4jStatus="$( NEO4J_HOME=${NEO4J_DIR} ${NEO4J_BIN}/neo4j status 2>&1 || true)" + neo4jStatus="$( NEO4J_HOME=${NEO4J_DIR} | ${NEO4J_BIN}/neo4j${scriptExtension} status 2>&1 || true)" neo4jNotRunning=$(echo "$neo4jStatus" | grep "not running" || true) if [ -z "${neo4jNotRunning}" ]; then echo "startNeo4j: Successfully started neo4j-${NEO4J_EDITION}-${NEO4J_VERSION}" @@ -76,11 +103,11 @@ else fi # Check if Neo4j is still not running using a temporary NEO4J_HOME environment variable that points to the current installation -neo4jStatus="$( NEO4J_HOME=${NEO4J_DIR} ${NEO4J_BIN}/neo4j status 2>&1 || true)" +neo4jStatus="$( NEO4J_HOME=${NEO4J_DIR} | ${NEO4J_BIN}/neo4j${scriptExtension} status 2>&1 || true)" neo4jNotRunning=$(echo "$neo4jStatus" | grep "not running" || true) if [ -n "${neo4jNotRunning}" ]; then echo "startNeo4j: neo4j-${NEO4J_EDITION}-${NEO4J_VERSION} still not running. Something went wrong. Details see 'NEO4J_HOME=${NEO4J_DIR} ${NEO4J_BIN}/neo4j status'." exit 1 fi -source ${SCRIPTS_DIR}/waitForNeo4jHttp.sh || exit 1 \ No newline at end of file +source "${SCRIPTS_DIR}/waitForNeo4jHttp.sh" || exit 1 \ No newline at end of file diff --git a/scripts/stopNeo4j.sh b/scripts/stopNeo4j.sh index fe92bc8e6..a17bcc4a7 100755 --- a/scripts/stopNeo4j.sh +++ b/scripts/stopNeo4j.sh @@ -4,6 +4,8 @@ # Note: Does nothing if the database is already stopped. +# Requires waitForNeo4jHttp.sh,operatingSystemFunctions.sh + # Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands) set -eo pipefail @@ -24,36 +26,51 @@ fi # Check if Neo4j is installed if [ -d "${NEO4J_BIN}" ] ; then - echo "stopNeo4j: Using Neo4j binary directoy ${NEO4J_BIN}" + echo "stopNeo4j: Using Neo4j binary directory ${NEO4J_BIN}" else echo "stopNeo4j: Directory ${NEO4J_BIN} doesn't exist. Please run setupNeo4j.sh first." exit 1 fi +# Include operation system function to for example detect Windows. +source "${SCRIPTS_DIR}/operatingSystemFunctions.sh" + # Check if Neo4j is stopped (not running) using a temporary NEO4J_HOME environment variable that points to the current installation -if NEO4J_HOME=${NEO4J_DIR} ${NEO4J_BIN}/neo4j status 2>&1 | grep -q "not running" ; then +neo4jStatus="$( NEO4J_HOME=${NEO4J_DIR} | ${NEO4J_BIN}/neo4j${scriptExtension} status 2>&1 || true)" +neo4jNotRunning=$(echo "$neo4jStatus" | grep "not running" || true) +if [ -n "${neo4jNotRunning}" ]; then echo "stopNeo4j: neo4j-${NEO4J_EDITION}-${NEO4J_VERSION} aleady stopped" exit 0 else - # Stop Neo4j using a temporary NEO4J_HOME environment variable that points to the current installation - NEO4J_HOME=${NEO4J_DIR} ${NEO4J_BIN}/neo4j stop + if isWindows; then + echo "stopNeo4j: IMPORTANT on Windows: Please close the console window or stop the service manually where neo4j-${NEO4J_EDITION}-${NEO4J_VERSION} is running." + else + # Stop Neo4j using a temporary NEO4J_HOME environment variable that points to the current installation + NEO4J_HOME=${NEO4J_DIR} ${NEO4J_BIN}/neo4j stop + fi fi # Check if Neo4j is still not running using a temporary NEO4J_HOME environment variable that points to the current installation -neo4jStatus="$( NEO4J_HOME=${NEO4J_DIR} ${NEO4J_BIN}/neo4j status 2>&1 || true)" +neo4jStatus="$( NEO4J_HOME=${NEO4J_DIR} | ${NEO4J_BIN}/neo4j${scriptExtension} status 2>&1 || true)" neo4jNotRunning=$(echo "$neo4jStatus" | grep "not running" || true) if [ -n "${neo4jNotRunning}" ]; then echo "stopNeo4j: Successfully stopped neo4j-${NEO4J_EDITION}-${NEO4J_VERSION}" else - echo "stopNeo4j: neo4j-${NEO4J_EDITION}-${NEO4J_VERSION} still running. Something went wrong. Details see 'NEO4J_HOME=${NEO4J_DIR} ${NEO4J_BIN}/neo4j status'." - exit 1 + if ! isWindows; then + echo "stopNeo4j: neo4j-${NEO4J_EDITION}-${NEO4J_VERSION} still running. Something went wrong. Details see 'NEO4J_HOME=${NEO4J_DIR} ${NEO4J_BIN}/neo4j${scriptExtension} status'." + exit 1 + fi fi # Check if there are still processes running that listen to the Neo4j HTTP port -port_listener_process_id=$( lsof -t -i:"${NEO4J_HTTP_PORT}" -sTCP:LISTEN || true ) -if [ -n "${port_listener_process_id}" ]; then - echo "stopNeo4j: Terminating the following process that still listens to port ${NEO4J_HTTP_PORT}" - ps -p "${port_listener_process_id}" - # Terminate the process that is listening to the Neo4j HTTP port - kill -9 "${port_listener_process_id}" -fi +if isWindows; then + echo "stopNeo4j: Skipping detection of processes listening to port ${NEO4J_HTTP_PORT} on Windows" +else + port_listener_process_id=$( lsof -t -i:"${NEO4J_HTTP_PORT}" -sTCP:LISTEN || true ) + if [ -n "${port_listener_process_id}" ]; then + echo "stopNeo4j: Terminating the following process that still listens to port ${NEO4J_HTTP_PORT}" + ps -p "${port_listener_process_id}" + # Terminate the process that is listening to the Neo4j HTTP port + kill -9 "${port_listener_process_id}" + fi +fi \ No newline at end of file diff --git a/scripts/waitForNeo4jHttp.sh b/scripts/waitForNeo4jHttp.sh index cefbe6696..f987b5c69 100755 --- a/scripts/waitForNeo4jHttp.sh +++ b/scripts/waitForNeo4jHttp.sh @@ -21,7 +21,7 @@ echo "waitForNeo4jHttp: SCRIPTS_DIR=$SCRIPTS_DIR" CYPHER_DIR=${CYPHER_DIR:-"${SCRIPTS_DIR}/../cypher"} echo "waitForNeo4jHttp: CYPHER_DIR=$CYPHER_DIR" -# Define functions to execute a cypher query from within the given file (first and only argument) +# Define functions to execute a Cypher query from within the given file (first and only argument) source "${SCRIPTS_DIR}/executeQueryFunctions.sh" # List of wait times in seconds per retry @@ -34,19 +34,19 @@ echo "${WAIT_TIMES}" | tr ' ' '\n' | while read -r waitTime; do sleep "${waitTime}" # Queries node and relationship count as a basic validation - if ! cyper_elements_query_result=$(execute_cypher "${CYPHER_DIR}/Count_nodes_and_relationships.cypher"); + if ! cypher_elements_query_result=$(execute_cypher "${CYPHER_DIR}/Count_nodes_and_relationships.cypher"); then continue; # query failed -> try again fi - if [[ -n "${cyper_elements_query_result}" ]]; then + if [[ -n "${cypher_elements_query_result}" ]]; then echo "waitForNeo4jHttp: Successfully accessed Neo4j HTTP API." - echo "${cyper_elements_query_result}" + echo "${cypher_elements_query_result}" exit 0 fi done -if ! cyper_elements_query_result=$(execute_cypher "${CYPHER_DIR}/Count_nodes_and_relationships.cypher"); then +if ! cypher_elements_query_result=$(execute_cypher "${CYPHER_DIR}/Count_nodes_and_relationships.cypher"); then # Error: Couldn't access HTTP after all wait iterations echo "waitForNeo4jHttp: Error: Failed to access Neo4j HTTP API." exit 1 From da2534bdefc472fd4a1ae6b01035bc8122756590 Mon Sep 17 00:00:00 2001 From: JohT Date: Sun, 22 Oct 2023 10:36:40 +0200 Subject: [PATCH 3/9] Introduce functions to check and wait for Neo4j --- scripts/executeQuery.sh | 14 ++++-- scripts/executeQueryFunctions.sh | 7 ++- scripts/startNeo4j.sh | 35 +++---------- scripts/stopNeo4j.sh | 22 ++++++--- scripts/waitForNeo4jHttp.sh | 55 --------------------- scripts/waitForNeo4jHttpFunctions.sh | 73 ++++++++++++++++++++++++++++ 6 files changed, 108 insertions(+), 98 deletions(-) delete mode 100755 scripts/waitForNeo4jHttp.sh create mode 100644 scripts/waitForNeo4jHttpFunctions.sh diff --git a/scripts/executeQuery.sh b/scripts/executeQuery.sh index e3210d8ae..5759c0e46 100755 --- a/scripts/executeQuery.sh +++ b/scripts/executeQuery.sh @@ -97,17 +97,23 @@ cypher_query_for_api="{\"statements\":[{\"statement\":${cypher_query},\"paramete #echo "executeQuery: Cypher Query for API: ${cypher_query_for_api}" # Calls the Neo4j HTTP API using cURL ( https://curl.se ) -cypher_query_result=$(curl --silent -S --fail-with-body -H Accept:application/json -H Content-Type:application/json \ +if ! cypher_query_result=$(curl --silent -S --fail-with-body -H Accept:application/json -H Content-Type:application/json \ -u neo4j:"${NEO4J_INITIAL_PASSWORD}" \ "http://localhost:${NEO4J_HTTP_PORT}/${NEO4J_HTTP_TRANSACTION_ENDPOINT}" \ - -d "${cypher_query_for_api}") -#echo "executeQuery: Cypher Query Result: ${cypher_query_result}" + -d "${cypher_query_for_api}" 2>&1) ; +then + redColor='\033[0;31m' + noColor='\033[0m' + echo -e "${redColor}${cypher_query_file_name}: ${cypher_query_result}${noColor}" >&2 + exit 1 +fi +#echo "executeQuery: Cypher Query OK Result: ${cypher_query_result}" # If there is a error message print it to syserr >&2 in red color error_message=$( echo "${cypher_query_result}" | jq -r '.errors[0] // empty' ) if [[ -n "${error_message}" ]]; then redColor='\033[0;31m' - noColor='\033[0m' # No Color + noColor='\033[0m' echo -e "${redColor}${cypher_query_file_name}: ${error_message}${noColor}" >&2 exit 1 fi diff --git a/scripts/executeQueryFunctions.sh b/scripts/executeQueryFunctions.sh index ddd157bf4..0e30ea4c5 100644 --- a/scripts/executeQueryFunctions.sh +++ b/scripts/executeQueryFunctions.sh @@ -13,7 +13,7 @@ set -eo pipefail # This way non-standard tools like readlink aren't needed. SCRIPTS_DIR=${SCRIPTS_DIR:-$( CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P )} # Repository directory containing the shell scripts -# Extact the value of one key out of a "key=value" array e.g. for query parameters. +# Extract the value of one key out of a "key=value" array e.g. for query parameters. # The first argument is the name of the target key. # All following arguments are the "key=value" parameters. # Example: `extractQueryParameter "b" "a=1" "b=2" "c=3"` returns `2` @@ -49,7 +49,7 @@ execute_cypher_expect_results() { # Function to execute a cypher query from the given file (first and only argument) using Neo4j's HTTP API execute_cypher_http() { # (Neo4j HTTP API Script) Execute the Cypher query contained in the file and print the results as CSV - source "$SCRIPTS_DIR/executeQuery.sh" "${@}" # "${@}": Get all function arguments and forward them + source "${SCRIPTS_DIR}/executeQuery.sh" "${@}" # "${@}": Get all function arguments and forward them } # Function to execute a cypher query from the given file (first and only argument) with a summarized (console) output using Neo4j's HTTP API @@ -63,8 +63,7 @@ execute_cypher_http_summarized() { execute_cypher_http_expect_results() { # Get the Cypher file name from the first argument cypherFileName="${1}" - - results=$( execute_cypher_http ${cypherFileName} | wc -l ) + results=$( execute_cypher_http "${cypherFileName}" | wc -l ) results=$((results - 1)) if [[ "$results" -lt 1 ]]; then echo "$(basename -- "${cypherFileName}") (via http) Error: Expected at least one entry but was ${results}" >&2 diff --git a/scripts/startNeo4j.sh b/scripts/startNeo4j.sh index 9e6b2fe25..a34acec84 100755 --- a/scripts/startNeo4j.sh +++ b/scripts/startNeo4j.sh @@ -45,13 +45,12 @@ fi # Include operation system function to for example detect Windows. source "${SCRIPTS_DIR}/operatingSystemFunctions.sh" -scriptExtension=$(ifWindows ".bat" "") -echo "startNeo4j: Using scriptExtension ${scriptExtension} for Windows." +# Include functions to check or wait for the database to be ready +source "${SCRIPTS_DIR}/waitForNeo4jHttpFunctions.sh" # Check if Neo4j is stopped (not running) using a temporary NEO4J_HOME environment variable that points to the current installation -neo4jStatus="$( NEO4J_HOME=${NEO4J_DIR} | ${NEO4J_BIN}/neo4j${scriptExtension} status 2>&1 || true)" -neo4jNotRunning=$(echo "$neo4jStatus" | grep "not running" || true) -if [ -n "${neo4jNotRunning}" ]; then +isDatabaseReady=$(isDatabaseQueryable) +if [[ ${isDatabaseReady} == "false" ]]; then echo "startNeo4j: Starting neo4j-${NEO4J_EDITION}-${NEO4J_VERSION} in ${NEO4J_DIR}" # Check if there is already a process that listens to the Neo4j HTTP port @@ -86,28 +85,8 @@ if [ -n "${neo4jNotRunning}" ]; then NEO4J_HOME=${NEO4J_DIR} ${NEO4J_BIN}/neo4j start --verbose fi - # Wait some time for the start of the database - echo "${WAIT_TIMES}" | tr ' ' '\n' | while read waitTime; do - echo "startNeo4j: Waiting for ${waitTime} second(s)" - sleep ${waitTime} - - neo4jStatus="$( NEO4J_HOME=${NEO4J_DIR} | ${NEO4J_BIN}/neo4j${scriptExtension} status 2>&1 || true)" - neo4jNotRunning=$(echo "$neo4jStatus" | grep "not running" || true) - if [ -z "${neo4jNotRunning}" ]; then - echo "startNeo4j: Successfully started neo4j-${NEO4J_EDITION}-${NEO4J_VERSION}" - exit 0 - fi - done + waitUntilDatabaseIsQueryable + else echo "startNeo4j: neo4j-${NEO4J_EDITION}-${NEO4J_VERSION} already started" -fi - -# Check if Neo4j is still not running using a temporary NEO4J_HOME environment variable that points to the current installation -neo4jStatus="$( NEO4J_HOME=${NEO4J_DIR} | ${NEO4J_BIN}/neo4j${scriptExtension} status 2>&1 || true)" -neo4jNotRunning=$(echo "$neo4jStatus" | grep "not running" || true) -if [ -n "${neo4jNotRunning}" ]; then - echo "startNeo4j: neo4j-${NEO4J_EDITION}-${NEO4J_VERSION} still not running. Something went wrong. Details see 'NEO4J_HOME=${NEO4J_DIR} ${NEO4J_BIN}/neo4j status'." - exit 1 -fi - -source "${SCRIPTS_DIR}/waitForNeo4jHttp.sh" || exit 1 \ No newline at end of file +fi \ No newline at end of file diff --git a/scripts/stopNeo4j.sh b/scripts/stopNeo4j.sh index a17bcc4a7..f44e7b9cc 100755 --- a/scripts/stopNeo4j.sh +++ b/scripts/stopNeo4j.sh @@ -14,6 +14,13 @@ NEO4J_VERSION=${NEO4J_VERSION:-"5.12.0"} TOOLS_DIRECTORY=${TOOLS_DIRECTORY:-"tools"} # Get the tools directory (defaults to "tools") NEO4J_HTTP_PORT=${NEO4J_HTTP_PORT:-"7474"} +## Get this "scripts" directory if not already set +# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution. +# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes. +# This way non-standard tools like readlink aren't needed. +SCRIPTS_DIR=${SCRIPTS_DIR:-$( CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P )} # Repository directory containing the shell scripts +echo "stopNeo4j: SCRIPTS_DIR=$SCRIPTS_DIR" + # Internal Constants NEO4J_DIR="${TOOLS_DIRECTORY}/neo4j-${NEO4J_EDITION}-${NEO4J_VERSION}" NEO4J_BIN="${NEO4J_DIR}/bin" @@ -35,11 +42,13 @@ fi # Include operation system function to for example detect Windows. source "${SCRIPTS_DIR}/operatingSystemFunctions.sh" +# Include functions to check or wait for the database to be ready +source "${SCRIPTS_DIR}/waitForNeo4jHttpFunctions.sh" + # Check if Neo4j is stopped (not running) using a temporary NEO4J_HOME environment variable that points to the current installation -neo4jStatus="$( NEO4J_HOME=${NEO4J_DIR} | ${NEO4J_BIN}/neo4j${scriptExtension} status 2>&1 || true)" -neo4jNotRunning=$(echo "$neo4jStatus" | grep "not running" || true) -if [ -n "${neo4jNotRunning}" ]; then - echo "stopNeo4j: neo4j-${NEO4J_EDITION}-${NEO4J_VERSION} aleady stopped" +isDatabaseReady=$(isDatabaseQueryable) +if [[ ${isDatabaseReady} == "false" ]]; then + echo "stopNeo4j: neo4j-${NEO4J_EDITION}-${NEO4J_VERSION} already stopped" exit 0 else if isWindows; then @@ -51,9 +60,8 @@ else fi # Check if Neo4j is still not running using a temporary NEO4J_HOME environment variable that points to the current installation -neo4jStatus="$( NEO4J_HOME=${NEO4J_DIR} | ${NEO4J_BIN}/neo4j${scriptExtension} status 2>&1 || true)" -neo4jNotRunning=$(echo "$neo4jStatus" | grep "not running" || true) -if [ -n "${neo4jNotRunning}" ]; then +isDatabaseReady=$(isDatabaseQueryable) +if [[ ${isDatabaseReady} == "false" ]]; then echo "stopNeo4j: Successfully stopped neo4j-${NEO4J_EDITION}-${NEO4J_VERSION}" else if ! isWindows; then diff --git a/scripts/waitForNeo4jHttp.sh b/scripts/waitForNeo4jHttp.sh deleted file mode 100755 index f987b5c69..000000000 --- a/scripts/waitForNeo4jHttp.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env bash - -# Waits until the HTTP Transactions API of Neo4j Graph Database is available. -# It queries the number of nodes and relationships to assert the connection. - -# Requires executeQueryFunctions.sh - -# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands) -set -eo pipefail - -NEO4J_HTTP_PORT=${NEO4J_HTTP_PORT:-"7474"} - -## Get this "scripts" directory if not already set -# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution. -# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes. -# This way non-standard tools like readlink aren't needed. -SCRIPTS_DIR=${SCRIPTS_DIR:-$( CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P )} # Repository directory containing the shell scripts -echo "waitForNeo4jHttp: SCRIPTS_DIR=$SCRIPTS_DIR" - -# Get the "cypher" directory by taking the path of this script and going one directory up and then to "cypher". -CYPHER_DIR=${CYPHER_DIR:-"${SCRIPTS_DIR}/../cypher"} -echo "waitForNeo4jHttp: CYPHER_DIR=$CYPHER_DIR" - -# Define functions to execute a Cypher query from within the given file (first and only argument) -source "${SCRIPTS_DIR}/executeQueryFunctions.sh" - -# List of wait times in seconds per retry -WAIT_TIMES="1 1 2 4 8 16 32 64" - -# Wait until the HTTP endpoint is up and a cypher query can be executed - -echo "${WAIT_TIMES}" | tr ' ' '\n' | while read -r waitTime; do - echo "waitForNeo4jHttp: Waiting for ${waitTime} second(s)" - sleep "${waitTime}" - - # Queries node and relationship count as a basic validation - if ! cypher_elements_query_result=$(execute_cypher "${CYPHER_DIR}/Count_nodes_and_relationships.cypher"); - then - continue; # query failed -> try again - fi - - if [[ -n "${cypher_elements_query_result}" ]]; then - echo "waitForNeo4jHttp: Successfully accessed Neo4j HTTP API." - echo "${cypher_elements_query_result}" - exit 0 - fi -done - -if ! cypher_elements_query_result=$(execute_cypher "${CYPHER_DIR}/Count_nodes_and_relationships.cypher"); then - # Error: Couldn't access HTTP after all wait iterations - echo "waitForNeo4jHttp: Error: Failed to access Neo4j HTTP API." - exit 1 -fi - - diff --git a/scripts/waitForNeo4jHttpFunctions.sh b/scripts/waitForNeo4jHttpFunctions.sh new file mode 100644 index 000000000..962b368c0 --- /dev/null +++ b/scripts/waitForNeo4jHttpFunctions.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash + +# Waits until the HTTP Transactions API of Neo4j Graph Database is available. +# It queries the number of nodes and relationships to assert the connection. + +# Requires executeQueryFunctions.sh + +# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands) +set -eo pipefail + +NEO4J_HTTP_PORT=${NEO4J_HTTP_PORT:-"7474"} + +## Get this "scripts" directory if not already set +# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution. +# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes. +# This way non-standard tools like readlink aren't needed. +SCRIPTS_DIR=${SCRIPTS_DIR:-$( CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P )} # Repository directory containing the shell scripts +echo "waitForNeo4jHttp: SCRIPTS_DIR=$SCRIPTS_DIR" + +# Get the "cypher" directory by taking the path of this script and going one directory up and then to "cypher". +CYPHER_DIR=${CYPHER_DIR:-"${SCRIPTS_DIR}/../cypher"} +echo "waitForNeo4jHttp: CYPHER_DIR=${CYPHER_DIR}" + +# Define functions to execute a Cypher query from within the given file (first and only argument) +source "${SCRIPTS_DIR}/executeQueryFunctions.sh" + +queryDatabase() { + execute_cypher "${CYPHER_DIR}/Count_nodes_and_relationships.cypher" "--no-source-reference-column" +} + +isDatabaseQueryable() { + local cypher_elements_query_result + if cypher_elements_query_result=$(queryDatabase); + then + echo "true" + else + echo "false" + fi +} + +waitUntilDatabaseIsQueryable() { + # List of wait times in seconds per retry + local WAIT_TIMES="16 1 1 2 4 8 16 32 64" + local retries=0 + local isDatabaseReady="false" + + # Wait until the HTTP endpoint is up and a cypher query can be executed + echo "${WAIT_TIMES}" | tr ' ' '\n' | while read -r waitTime; do + echo "waitForNeo4jHttp: Waiting for ${waitTime} second(s) before retry ${retries}" + sleep "${waitTime}" + + # Queries node and relationship count as a basic validation + isDatabaseReady=$(isDatabaseQueryable) + if [[ ${isDatabaseReady} == "false" ]]; then + retries=$((retries+1)) + continue; # query failed -> try again + fi + + local queryResult=$(queryDatabase || true) + if [[ -n "${queryResult}" ]]; then + echo "waitForNeo4jHttp: Successfully accessed Neo4j HTTP API." + echo "${queryResult}" + exit 0 + fi + done + + isDatabaseReady=$(isDatabaseQueryable) + if [[ ${isDatabaseReady} == "false" ]]; then + # Error: Couldn't access HTTP after all wait iterations + echo "waitForNeo4jHttp: Error: Failed to access Neo4j HTTP API" + exit 1 + fi +} \ No newline at end of file From 1dbc616efd54d513b09044d752c61cba8c95cf26 Mon Sep 17 00:00:00 2001 From: JohT Date: Thu, 26 Oct 2023 14:44:44 +0200 Subject: [PATCH 4/9] Minor typo and other fixes --- scripts/prepareAnalysis.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/prepareAnalysis.sh b/scripts/prepareAnalysis.sh index c3e725108..9188c2c70 100644 --- a/scripts/prepareAnalysis.sh +++ b/scripts/prepareAnalysis.sh @@ -22,7 +22,7 @@ fi # Get the "cypher" directory by taking the path of this script, going one directory up and then into "cypher". CYPHER_DIR=${CYPHER_DIR:-"${SCRIPTS_DIR}/../cypher"} # Repository directory containing the cypher queries -echo "prepareAnalysis: CYPHER_DIR=$CYPHER_DIR" +echo "prepareAnalysis: CYPHER_DIR=${CYPHER_DIR}" # Define functions to execute a cypher query from within the given file (first and only argument) source "${SCRIPTS_DIR}/executeQueryFunctions.sh" @@ -34,7 +34,7 @@ EXTERNAL_DEPENDENCIES_CYPHER_DIR="$CYPHER_DIR/External_Dependencies" ARTIFACT_DEPENDENCIES_CYPHER_DIR="$CYPHER_DIR/Artifact_Dependencies" TYPES_CYPHER_DIR="$CYPHER_DIR/Types" -# Preparation - Create indizes +# Preparation - Create indices execute_cypher "${CYPHER_DIR}/Create_index_for_full_qualified_type_name.cypher" # Preparation - Create DEPENDS_ON for every DEPENDS_ON_PACKAGE relationship From ffa358732d6360535c459008a913dfa8e87b7719 Mon Sep 17 00:00:00 2001 From: JohT Date: Thu, 26 Oct 2023 15:17:54 +0200 Subject: [PATCH 5/9] Migrate to jq v1.7 - Fix jq error "is no valid in a csv row" - Skip algorithm estimation result column "mapView" to overcome jq error - Fix jq error "string and array cannot be added" - jq flatten doesn't work on lists in lists on Windows/git bash jq-7 - Cypher plugin apoc provides a flatten function for that --- ..._Detection_3a_WeaklyConnectedComponents_Estimate.cypher | 2 +- ...ommunity_Detection_4a_Label_Propagation_Estimate.cypher | 2 +- ...unity_Detection_5a_K_Core_Decomposition_Estimate.cypher | 2 +- ..._Detection_6a_Approximate_Maximum_k_cut_Estimate.cypher | 2 +- .../External_package_usage_per_artifact_sorted_top.cypher | 7 +++---- ...de_Embeddings_1a_Fast_Random_Projection_Estimate.cypher | 4 ++-- .../Node_Embeddings_2a_Hash_GNN_Estimate.cypher | 4 ++-- .../Node_Embeddings_3a_Node2Vec_Estimate.cypher | 4 ++-- cypher/Similarity/Similarity_1a_Estimate.cypher | 4 ++-- 9 files changed, 15 insertions(+), 16 deletions(-) diff --git a/cypher/Community_Detection/Community_Detection_3a_WeaklyConnectedComponents_Estimate.cypher b/cypher/Community_Detection/Community_Detection_3a_WeaklyConnectedComponents_Estimate.cypher index f860a8087..663364e3e 100644 --- a/cypher/Community_Detection/Community_Detection_3a_WeaklyConnectedComponents_Estimate.cypher +++ b/cypher/Community_Detection/Community_Detection_3a_WeaklyConnectedComponents_Estimate.cypher @@ -23,4 +23,4 @@ RETURN requiredMemory ,heapPercentageMin ,heapPercentageMax ,treeView - ,mapView \ No newline at end of file + //,mapView //doesn't work on Windows with git bash jq version jq-1.7-dirty \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_4a_Label_Propagation_Estimate.cypher b/cypher/Community_Detection/Community_Detection_4a_Label_Propagation_Estimate.cypher index d8a2925ac..f505397de 100644 --- a/cypher/Community_Detection/Community_Detection_4a_Label_Propagation_Estimate.cypher +++ b/cypher/Community_Detection/Community_Detection_4a_Label_Propagation_Estimate.cypher @@ -23,4 +23,4 @@ RETURN requiredMemory ,heapPercentageMin ,heapPercentageMax ,treeView - ,mapView \ No newline at end of file + //,mapView //doesn't work on Windows with git bash jq version jq-1.7-dirty \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_5a_K_Core_Decomposition_Estimate.cypher b/cypher/Community_Detection/Community_Detection_5a_K_Core_Decomposition_Estimate.cypher index ace1e4652..929d57040 100644 --- a/cypher/Community_Detection/Community_Detection_5a_K_Core_Decomposition_Estimate.cypher +++ b/cypher/Community_Detection/Community_Detection_5a_K_Core_Decomposition_Estimate.cypher @@ -21,4 +21,4 @@ RETURN requiredMemory ,heapPercentageMin ,heapPercentageMax ,treeView - ,mapView \ No newline at end of file + //,mapView //doesn't work on Windows with git bash jq version jq-1.7-dirty \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_6a_Approximate_Maximum_k_cut_Estimate.cypher b/cypher/Community_Detection/Community_Detection_6a_Approximate_Maximum_k_cut_Estimate.cypher index 9feb260ae..aab6da538 100644 --- a/cypher/Community_Detection/Community_Detection_6a_Approximate_Maximum_k_cut_Estimate.cypher +++ b/cypher/Community_Detection/Community_Detection_6a_Approximate_Maximum_k_cut_Estimate.cypher @@ -23,4 +23,4 @@ RETURN requiredMemory ,heapPercentageMin ,heapPercentageMax ,treeView - ,mapView \ No newline at end of file + //,mapView //doesn't work on Windows with git bash jq version jq-1.7-dirty \ No newline at end of file diff --git a/cypher/External_Dependencies/External_package_usage_per_artifact_sorted_top.cypher b/cypher/External_Dependencies/External_package_usage_per_artifact_sorted_top.cypher index 178b24e2c..5438c5be6 100644 --- a/cypher/External_Dependencies/External_package_usage_per_artifact_sorted_top.cypher +++ b/cypher/External_Dependencies/External_package_usage_per_artifact_sorted_top.cypher @@ -46,7 +46,6 @@ RETURN artifactName ,externalTypeRate ,numberOfExternalTypeCaller ,numberOfExternalTypeCalls - ,size(externalPackageNames) AS numberOfExternalPackages - ,externalPackageNames[0..4] AS top5ExternalPackages - ,externalTypeNames[0..1] AS someExternalTypes -LIMIT 40 \ No newline at end of file + ,size(externalPackageNames) AS numberOfExternalPackages + ,externalPackageNames[0..4] AS top5ExternalPackages + ,apoc.coll.flatten(externalTypeNames)[0..9] AS someExternalTypes \ No newline at end of file diff --git a/cypher/Node_Embeddings/Node_Embeddings_1a_Fast_Random_Projection_Estimate.cypher b/cypher/Node_Embeddings/Node_Embeddings_1a_Fast_Random_Projection_Estimate.cypher index aae3e80be..366ae357c 100644 --- a/cypher/Node_Embeddings/Node_Embeddings_1a_Fast_Random_Projection_Estimate.cypher +++ b/cypher/Node_Embeddings/Node_Embeddings_1a_Fast_Random_Projection_Estimate.cypher @@ -6,5 +6,5 @@ CALL gds.fastRP.stream.estimate( ,relationshipWeightProperty: $dependencies_projection_weight_property } ) - YIELD requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView, mapView -RETURN requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView, mapView \ No newline at end of file + YIELD requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView +RETURN requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView \ No newline at end of file diff --git a/cypher/Node_Embeddings/Node_Embeddings_2a_Hash_GNN_Estimate.cypher b/cypher/Node_Embeddings/Node_Embeddings_2a_Hash_GNN_Estimate.cypher index 051362022..6dc7adc6b 100644 --- a/cypher/Node_Embeddings/Node_Embeddings_2a_Hash_GNN_Estimate.cypher +++ b/cypher/Node_Embeddings/Node_Embeddings_2a_Hash_GNN_Estimate.cypher @@ -11,5 +11,5 @@ CALL gds.beta.hashgnn.stream.estimate( ,outputDimension: toInteger($dependencies_projection_embedding_dimension) } ) - YIELD requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView, mapView -RETURN requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView, mapView \ No newline at end of file + YIELD requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView +RETURN requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView \ No newline at end of file diff --git a/cypher/Node_Embeddings/Node_Embeddings_3a_Node2Vec_Estimate.cypher b/cypher/Node_Embeddings/Node_Embeddings_3a_Node2Vec_Estimate.cypher index 01882b134..7583d039e 100644 --- a/cypher/Node_Embeddings/Node_Embeddings_3a_Node2Vec_Estimate.cypher +++ b/cypher/Node_Embeddings/Node_Embeddings_3a_Node2Vec_Estimate.cypher @@ -8,5 +8,5 @@ CALL gds.node2vec.write.estimate( ,writeProperty: $dependencies_projection_write_property } ) - YIELD requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView, mapView -RETURN requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView, mapView \ No newline at end of file + YIELD requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView +RETURN requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView \ No newline at end of file diff --git a/cypher/Similarity/Similarity_1a_Estimate.cypher b/cypher/Similarity/Similarity_1a_Estimate.cypher index 6e54ace9b..bbaa8f55a 100644 --- a/cypher/Similarity/Similarity_1a_Estimate.cypher +++ b/cypher/Similarity/Similarity_1a_Estimate.cypher @@ -8,5 +8,5 @@ CALL gds.nodeSimilarity.write.estimate( ,writeProperty: 'score' ,topK: 3 }) - YIELD requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView, mapView -RETURN requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView, mapView \ No newline at end of file + YIELD requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView +RETURN requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView \ No newline at end of file From 9fe313c4c73439a39e7f5985f5e390fa8e4533fd Mon Sep 17 00:00:00 2001 From: JohT Date: Thu, 26 Oct 2023 21:00:15 +0200 Subject: [PATCH 6/9] Support executing Jupyter Notebooks on Windows --- COMMANDS.md | 46 ++--------- GETTING_STARTED.md | 73 +++++++++++++++++ README.md | 27 +++++-- scripts/executeJupyterNotebook.sh | 80 ++++++++++++------- .../reports/ArtifactDependenciesJupyter.sh | 2 +- .../reports/ExternalDependenciesJupyter.sh | 2 +- .../reports/InternalDependenciesJupyter.sh | 2 +- scripts/reports/MethodMetricsJupyter.sh | 2 +- scripts/reports/NodeEmbeddingsJupyter.sh | 2 +- .../ObjectOrientedDesignMetricsJupyter.sh | 2 +- scripts/reports/OverviewJupyter.sh | 2 +- scripts/reports/VisibilityMetricsJupyter.sh | 2 +- scripts/reports/WordcloudJupyter.sh | 2 +- 13 files changed, 161 insertions(+), 83 deletions(-) create mode 100644 GETTING_STARTED.md diff --git a/COMMANDS.md b/COMMANDS.md index 27aa1b54a..7f2ee91e0 100644 --- a/COMMANDS.md +++ b/COMMANDS.md @@ -2,47 +2,11 @@ ## Start an analysis -1. Create a directory for all analysis projects - - ```shell - mkdir temp - cd temp - ``` - -1. Create a working directory for your specific analysis - - ```shell - mkdir MyFirstAnalysis - cd MyFirstAnalysis - ``` - -1. Choose an initial password for Neo4j - - ```shell - export NEO4J_INITIAL_PASSWORD=theinitialpasswordthatihavechosenforneo4j - ``` - -1. Create the `artifacts` directory for the code to be analyzed (without `cd` afterwards) - - ```shell - mkdir artifacts - ``` - -1. Move the artifacts you want to analyze into the `artifacts` directory - -1. Optionally run a predefined script to download artifacts - - ```shell - ./../../scripts/downloader/downloadAxonFramework.sh - ``` - -1. Optionally use a script to download artifacts from Maven ([details](#download-maven-artifacts-to-analyze)) - -1. Start the analysis - - ```shell - ./../../scripts/analysis/analyze.sh - ``` +An analysis is started with the script [analyze.sh](./scripts/analysis/analyze.sh). +To run all analysis steps simple execute the following command: +```shell +./../../scripts/analysis/analyze.sh +``` 👉 See [scripts/examples/analyzeAxonFramework.sh](./scripts/examples/analyzeAxonFramework.sh) as an example script that combines all the above steps. 👉 See [Code Structure Analysis Pipeline](./.github/workflows/code-structure-analysis.yml) on how to do this within a GitHub Actions Workflow. diff --git a/GETTING_STARTED.md b/GETTING_STARTED.md new file mode 100644 index 000000000..4cb9fda65 --- /dev/null +++ b/GETTING_STARTED.md @@ -0,0 +1,73 @@ +# Code Graph Analysis Pipeline - Getting started guide + +This document describes the steps to get started as quickly as possible. +For more details on what you can do with this pipeline see [README](./README.md). +For more details on how the commands work in detail see [COMMANDS](./COMMANDS.md). + +## 🛠 Prerequisites + +Please read through the [Prerequisites](./README.md#🛠-prerequisites) in the [README](./README.md) file for what is required to run the scripts. + +## Start an analysis + +1. Create a directory for all analysis projects + + ```shell + mkdir temp + cd temp + ``` + +1. Create a working directory for your specific analysis + + ```shell + mkdir MyFirstAnalysis + cd MyFirstAnalysis + ``` + +1. Choose an initial password for Neo4j + + ```shell + export NEO4J_INITIAL_PASSWORD=theinitialpasswordthatihavechosenforneo4j + ``` + +1. Create the `artifacts` directory for the code to be analyzed (without `cd` afterwards) + + ```shell + mkdir artifacts + ``` + +1. Move the artifacts you want to analyze into the `artifacts` directory + +1. Optionally run a predefined script to download artifacts + + ```shell + ./../../scripts/downloader/downloadAxonFramework.sh + ``` + +1. Optionally use a script to download artifacts from Maven ([details](#download-maven-artifacts-to-analyze)) + +1. Start the analysis + + - Without any additional dependencies: + ```shell + ./../../scripts/analysis/analyze.sh --report Csv + ``` + - Jupyter notebook reports when Python and Conda are installed: + ```shell + ./../../scripts/analysis/analyze.sh --report Jupyter + ``` + - Graph visualizations when Node.js and npm are installed: + ```shell + ./../../scripts/analysis/analyze.sh --report Jupyter + ``` + - All reports with Python, Conda, Node.js and npm installed: + ```shell + ./../../scripts/analysis/analyze.sh + ``` + - To explore the database yourself without any automatically generated reports and no additional requirements: + ```shell + ./../../scripts/analysis/analyze.sh + ``` + +👉 See [scripts/examples/analyzeAxonFramework.sh](./scripts/examples/analyzeAxonFramework.sh) as an example script that combines all the above steps. +👉 See [Code Structure Analysis Pipeline](./.github/workflows/code-structure-analysis.yml) on how to do this within a GitHub Actions Workflow. \ No newline at end of file diff --git a/README.md b/README.md index 024fe4da2..ef65c78a0 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Contained within this repository is a comprehensive and automated code graph ana --- -## 🚀 Features +## ✨ Features - Analyze static code structure as a graph - Fully automated [pipeline](./.github/workflows/code-structure-analysis.yml) from tool installation to report generation @@ -37,14 +37,29 @@ Here are some reports that utilize Neo4j's [Graph Data Science Library](https:// ## 🛠 Prerequisites -- Java 17 is [required for Neo4j](https://neo4j.com/docs/operations-manual/current/installation/requirements/#deployment-requirements-software) (5.x requirement) +- Java 17 is [required for Neo4j](https://neo4j.com/docs/operations-manual/current/installation/requirements/#deployment-requirements-software) (Neo4j 5.x requirement). +- On Windows it is recommended to use the git bash provided by [git for windows](https://gitforwindows.org). - [jq](https://github.com/jqlang/jq) the "lightweight and flexible command-line JSON processor" needs to be installed. Latest releases: https://github.com/jqlang/jq/releases/latest. Check using `jq --version`. -- Python and a conda package manager are required for Jupyter Notebook reports -- Chromium will automatically be downloaded if needed for Jupyter Notebook reports in PDF format +- Set environment variable `NEO4J_INITIAL_PASSWORD` to a password of your choice. For example: + ```shell + export NEO4J_INITIAL_PASSWORD=neo4j_password_of_my_choice + ``` + To run Jupyter notebooks, create an `.env` file in the folder from where you open the notebook containing for example: `NEO4J_INITIAL_PASSWORD=neo4j_password_of_my_choice` + +### Further Prerequisites for Python Jupyter Notebooks + +- Python is required for Jupyter Notebook reports. +- A conda package manager like [Miniconda](https://docs.conda.io/projects/miniconda/en/latest) or [Anaconda](https://www.anaconda.com/download)(Recommended for Windows) is required for Jupyter Notebook reports. +- Chromium will automatically be downloaded if needed for Jupyter Notebook PDF reports generation. + +### Hints for Windows + +- Add this line to your `~/.bashrc` file if you are using Anaconda3: `/c/ProgramData/Anaconda3/etc/profile.d/conda.sh`. Try to find a similar script for other conda package managers or versions. +- Run `conda init` in the git bash opened as administrator. Running it in normal mode usually leads to an error message. -## Getting Started +## 🚀 Getting Started -See [Start an analysis](./COMMANDS.md#start-an-analysis) in the [Commands Reference](./COMMANDS.md) on how to start an analysis on your local machine. +See [GETTING_STARTED.md](./GETTING_STARTED.md) on how to get started on your local machine. ## 🏗 Pipeline and Tools diff --git a/scripts/executeJupyterNotebook.sh b/scripts/executeJupyterNotebook.sh index 6b65bd357..ec9ac2e7a 100755 --- a/scripts/executeJupyterNotebook.sh +++ b/scripts/executeJupyterNotebook.sh @@ -15,17 +15,20 @@ # in the same directory as the given jupyter notebook ipynb file # to create the environment. -# Note: This script uses conda to activate the environment defined in CODEGRAPH_CONDA_ENVIRONMENT (defaults to "codegraph"). -# It it isn't activated, it will save the currently activated environment, change to codegraph, and restore the original one at the end again. -# In cases of an error it might be, that the original conda environment isn't set back. Typically this shouldn't be the case though. - -# Requires juypter nbconvert +# Requires juypter nbconvert,operatingSystemFunctions.sh # Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands) set -eo pipefail ENABLE_JUPYTER_NOTEBOOK_PDF_GENERATION=${ENABLE_JUPYTER_NOTEBOOK_PDF_GENERATION:-""} # Enable PDF generation for Jupyter Notebooks if set to any non empty value e.g. "true" +## Get this "scripts" directory if not already set +# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution. +# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes. +# This way non-standard tools like readlink aren't needed. +SCRIPTS_DIR=${SCRIPTS_DIR:-$( CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P )} # Repository directory containing the shell scripts +echo "executeJupyterNotebook: SCRIPTS_DIR=$SCRIPTS_DIR" + # Check if environment variable is set if [ -z "${NEO4J_INITIAL_PASSWORD}" ]; then echo "executeJupyterNotebook: Requires environment variable NEO4J_INITIAL_PASSWORD to be set first. Use 'export NEO4J_INITIAL_PASSWORD='." @@ -81,36 +84,59 @@ fi # Define conda environment to use for code structure analysis. Default "codegraph" CODEGRAPH_CONDA_ENVIRONMENT=${CODEGRAPH_CONDA_ENVIRONMENT:-"codegraph"} # Name of the conda environment to use for code graph analysis -echo "executeJupyterNotebook: CODEGRAPH_CONDA_ENVIRONMENT=$CODEGRAPH_CONDA_ENVIRONMENT" +backupCondaEnvironment=$CONDA_DEFAULT_ENV +echo "executeJupyterNotebook: CODEGRAPH_CONDA_ENVIRONMENT=${CODEGRAPH_CONDA_ENVIRONMENT}" +echo "executeJupyterNotebook: CONDA_PREFIX=${CONDA_PREFIX}" +echo "executeJupyterNotebook: Current conda environment=${CONDA_DEFAULT_ENV}" + +# Include operation system function to for example detect Windows. +source "${SCRIPTS_DIR}/operatingSystemFunctions.sh" # Determine the path to "conda" if [ -n "${CONDA}" ]; then - pathToConda="$CONDA/bin/" # the trailing slash character is required + if isWindows; then + pathToConda="${CONDA}\\Scripts\\" # the trailing backslash character is required + else + pathToConda="${CONDA}/bin/" # the trailing slash character is required + fi else pathToConda="" fi + echo "executeJupyterNotebook: pathToConda=${pathToConda}" -# Activate conda shell hook -eval "$(${pathToConda}conda shell.bash hook)" +scriptExtension=$(ifWindows ".bat" "") +echo "executeJupyterNotebook: scriptExtension=${scriptExtension}" -# Create (if missing) and activate Conda environment for code structure graph analysis -backupCondaEnvironment=$CONDA_DEFAULT_ENV -if [ ! "$backupCondaEnvironment" = "$CODEGRAPH_CONDA_ENVIRONMENT" ] ; then - backupCondaEnvironment=$CONDA_DEFAULT_ENV +# Activate conda shell hook. Also resets CONDA_DEFAULT_ENV to base. +# Thats why CONDA_DEFAULT_ENV (base) is never equal to CODEGRAPH_CONDA_ENVIRONMENT (codegraph). +eval "$(${pathToConda}conda${scriptExtension} shell.bash hook)" +echo "executeJupyterNotebook: Current conda environment after shell hook=${CONDA_DEFAULT_ENV}" - if { ${pathToConda}conda env list | grep "$CODEGRAPH_CONDA_ENVIRONMENT "; } >/dev/null 2>&1; then - echo "executeJupyterNotebook: Conda environment $CODEGRAPH_CONDA_ENVIRONMENT already created" - else - if [ ! -f "${jupyter_notebook_file_path}/environment.yml" ] ; then - echo "executeJupyterNotebook: Couldn't find environment file ${jupyter_notebook_file_path}/environment.yml." - exit 2 - fi - echo "executeJupyterNotebook: Creating Conda environment ${CODEGRAPH_CONDA_ENVIRONMENT}" - ${pathToConda}conda env create --file ${jupyter_notebook_file_path}/environment.yml --name "${CODEGRAPH_CONDA_ENVIRONMENT}" +# Create (if missing) and activate Conda environment for code structure graph analysis +if { "${pathToConda}conda" env list | grep "$CODEGRAPH_CONDA_ENVIRONMENT "; } >/dev/null 2>&1; then + echo "executeJupyterNotebook: Conda environment $CODEGRAPH_CONDA_ENVIRONMENT already created" +else + if [ ! -f "${jupyter_notebook_file_path}/environment.yml" ] ; then + echo "executeJupyterNotebook: Couldn't find environment file ${jupyter_notebook_file_path}/environment.yml." + exit 2 fi - ${pathToConda}conda activate "${CODEGRAPH_CONDA_ENVIRONMENT}" - echo "executeJupyterNotebook: Activated Conda environment: $CODEGRAPH_CONDA_ENVIRONMENT " + echo "executeJupyterNotebook: Creating Conda environment ${CODEGRAPH_CONDA_ENVIRONMENT}" + "${pathToConda}conda" env create --file "${jupyter_notebook_file_path}/environment.yml" --name "${CODEGRAPH_CONDA_ENVIRONMENT}" +fi + +echo "executeJupyterNotebook: Activating Conda environment ${CODEGRAPH_CONDA_ENVIRONMENT}" +if isWindows; then + "${pathToConda}conda" activate ${CODEGRAPH_CONDA_ENVIRONMENT} +else + conda activate "${CODEGRAPH_CONDA_ENVIRONMENT}" +fi + +if [ "${CONDA_DEFAULT_ENV}" = "${CODEGRAPH_CONDA_ENVIRONMENT}" ] ; then + echo "executeJupyterNotebook: Activated Conda environment: ${CONDA_DEFAULT_ENV}" +else + echo "executeJupyterNotebook: Error: Failed to activate Conda environment ${CODEGRAPH_CONDA_ENVIRONMENT}. ${CONDA_DEFAULT_ENV} still active." + exit 1 fi # Execute the Jupyter Notebook and write it to the output file name @@ -135,7 +161,7 @@ if [ -n "${ENABLE_JUPYTER_NOTEBOOK_PDF_GENERATION}" ]; then fi # Restore Conda environment -if [ ! "$backupCondaEnvironment" = "$CODEGRAPH_CONDA_ENVIRONMENT" ] ; then - ${pathToConda}conda activate "${backupCondaEnvironment}" - echo "executeJupyterNotebook: Restored Conda Environment: ${backupCondaEnvironment}" +if [ ! "${backupCondaEnvironment}" = "${CODEGRAPH_CONDA_ENVIRONMENT}" ] ; then + "${pathToConda}conda" activate "${backupCondaEnvironment}" + echo "executeJupyterNotebook: Restored Conda Environment to ${backupCondaEnvironment}: ${CONDA_DEFAULT_ENV}" fi \ No newline at end of file diff --git a/scripts/reports/ArtifactDependenciesJupyter.sh b/scripts/reports/ArtifactDependenciesJupyter.sh index 6ac1f3ea1..414eecec0 100755 --- a/scripts/reports/ArtifactDependenciesJupyter.sh +++ b/scripts/reports/ArtifactDependenciesJupyter.sh @@ -36,4 +36,4 @@ FULL_REPORT_DIRECTORY="${REPORTS_DIRECTORY}/${REPORT_NAME}" mkdir -p "${FULL_REPORT_DIRECTORY}" # Execute and convert the Jupyter Notebook "ArtifactDependencies.ipynb" within the given reports directory -(cd "${FULL_REPORT_DIRECTORY}" && exec ${SCRIPTS_DIR}/executeJupyterNotebook.sh ${JUPYTER_NOTEBOOK_DIRECTORY}/ArtifactDependencies.ipynb) \ No newline at end of file +(cd "${FULL_REPORT_DIRECTORY}" && exec "${SCRIPTS_DIR}/executeJupyterNotebook.sh" "${JUPYTER_NOTEBOOK_DIRECTORY}/ArtifactDependencies.ipynb") \ No newline at end of file diff --git a/scripts/reports/ExternalDependenciesJupyter.sh b/scripts/reports/ExternalDependenciesJupyter.sh index fea028d51..102e8f613 100755 --- a/scripts/reports/ExternalDependenciesJupyter.sh +++ b/scripts/reports/ExternalDependenciesJupyter.sh @@ -34,4 +34,4 @@ FULL_REPORT_DIRECTORY="${REPORTS_DIRECTORY}/${REPORT_NAME}" mkdir -p "${FULL_REPORT_DIRECTORY}" # Execute and convert the following Jupyter Notebook within the given reports directory -(cd "${FULL_REPORT_DIRECTORY}" && exec ${SCRIPTS_DIR}/executeJupyterNotebook.sh ${JUPYTER_NOTEBOOK_DIRECTORY}/ExternalDependencies.ipynb) \ No newline at end of file +(cd "${FULL_REPORT_DIRECTORY}" && exec "${SCRIPTS_DIR}/executeJupyterNotebook.sh" "${JUPYTER_NOTEBOOK_DIRECTORY}/ExternalDependencies.ipynb") \ No newline at end of file diff --git a/scripts/reports/InternalDependenciesJupyter.sh b/scripts/reports/InternalDependenciesJupyter.sh index c5fadb757..3e817c28f 100755 --- a/scripts/reports/InternalDependenciesJupyter.sh +++ b/scripts/reports/InternalDependenciesJupyter.sh @@ -33,4 +33,4 @@ FULL_REPORT_DIRECTORY="${REPORTS_DIRECTORY}/${REPORT_NAME}" mkdir -p "${FULL_REPORT_DIRECTORY}" # Execute and convert the Jupyter Notebook "InternalDependencies.ipynb" within the given reports directory -(cd "${FULL_REPORT_DIRECTORY}" && exec ${SCRIPTS_DIR}/executeJupyterNotebook.sh ${JUPYTER_NOTEBOOK_DIRECTORY}/InternalDependencies.ipynb) \ No newline at end of file +(cd "${FULL_REPORT_DIRECTORY}" && exec "${SCRIPTS_DIR}/executeJupyterNotebook.sh" "${JUPYTER_NOTEBOOK_DIRECTORY}/InternalDependencies.ipynb") \ No newline at end of file diff --git a/scripts/reports/MethodMetricsJupyter.sh b/scripts/reports/MethodMetricsJupyter.sh index ff21a844e..59f37e057 100755 --- a/scripts/reports/MethodMetricsJupyter.sh +++ b/scripts/reports/MethodMetricsJupyter.sh @@ -33,4 +33,4 @@ FULL_REPORT_DIRECTORY="${REPORTS_DIRECTORY}/${REPORT_NAME}" mkdir -p "${FULL_REPORT_DIRECTORY}" # Execute and convert the following Jupyter Notebook within the given reports directory -(cd "${FULL_REPORT_DIRECTORY}" && exec ${SCRIPTS_DIR}/executeJupyterNotebook.sh ${JUPYTER_NOTEBOOK_DIRECTORY}/MethodMetrics.ipynb) \ No newline at end of file +(cd "${FULL_REPORT_DIRECTORY}" && exec "${SCRIPTS_DIR}/executeJupyterNotebook.sh" "${JUPYTER_NOTEBOOK_DIRECTORY}/MethodMetrics.ipynb") \ No newline at end of file diff --git a/scripts/reports/NodeEmbeddingsJupyter.sh b/scripts/reports/NodeEmbeddingsJupyter.sh index 4ff212796..fd03873ed 100755 --- a/scripts/reports/NodeEmbeddingsJupyter.sh +++ b/scripts/reports/NodeEmbeddingsJupyter.sh @@ -34,4 +34,4 @@ FULL_REPORT_DIRECTORY="${REPORTS_DIRECTORY}/${REPORT_NAME}" mkdir -p "${FULL_REPORT_DIRECTORY}" # Execute and convert the Jupyter Notebook "InternalDependencies.ipynb" within the given reports directory -(cd "${FULL_REPORT_DIRECTORY}" && exec ${SCRIPTS_DIR}/executeJupyterNotebook.sh ${JUPYTER_NOTEBOOK_DIRECTORY}/NodeEmbeddings.ipynb) \ No newline at end of file +(cd "${FULL_REPORT_DIRECTORY}" && exec "${SCRIPTS_DIR}/executeJupyterNotebook.sh" "${JUPYTER_NOTEBOOK_DIRECTORY}/NodeEmbeddings.ipynb") \ No newline at end of file diff --git a/scripts/reports/ObjectOrientedDesignMetricsJupyter.sh b/scripts/reports/ObjectOrientedDesignMetricsJupyter.sh index 63e9ca4b7..dad4805e5 100755 --- a/scripts/reports/ObjectOrientedDesignMetricsJupyter.sh +++ b/scripts/reports/ObjectOrientedDesignMetricsJupyter.sh @@ -33,4 +33,4 @@ FULL_REPORT_DIRECTORY="${REPORTS_DIRECTORY}/${REPORT_NAME}" mkdir -p "${FULL_REPORT_DIRECTORY}" # Execute and convert the Jupyter Notebook "ObjectOrientedDesignMetrics.ipynb" within the given reports directory -(cd "${FULL_REPORT_DIRECTORY}" && exec ${SCRIPTS_DIR}/executeJupyterNotebook.sh ${JUPYTER_NOTEBOOK_DIRECTORY}/ObjectOrientedDesignMetrics.ipynb) \ No newline at end of file +(cd "${FULL_REPORT_DIRECTORY}" && exec "${SCRIPTS_DIR}/executeJupyterNotebook.sh" "${JUPYTER_NOTEBOOK_DIRECTORY}/ObjectOrientedDesignMetrics.ipynb") \ No newline at end of file diff --git a/scripts/reports/OverviewJupyter.sh b/scripts/reports/OverviewJupyter.sh index 7489d8401..2741299c2 100755 --- a/scripts/reports/OverviewJupyter.sh +++ b/scripts/reports/OverviewJupyter.sh @@ -33,4 +33,4 @@ FULL_REPORT_DIRECTORY="${REPORTS_DIRECTORY}/${REPORT_NAME}" mkdir -p "${FULL_REPORT_DIRECTORY}" # Execute and convert the following Jupyter Notebook within the given reports directory -(cd "${FULL_REPORT_DIRECTORY}" && exec ${SCRIPTS_DIR}/executeJupyterNotebook.sh ${JUPYTER_NOTEBOOK_DIRECTORY}/Overview.ipynb) \ No newline at end of file +(cd "${FULL_REPORT_DIRECTORY}" && exec "${SCRIPTS_DIR}/executeJupyterNotebook.sh" "${JUPYTER_NOTEBOOK_DIRECTORY}/Overview.ipynb") \ No newline at end of file diff --git a/scripts/reports/VisibilityMetricsJupyter.sh b/scripts/reports/VisibilityMetricsJupyter.sh index 93a28cd77..28c5efd20 100755 --- a/scripts/reports/VisibilityMetricsJupyter.sh +++ b/scripts/reports/VisibilityMetricsJupyter.sh @@ -32,4 +32,4 @@ FULL_REPORT_DIRECTORY="${REPORTS_DIRECTORY}/${REPORT_NAME}" mkdir -p "${FULL_REPORT_DIRECTORY}" # Execute and convert the Jupyter Notebook "VisibilityMetrics.ipynb" within the given reports directory -(cd "${FULL_REPORT_DIRECTORY}" && exec ${SCRIPTS_DIR}/executeJupyterNotebook.sh ${JUPYTER_NOTEBOOK_DIRECTORY}/VisibilityMetrics.ipynb) \ No newline at end of file +(cd "${FULL_REPORT_DIRECTORY}" && exec "${SCRIPTS_DIR}/executeJupyterNotebook.sh" "${JUPYTER_NOTEBOOK_DIRECTORY}/VisibilityMetrics.ipynb") \ No newline at end of file diff --git a/scripts/reports/WordcloudJupyter.sh b/scripts/reports/WordcloudJupyter.sh index a63e5877e..817dcc34c 100755 --- a/scripts/reports/WordcloudJupyter.sh +++ b/scripts/reports/WordcloudJupyter.sh @@ -34,4 +34,4 @@ FULL_REPORT_DIRECTORY="${REPORTS_DIRECTORY}/${REPORT_NAME}" mkdir -p "${FULL_REPORT_DIRECTORY}" # Execute and convert the following Jupyter Notebook within the given reports directory -(cd "${FULL_REPORT_DIRECTORY}" && exec ${SCRIPTS_DIR}/executeJupyterNotebook.sh ${JUPYTER_NOTEBOOK_DIRECTORY}/Wordcloud.ipynb) \ No newline at end of file +(cd "${FULL_REPORT_DIRECTORY}" && exec "${SCRIPTS_DIR}/executeJupyterNotebook.sh" "${JUPYTER_NOTEBOOK_DIRECTORY}/Wordcloud.ipynb") \ No newline at end of file From 211399bf02f59f2e749fb368918dd4e79476903c Mon Sep 17 00:00:00 2001 From: JohT Date: Sun, 29 Oct 2023 17:01:52 +0100 Subject: [PATCH 7/9] Support graph visualizations on Windows --- README.md | 9 ++++++++- graph-visualization/renderVisualizations.js | 8 ++++++-- scripts/reports/GraphVisualization.sh | 10 ++++++++-- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index ef65c78a0..041b53ed8 100644 --- a/README.md +++ b/README.md @@ -46,12 +46,19 @@ Here are some reports that utilize Neo4j's [Graph Data Science Library](https:// ``` To run Jupyter notebooks, create an `.env` file in the folder from where you open the notebook containing for example: `NEO4J_INITIAL_PASSWORD=neo4j_password_of_my_choice` -### Further Prerequisites for Python Jupyter Notebooks +### Further Prerequisites for Python and Jupyter Notebooks - Python is required for Jupyter Notebook reports. - A conda package manager like [Miniconda](https://docs.conda.io/projects/miniconda/en/latest) or [Anaconda](https://www.anaconda.com/download)(Recommended for Windows) is required for Jupyter Notebook reports. - Chromium will automatically be downloaded if needed for Jupyter Notebook PDF reports generation. +### Further Prerequisites for Graph Visualization + +These tools are needed to run the graph visualization scripts of directory [graph-visualization](./graph-visualization): + +- [Node.js](https://nodejs.org/en) +- [npm](https://www.npmjs.com) + ### Hints for Windows - Add this line to your `~/.bashrc` file if you are using Anaconda3: `/c/ProgramData/Anaconda3/etc/profile.d/conda.sh`. Try to find a similar script for other conda package managers or versions. diff --git a/graph-visualization/renderVisualizations.js b/graph-visualization/renderVisualizations.js index c069609bd..1c3850890 100644 --- a/graph-visualization/renderVisualizations.js +++ b/graph-visualization/renderVisualizations.js @@ -9,6 +9,7 @@ import jimp from "jimp"; const indexOfScriptFilePathArgument = 1; const __filename = process.argv[indexOfScriptFilePathArgument]; const __dirname = dirname(__filename); +console.log(`renderVisualizations.js: dirname=${__dirname}`); /** * Crops the image in the buffer so that there is no empty frame around it. @@ -88,14 +89,17 @@ let browser; * and takes a screenshot of the canvas elements using {@link takeCanvasScreenshots}. */ (async () => { + console.log('renderVisualizations.js: Starting headless browser...'); browser = await puppeteer.launch({ headless: "new" }); // { headless: false } for testing // Get all *.html files in this (script) directory and its subdirectories - const htmlFiles = globSync(`${__dirname}/**/*.html`, { ignore: `${__dirname}/node_modules/**` }); + // The separate filter is needed to ignore the "node_modules" directory. + // Glob's build-in filter doesn't seem to work on Windows. + const htmlFiles = globSync(`${__dirname}/**/*.html`, { absolute: true }).filter(file => !file.includes('node_modules')); for (const htmlFile of htmlFiles) { await takeCanvasScreenshots(browser, htmlFile); } - console.log(`Successfully rendered ${htmlFiles.length} html file(s)`); + console.log(`renderVisualizations.js: Successfully rendered ${htmlFiles.length} html file(s)`); })() .catch((err) => console.error(err)) .finally(() => browser?.close()); diff --git a/scripts/reports/GraphVisualization.sh b/scripts/reports/GraphVisualization.sh index 0ece0c08d..dbc0b12d8 100755 --- a/scripts/reports/GraphVisualization.sh +++ b/scripts/reports/GraphVisualization.sh @@ -24,7 +24,13 @@ echo "GraphVisualization: SCRIPTS_DIR=${SCRIPTS_DIR}" # Get the "graph-visualization" directory by taking the path of this script and going two directory up and then to "visualization". GRAPH_VISUALIZATION_DIRECTORY=${GRAPH_VISUALIZATION_DIRECTORY:-"${SCRIPTS_DIR}/../graph-visualization"} # Repository directory containing the Jupyter Notebooks -echo "GraphVisualization: GRAPH_VISUALIZATION_DIRECTORY=$GRAPH_VISUALIZATION_DIRECTORY" +echo "GraphVisualization: GRAPH_VISUALIZATION_DIRECTORY=${GRAPH_VISUALIZATION_DIRECTORY}" + +# Execute "npm ci" to get all required node modules from npm package manager +if [ ! -d "${GRAPH_VISUALIZATION_DIRECTORY}/node_modules" ] ; then + echo "GraphVisualization: Resolving node_modules..." + (cd "${GRAPH_VISUALIZATION_DIRECTORY}" && exec npm ci) +fi # Execute the node.js script to render the graph visualizations as image files -(cd "${REPORTS_DIRECTORY}" && exec node ${GRAPH_VISUALIZATION_DIRECTORY}/renderVisualizations.js) \ No newline at end of file +(cd "${REPORTS_DIRECTORY}" && exec node "${GRAPH_VISUALIZATION_DIRECTORY}/renderVisualizations.js") \ No newline at end of file From 5860d84dbc64f57a2532b281e3315e16ef237a95 Mon Sep 17 00:00:00 2001 From: JohT Date: Mon, 30 Oct 2023 18:14:13 +0100 Subject: [PATCH 8/9] Add info about conda environment in pipeline --- .github/workflows/code-structure-analysis.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/code-structure-analysis.yml b/.github/workflows/code-structure-analysis.yml index 06cd7ee16..c9c77d670 100644 --- a/.github/workflows/code-structure-analysis.yml +++ b/.github/workflows/code-structure-analysis.yml @@ -97,6 +97,10 @@ jobs: auto-activate-base: false use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly! + - name: Conda environment info + shell: bash -el {0} + run: conda info + - name: Setup temp directory if missing run: mkdir -p ./temp From 831fb8df1014fc2f241fe5482bd6f96c81201f4e Mon Sep 17 00:00:00 2001 From: JohT Date: Tue, 31 Oct 2023 08:34:05 +0100 Subject: [PATCH 9/9] Move conda environment activation to own script --- scripts/activateCondaEnvironment.sh | 85 +++++++++++++++++++++++++++++ scripts/executeJupyterNotebook.sh | 64 +--------------------- 2 files changed, 87 insertions(+), 62 deletions(-) create mode 100755 scripts/activateCondaEnvironment.sh diff --git a/scripts/activateCondaEnvironment.sh b/scripts/activateCondaEnvironment.sh new file mode 100755 index 000000000..5eaf662f7 --- /dev/null +++ b/scripts/activateCondaEnvironment.sh @@ -0,0 +1,85 @@ +#!/usr/bin/env bash + +# Activates the Conda (Python package manager) environment "codegraph" with all packages needed to execute the Jupyter Notebooks. + +# Note: This script uses the conda environment defined in CODEGRAPH_CONDA_ENVIRONMENT (defaults to "codegraph"). +# If the environment hadn't been created yet it will use "environment.yml" +# in the same directory as the given jupyter notebook ipynb file +# to create the environment. + +# Requires operatingSystemFunctions.sh + +# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands) +set -eo pipefail + +## Get this "scripts" directory if not already set +# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution. +# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes. +# This way non-standard tools like readlink aren't needed. +SCRIPTS_DIR=${SCRIPTS_DIR:-$( CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P )} # Repository directory containing the shell scripts +echo "activateCondaEnvironment: SCRIPTS_DIR=$SCRIPTS_DIR" + +# Get the "jupyter" directory by taking the path of this script and going two directory up and then to "jupyter". +JUPYTER_NOTEBOOK_DIRECTORY=${JUPYTER_NOTEBOOK_DIRECTORY:-"${SCRIPTS_DIR}/../jupyter"} # Repository directory containing the Jupyter Notebooks +echo "activateCondaEnvironment: JUPYTER_NOTEBOOK_DIRECTORY=$JUPYTER_NOTEBOOK_DIRECTORY" + +# Define conda environment to use for code structure analysis. Default "codegraph" +CODEGRAPH_CONDA_ENVIRONMENT=${CODEGRAPH_CONDA_ENVIRONMENT:-"codegraph"} # Name of the conda environment to use for code graph analysis +echo "activateCondaEnvironment: CONDA_PREFIX=${CONDA_PREFIX}" +echo "activateCondaEnvironment: Current conda environment=${CONDA_DEFAULT_ENV}" +echo "activateCondaEnvironment: Target conda environment=${CODEGRAPH_CONDA_ENVIRONMENT}" + +if [ "${CONDA_DEFAULT_ENV}" = "${CODEGRAPH_CONDA_ENVIRONMENT}" ] ; then + echo "activateCondaEnvironment: Skipping activation. Target conda environment ${CODEGRAPH_CONDA_ENVIRONMENT} is already activated." + exit 0 +fi + +# Include operation system function to for example detect Windows. +source "${SCRIPTS_DIR}/operatingSystemFunctions.sh" + +# Determine the path to "conda" +if [ -n "${CONDA}" ]; then + if isWindows; then + pathToConda="${CONDA}\\Scripts\\" # the trailing backslash character is required + else + pathToConda="${CONDA}/bin/" # the trailing slash character is required + fi +else + pathToConda="" +fi +echo "activateCondaEnvironment: pathToConda=${pathToConda}" + +scriptExtension=$(ifWindows ".bat" "") +echo "activateCondaEnvironment: scriptExtension=${scriptExtension}" + +# Activate conda shell hook. Also resets CONDA_DEFAULT_ENV to base. +# Thats why CONDA_DEFAULT_ENV (base) is never equal to CODEGRAPH_CONDA_ENVIRONMENT (codegraph). +eval "$(${pathToConda}conda${scriptExtension} shell.bash hook)" +echo "activateCondaEnvironment: Current conda environment after shell hook=${CONDA_DEFAULT_ENV}" + +# Create (if missing) and activate Conda environment for code structure graph analysis +if { "${pathToConda}conda" env list | grep "$CODEGRAPH_CONDA_ENVIRONMENT "; } >/dev/null 2>&1; then + echo "activateCondaEnvironment: Conda environment $CODEGRAPH_CONDA_ENVIRONMENT already created" +else + if [ ! -f "${JUPYTER_NOTEBOOK_DIRECTORY}/environment.yml" ] ; then + echo "activateCondaEnvironment: Couldn't find environment file ${jupyter_notebook_file_path}/environment.yml." + exit 2 + fi + echo "activateCondaEnvironment: Creating Conda environment ${CODEGRAPH_CONDA_ENVIRONMENT}" + "${pathToConda}conda" env create --file "${jupyter_notebook_file_path}/environment.yml" --name "${CODEGRAPH_CONDA_ENVIRONMENT}" +fi + +echo "activateCondaEnvironment: Activating Conda environment ${CODEGRAPH_CONDA_ENVIRONMENT}" +"${pathToConda}conda" activate ${CODEGRAPH_CONDA_ENVIRONMENT} + +if [ "${CONDA_DEFAULT_ENV}" != "${CODEGRAPH_CONDA_ENVIRONMENT}" ] ; then + echo "activateCondaEnvironment: Retry activating Conda environment ${CODEGRAPH_CONDA_ENVIRONMENT} with plain 'conda' command" + conda activate ${CODEGRAPH_CONDA_ENVIRONMENT} +fi + +if [ "${CONDA_DEFAULT_ENV}" = "${CODEGRAPH_CONDA_ENVIRONMENT}" ] ; then + echo "activateCondaEnvironment: Activated Conda environment: ${CONDA_DEFAULT_ENV}" +else + echo "activateCondaEnvironment: Error: Failed to activate Conda environment ${CODEGRAPH_CONDA_ENVIRONMENT}. ${CONDA_DEFAULT_ENV} still active." + exit 1 +fi \ No newline at end of file diff --git a/scripts/executeJupyterNotebook.sh b/scripts/executeJupyterNotebook.sh index ec9ac2e7a..44b335af5 100755 --- a/scripts/executeJupyterNotebook.sh +++ b/scripts/executeJupyterNotebook.sh @@ -82,62 +82,8 @@ if [ ! -f "${jupyter_notebook_file_path}/.env" ] ; then echo "NEO4J_INITIAL_PASSWORD=${NEO4J_INITIAL_PASSWORD}" > "${jupyter_notebook_file_path}/.env" fi -# Define conda environment to use for code structure analysis. Default "codegraph" -CODEGRAPH_CONDA_ENVIRONMENT=${CODEGRAPH_CONDA_ENVIRONMENT:-"codegraph"} # Name of the conda environment to use for code graph analysis -backupCondaEnvironment=$CONDA_DEFAULT_ENV -echo "executeJupyterNotebook: CODEGRAPH_CONDA_ENVIRONMENT=${CODEGRAPH_CONDA_ENVIRONMENT}" -echo "executeJupyterNotebook: CONDA_PREFIX=${CONDA_PREFIX}" -echo "executeJupyterNotebook: Current conda environment=${CONDA_DEFAULT_ENV}" - -# Include operation system function to for example detect Windows. -source "${SCRIPTS_DIR}/operatingSystemFunctions.sh" - -# Determine the path to "conda" -if [ -n "${CONDA}" ]; then - if isWindows; then - pathToConda="${CONDA}\\Scripts\\" # the trailing backslash character is required - else - pathToConda="${CONDA}/bin/" # the trailing slash character is required - fi -else - pathToConda="" -fi - -echo "executeJupyterNotebook: pathToConda=${pathToConda}" - -scriptExtension=$(ifWindows ".bat" "") -echo "executeJupyterNotebook: scriptExtension=${scriptExtension}" - -# Activate conda shell hook. Also resets CONDA_DEFAULT_ENV to base. -# Thats why CONDA_DEFAULT_ENV (base) is never equal to CODEGRAPH_CONDA_ENVIRONMENT (codegraph). -eval "$(${pathToConda}conda${scriptExtension} shell.bash hook)" -echo "executeJupyterNotebook: Current conda environment after shell hook=${CONDA_DEFAULT_ENV}" - -# Create (if missing) and activate Conda environment for code structure graph analysis -if { "${pathToConda}conda" env list | grep "$CODEGRAPH_CONDA_ENVIRONMENT "; } >/dev/null 2>&1; then - echo "executeJupyterNotebook: Conda environment $CODEGRAPH_CONDA_ENVIRONMENT already created" -else - if [ ! -f "${jupyter_notebook_file_path}/environment.yml" ] ; then - echo "executeJupyterNotebook: Couldn't find environment file ${jupyter_notebook_file_path}/environment.yml." - exit 2 - fi - echo "executeJupyterNotebook: Creating Conda environment ${CODEGRAPH_CONDA_ENVIRONMENT}" - "${pathToConda}conda" env create --file "${jupyter_notebook_file_path}/environment.yml" --name "${CODEGRAPH_CONDA_ENVIRONMENT}" -fi - -echo "executeJupyterNotebook: Activating Conda environment ${CODEGRAPH_CONDA_ENVIRONMENT}" -if isWindows; then - "${pathToConda}conda" activate ${CODEGRAPH_CONDA_ENVIRONMENT} -else - conda activate "${CODEGRAPH_CONDA_ENVIRONMENT}" -fi - -if [ "${CONDA_DEFAULT_ENV}" = "${CODEGRAPH_CONDA_ENVIRONMENT}" ] ; then - echo "executeJupyterNotebook: Activated Conda environment: ${CONDA_DEFAULT_ENV}" -else - echo "executeJupyterNotebook: Error: Failed to activate Conda environment ${CODEGRAPH_CONDA_ENVIRONMENT}. ${CONDA_DEFAULT_ENV} still active." - exit 1 -fi +# Create and activate (if necessary) Conda environment as defined in environment variable CODEGRAPH_CONDA_ENVIRONMENT (default "codegraph") +source "${SCRIPTS_DIR}/activateCondaEnvironment.sh" # Execute the Jupyter Notebook and write it to the output file name jupyter nbconvert --to notebook \ @@ -158,10 +104,4 @@ mv -f "${jupyter_notebook_markdown_file}.nostyle" "${jupyter_notebook_markdown_f # Convert the Jupyter Notebook to PDF if [ -n "${ENABLE_JUPYTER_NOTEBOOK_PDF_GENERATION}" ]; then jupyter nbconvert --to webpdf --no-input --allow-chromium-download --disable-chromium-sandbox "$jupyter_notebook_output_file" -fi - -# Restore Conda environment -if [ ! "${backupCondaEnvironment}" = "${CODEGRAPH_CONDA_ENVIRONMENT}" ] ; then - "${pathToConda}conda" activate "${backupCondaEnvironment}" - echo "executeJupyterNotebook: Restored Conda Environment to ${backupCondaEnvironment}: ${CONDA_DEFAULT_ENV}" fi \ No newline at end of file