Skip to content

Commit 3be846a

Browse files
committed
Add comma separated paths option to change detection
1 parent 064d92b commit 3be846a

File tree

5 files changed

+139
-46
lines changed

5 files changed

+139
-46
lines changed

scripts/analysis/analyze.sh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ REPORTS_SCRIPTS_DIRECTORY=${REPORTS_SCRIPTS_DIRECTORY:-"reports"} # Working dire
3838
REPORT_COMPILATIONS_SCRIPTS_DIRECTORY=${REPORT_COMPILATIONS_SCRIPTS_DIRECTORY:-"compilations"} # Repository directory that contains scripts that execute selected report generation scripts
3939
SETTINGS_PROFILE_SCRIPTS_DIRECTORY=${SETTINGS_PROFILE_SCRIPTS_DIRECTORY:-"profiles"} # Repository directory that contains scripts containing settings
4040
ARTIFACTS_DIRECTORY=${ARTIFACTS_DIRECTORY:-"artifacts"} # Working directory containing the artifacts to be analyzed
41+
SOURCE_DIRECTORY=${SOURCE_DIRECTORY:-"source"}
4142

4243
# Function to display script usage
4344
usage() {
@@ -86,9 +87,9 @@ if ! [[ ${settingsProfile} =~ ^[[:alnum:]]+$ ]]; then
8687
exit 1
8788
fi
8889

89-
# Check if Neo4j is installed
90-
if [ ! -d "${ARTIFACTS_DIRECTORY}" ] ; then
91-
echo "analyze: The ${ARTIFACTS_DIRECTORY} directory doesn't exist. Please download artifacts first."
90+
# Check if there is something to scan and analyze
91+
if [ ! -d "${ARTIFACTS_DIRECTORY}" ] && [ ! -d "${SOURCE_DIRECTORY}" ] ; then
92+
echo "analyze: Neither ${ARTIFACTS_DIRECTORY} nor the ${SOURCE_DIRECTORY} directory exist. Please download artifacts/sources first."
9293
exit 1
9394
fi
9495

scripts/detectChangedArtifacts.sh

Lines changed: 69 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,45 @@
11
#!/usr/bin/env bash
22

3-
# Detect changed files in the artifacts directory with a text file containing the last hash code of the contents.
3+
# Detect changed files in the artifacts directory or in a given list of paths
4+
# using a text file containing the last hash code of the contents.
45
# The hash value is generated based on all files (their names and properties) within the artifacts directory.
56
# A change is detected when the current hash and the stored differ.
67
#
78
# Command line options:
89
# --readonly Detect changes without creating or updating the change detection file (stateless).
910
# A second call without this option will be needed for the change detection to work.
1011
# This is helpful to decide if an operation should be done based on changes while waiting for its success to finally save the change state.
12+
# --paths Comma-separated list of file- and directory-names that are used for calculating the hash based on their name and size.
1113

1214
# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
1315
set -o errexit -o pipefail
1416

17+
# Overrideable defaults
18+
ARTIFACTS_DIRECTORY=${ARTIFACTS_DIRECTORY:-"artifacts"}
19+
ARTIFACTS_CHANGE_DETECTION_HASH_FILE=${ARTIFACTS_CHANGE_DETECTION_HASH_FILE:-"artifactsChangeDetectionHash.txt"} # Name of the file that contains the hash code of the file list for change detection
20+
ARTIFACTS_CHANGE_DETECTION_HASH_FILE_PATH="./${ARTIFACTS_DIRECTORY}/$ARTIFACTS_CHANGE_DETECTION_HASH_FILE"
21+
1522
# Function to display script usage
1623
usage() {
17-
echo "Usage: $0 [--readonly]"
24+
echo "Usage: $0 [--readonly] [--paths <comma separated list of file and directory names>]"
1825
exit 1
1926
}
2027

2128
# Default values
2229
readonlyMode=false
30+
paths="./${ARTIFACTS_DIRECTORY}"
2331

2432
# Parse command line arguments
2533
while [[ $# -gt 0 ]]; do
2634
key="$1"
27-
case $key in
35+
value="${2}"
36+
37+
case ${key} in
2838
--readonly)
2939
readonlyMode=true
40+
;;
41+
--paths)
42+
paths="${value}"
3043
shift
3144
;;
3245
*)
@@ -41,42 +54,84 @@ if ${readonlyMode}; then
4154
echo "detectChangedArtifacts: Readonly mode activated. Change detection file won't be created." >&2
4255
fi
4356

44-
ARTIFACTS_DIRECTORY=${ARTIFACTS_DIRECTORY:-"artifacts"}
45-
ARTIFACTS_CHANGE_DETECTION_HASH_FILE=${ARTIFACTS_CHANGE_DETECTION_HASH_FILE:-"artifactsChangeDetectionHash.txt"} # Name of the file that contains the hash code of the file list for change detection
46-
ARTIFACTS_CHANGE_DETECTION_HASH_FILE_PATH="./${ARTIFACTS_DIRECTORY}/$ARTIFACTS_CHANGE_DETECTION_HASH_FILE"
47-
4857
# Check if the artifacts directory exists
49-
if [ ! -d "./${ARTIFACTS_DIRECTORY}" ] ; then
50-
echo 0 # The artifact directory doesn't exist. There is nothing to compare. Therefore assume that there are no changes.
58+
if [ -z "${paths}" ] ; then
59+
echo 0 # 0=No change detected. The path list is empty. There is nothing to compare. Therefore assume that there are no changes.
5160
exit 0
5261
fi
5362

63+
# Function to get file size
64+
get_file_size() {
65+
if [ -f "$1" ]; then
66+
wc -c < "$1" | tr -d ' '
67+
else
68+
echo 0
69+
fi
70+
}
71+
72+
# Function to process a single path
73+
unwind_directories() {
74+
if [ -d "$1" ]; then
75+
# If it's a directory, list all files inside
76+
# except for "node_modules", "target", "temp" and the change detection file itself
77+
find "$1" \
78+
-type d -name "node_modules" -prune -o \
79+
-type d -name "target" -prune -o \
80+
-type d -name "temp" -prune -o \
81+
-not -name "${ARTIFACTS_CHANGE_DETECTION_HASH_FILE}" \
82+
-type f \
83+
| sort
84+
elif [ -f "$1" ]; then
85+
# If it's a file, just echo the file path
86+
echo "$1"
87+
fi
88+
}
89+
90+
# Function that takes a comma-separated list of file- and directory-names,
91+
# finds all files in the directories
92+
# and calculates the md5 hash for every of these .
93+
get_md5_checksum_of_all_file_names_and_sizes() {
94+
local paths=${1}
95+
local files_and_sizes=""
96+
97+
for path in ${paths//,/ }; do
98+
files=$(unwind_directories "${path}")
99+
for file in ${files}; do
100+
size=$(get_file_size "${file}")
101+
files_and_sizes="${files_and_sizes}${file}${size}"
102+
done
103+
done
104+
105+
echo "${files_and_sizes}" | openssl md5 | awk '{print $2}'
106+
}
107+
54108
# Use find to list all files in the directory with their properties,
55109
# sort the output, and pipe it to md5 to create a hash
56110
# Use openssl md5 that is at least available on Mac and Linux.
57111
# See: https://github.com/TRON-US/go-btfs/issues/90#issuecomment-517409369
58-
CURRENT_ARTIFACTS_HASH="$( find "./$ARTIFACTS_DIRECTORY" -type f -not -name "${ARTIFACTS_CHANGE_DETECTION_HASH_FILE}" -exec openssl md5 -binary {} + | openssl md5 | awk '{print $2}' )"
112+
CURRENT_ARTIFACTS_HASH=$(get_md5_checksum_of_all_file_names_and_sizes "${paths}")
59113

60114
# Assume that the files where changed if the file containing the hash of the file list does not exist yet.
61115
if [ ! -f "${ARTIFACTS_CHANGE_DETECTION_HASH_FILE_PATH}" ] ; then
62-
if ! ${readonlyMode}; then
116+
if [ "${readonlyMode}" = false ] ; then
63117
# Create the file containing the hash of the files list to a new file for the next call
118+
mkdir -p "${ARTIFACTS_DIRECTORY}"
64119
echo "${CURRENT_ARTIFACTS_HASH}" > "${ARTIFACTS_CHANGE_DETECTION_HASH_FILE_PATH}"
65120
echo "detectChangedArtifacts: Change detection file created" >&2
66121
fi
67-
echo 1
122+
echo 1 # 1=Change detected and change detection file created
68123
exit 0
69124
fi
70125

71126
# Assume that there is no change if the saved hash is equal to the current one.
72127
# Otherwise assume that the files where changed and overwrite the hash with the current one for the next call
73128
if [[ $(< "${ARTIFACTS_CHANGE_DETECTION_HASH_FILE_PATH}") == "$CURRENT_ARTIFACTS_HASH" ]] ; then
74-
echo 0
129+
echo 0 # 0=No change detected
75130
else
76131
if ! ${readonlyMode}; then
77132
# Write the updated hash into the file containing the hash of the files list for the next call
78133
echo "$CURRENT_ARTIFACTS_HASH" > "${ARTIFACTS_CHANGE_DETECTION_HASH_FILE_PATH}"
79134
echo "detectChangedArtifacts: Change detection file updated" >&2
80135
fi
81-
echo 2
136+
echo 2 # 2=Change detected and change detection file updated
82137
fi

scripts/findPathsToScan.sh

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#!/usr/bin/env bash
2+
3+
# Finds all files and directories to scan and analyze and provides them as comma-separated list.
4+
5+
# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
6+
set -o errexit -o pipefail
7+
8+
ARTIFACTS_DIRECTORY=${ARTIFACTS_DIRECTORY:-"artifacts"}
9+
SOURCE_DIRECTORY=${SOURCE_DIRECTORY:-"source"}
10+
11+
# This function returns the argument followed by a comma (separator) if it is not empty
12+
# and just an empty string otherwise.
13+
appendNonEmpty() {
14+
if [ -n "${1}" ] ; then
15+
echo "${1},"
16+
else
17+
echo ""
18+
fi
19+
}
20+
21+
# Collect all files and directories to scan
22+
directoriesAndFilesToScan=""
23+
24+
if [ -d "${ARTIFACTS_DIRECTORY}" ] ; then
25+
# Scan all files in the artifacts directory (e.g. *.ear, *.war, *.jar for Java)
26+
directoriesAndFilesToScan="$(appendNonEmpty "${directoriesAndFilesToScan}")./${ARTIFACTS_DIRECTORY}"
27+
else
28+
echo "findPathsToScan: Artifacts directory ${ARTIFACTS_DIRECTORY} doesn't exist and will therefore be skipped." >&2
29+
fi
30+
31+
if [ -d "${SOURCE_DIRECTORY}" ] ; then
32+
# Scan Typescript analysis json data files in the source directory
33+
typescriptAnalysisFiles="$(find "${SOURCE_DIRECTORY}" -type f -path "*/.reports/jqa/ts-output.json" -exec echo typescript:project::{} \; | tr '\n' ',' | sed 's/,$/\n/')"
34+
if [ -n "${typescriptAnalysisFiles}" ]; then
35+
directoriesAndFilesToScan="$(appendNonEmpty "${directoriesAndFilesToScan}")${typescriptAnalysisFiles}"
36+
fi
37+
38+
# Scan package.json files for npm (nodes package manager) in the source directory
39+
npmPackageJsonFiles="$(find "${SOURCE_DIRECTORY}" -type d -name node_modules -prune -o -name 'package.json' -print0 | xargs -0 -r -I {} | tr '\n' ',' | sed 's/,$/\n/')"
40+
if [ -n "${npmPackageJsonFiles}" ]; then
41+
directoriesAndFilesToScan="$(appendNonEmpty "${directoriesAndFilesToScan}")${npmPackageJsonFiles}"
42+
fi
43+
else
44+
echo "findPathsToScan: Source directory ${SOURCE_DIRECTORY} doesn't exist and will therefore be skipped." >&2
45+
fi
46+
47+
echo -n "${directoriesAndFilesToScan}"

scripts/resetAndScan.sh

Lines changed: 13 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
# CAUTION: This script deletes all relationships and nodes in the Neo4j Graph Database.
66
# Note: The environment variable NEO4J_INITIAL_PASSWORD is required to login to Neo4j.
77

8+
# Command line options:
9+
# This script takes one parameter that contains the comma-separated list of paths to scan
10+
811
# Requires importGit.sh
912

1013
# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
@@ -17,7 +20,6 @@ JQASSISTANT_CONFIG_TEMPLATE=${JQASSISTANT_CONFIG_TEMPLATE:-"template-neo4jv5-jqa
1720
NEO4J_INITIAL_PASSWORD=${NEO4J_INITIAL_PASSWORD:-""} # Neo4j login password that was set to replace the temporary initial password
1821
ARTIFACTS_DIRECTORY=${ARTIFACTS_DIRECTORY:-"artifacts"} # Directory with the Java artifacts to scan and analyze
1922
TOOLS_DIRECTORY=${TOOLS_DIRECTORY:-"tools"} # Get the tools directory (defaults to "tools")
20-
SOURCE_DIRECTORY=${SOURCE_DIRECTORY:-"source"} # Get the source repository directory (defaults to "source")
2123

2224
## Get this "scripts" directory if not already set
2325
# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution.
@@ -31,6 +33,15 @@ JQASSISTANT_DIRECTORY="${TOOLS_DIRECTORY}/${JQASSISTANT_CLI_ARTIFACT}-${JQASSIST
3133
JQASSISTANT_BIN="${JQASSISTANT_DIRECTORY}/bin"
3234
JQASSISTANT_CONFIG_TEMPLATE_PATH="${SCRIPTS_DIR}/configuration/${JQASSISTANT_CONFIG_TEMPLATE}"
3335

36+
# Parse the single parameter that contains the comma-separated file and directory names to scan.
37+
if [ "$#" -eq 0 ]; then
38+
echo "resetAndScan: Skipping reset and scan since no paths to scan were passed."
39+
return 0
40+
else
41+
directoriesAndFilesToScan="$1"
42+
shift
43+
fi
44+
3445
# Check if environment variable is set
3546
if [ -z "${NEO4J_INITIAL_PASSWORD}" ]; then
3647
echo "resetAndScan: Error: Requires environment variable NEO4J_INITIAL_PASSWORD to be set first. Use 'export NEO4J_INITIAL_PASSWORD=<your-own-password>'."
@@ -61,30 +72,7 @@ else
6172
echo "resetAndScan: jQAssistant configuration won't be changed since it already exists."
6273
fi
6374

64-
# -- Collect all files and directories to scan ---------------------
65-
directoriesAndFilesToScan=""
66-
67-
# Scan all files in the artifacts directory (e.g. *.ear, *.war, *.jar for Java)
68-
if [ -d "${ARTIFACTS_DIRECTORY}" ] ; then
69-
directoriesAndFilesToScan="${directoriesAndFilesToScan},./${ARTIFACTS_DIRECTORY}"
70-
fi
71-
72-
if [ -d "${SOURCE_DIRECTORY}" ] ; then
73-
# Scan Typescript analysis json data files in the source directory
74-
typescriptAnalysisFiles="$(find "${SOURCE_DIRECTORY}" -type f -path "*/.reports/jqa/ts-output.json" -exec echo typescript:project::{} \; | tr '\n' ',' | sed 's/,$/\n/')"
75-
if [ -n "${typescriptAnalysisFiles}" ]; then
76-
directoriesAndFilesToScan="${directoriesAndFilesToScan},${typescriptAnalysisFiles}"
77-
fi
78-
# Scan package.json files for npm (nodes package manager) in the source directory
79-
npmPackageJsonFiles="$(find "${SOURCE_DIRECTORY}" -type d -name node_modules -prune -o -name 'package.json' -print0 | xargs -0 -r -I {} | tr '\n' ',' | sed 's/,$/\n/')"
80-
if [ -n "${npmPackageJsonFiles}" ]; then
81-
directoriesAndFilesToScan="${directoriesAndFilesToScan},${npmPackageJsonFiles}"
82-
fi
83-
fi
84-
85-
# ------------------------------------------------------------------
86-
87-
# Use jQAssistant to scan the downloaded artifacts and write the results into the separate, local Neo4j Graph Database
75+
# Use jQAssistant to scan the downloaded artifacts and stores the results into the local Neo4j Graph Database
8876
echo "resetAndScan: Using jQAssistant CLI version ${JQASSISTANT_CLI_VERSION} to scan the following files and directories:"
8977
for directoryOrFileToScan in ${directoriesAndFilesToScan//,/ }; do
9078
echo " - ${directoryOrFileToScan}"

scripts/resetAndScanChanged.sh

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
# Note: "resetAndScan" expects jQAssistant to be installed in the "tools" directory.
66

7-
# Requires resetAndScan.sh
7+
# Requires resetAndScan.sh, detectChangedArtifacts.sh, findPathsToScan.sh
88

99
# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
1010
set -o errexit -o pipefail
@@ -16,12 +16,14 @@ set -o errexit -o pipefail
1616
SCRIPTS_DIR=${SCRIPTS_DIR:-$( CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P )} # Repository directory containing the shell scripts
1717
echo "resetAndScanChanged SCRIPTS_DIR=${SCRIPTS_DIR}"
1818

19+
filesAndDirectoriesToScan=$( source "${SCRIPTS_DIR}/findPathsToScan.sh" )
20+
1921
# Scan and analyze Artifacts when they were changed
20-
changeDetectionReturnCode=$( source "${SCRIPTS_DIR}/detectChangedArtifacts.sh" --readonly)
22+
changeDetectionReturnCode=$( source "${SCRIPTS_DIR}/detectChangedArtifacts.sh" --readonly --paths "${filesAndDirectoriesToScan}")
2123
if [[ "${changeDetectionReturnCode}" == "0" ]] ; then
2224
echo "resetAndScanChanged: Artifacts unchanged. Scan skipped."
2325
else
2426
echo "resetAndScanChanged: Detected change (${changeDetectionReturnCode}). Resetting database and scanning artifacts."
25-
source "${SCRIPTS_DIR}/resetAndScan.sh"
26-
changeDetectionReturnCode=$( source "${SCRIPTS_DIR}/detectChangedArtifacts.sh")
27+
source "${SCRIPTS_DIR}/resetAndScan.sh" "${filesAndDirectoriesToScan}"
28+
changeDetectionReturnCode=$( source "${SCRIPTS_DIR}/detectChangedArtifacts.sh" --paths "${filesAndDirectoriesToScan}")
2729
fi

0 commit comments

Comments
 (0)