1+ #! /usr/bin/env bash
2+
3+ # Pipeline that coordinates anomaly detection using the Graph Data Science Library of Neo4j.
4+ # It requires an already running Neo4j graph database with already scanned and analyzed artifacts.
5+ # The results will be written into the sub directory reports/anomaly-detection.
6+
7+ # Note that "scripts/prepareAnalysis.sh" is required to run prior to this script.
8+
9+ # Requires executeQueryFunctions.sh, projectionFunctions.sh, cleanupAfterReportGeneration.sh
10+
11+ # Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
12+ set -o errexit -o pipefail
13+
14+ # Overrideable Constants (defaults also defined in sub scripts)
15+ REPORTS_DIRECTORY=${REPORTS_DIRECTORY:- " reports" }
16+
17+ # # Get this "scripts/reports" directory if not already set
18+ # Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution.
19+ # CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes.
20+ # This way non-standard tools like readlink aren't needed.
21+ ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR:- $(CDPATH=. cd -- " $( dirname -- " ${BASH_SOURCE[0]} " ) " && pwd -P)}
22+ echo " anomalyDetectionPipeline: ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR} "
23+ # Get the "scripts" directory by taking the path of this script and going one directory up.
24+ SCRIPTS_DIR=${SCRIPTS_DIR:- " ${ANOMALY_DETECTION_SCRIPT_DIR} /../../scripts" } # Repository directory containing the shell scripts
25+ # Get the "cypher" query directory for gathering features.
26+ ANOMALY_DETECTION_FEATURE_CYPHER_DIR=${ANOMALY_DETECTION_FEATURE_CYPHER_DIR:- " ${ANOMALY_DETECTION_SCRIPT_DIR} /features" }
27+
28+ # Function to display script usage
29+ usage () {
30+ echo -e " ${COLOR_ERROR} " >&2
31+ echo " Usage: $0 [--verbose]" >&2
32+ echo -e " ${COLOR_DEFAULT} " >&2
33+ exit 1
34+ }
35+
36+ # Default values
37+ verboseMode=" " # either "" or "--verbose"
38+
39+ # Parse command line arguments
40+ while [[ $# -gt 0 ]]; do
41+ key=" $1 "
42+ value=" ${2} "
43+
44+ case ${key} in
45+ --verbose)
46+ verboseMode=" --verbose"
47+ ;;
48+ * )
49+ echo -e " ${COLOR_ERROR} anomalyDetectionPipeline: Error: Unknown option: ${key}${COLOR_DEFAULT} " >&2
50+ usage
51+ ;;
52+ esac
53+ shift || true # ignore error when there are no more arguments
54+ done
55+
56+ # Define functions to execute a cypher query from within a given file (first and only argument) like "execute_cypher"
57+ source " ${SCRIPTS_DIR} /executeQueryFunctions.sh"
58+
59+ # Define functions to create and delete Graph Projections like "createUndirectedDependencyProjection"
60+ source " ${SCRIPTS_DIR} /projectionFunctions.sh"
61+
62+ # Create report directory
63+ REPORT_NAME=" anomaly-detection"
64+ FULL_REPORT_DIRECTORY=" ${REPORTS_DIRECTORY} /${REPORT_NAME} "
65+ mkdir -p " ${FULL_REPORT_DIRECTORY} "
66+
67+ # Query Parameter key pairs for projection and algorithm side
68+ PROJECTION_NAME=" dependencies_projection"
69+ ALGORITHM_PROJECTION=" projection_name"
70+
71+ PROJECTION_NODE=" dependencies_projection_node"
72+ ALGORITHM_NODE=" projection_node_label"
73+
74+ PROJECTION_WEIGHT=" dependencies_projection_weight_property"
75+ ALGORITHM_WEIGHT=" projection_weight_property"
76+
77+ # Code independent algorithm parameters
78+ COMMUNITY_PROPERTY=" community_property=communityLeidenIdTuned"
79+
80+ # Query (or recalculate) features.
81+ #
82+ # Required Parameters:
83+ # - projection_name=...
84+ # Name prefix for the in-memory projection name. Example: "package-anomaly-detection"
85+ # - projection_node_label=...
86+ # Label of the nodes that will be used for the projection. Example: "Package"
87+ # - projection_weight_property=...
88+ # Name of the node property that contains the dependency weight. Example: "weight"
89+ anomaly_detection_features () {
90+ # Query Feature: Determine the Betweenness centrality (with the directed graph projection) if not already done
91+ execute_cypher_queries_until_results " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeature-Betweenness-Exists.cypher" \
92+ " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeature-Betweenness-Write.cypher" " ${@ } "
93+ # Query Feature: Determine the local clustering coefficient if not already done
94+ execute_cypher_queries_until_results " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeature-LocalClusteringCoefficient-Exists.cypher" \
95+ " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeature-LocalClusteringCoefficient-Write.cypher" " ${@ } "
96+ # Query Feature: Determine the page rank if not already done
97+ execute_cypher_queries_until_results " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeature-PageRank-Exists.cypher" \
98+ " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeature-PageRank-Write.cypher" " ${@ } "
99+ # Query Feature: Determine the article rank if not already done
100+ execute_cypher_queries_until_results " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeature-ArticleRank-Exists.cypher" \
101+ " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeature-ArticleRank-Write.cypher" " ${@ } "
102+ }
103+
104+ # Run the anomaly detection pipeline.
105+ #
106+ # Required Parameters:
107+ # - projection_name=...
108+ # Name prefix for the in-memory projection name. Example: "package-anomaly-detection"
109+ # - projection_node_label=...
110+ # Label of the nodes that will be used for the projection. Example: "Package"
111+ # - projection_weight_property=...
112+ # Name of the node property that contains the dependency weight. Example: "weight"
113+ anomaly_detection_pipeline () {
114+ time anomaly_detection_features " ${@ } "
115+ # Run Python: Get tuned Leiden communities as a reference to tune clustering
116+ time " ${ANOMALY_DETECTION_SCRIPT_DIR} /tunedLeidenCommunityDetection.py" " ${@ } " ${verboseMode}
117+ # Run Python: Tuned Fast Random Projection and tuned HDBSCAN clustering
118+ time " ${ANOMALY_DETECTION_SCRIPT_DIR} /tunedNodeEmbeddingClustering.py" " ${@ } " ${verboseMode}
119+
120+ # Query Results: Output all collected features into a CSV file.
121+ local nodeLabel
122+ nodeLabel=$( extractQueryParameter " projection_node_label" " ${@ } " )
123+ execute_cypher " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeatures.cypher" " ${@ } " > " ${FULL_REPORT_DIRECTORY} /${nodeLabel} AnomalyDetection.csv"
124+ }
125+
126+ # -- Java Artifact Node Embeddings -------------------------------
127+
128+ if createUndirectedDependencyProjection " ${PROJECTION_NAME} =artifact-anomaly-detection" " ${PROJECTION_NODE} =Artifact" " ${PROJECTION_WEIGHT} =weight" ; then
129+ createDirectedDependencyProjection " ${PROJECTION_NAME} =artifact-anomaly-detection-directed" " ${PROJECTION_NODE} =Artifact" " ${PROJECTION_WEIGHT} =weight"
130+ anomaly_detection_pipeline " ${ALGORITHM_PROJECTION} =artifact-anomaly-detection" " ${ALGORITHM_NODE} =Artifact" " ${ALGORITHM_WEIGHT} =weight" " ${COMMUNITY_PROPERTY} "
131+ fi
132+
133+ # -- Java Package Node Embeddings --------------------------------
134+
135+ if createUndirectedDependencyProjection " ${PROJECTION_NAME} =package-anomaly-detection" " ${PROJECTION_NODE} =Package" " ${PROJECTION_WEIGHT} =weight25PercentInterfaces" ; then
136+ createDirectedDependencyProjection " ${PROJECTION_NAME} =package-anomaly-detection-directed" " ${PROJECTION_NODE} =Package" " ${PROJECTION_WEIGHT} =weight25PercentInterfaces"
137+ anomaly_detection_pipeline " ${ALGORITHM_PROJECTION} =package-anomaly-detection" " ${ALGORITHM_NODE} =Package" " ${ALGORITHM_WEIGHT} =weight25PercentInterfaces" " ${COMMUNITY_PROPERTY} "
138+ fi
139+
140+ # TODO reactivate
141+ # # -- Java Type Node Embeddings -----------------------------------
142+
143+ # if createUndirectedJavaTypeDependencyProjection "${PROJECTION_NAME}=type-anomaly-detection"; then
144+ # createDirectedJavaTypeDependencyProjection "${PROJECTION_NAME}=type-anomaly-detection-directed"
145+ # anomaly_detection_pipeline "${ALGORITHM_PROJECTION}=type-anomaly-detection" "${ALGORITHM_NODE}=Type" "${ALGORITHM_WEIGHT}=weight" "${COMMUNITY_PROPERTY}"
146+ # fi
147+
148+ # # -- Typescript Module Node Embeddings ---------------------------
149+
150+ # if createUndirectedDependencyProjection "${PROJECTION_NAME}=typescript-module-embedding" "${PROJECTION_NODE}=Module" "${PROJECTION_WEIGHT}=lowCouplingElement25PercentWeight"; then
151+ # createDirectedDependencyProjection "${PROJECTION_NAME}=typescript-module-embedding-directed" "${PROJECTION_NODE}=Module" "${PROJECTION_WEIGHT}=lowCouplingElement25PercentWeight"
152+ # anomaly_detection_pipeline "${ALGORITHM_PROJECTION}=typescript-module-embedding" "${ALGORITHM_NODE}=Module" "${ALGORITHM_WEIGHT}=lowCouplingElement25PercentWeight" "${COMMUNITY_PROPERTY}"
153+ # fi
154+
155+ # ---------------------------------------------------------------
156+
157+ # Clean-up after report generation. Empty reports will be deleted.
158+ source " ${SCRIPTS_DIR} /cleanupAfterReportGeneration.sh" " ${FULL_REPORT_DIRECTORY} "
159+
160+ echo " anomalyDetectionPipeline: $( date +' %Y-%m-%dT%H:%M:%S%z' ) Successfully finished."
0 commit comments