1+ #! /usr/bin/env bash
2+
3+ # Pipeline that coordinates anomaly detection using the Graph Data Science Library of Neo4j.
4+ # It requires an already running Neo4j graph database with already scanned and analyzed artifacts.
5+ # The results will be written into the sub directory reports/anomaly-detection.
6+
7+ # Note that "scripts/prepareAnalysis.sh" is required to run prior to this script.
8+
9+ # Requires executeQueryFunctions.sh, projectionFunctions.sh, cleanupAfterReportGeneration.sh
10+
11+ # Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
12+ set -o errexit -o pipefail
13+
14+ # Overrideable Constants (defaults also defined in sub scripts)
15+ REPORTS_DIRECTORY=${REPORTS_DIRECTORY:- " reports" }
16+
17+ # # Get this "scripts/reports" directory if not already set
18+ # Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution.
19+ # CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes.
20+ # This way non-standard tools like readlink aren't needed.
21+ ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR:- $(CDPATH=. cd -- " $( dirname -- " ${BASH_SOURCE[0]} " ) " && pwd -P)}
22+ echo " anomalyDetectionCsv: ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR} "
23+ # Get the "scripts" directory by taking the path of this script and going one directory up.
24+ SCRIPTS_DIR=${SCRIPTS_DIR:- " ${ANOMALY_DETECTION_SCRIPT_DIR} /../../scripts" } # Repository directory containing the shell scripts
25+ # Get the "cypher" query directory for gathering features.
26+ ANOMALY_DETECTION_FEATURE_CYPHER_DIR=${ANOMALY_DETECTION_FEATURE_CYPHER_DIR:- " ${ANOMALY_DETECTION_SCRIPT_DIR} /features" }
27+ ANOMALY_DETECTION_QUERY_CYPHER_DIR=${ANOMALY_DETECTION_QUERY_CYPHER_DIR:- " ${ANOMALY_DETECTION_SCRIPT_DIR} /queries" }
28+
29+ # Define functions to execute a cypher query from within a given file (first and only argument) like "execute_cypher"
30+ source " ${SCRIPTS_DIR} /executeQueryFunctions.sh"
31+
32+ # Define functions to create and delete Graph Projections like "createUndirectedDependencyProjection"
33+ source " ${SCRIPTS_DIR} /projectionFunctions.sh"
34+
35+ # Query or recalculate features.
36+ #
37+ # Required Parameters:
38+ # - projection_name=...
39+ # Name prefix for the in-memory projection name. Example: "package-anomaly-detection"
40+ # - projection_node_label=...
41+ # Label of the nodes that will be used for the projection. Example: "Package"
42+ # - projection_weight_property=...
43+ # Name of the node property that contains the dependency weight. Example: "weight"
44+ anomaly_detection_features () {
45+ local nodeLabel
46+ nodeLabel=$( extractQueryParameter " projection_node_label" " ${@ } " )
47+
48+ echo " anomalyDetectionCsv: $( date +' %Y-%m-%dT%H:%M:%S%z' ) Collecting features for ${nodeLabel} nodes..."
49+
50+ # Determine the Betweenness centrality (with the directed graph projection) if not already done
51+ execute_cypher_queries_until_results " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeature-Betweenness-Exists.cypher" \
52+ " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeature-Betweenness-Write.cypher" " ${@ } "
53+ # Determine the local clustering coefficient if not already done
54+ execute_cypher_queries_until_results " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeature-LocalClusteringCoefficient-Exists.cypher" \
55+ " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeature-LocalClusteringCoefficient-Write.cypher" " ${@ } "
56+ # Determine the page rank if not already done
57+ execute_cypher_queries_until_results " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeature-PageRank-Exists.cypher" \
58+ " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeature-PageRank-Write.cypher" " ${@ } "
59+ # Determine the article rank if not already done
60+ execute_cypher_queries_until_results " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeature-ArticleRank-Exists.cypher" \
61+ " ${ANOMALY_DETECTION_FEATURE_CYPHER_DIR} /AnomalyDetectionFeature-PageRank-Write.cypher" " ${@ } "
62+ }
63+ # Run queries to find anomalies in the graph.
64+ #
65+ # Required Parameters:
66+ # - projection_node_label=...
67+ # Label of the nodes that will be used for the projection. Example: "Package"
68+ anomaly_detection_queries () {
69+ local nodeLabel
70+ nodeLabel=$( extractQueryParameter " projection_node_label" " ${@ } " )
71+
72+ echo " anomalyDetectionCsv: $( date +' %Y-%m-%dT%H:%M:%S%z' ) Executing Queries for ${nodeLabel} nodes..."
73+ execute_cypher " ${ANOMALY_DETECTION_QUERY_CYPHER_DIR} /AnomalyDetectionPotentialImbalancedRoles.cypher" " ${@ } " > " ${FULL_REPORT_DIRECTORY} /${nodeLabel} AnomalyDetection_PotentialImbalancedRoles.csv"
74+ execute_cypher " ${ANOMALY_DETECTION_QUERY_CYPHER_DIR} /AnomalyDetectionPotentialOverEngineerOrIsolated.cypher" " ${@ } " > " ${FULL_REPORT_DIRECTORY} /${nodeLabel} AnomalyDetection_PotentialOverEngineerOrIsolated.csv"
75+
76+ execute_cypher " ${ANOMALY_DETECTION_QUERY_CYPHER_DIR} /AnomalyDetectionHiddenBridgeNodes.cypher" " ${@ } " > " ${FULL_REPORT_DIRECTORY} /${nodeLabel} AnomalyDetection_HiddenBridgeNodes.csv"
77+ execute_cypher " ${ANOMALY_DETECTION_QUERY_CYPHER_DIR} /AnomalyDetectionPopularBottlenecks.cypher" " ${@ } " > " ${FULL_REPORT_DIRECTORY} /${nodeLabel} AnomalyDetection_PopularBottlenecks.csv"
78+ execute_cypher " ${ANOMALY_DETECTION_QUERY_CYPHER_DIR} /AnomalyDetectionSilentCoordinators.cypher" " ${@ } " > " ${FULL_REPORT_DIRECTORY} /${nodeLabel} AnomalyDetection_SilentCoordinators.csv"
79+ execute_cypher " ${ANOMALY_DETECTION_QUERY_CYPHER_DIR} /AnomalyDetectionOverReferencesUtilities.cypher" " ${@ } " > " ${FULL_REPORT_DIRECTORY} /${nodeLabel} AnomalyDetection_OverReferencesUtilities.csv"
80+ execute_cypher " ${ANOMALY_DETECTION_QUERY_CYPHER_DIR} /AnomalyDetectionFragileStructuralBridges.cypher" " ${@ } " > " ${FULL_REPORT_DIRECTORY} /${nodeLabel} AnomalyDetection_FragileStructuralBridges.csv"
81+ execute_cypher " ${ANOMALY_DETECTION_QUERY_CYPHER_DIR} /AnomalyDetectionDependencyHungryOrchestrators.cypher" " ${@ } " > " ${FULL_REPORT_DIRECTORY} /${nodeLabel} AnomalyDetection_DependencyHungryOrchestrators.csv"
82+ execute_cypher " ${ANOMALY_DETECTION_QUERY_CYPHER_DIR} /AnomalyDetectionUnexpectedCentralNodes.cypher" " ${@ } " > " ${FULL_REPORT_DIRECTORY} /${nodeLabel} AnomalyDetection_UnexpectedCentralNodes.csv"
83+ }
84+
85+ # Run the anomaly detection pipeline.
86+ #
87+ # Required Parameters:
88+ # - projection_name=...
89+ # Name prefix for the in-memory projection name. Example: "package-anomaly-detection"
90+ # - projection_node_label=...
91+ # Label of the nodes that will be used for the projection. Example: "Package"
92+ # - projection_weight_property=...
93+ # Name of the node property that contains the dependency weight. Example: "weight"
94+ anomaly_detection_csv_reports () {
95+ time anomaly_detection_features " ${@ } "
96+ time anomaly_detection_queries " ${@ } "
97+ }
98+
99+ # Create report directory
100+ REPORT_NAME=" anomaly-detection"
101+ FULL_REPORT_DIRECTORY=" ${REPORTS_DIRECTORY} /${REPORT_NAME} "
102+ mkdir -p " ${FULL_REPORT_DIRECTORY} "
103+
104+ # Query Parameter key pairs for projection and algorithm side
105+ PROJECTION_NAME=" dependencies_projection"
106+ ALGORITHM_PROJECTION=" projection_name"
107+
108+ PROJECTION_NODE=" dependencies_projection_node"
109+ ALGORITHM_NODE=" projection_node_label"
110+
111+ PROJECTION_WEIGHT=" dependencies_projection_weight_property"
112+ ALGORITHM_WEIGHT=" projection_weight_property"
113+
114+ # Code independent algorithm parameters
115+ COMMUNITY_PROPERTY=" community_property=communityLeidenIdTuned"
116+ EMBEDDING_PROPERTY=" embedding_property=embeddingsFastRandomProjectionTunedForClustering"
117+
118+ # -- Java Artifact Node Embeddings -------------------------------
119+
120+ if createUndirectedDependencyProjection " ${PROJECTION_NAME} =artifact-anomaly-detection" " ${PROJECTION_NODE} =Artifact" " ${PROJECTION_WEIGHT} =weight" ; then
121+ createDirectedDependencyProjection " ${PROJECTION_NAME} =artifact-anomaly-detection-directed" " ${PROJECTION_NODE} =Artifact" " ${PROJECTION_WEIGHT} =weight"
122+ anomaly_detection_csv_reports " ${ALGORITHM_PROJECTION} =artifact-anomaly-detection" " ${ALGORITHM_NODE} =Artifact" " ${ALGORITHM_WEIGHT} =weight" " ${COMMUNITY_PROPERTY} " " ${EMBEDDING_PROPERTY} "
123+ fi
124+
125+ # -- Java Package Node Embeddings --------------------------------
126+
127+ if createUndirectedDependencyProjection " ${PROJECTION_NAME} =package-anomaly-detection" " ${PROJECTION_NODE} =Package" " ${PROJECTION_WEIGHT} =weight25PercentInterfaces" ; then
128+ createDirectedDependencyProjection " ${PROJECTION_NAME} =package-anomaly-detection-directed" " ${PROJECTION_NODE} =Package" " ${PROJECTION_WEIGHT} =weight25PercentInterfaces"
129+ anomaly_detection_csv_reports " ${ALGORITHM_PROJECTION} =package-anomaly-detection" " ${ALGORITHM_NODE} =Package" " ${ALGORITHM_WEIGHT} =weight25PercentInterfaces" " ${COMMUNITY_PROPERTY} " " ${EMBEDDING_PROPERTY} "
130+ fi
131+
132+ # -- Java Type Node Embeddings -----------------------------------
133+
134+ if createUndirectedJavaTypeDependencyProjection " ${PROJECTION_NAME} =type-anomaly-detection" ; then
135+ createDirectedJavaTypeDependencyProjection " ${PROJECTION_NAME} =type-anomaly-detection-directed"
136+ anomaly_detection_csv_reports " ${ALGORITHM_PROJECTION} =type-anomaly-detection" " ${ALGORITHM_NODE} =Type" " ${ALGORITHM_WEIGHT} =weight" " ${COMMUNITY_PROPERTY} " " ${EMBEDDING_PROPERTY} "
137+ fi
138+
139+ # -- Typescript Module Node Embeddings ---------------------------
140+
141+ if createUndirectedDependencyProjection " ${PROJECTION_NAME} =typescript-module-embedding" " ${PROJECTION_NODE} =Module" " ${PROJECTION_WEIGHT} =lowCouplingElement25PercentWeight" ; then
142+ createDirectedDependencyProjection " ${PROJECTION_NAME} =typescript-module-embedding-directed" " ${PROJECTION_NODE} =Module" " ${PROJECTION_WEIGHT} =lowCouplingElement25PercentWeight"
143+ anomaly_detection_csv_reports " ${ALGORITHM_PROJECTION} =typescript-module-embedding" " ${ALGORITHM_NODE} =Module" " ${ALGORITHM_WEIGHT} =lowCouplingElement25PercentWeight" " ${COMMUNITY_PROPERTY} " " ${EMBEDDING_PROPERTY} "
144+ fi
145+
146+ # ---------------------------------------------------------------
147+
148+ # Clean-up after report generation. Empty reports will be deleted.
149+ source " ${SCRIPTS_DIR} /cleanupAfterReportGeneration.sh" " ${FULL_REPORT_DIRECTORY} "
150+
151+ echo " anomalyDetectionCsv: $( date +' %Y-%m-%dT%H:%M:%S%z' ) Successfully finished."
0 commit comments