Skip to content

Commit 2224766

Browse files
committed
Add graph visualizations to anomaly detection
1 parent 8f86f7a commit 2224766

File tree

10 files changed

+745
-19
lines changed

10 files changed

+745
-19
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/usr/bin/env bash
2+
3+
# This script is dynamically triggered by "VisualizationReports.sh" when report "All" or "Visualization" is enabled.
4+
# It is designed as an entry point and delegates the execution to the dedicated "anomalyDetectionGraphVisualization.sh" script that does the "heavy lifting".
5+
6+
# Note that "scripts/prepareAnalysis.sh" is required to run prior to this script.
7+
8+
# Requires anomalyDetectionGraphVisualization.sh
9+
10+
# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
11+
set -o errexit -o pipefail
12+
13+
# Overrideable Constants (defaults also defined in sub scripts)
14+
REPORTS_DIRECTORY=${REPORTS_DIRECTORY:-"reports"}
15+
16+
## Get this "scripts/reports" directory if not already set
17+
# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution.
18+
# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes.
19+
# This way non-standard tools like readlink aren't needed.
20+
ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR:-$(CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)}
21+
# echo "anomalyDetectionCsv: ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR}"
22+
23+
# Get the "summary" directory by taking the path of this script and selecting "summary".
24+
ANOMALY_DETECTION_GRAPHS_DIR=${ANOMALY_DETECTION_GRAPHS_DIR:-"${ANOMALY_DETECTION_SCRIPT_DIR}/graphs"} # Contains everything (scripts, queries, templates) to create the Markdown summary report for anomaly detection
25+
26+
# Delegate the execution to the responsible script.
27+
source "${ANOMALY_DETECTION_GRAPHS_DIR}/anomalyDetectionGraphVisualization.sh"
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
// Anomaly Detection Graphs: Find top nodes marked as "Authority" including their incoming and outgoing dependencies, sizes based on PageRank and thick outline for nodes with high Page Rank to Article Rank difference in Graphviz format.
2+
3+
// Step 1: Query overall statistics, e.g. min/max weight for later normalization
4+
MATCH (sourceForStatistics)-[dependencyForStatistics:DEPENDS_ON]->(targetForStatistics)
5+
WHERE $projection_node_label IN labels(sourceForStatistics)
6+
AND $projection_node_label IN labels(targetForStatistics)
7+
WITH max(coalesce(dependencyForStatistics.weight25PercentInterfaces, dependencyForStatistics.weight)) AS maxWeight
8+
,percentileDisc(sourceForStatistics.centralityPageRankToArticleRankDifference, 0.80) AS pageToArticleRankThreshold
9+
,percentileDisc(targetForStatistics.centralityPageRankNormalized, 0.80) AS pageRankThreshold
10+
// Step 2: Query selected central node
11+
MATCH (central)
12+
WHERE $projection_node_label IN labels(central)
13+
AND central.anomalyAuthorityRank = toInteger($projection_node_rank)
14+
WITH maxWeight
15+
,pageToArticleRankThreshold
16+
,pageRankThreshold
17+
,central
18+
,"Top Rank #" + $projection_node_rank + " " + $projection_language + " " + $projection_node_label + " Authority\\n" AS graphLabel
19+
,coalesce(central.fqn, central.globalFqn, central.fileName, central.signature, central.name) AS targetName
20+
,[] AS graphVizOutput
21+
WITH *, graphVizOutput + ["graph [label=\"" + graphLabel + targetName + "\\n\\n\"];"] AS graphVizOutput
22+
WITH *, "🏛️ authority #" + central.anomalyAuthorityRank + "\\n" + central.name AS centralNodeLabel
23+
WITH *, graphVizOutput + ["central [label=\"" + centralNodeLabel + "\"];"] AS graphVizOutput
24+
// Step 3: Query direct incoming dependencies to the central node
25+
MATCH (source)-[dependency:DEPENDS_ON]->(central)
26+
WHERE $projection_node_label IN labels(source)
27+
AND source.outgoingDependencies > 0
28+
ORDER BY dependency.weight DESC, source.name ASC
29+
LIMIT 40
30+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
31+
WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0) AS penWidth
32+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth AS edgeAttributes
33+
WITH *, CASE WHEN source.centralityPageRankToArticleRankDifference >= pageToArticleRankThreshold
34+
THEN 5 ELSE 2 END AS scaledNodeBorder
35+
WITH *, CASE WHEN source.centralityPageRankNormalized >= pageRankThreshold
36+
THEN "shape = \"circle\"; height=2; " ELSE "" END AS nodeEmphasis
37+
WITH *, round(source.centralityPageRankNormalized * 100.0, 2) + "%" AS labelValue
38+
// Add the last part of the element id to the node name to make it unique.
39+
WITH *, source.name + "_" + split(elementId(source), ':')[-1] AS sourceId
40+
WITH *, "penwidth = " + scaledNodeBorder + "; " AS directInBorder
41+
// Split long names like inner classes identified by a dollar sign ($)
42+
WITH *, replace(source.name, '$', '$\\n') AS sourceNameSplit
43+
WITH *, "label = \"" + sourceNameSplit + "\\n(" + labelValue + ")\"; " AS directInLabel
44+
WITH *, " [" + nodeEmphasis + directInLabel + directInBorder + "]; " AS directInNodeProperties
45+
WITH *, "\"" + sourceId + "\" " + directInNodeProperties AS directInNode
46+
WITH maxWeight
47+
,pageToArticleRankThreshold
48+
,pageRankThreshold
49+
,central
50+
,graphVizOutput
51+
,collect(source) AS incomingDependencyNodes
52+
,collect(directInNode + "\"" + sourceId + "\" -> central [" + edgeAttributes + "];") AS directInEdges
53+
WITH *, graphVizOutput + directInEdges AS graphVizOutput
54+
// Step 4: Query direct outgoing dependencies from the central node
55+
MATCH (source)<-[dependency:DEPENDS_ON]-(central)
56+
WHERE $projection_node_label IN labels(source)
57+
AND source.incomingDependencies > 0
58+
ORDER BY dependency.weight DESC, source.name ASC
59+
LIMIT 40
60+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
61+
WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0) AS penWidth
62+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth AS edgeAttributes
63+
// Use a lighter color for the target nodes of outgoing dependencies from the central node and their edges
64+
WITH *, edgeAttributes + "; color = 5" AS edgeAttributes
65+
WITH *, "color = 5; fillcolor = 1; " AS directOutColor
66+
WITH *, CASE WHEN source.centralityPageRankToArticleRankDifference >= pageToArticleRankThreshold
67+
THEN 5 ELSE 2 END AS scaledNodeBorder
68+
WITH *, CASE WHEN source.centralityPageRankNormalized >= pageRankThreshold
69+
THEN "shape = \"circle\"; height=2; " ELSE "" END AS nodeEmphasis
70+
WITH *, round(source.centralityPageRankNormalized * 100.0, 2) + "%" AS labelValue
71+
// Add the last part of the element id to the node name to make it unique.
72+
WITH *, source.name + "_" + split(elementId(source), ':')[-1] AS sourceId
73+
WITH *, "penwidth = " + scaledNodeBorder + "; " AS directOutBorder
74+
// Split long names like inner classes identified by a dollar sign ($)
75+
WITH *, replace(source.name, '$', '$\\n') AS sourceNameSplit
76+
WITH *, "label = \"" + sourceNameSplit + "\\n(" + labelValue + ")\"; " AS directOutLabel
77+
WITH *, " [" + nodeEmphasis + directOutLabel + directOutBorder + directOutColor + "]; " AS directOutNodeProperties
78+
WITH *, "\"" + sourceId + "\" " + directOutNodeProperties AS directOutNode
79+
WITH maxWeight
80+
,central
81+
,graphVizOutput
82+
,incomingDependencyNodes
83+
,collect(source) AS outgoingDependencyNodes
84+
,collect(directOutNode + "central -> \"" + sourceId + "\" [" + edgeAttributes + "];") AS directOutEdges
85+
WITH *, graphVizOutput + directOutEdges AS graphVizOutput
86+
WITH *, incomingDependencyNodes + outgoingDependencyNodes AS directDependentNodes
87+
// Step 5: Query dependencies between direct dependencies outside the central node
88+
UNWIND directDependentNodes AS directDependentNode
89+
MATCH (directDependentNode)-[dependency:DEPENDS_ON]->(anotherDirectDependentNode)
90+
WHERE anotherDirectDependentNode IN directDependentNodes
91+
AND anotherDirectDependentNode <> directDependentNode
92+
ORDER BY dependency.weight DESC, directDependentNode.name ASC
93+
WITH graphVizOutput
94+
,directDependentNode
95+
,dependency
96+
,collect(anotherDirectDependentNode)[0] AS firstLinkedDependentNode
97+
LIMIT 140
98+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
99+
// Use a fixed small pen width for secondary dependencies for better visibility of the more important direct dependency
100+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=0.3" AS edgeAttributes
101+
// Use an even lighter color for secondary dependency edges
102+
WITH *, edgeAttributes + "; color = 3" AS edgeAttributes
103+
// Add the last part of the element id to the node name to make it unique.
104+
WITH *, directDependentNode.name + "_" + split(elementId(directDependentNode), ':')[-1] AS directDependentNodeId
105+
WITH *, firstLinkedDependentNode.name + "_" + split(elementId(firstLinkedDependentNode), ':')[-1] AS firstLinkedDependentNodeId
106+
WITH *, "\"" + directDependentNodeId + "\" -> \"" + firstLinkedDependentNodeId + "\"" AS directDependenciesEdge
107+
WITH *, collect(directDependenciesEdge + " [" + edgeAttributes + "]") AS directDependenciesEdges
108+
WITH *, graphVizOutput + directDependenciesEdges AS graphVizOutput
109+
UNWIND graphVizOutput AS graphVizOutputLine
110+
RETURN DISTINCT graphVizOutputLine
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
// Anomaly Detection Graphs: Find top nodes marked as "Bottleneck" including their incoming and outgoing dependencies and output them in Graphviz format.
2+
3+
// Step 1: Query overall statistics, e.g. min/max weight for later normalization
4+
MATCH (sourceForStatistics)-[dependencyForStatistics:DEPENDS_ON]->(targetForStatistics)
5+
WHERE $projection_node_label IN labels(sourceForStatistics)
6+
AND $projection_node_label IN labels(targetForStatistics)
7+
WITH max(coalesce(dependencyForStatistics.weight25PercentInterfaces, dependencyForStatistics.weight)) AS maxWeight
8+
,percentileDisc(sourceForStatistics.centralityBetweenness, 0.90) AS betweennessThreshold
9+
// Step 2: Query selected central node
10+
MATCH (central)
11+
WHERE $projection_node_label IN labels(central)
12+
AND central.anomalyBottleneckRank = toInteger($projection_node_rank)
13+
WITH maxWeight
14+
,betweennessThreshold
15+
,central
16+
,"Top Rank #" + $projection_node_rank + " " + $projection_language + " " + $projection_node_label + " Bottleneck\\n" AS graphLabel
17+
,coalesce(central.fqn, central.globalFqn, central.fileName, central.signature, central.name) AS targetName
18+
,[] AS graphVizOutput
19+
WITH *, graphVizOutput + ["graph [label=\"" + graphLabel + targetName + "\\n\\n\"];"] AS graphVizOutput
20+
WITH *, "🔒 bottleneck #" + central.anomalyBottleneckRank + "\\n" + central.name AS centralNodeLabel
21+
WITH *, graphVizOutput + ["central [label=\"" + centralNodeLabel + "\"];"] AS graphVizOutput
22+
// Step 3: Query direct incoming dependencies to the central node
23+
MATCH (source)-[dependency:DEPENDS_ON]->(central)
24+
WHERE $projection_node_label IN labels(source)
25+
AND source.outgoingDependencies > 0
26+
ORDER BY dependency.weight DESC, source.name ASC
27+
LIMIT 40
28+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
29+
WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0) AS penWidth
30+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth AS edgeAttributes
31+
WITH *, CASE WHEN source.centralityBetweenness >= betweennessThreshold
32+
THEN 5 ELSE 2 END AS scaledNodeBorder
33+
WITH *, round(source.centralityBetweenness, 2) AS labelValue
34+
// Add the last part of the element id to the node name to make it unique.
35+
WITH *, source.name + "_" + split(elementId(source), ':')[-1] AS sourceId
36+
WITH *, "penwidth = " + scaledNodeBorder + "; " AS directInBorder
37+
// Split long names like inner classes identified by a dollar sign ($)
38+
WITH *, replace(source.name, '$', '$\\n') AS sourceNameSplit
39+
WITH *, "label = \"" + sourceNameSplit + "\\n(" + labelValue + ")\"; " AS directInLabel
40+
WITH *, " [" + directInLabel + directInBorder + "]; " AS directInNodeProperties
41+
WITH *, "\"" + sourceId + "\" " + directInNodeProperties AS directInNode
42+
WITH maxWeight
43+
,betweennessThreshold
44+
,central
45+
,graphVizOutput
46+
,collect(source) AS incomingDependencyNodes
47+
,collect(directInNode + "\"" + sourceId + "\" -> central [" + edgeAttributes + "];") AS directInEdges
48+
WITH *, graphVizOutput + directInEdges AS graphVizOutput
49+
// Step 4: Query direct outgoing dependencies from the central node
50+
MATCH (source)<-[dependency:DEPENDS_ON]-(central)
51+
WHERE $projection_node_label IN labels(source)
52+
AND source.incomingDependencies > 0
53+
ORDER BY dependency.weight DESC, source.name ASC
54+
LIMIT 40
55+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
56+
WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0) AS penWidth
57+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth AS edgeAttributes
58+
// Use a lighter color for the target nodes of outgoing dependencies from the central node and their edges
59+
WITH *, edgeAttributes + "; color = 5" AS edgeAttributes
60+
WITH *, "color = 5; fillcolor = 1; " AS directOutColor
61+
WITH *, CASE WHEN source.centralityBetweenness >= betweennessThreshold
62+
THEN 5 ELSE 2 END AS scaledNodeBorder
63+
WITH *, round(source.centralityBetweenness, 2) AS labelValue
64+
// Add the last part of the element id to the node name to make it unique.
65+
WITH *, source.name + "_" + split(elementId(source), ':')[-1] AS sourceId
66+
WITH *, "penwidth = " + scaledNodeBorder + "; " AS directOutBorder
67+
// Split long names like inner classes identified by a dollar sign ($)
68+
WITH *, replace(source.name, '$', '$\\n') AS sourceNameSplit
69+
WITH *, "label = \"" + sourceNameSplit + "\\n(" + labelValue + ")\"; " AS directOutLabel
70+
WITH *, " [" + directOutLabel + directOutBorder + directOutColor + "]; " AS directOutNodeProperties
71+
WITH *, "\"" + sourceId + "\" " + directOutNodeProperties AS directOutNode
72+
WITH maxWeight
73+
,betweennessThreshold
74+
,central
75+
,graphVizOutput
76+
,incomingDependencyNodes
77+
,collect(source) AS outgoingDependencyNodes
78+
,collect(directOutNode + "central -> \"" + sourceId + "\" [" + edgeAttributes + "];") AS directOutEdges
79+
WITH *, graphVizOutput + directOutEdges AS graphVizOutput
80+
WITH *, incomingDependencyNodes + outgoingDependencyNodes AS directDependentNodes
81+
// Step 5: Query dependencies between direct dependencies outside the central node
82+
UNWIND directDependentNodes AS directDependentNode
83+
MATCH (directDependentNode)-[dependency:DEPENDS_ON]->(anotherDirectDependentNode)
84+
WHERE anotherDirectDependentNode IN directDependentNodes
85+
AND anotherDirectDependentNode <> directDependentNode
86+
ORDER BY dependency.weight DESC, directDependentNode.name ASC
87+
WITH graphVizOutput
88+
,directDependentNode
89+
,dependency
90+
,collect(anotherDirectDependentNode)[0] AS firstLinkedDependentNode
91+
LIMIT 140
92+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
93+
// Use a fixed small pen width for secondary dependencies for better visibility of the more important direct dependency
94+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=0.3" AS edgeAttributes
95+
// Use an even lighter color for secondary dependency edges
96+
WITH *, edgeAttributes + "; color = 3" AS edgeAttributes
97+
// Add the last part of the element id to the node name to make it unique.
98+
WITH *, directDependentNode.name + "_" + split(elementId(directDependentNode), ':')[-1] AS directDependentNodeId
99+
WITH *, firstLinkedDependentNode.name + "_" + split(elementId(firstLinkedDependentNode), ':')[-1] AS firstLinkedDependentNodeId
100+
WITH *, "\"" + directDependentNodeId + "\" -> \"" + firstLinkedDependentNodeId + "\"" AS directDependenciesEdge
101+
WITH *, collect(directDependenciesEdge + " [" + edgeAttributes + "]") AS directDependenciesEdges
102+
WITH *, graphVizOutput + directDependenciesEdges AS graphVizOutput
103+
UNWIND graphVizOutput AS graphVizOutputLine
104+
RETURN DISTINCT graphVizOutputLine

0 commit comments

Comments
 (0)