Skip to content

Commit 9fb59c7

Browse files
committed
Add graph visualizations to anomaly detection
1 parent 8f86f7a commit 9fb59c7

File tree

10 files changed

+755
-19
lines changed

10 files changed

+755
-19
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/usr/bin/env bash
2+
3+
# This script is dynamically triggered by "VisualizationReports.sh" when report "All" or "Visualization" is enabled.
4+
# It is designed as an entry point and delegates the execution to the dedicated "anomalyDetectionGraphVisualization.sh" script that does the "heavy lifting".
5+
6+
# Note that "scripts/prepareAnalysis.sh" is required to run prior to this script.
7+
8+
# Requires anomalyDetectionGraphVisualization.sh
9+
10+
# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
11+
set -o errexit -o pipefail
12+
13+
# Overrideable Constants (defaults also defined in sub scripts)
14+
REPORTS_DIRECTORY=${REPORTS_DIRECTORY:-"reports"}
15+
16+
## Get this "scripts/reports" directory if not already set
17+
# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution.
18+
# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes.
19+
# This way non-standard tools like readlink aren't needed.
20+
ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR:-$(CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)}
21+
# echo "anomalyDetectionCsv: ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR}"
22+
23+
# Get the "summary" directory by taking the path of this script and selecting "summary".
24+
ANOMALY_DETECTION_GRAPHS_DIR=${ANOMALY_DETECTION_GRAPHS_DIR:-"${ANOMALY_DETECTION_SCRIPT_DIR}/graphs"} # Contains everything (scripts, queries, templates) to create the Markdown summary report for anomaly detection
25+
26+
# Delegate the execution to the responsible script.
27+
source "${ANOMALY_DETECTION_GRAPHS_DIR}/anomalyDetectionGraphVisualization.sh"
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
// Anomaly Detection Graphs: Find top nodes marked as "Authority" including their incoming and outgoing dependencies, sizes based on PageRank and thick outline for nodes with high Page Rank to Article Rank difference in Graphviz format.
2+
3+
// Step 1: Query overall statistics, e.g. min/max weight for later normalization
4+
MATCH (sourceForStatistics)-[dependencyForStatistics:DEPENDS_ON]->(targetForStatistics)
5+
WHERE $projection_node_label IN labels(sourceForStatistics)
6+
AND $projection_node_label IN labels(targetForStatistics)
7+
WITH max(coalesce(dependencyForStatistics.weight25PercentInterfaces, dependencyForStatistics.weight)) AS maxWeight
8+
,percentileDisc(sourceForStatistics.centralityPageRankToArticleRankDifference, 0.80) AS pageToArticleRankThreshold
9+
,percentileDisc(targetForStatistics.centralityPageRankNormalized, 0.80) AS pageRankThreshold
10+
// Step 2: Query selected central node
11+
MATCH (central)
12+
WHERE $projection_node_label IN labels(central)
13+
AND central.anomalyAuthorityRank = toInteger($projection_node_rank)
14+
WITH maxWeight
15+
,pageToArticleRankThreshold
16+
,pageRankThreshold
17+
,central
18+
,"Top Rank #" + $projection_node_rank + " " + $projection_language + " " + $projection_node_label + " Authority\\n" AS graphLabel
19+
,coalesce(central.fqn, central.globalFqn, central.fileName, central.signature, central.name) AS targetName
20+
WITH *, "\\n\\ndark nodes: incoming dependencies (limited max. 40)\\n" AS graphLegend
21+
WITH *, graphLegend + "bright nodes: outgoing dependencies (limited max. 40)\\n" AS graphLegend
22+
WITH *, graphLegend + "node value: Page Rank (normalized)\\n" AS graphLegend
23+
WITH *, graphLegend + "large circle: > 80% percentile of Page Rank\\n" AS graphLegend
24+
WITH *, graphLegend + "thick outline: > 80% percentile of Page Rank to Article Rank Difference\\n" AS graphLegend
25+
WITH *, ["graph [label=\"" + graphLabel + targetName + graphLegend + "\\n\"];"] AS graphVizOutput
26+
WITH *, "🏛️ authority #" + central.anomalyAuthorityRank + "\\n" + central.name AS centralNodeLabel
27+
WITH *, graphVizOutput + ["central [label=\"" + centralNodeLabel + "\"];"] AS graphVizOutput
28+
// Step 3: Query direct incoming dependencies to the central node
29+
MATCH (source)-[dependency:DEPENDS_ON]->(central)
30+
WHERE $projection_node_label IN labels(source)
31+
AND source.outgoingDependencies > 0
32+
ORDER BY dependency.weight DESC, source.name ASC
33+
LIMIT 40
34+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
35+
WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0) AS penWidth
36+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth AS edgeAttributes
37+
WITH *, CASE WHEN source.centralityPageRankToArticleRankDifference >= pageToArticleRankThreshold
38+
THEN 5 ELSE 2 END AS scaledNodeBorder
39+
WITH *, CASE WHEN source.centralityPageRankNormalized >= pageRankThreshold
40+
THEN "shape = \"circle\"; height=2; " ELSE "" END AS nodeEmphasis
41+
WITH *, round(source.centralityPageRankNormalized * 100.0, 2) + "%" AS labelValue
42+
// Add the last part of the element id to the node name to make it unique.
43+
WITH *, source.name + "_" + split(elementId(source), ':')[-1] AS sourceId
44+
WITH *, "penwidth = " + scaledNodeBorder + "; " AS directInBorder
45+
// Split long names like inner classes identified by a dollar sign ($)
46+
WITH *, replace(source.name, '$', '$\\n') AS sourceNameSplit
47+
WITH *, "label = \"" + sourceNameSplit + "\\n(" + labelValue + ")\"; " AS directInLabel
48+
WITH *, " [" + nodeEmphasis + directInLabel + directInBorder + "]; " AS directInNodeProperties
49+
WITH *, "\"" + sourceId + "\" " + directInNodeProperties AS directInNode
50+
WITH maxWeight
51+
,pageToArticleRankThreshold
52+
,pageRankThreshold
53+
,central
54+
,graphVizOutput
55+
,collect(source) AS incomingDependencyNodes
56+
,collect(directInNode + "\"" + sourceId + "\" -> central [" + edgeAttributes + "];") AS directInEdges
57+
WITH *, graphVizOutput + directInEdges AS graphVizOutput
58+
// Step 4: Query direct outgoing dependencies from the central node
59+
MATCH (source)<-[dependency:DEPENDS_ON]-(central)
60+
WHERE $projection_node_label IN labels(source)
61+
AND source.incomingDependencies > 0
62+
ORDER BY dependency.weight DESC, source.name ASC
63+
LIMIT 40
64+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
65+
WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0) AS penWidth
66+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth AS edgeAttributes
67+
// Use a lighter color for the target nodes of outgoing dependencies from the central node and their edges
68+
WITH *, edgeAttributes + "; color = 5" AS edgeAttributes
69+
WITH *, "color = 5; fillcolor = 1; " AS directOutColor
70+
WITH *, CASE WHEN source.centralityPageRankToArticleRankDifference >= pageToArticleRankThreshold
71+
THEN 5 ELSE 2 END AS scaledNodeBorder
72+
WITH *, CASE WHEN source.centralityPageRankNormalized >= pageRankThreshold
73+
THEN "shape = \"circle\"; height=2; " ELSE "" END AS nodeEmphasis
74+
WITH *, round(source.centralityPageRankNormalized * 100.0, 2) + "%" AS labelValue
75+
// Add the last part of the element id to the node name to make it unique.
76+
WITH *, source.name + "_" + split(elementId(source), ':')[-1] AS sourceId
77+
WITH *, "penwidth = " + scaledNodeBorder + "; " AS directOutBorder
78+
// Split long names like inner classes identified by a dollar sign ($)
79+
WITH *, replace(source.name, '$', '$\\n') AS sourceNameSplit
80+
WITH *, "label = \"" + sourceNameSplit + "\\n(" + labelValue + ")\"; " AS directOutLabel
81+
WITH *, " [" + nodeEmphasis + directOutLabel + directOutBorder + directOutColor + "]; " AS directOutNodeProperties
82+
WITH *, "\"" + sourceId + "\" " + directOutNodeProperties AS directOutNode
83+
WITH maxWeight
84+
,central
85+
,graphVizOutput
86+
,incomingDependencyNodes
87+
,collect(source) AS outgoingDependencyNodes
88+
,collect(directOutNode + "central -> \"" + sourceId + "\" [" + edgeAttributes + "];") AS directOutEdges
89+
WITH *, graphVizOutput + directOutEdges AS graphVizOutput
90+
WITH *, incomingDependencyNodes + outgoingDependencyNodes AS directDependentNodes
91+
// Step 5: Query dependencies between direct dependencies outside the central node
92+
UNWIND directDependentNodes AS directDependentNode
93+
MATCH (directDependentNode)-[dependency:DEPENDS_ON]->(anotherDirectDependentNode)
94+
WHERE anotherDirectDependentNode IN directDependentNodes
95+
AND anotherDirectDependentNode <> directDependentNode
96+
ORDER BY dependency.weight DESC, directDependentNode.name ASC
97+
WITH graphVizOutput
98+
,directDependentNode
99+
,dependency
100+
,collect(anotherDirectDependentNode)[0] AS firstLinkedDependentNode
101+
LIMIT 140
102+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
103+
// Use a fixed small pen width for secondary dependencies for better visibility of the more important direct dependency
104+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=0.3" AS edgeAttributes
105+
// Use an even lighter color for secondary dependency edges
106+
WITH *, edgeAttributes + "; color = 3" AS edgeAttributes
107+
// Add the last part of the element id to the node name to make it unique.
108+
WITH *, directDependentNode.name + "_" + split(elementId(directDependentNode), ':')[-1] AS directDependentNodeId
109+
WITH *, firstLinkedDependentNode.name + "_" + split(elementId(firstLinkedDependentNode), ':')[-1] AS firstLinkedDependentNodeId
110+
WITH *, "\"" + directDependentNodeId + "\" -> \"" + firstLinkedDependentNodeId + "\"" AS directDependenciesEdge
111+
WITH *, collect(directDependenciesEdge + " [" + edgeAttributes + "]") AS directDependenciesEdges
112+
WITH *, graphVizOutput + directDependenciesEdges AS graphVizOutput
113+
UNWIND graphVizOutput AS graphVizOutputLine
114+
RETURN DISTINCT graphVizOutputLine
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
// Anomaly Detection Graphs: Find top nodes marked as "Bottleneck" including their incoming and outgoing dependencies and output them in Graphviz format.
2+
3+
// Step 1: Query overall statistics, e.g. min/max weight for later normalization
4+
MATCH (sourceForStatistics)-[dependencyForStatistics:DEPENDS_ON]->(targetForStatistics)
5+
WHERE $projection_node_label IN labels(sourceForStatistics)
6+
AND $projection_node_label IN labels(targetForStatistics)
7+
WITH max(coalesce(dependencyForStatistics.weight25PercentInterfaces, dependencyForStatistics.weight)) AS maxWeight
8+
,percentileDisc(sourceForStatistics.centralityBetweenness, 0.90) AS betweennessThreshold
9+
// Step 2: Query selected central node
10+
MATCH (central)
11+
WHERE $projection_node_label IN labels(central)
12+
AND central.anomalyBottleneckRank = toInteger($projection_node_rank)
13+
WITH maxWeight
14+
,betweennessThreshold
15+
,central
16+
,"Top Rank #" + $projection_node_rank + " " + $projection_language + " " + $projection_node_label + " Bottleneck\\n" AS graphLabel
17+
,coalesce(central.fqn, central.globalFqn, central.fileName, central.signature, central.name) AS targetName
18+
WITH *, "\\n\\ndark nodes: incoming dependencies (limited max. 40)\\n" AS graphLegend
19+
WITH *, graphLegend + "bright nodes: outgoing dependencies (limited max. 40)\\n" AS graphLegend
20+
WITH *, graphLegend + "node value: Betweenness centrality\\n" AS graphLegend
21+
WITH *, graphLegend + "thick outline: > 90% percentile of Betweenness centrality\\n" AS graphLegend
22+
WITH *, ["graph [label=\"" + graphLabel + targetName + graphLegend + "\\n\"];"] AS graphVizOutput
23+
WITH *, "🔒 bottleneck #" + central.anomalyBottleneckRank + "\\n" + central.name AS centralNodeLabel
24+
WITH *, graphVizOutput + ["central [label=\"" + centralNodeLabel + "\"];"] AS graphVizOutput
25+
// Step 3: Query direct incoming dependencies to the central node
26+
MATCH (source)-[dependency:DEPENDS_ON]->(central)
27+
WHERE $projection_node_label IN labels(source)
28+
AND source.outgoingDependencies > 0
29+
ORDER BY dependency.weight DESC, source.name ASC
30+
LIMIT 40
31+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
32+
WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0) AS penWidth
33+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth AS edgeAttributes
34+
WITH *, CASE WHEN source.centralityBetweenness >= betweennessThreshold
35+
THEN 5 ELSE 2 END AS scaledNodeBorder
36+
WITH *, round(source.centralityBetweenness, 2) AS labelValue
37+
// Add the last part of the element id to the node name to make it unique.
38+
WITH *, source.name + "_" + split(elementId(source), ':')[-1] AS sourceId
39+
WITH *, "penwidth = " + scaledNodeBorder + "; " AS directInBorder
40+
// Split long names like inner classes identified by a dollar sign ($)
41+
WITH *, replace(source.name, '$', '$\\n') AS sourceNameSplit
42+
WITH *, "label = \"" + sourceNameSplit + "\\n(" + labelValue + ")\"; " AS directInLabel
43+
WITH *, " [" + directInLabel + directInBorder + "]; " AS directInNodeProperties
44+
WITH *, "\"" + sourceId + "\" " + directInNodeProperties AS directInNode
45+
WITH maxWeight
46+
,betweennessThreshold
47+
,central
48+
,graphVizOutput
49+
,collect(source) AS incomingDependencyNodes
50+
,collect(directInNode + "\"" + sourceId + "\" -> central [" + edgeAttributes + "];") AS directInEdges
51+
WITH *, graphVizOutput + directInEdges AS graphVizOutput
52+
// Step 4: Query direct outgoing dependencies from the central node
53+
MATCH (source)<-[dependency:DEPENDS_ON]-(central)
54+
WHERE $projection_node_label IN labels(source)
55+
AND source.incomingDependencies > 0
56+
ORDER BY dependency.weight DESC, source.name ASC
57+
LIMIT 40
58+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
59+
WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0) AS penWidth
60+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth AS edgeAttributes
61+
// Use a lighter color for the target nodes of outgoing dependencies from the central node and their edges
62+
WITH *, edgeAttributes + "; color = 5" AS edgeAttributes
63+
WITH *, "color = 5; fillcolor = 1; " AS directOutColor
64+
WITH *, CASE WHEN source.centralityBetweenness >= betweennessThreshold
65+
THEN 5 ELSE 2 END AS scaledNodeBorder
66+
WITH *, round(source.centralityBetweenness, 2) AS labelValue
67+
// Add the last part of the element id to the node name to make it unique.
68+
WITH *, source.name + "_" + split(elementId(source), ':')[-1] AS sourceId
69+
WITH *, "penwidth = " + scaledNodeBorder + "; " AS directOutBorder
70+
// Split long names like inner classes identified by a dollar sign ($)
71+
WITH *, replace(source.name, '$', '$\\n') AS sourceNameSplit
72+
WITH *, "label = \"" + sourceNameSplit + "\\n(" + labelValue + ")\"; " AS directOutLabel
73+
WITH *, " [" + directOutLabel + directOutBorder + directOutColor + "]; " AS directOutNodeProperties
74+
WITH *, "\"" + sourceId + "\" " + directOutNodeProperties AS directOutNode
75+
WITH maxWeight
76+
,betweennessThreshold
77+
,central
78+
,graphVizOutput
79+
,incomingDependencyNodes
80+
,collect(source) AS outgoingDependencyNodes
81+
,collect(directOutNode + "central -> \"" + sourceId + "\" [" + edgeAttributes + "];") AS directOutEdges
82+
WITH *, graphVizOutput + directOutEdges AS graphVizOutput
83+
WITH *, incomingDependencyNodes + outgoingDependencyNodes AS directDependentNodes
84+
// Step 5: Query dependencies between direct dependencies outside the central node
85+
UNWIND directDependentNodes AS directDependentNode
86+
MATCH (directDependentNode)-[dependency:DEPENDS_ON]->(anotherDirectDependentNode)
87+
WHERE anotherDirectDependentNode IN directDependentNodes
88+
AND anotherDirectDependentNode <> directDependentNode
89+
ORDER BY dependency.weight DESC, directDependentNode.name ASC
90+
WITH graphVizOutput
91+
,directDependentNode
92+
,dependency
93+
,collect(anotherDirectDependentNode)[0] AS firstLinkedDependentNode
94+
LIMIT 140
95+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
96+
// Use a fixed small pen width for secondary dependencies for better visibility of the more important direct dependency
97+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=0.3" AS edgeAttributes
98+
// Use an even lighter color for secondary dependency edges
99+
WITH *, edgeAttributes + "; color = 3" AS edgeAttributes
100+
// Add the last part of the element id to the node name to make it unique.
101+
WITH *, directDependentNode.name + "_" + split(elementId(directDependentNode), ':')[-1] AS directDependentNodeId
102+
WITH *, firstLinkedDependentNode.name + "_" + split(elementId(firstLinkedDependentNode), ':')[-1] AS firstLinkedDependentNodeId
103+
WITH *, "\"" + directDependentNodeId + "\" -> \"" + firstLinkedDependentNodeId + "\"" AS directDependenciesEdge
104+
WITH *, collect(directDependenciesEdge + " [" + edgeAttributes + "]") AS directDependenciesEdges
105+
WITH *, graphVizOutput + directDependenciesEdges AS graphVizOutput
106+
UNWIND graphVizOutput AS graphVizOutputLine
107+
RETURN DISTINCT graphVizOutputLine

0 commit comments

Comments
 (0)