Skip to content

Commit a6fbec1

Browse files
committed
Add graph visualizations to anomaly detection
1 parent 525f615 commit a6fbec1

File tree

9 files changed

+511
-19
lines changed

9 files changed

+511
-19
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/usr/bin/env bash
2+
3+
# This script is dynamically triggered by "VisualizationReports.sh" when report "All" or "Visualization" is enabled.
4+
# It is designed as an entry point and delegates the execution to the dedicated "anomalyDetectionGraphVisualization.sh" script that does the "heavy lifting".
5+
6+
# Note that "scripts/prepareAnalysis.sh" is required to run prior to this script.
7+
8+
# Requires anomalyDetectionGraphVisualization.sh
9+
10+
# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
11+
set -o errexit -o pipefail
12+
13+
# Overrideable Constants (defaults also defined in sub scripts)
14+
REPORTS_DIRECTORY=${REPORTS_DIRECTORY:-"reports"}
15+
16+
## Get this "scripts/reports" directory if not already set
17+
# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution.
18+
# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes.
19+
# This way non-standard tools like readlink aren't needed.
20+
ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR:-$(CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)}
21+
# echo "anomalyDetectionCsv: ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR}"
22+
23+
# Get the "summary" directory by taking the path of this script and selecting "summary".
24+
ANOMALY_DETECTION_GRAPHS_DIR=${ANOMALY_DETECTION_GRAPHS_DIR:-"${ANOMALY_DETECTION_SCRIPT_DIR}/graphs"} # Contains everything (scripts, queries, templates) to create the Markdown summary report for anomaly detection
25+
26+
# Delegate the execution to the responsible script.
27+
source "${ANOMALY_DETECTION_GRAPHS_DIR}/anomalyDetectionGraphVisualization.sh"
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// Anomaly Detection Graphs: Find top nodes marked as "Authority" including their incoming dependencies and output them in Graphviz format.
2+
3+
// Step 1: Query overall statistics, e.g. min/max weight for later normalization
4+
MATCH (sourceForStatistics)-[dependencyForStatistics:DEPENDS_ON]->(targetForStatistics)
5+
WHERE $projection_node_label IN labels(sourceForStatistics)
6+
AND $projection_node_label IN labels(targetForStatistics)
7+
WITH max(coalesce(dependencyForStatistics.weight25PercentInterfaces, dependencyForStatistics.weight)) AS maxWeight
8+
,percentileDisc(sourceForStatistics.centralityPageRankToArticleRankDifference, 0.80) AS pageToArticleRankThreshold
9+
// Step 2: Query selected central node
10+
MATCH (central)
11+
WHERE $projection_node_label IN labels(central)
12+
AND central.anomalyAuthorityRank = toInteger($projection_node_rank)
13+
WITH maxWeight
14+
,pageToArticleRankThreshold
15+
,central
16+
,"Top Rank #" + $projection_node_rank + " " + $projection_language + " " + $projection_node_label + " Authority: " AS graphLabel
17+
,coalesce(central.fqn, central.globalFqn, central.fileName, central.signature, central.name) AS targetName
18+
,[] AS graphVizOutput
19+
WITH *, replace(replace(targetName, '.', '.\\n'), '/', '/\\n') AS targetNameSplit
20+
WITH *, targetNameSplit + "\\n(authority #" + central.anomalyAuthorityRank + ")" AS centralNodeLabel
21+
WITH *, graphVizOutput + ["graph [label=\"" + graphLabel + targetName + "\\n\\n\"];"] AS graphVizOutput
22+
WITH *, graphVizOutput + ["central [label=\"" + centralNodeLabel + "\"];"] AS graphVizOutput
23+
// Step 3: Query direct incoming dependencies to the central node
24+
MATCH (source)-[dependency:DEPENDS_ON]->(central)
25+
WHERE $projection_node_label IN labels(source)
26+
AND source.outgoingDependencies > 0
27+
ORDER BY dependency.weight DESC, source.name ASC
28+
LIMIT 40
29+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
30+
WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0) AS penWidth
31+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth AS edgeAttributes
32+
WITH *, CASE WHEN source.centralityPageRankToArticleRankDifference > pageToArticleRankThreshold THEN 5 ELSE 2 END AS scaledNodeBorder
33+
WITH *, round(source.centralityPageRankNormalized * 0.66 + 0.2, 3) AS scaledNodeSize
34+
WITH *, "penwidth = " + scaledNodeBorder + "; " AS directInBorder
35+
WITH *, "height = " + scaledNodeSize + "; " AS directInSize
36+
WITH *, "\"" + source.name + "\" [" + directInBorder + directInSize + "]; " AS directInNode
37+
WITH maxWeight
38+
,pageToArticleRankThreshold
39+
,central
40+
,graphVizOutput
41+
,collect(source) AS incomingDependencyNodes
42+
,collect(directInNode + "\"" + source.name + "\" -> central [" + edgeAttributes + "];") AS directInEdges
43+
WITH *, graphVizOutput + directInEdges AS graphVizOutput
44+
// Step 4: Query direct outgoing dependencies from the central node
45+
MATCH (source)<-[dependency:DEPENDS_ON]-(central)
46+
WHERE $projection_node_label IN labels(source)
47+
AND source.incomingDependencies > 0
48+
ORDER BY dependency.weight DESC, source.name ASC
49+
LIMIT 40
50+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
51+
WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0) AS penWidth
52+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth AS edgeAttributes
53+
// Use a lighter color for the target nodes of outgoing dependencies from the central node and their edges
54+
WITH *, edgeAttributes + "; color = 5" AS edgeAttributes
55+
WITH *, CASE WHEN source.centralityPageRankToArticleRankDifference > pageToArticleRankThreshold THEN 5 ELSE 2 END AS scaledNodeBorder
56+
WITH *, round(source.centralityPageRankNormalized * 0.66 + 0.2, 3) AS scaledNodeSize
57+
WITH *, "penwidth = " + scaledNodeBorder + "; " AS directOutBorder
58+
WITH *, "height = " + scaledNodeSize + "; " AS directOutSize
59+
WITH *, "color = 5; fillcolor = 1; " AS directOutColors
60+
WITH *, "\"" + source.name + "\" [" + directOutBorder + directOutSize + directOutColors + "]; " AS directOutNode
61+
WITH maxWeight
62+
,pageToArticleRankThreshold
63+
,central
64+
,graphVizOutput
65+
,incomingDependencyNodes
66+
,collect(source) AS outgoingDependencyNodes
67+
,collect(directOutNode + "central -> \"" + source.name + "\" [" + edgeAttributes + "];") AS directOutEdges
68+
WITH *, graphVizOutput + directOutEdges AS graphVizOutput
69+
WITH *, incomingDependencyNodes + outgoingDependencyNodes AS directDependentNodes
70+
// Step 5: Query dependencies between direct dependencies outside the central node
71+
UNWIND directDependentNodes AS directDependentNode
72+
MATCH (directDependentNode)-[dependency:DEPENDS_ON]->(anotherDirectDependentNode)
73+
WHERE anotherDirectDependentNode IN directDependentNodes
74+
AND anotherDirectDependentNode <> directDependentNode
75+
ORDER BY dependency.weight DESC, directDependentNode.name ASC
76+
WITH maxWeight
77+
,pageToArticleRankThreshold
78+
,central
79+
,graphVizOutput
80+
,directDependentNode
81+
,dependency
82+
,collect(anotherDirectDependentNode)[0] AS firstLinkedDependentNode
83+
LIMIT 80
84+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
85+
// Use a fixed small pen width for secondary dependencies for better visibility of the more important direct dependency
86+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=0.3" AS edgeAttributes
87+
// Use an even lighter color for secondary dependency edges
88+
WITH *, edgeAttributes + "; color = 3" AS edgeAttributes
89+
WITH *, "\"" + directDependentNode.name + "\" -> \"" + firstLinkedDependentNode.name + "\"" AS directDependenciesEdge
90+
WITH *, collect(directDependenciesEdge + " [" + edgeAttributes + "]") AS directDependenciesEdges
91+
WITH *, graphVizOutput + directDependenciesEdges AS graphVizOutput
92+
UNWIND graphVizOutput AS graphVizOutputLine
93+
RETURN DISTINCT graphVizOutputLine
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
// Anomaly Detection Graphs: Find top nodes marked as "Bottleneck" including their incoming and outgoing dependencies and output them in Graphviz format.
2+
3+
// Step 1: Query overall statistics, e.g. min/max weight for later normalization
4+
MATCH (sourceForStatistics)-[dependencyForStatistics:DEPENDS_ON]->(targetForStatistics)
5+
WHERE $projection_node_label IN labels(sourceForStatistics)
6+
AND $projection_node_label IN labels(targetForStatistics)
7+
WITH max(coalesce(dependencyForStatistics.weight25PercentInterfaces, dependencyForStatistics.weight)) AS maxWeight
8+
// Step 2: Query selected central node
9+
MATCH (central)
10+
WHERE $projection_node_label IN labels(central)
11+
AND central.anomalyBottleneckRank = toInteger($projection_node_rank)
12+
WITH maxWeight
13+
,central
14+
,"Top Rank #" + $projection_node_rank + " " + $projection_language + " " + $projection_node_label + " Bottleneck: " AS graphLabel
15+
,coalesce(central.fqn, central.globalFqn, central.fileName, central.signature, central.name) AS targetName
16+
,[] AS graphVizOutput
17+
WITH *, replace(replace(targetName, '.', '.\\n'), '/', '/\\n') AS targetNameSplit
18+
WITH *, targetNameSplit + "\\n(bottleneck #" + central.anomalyBottleneckRank + ")" AS centralNodeLabel
19+
WITH *, graphVizOutput + ["graph [label=\"" + graphLabel + targetName + "\\n\\n\"];"] AS graphVizOutput
20+
WITH *, graphVizOutput + ["central [label=\"" + centralNodeLabel + "\"];"] AS graphVizOutput
21+
// Step 3: Query direct incoming dependencies to the central node
22+
MATCH (source)-[dependency:DEPENDS_ON]->(central)
23+
WHERE $projection_node_label IN labels(source)
24+
AND source.outgoingDependencies > 0
25+
ORDER BY dependency.weight DESC, source.name ASC
26+
LIMIT 30
27+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
28+
WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0) AS penWidth
29+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth AS edgeAttributes
30+
WITH maxWeight
31+
,central
32+
,graphVizOutput
33+
,collect(source) AS incomingDependencyNodes
34+
,collect("\"" + source.name + "\" -> central [" + edgeAttributes + "];") AS directInEdges
35+
WITH *, graphVizOutput + directInEdges AS graphVizOutput
36+
// Step 4: Query direct outgoing dependencies from the central node
37+
MATCH (source)<-[dependency:DEPENDS_ON]-(central)
38+
WHERE $projection_node_label IN labels(source)
39+
AND source.incomingDependencies > 0
40+
ORDER BY dependency.weight DESC, source.name ASC
41+
LIMIT 30
42+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
43+
WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0) AS penWidth
44+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth AS edgeAttributes
45+
// Use a lighter color for the target nodes of outgoing dependencies from the central node and their edges
46+
WITH *, edgeAttributes + "; color = 5" AS edgeAttributes
47+
WITH *, "\"" + source.name + "\" [color = 5; fillcolor = 1;]; " AS directOutNode
48+
WITH maxWeight
49+
,central
50+
,graphVizOutput
51+
,incomingDependencyNodes
52+
,collect(source) AS outgoingDependencyNodes
53+
,collect(directOutNode + "central -> \"" + source.name + "\" [" + edgeAttributes + "];") AS directOutEdges
54+
WITH *, graphVizOutput + directOutEdges AS graphVizOutput
55+
WITH *, incomingDependencyNodes + outgoingDependencyNodes AS directDependentNodes
56+
// Step 5: Query dependencies between direct dependencies outside the central node
57+
UNWIND directDependentNodes AS directDependentNode
58+
MATCH (directDependentNode)-[dependency:DEPENDS_ON]->(anotherDirectDependentNode)
59+
WHERE anotherDirectDependentNode IN directDependentNodes
60+
AND anotherDirectDependentNode <> directDependentNode
61+
ORDER BY dependency.weight DESC, directDependentNode.name ASC
62+
WITH maxWeight
63+
,central
64+
,graphVizOutput
65+
,directDependentNode
66+
,dependency
67+
,collect(anotherDirectDependentNode)[0] AS firstLinkedDependentNode
68+
LIMIT 60
69+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
70+
// Use a fixed small pen width for secondary dependencies for better visibility of the more important direct dependency
71+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=0.3" AS edgeAttributes
72+
// Use a light color for secondary dependency edges
73+
WITH *, edgeAttributes + "; color = 3" AS edgeAttributes
74+
WITH *, "\"" + directDependentNode.name + "\" -> \"" + firstLinkedDependentNode.name + "\"" AS directDependenciesEdge
75+
WITH *, collect(directDependenciesEdge + " [" + edgeAttributes + "]") AS directDependenciesEdges
76+
WITH *, graphVizOutput + directDependenciesEdges AS graphVizOutput
77+
UNWIND graphVizOutput AS graphVizOutputLine
78+
RETURN DISTINCT graphVizOutputLine
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// This is a GraphViz dot template file for the visualization of a anomaly archetype graphs with a selected central node.
2+
// The main part of the template is marked by the comments "Begin-Template" and "End-Template".
3+
// It also contains a simple example graph.
4+
//
5+
strict digraph top_central_template {
6+
//Begin-Template
7+
graph [layout = "fdp"; start = "7"; splines = "spline"; beautify = true;];
8+
graph [fontname = "Helvetica,Arial,sans-serif"; labelloc = "t";];
9+
node [colorscheme = "bugn9"; color = 6; fillcolor = 3;]; # Alternative: color = "0.58 0.75 0.75"; fillcolor = "0.58 0.15 0.99"
10+
edge [colorscheme = "bugn9"; color = 7; ]; # Alternative: color = "0.58 0.75 0.85";
11+
node [fontsize = 8; style = "filled"; margin = "0.05,0.05"];
12+
edge [fontsize = 4; arrowsize = "0.4";];
13+
14+
central [shape = "doublecircle"; margin = "0.00001,0.00001";];
15+
central [fontsize = 10;];
16+
central [color = 7; fillcolor = 5; penwidth = 3;]; # color = "0.52 0.7 0.7"; fillcolor = "0.52 0.4 0.9"
17+
18+
limit_hint [color = 7; fillcolor = 5; penwidth = 2;] # color = "0.52 0.7 0.7"; fillcolor = "0.52 0.4 0.9"
19+
limit_hint [shape = "note"; fontsize = 10]
20+
limit_hint [label = "limited\nnode count";]
21+
limit_hint -> central [dir = "back"; arrowtail = "inv"]; // Signals that the number of edges might have been limited
22+
23+
//End-Template
24+
"A" -> "central" [penwidth = 1.0; label = 1;];
25+
"A" -> "B" [penwidth = 3.0; label = 4;];
26+
"B" -> "central" [penwidth = 2.0; label = 2;];
27+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Anomaly Detection Graphs: Find top nodes marked as "central" including their incoming dependencies and output them in Graphviz format.
2+
3+
// Step 1: Query overall statistics, e.g. min/max weight for later normalization
4+
MATCH (sourceForStatistics)-[dependencyForStatistics:DEPENDS_ON]->(targetForStatistics)
5+
WHERE $projection_node_label IN labels(sourceForStatistics)
6+
AND $projection_node_label IN labels(targetForStatistics)
7+
WITH max(coalesce(dependencyForStatistics.weight25PercentInterfaces, dependencyForStatistics.weight)) AS maxWeight
8+
// Step 2: Query selected central node
9+
MATCH (central)
10+
WHERE $projection_node_label IN labels(central)
11+
AND central.anomalyHubRank = toInteger($projection_node_rank)
12+
WITH maxWeight
13+
,central
14+
,"Top Rank #" + $projection_node_rank + " " + $projection_language + " " + $projection_node_label + " Hub: " AS graphLabel
15+
,coalesce(central.fqn, central.globalFqn, central.fileName, central.signature, central.name) AS targetName
16+
,[] AS graphVizOutput
17+
WITH *, replace(replace(targetName, '.', '.\\n'), '/', '/\\n') AS targetNameSplit
18+
WITH *, targetNameSplit + "\\n(hub #" + central.anomalyHubRank + ")" AS centralNodeLabel
19+
WITH *, graphVizOutput + ["graph [label=\"" + graphLabel + targetName + "\\n\\n\"];"] AS graphVizOutput
20+
WITH *, graphVizOutput + ["central [label=\"" + centralNodeLabel + "\"];"] AS graphVizOutput
21+
// Step 3: Query direct incoming dependencies to the central node
22+
MATCH (source)-[dependency:DEPENDS_ON]->(central)
23+
WHERE $projection_node_label IN labels(source)
24+
AND source.outgoingDependencies > 0
25+
ORDER BY dependency.weight DESC, source.name ASC
26+
LIMIT 70
27+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
28+
WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0) AS penWidth
29+
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth AS edgeAttributes
30+
WITH maxWeight
31+
,central
32+
,graphVizOutput
33+
,collect(source) AS directDependentNodes
34+
,collect("\"" + source.name + "\" -> central [" + edgeAttributes + "];") AS directInEdges
35+
WITH *, graphVizOutput + directInEdges AS graphVizOutput
36+
// Step 4: Query dependencies between direct dependencies outside the central node
37+
UNWIND directDependentNodes AS directDependentNode
38+
MATCH (directDependentNode)-[dependency:DEPENDS_ON]->(anotherDirectDependentNode)
39+
WHERE anotherDirectDependentNode IN directDependentNodes
40+
AND anotherDirectDependentNode <> directDependentNode
41+
ORDER BY dependency.weight DESC, directDependentNode.name ASC
42+
WITH maxWeight
43+
,central
44+
,graphVizOutput
45+
,directDependentNode
46+
,dependency
47+
,collect(anotherDirectDependentNode)[0] AS firstLinkedDependentNode
48+
LIMIT 140
49+
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
50+
// Use a fixed small pen width for secondary dependencies for better visibility of the more important direct dependency
51+
WITH *, "weight=" + weight + "; penwidth=0.2" AS edgeAttributes
52+
WITH *, "\"" + directDependentNode.name + "\" -> \"" + firstLinkedDependentNode.name + "\"" AS directDependenciesEdge
53+
WITH *, collect(directDependenciesEdge + " [" + edgeAttributes + "]") AS directDependenciesEdges
54+
WITH *, graphVizOutput + directDependenciesEdges AS graphVizOutput
55+
UNWIND graphVizOutput AS graphVizOutputLine
56+
RETURN DISTINCT graphVizOutputLine
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// This is a GraphViz dot template file for the visualization of a anomaly archetype graphs with a selected central node.
2+
// The main part of the template is marked by the comments "Begin-Template" and "End-Template".
3+
// It also contains a simple example graph.
4+
//
5+
strict digraph top_central_template {
6+
//Begin-Template
7+
graph [layout = "fdp"; start = "7"; splines = "spline"; beautify = true;];
8+
graph [fontname = "Helvetica,Arial,sans-serif"; labelloc = "t";];
9+
node [colorscheme = "bugn9"; color = 6; fillcolor = 3;]; # Alternative: color = "0.58 0.75 0.75"; fillcolor = "0.58 0.15 0.99"
10+
edge [colorscheme = "bugn9"; color = 7; ]; # Alternative: color = "0.58 0.75 0.85";
11+
node [shape = "circle";]
12+
node [fontsize = 6; style = "filled"; margin = "0.03,0.03"];
13+
edge [fontsize = 4; arrowsize = "0.4";];
14+
15+
central [shape = "doublecircle"; margin = "0.00001,0.00001";];
16+
central [fontsize = 10;];
17+
central [color = 7; fillcolor = 5; penwidth = 3;]; # color = "0.52 0.7 0.7"; fillcolor = "0.52 0.4 0.9"
18+
19+
limit_hint [color = 7; fillcolor = 5; penwidth = 2;] # color = "0.52 0.7 0.7"; fillcolor = "0.52 0.4 0.9"
20+
limit_hint [shape = "note"; fontsize = 10;]
21+
limit_hint [label = "limited\nnode count";]
22+
limit_hint -> central [dir = "back"; arrowtail = "inv"]; // Signals that the number of edges might have been limited
23+
24+
//End-Template
25+
"A" -> "central" [penwidth = 1.0; label = 1;];
26+
"A" -> "B" [penwidth = 3.0; label = 4;];
27+
"B" -> "central" [penwidth = 2.0; label = 2;];
28+
}

0 commit comments

Comments
 (0)