JohT · JohT · Oct 25, 2025 · Oct 12, 2025 · Oct 14, 2025 · Oct 15, 2025
diff --git a/.github/workflows/public-analyze-code-graph.yml b/.github/workflows/public-analyze-code-graph.yml
@@ -100,6 +100,7 @@ jobs:
           repository: JohT/code-graph-analysis-pipeline
           ref: ${{ inputs.ref }}
           persist-credentials: false
+          fetch-tags: true
 
       - name: (Java Setup) Java Development Kit (JDK) ${{ matrix.java }}
         uses: actions/setup-java@dded0888837ed1f317902acf8a20df0ad188d165 # v5

diff --git a/domains/anomaly-detection/anomalyDetectionCsv.sh b/domains/anomaly-detection/anomalyDetectionCsv.sh
@@ -61,6 +61,9 @@ anomaly_detection_features() {
     # Determine the article rank if not already done
     execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-ArticleRank-Exists.cypher" \
                                          "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-ArticleRank-Write.cypher" "${@}"
+    # Determine the normalized difference between Page Rank and Article Rank if not already done
+    execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-PageToArticleRank-Exists.cypher" \
+                                         "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-PageToArticleRank-Write.cypher" "${@}"
 }
 
 # Run queries to find anomalies in the graph.

diff --git a/domains/anomaly-detection/anomalyDetectionPython.sh b/domains/anomaly-detection/anomalyDetectionPython.sh
@@ -106,6 +106,9 @@ anomaly_detection_features() {
     # Determine the article rank if not already done
     execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-ArticleRank-Exists.cypher" \
                                          "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-ArticleRank-Write.cypher" "${@}"
+    # Determine the normalized difference between Page Rank and Article Rank if not already done
+    execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-PageToArticleRank-Exists.cypher" \
+                                         "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-PageToArticleRank-Write.cypher" "${@}"
 }
 
 # Execute the Python scripts for anomaly detection.

diff --git a/domains/anomaly-detection/anomalyDetectionVisualization.sh b/domains/anomaly-detection/anomalyDetectionVisualization.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+
+# This script is dynamically triggered by "VisualizationReports.sh" when report "All" or "Visualization" is enabled.
+# It is designed as an entry point and delegates the execution to the dedicated "anomalyDetectionGraphVisualization.sh" script that does the "heavy lifting".
+
+# Note that "scripts/prepareAnalysis.sh" is required to run prior to this script.
+
+# Requires anomalyDetectionGraphVisualization.sh
+
+# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
+set -o errexit -o pipefail
+
+# Overrideable Constants (defaults also defined in sub scripts)
+REPORTS_DIRECTORY=${REPORTS_DIRECTORY:-"reports"}
+
+## Get this "scripts/reports" directory if not already set
+# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution.
+# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes.
+# This way non-standard tools like readlink aren't needed.
+ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR:-$(CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)}
+# echo "anomalyDetectionCsv: ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR}"
+
+# Get the "summary" directory by taking the path of this script and selecting "summary".
+ANOMALY_DETECTION_GRAPHS_DIR=${ANOMALY_DETECTION_GRAPHS_DIR:-"${ANOMALY_DETECTION_SCRIPT_DIR}/graphs"} # Contains everything (scripts, queries, templates) to create the Markdown summary report for anomaly detection
+
+# Delegate the execution to the responsible script.
+source "${ANOMALY_DETECTION_GRAPHS_DIR}/anomalyDetectionGraphVisualization.sh"
diff --git a/domains/anomaly-detection/features/AnomalyDetectionFeature-PageToArticleRank-Exists.cypher b/domains/anomaly-detection/features/AnomalyDetectionFeature-PageToArticleRank-Exists.cypher
@@ -0,0 +1,11 @@
+// Return the first node with (amongst others) a "centralityPageRankToArticleRankDifference" property if it exists
+
+   MATCH (codeUnit)
+   WHERE $projection_node_label IN labels(codeUnit)
+     AND codeUnit.centralityPageRankToArticleRankDifference IS NOT NULL
+     AND codeUnit.centralityPageRankNormalized              IS NOT NULL
+     AND codeUnit.centralityPArticleRankNormalized          IS NOT NULL
+  RETURN codeUnit.name                                      AS shortCodeUnitName
+        ,elementId(codeUnit)                                AS nodeElementId
+        ,codeUnit.centralityPageRankToArticleRankDifference AS pageToArticleRankDifference
+   LIMIT 1
diff --git a/domains/anomaly-detection/features/AnomalyDetectionFeature-PageToArticleRank-Write.cypher b/domains/anomaly-detection/features/AnomalyDetectionFeature-PageToArticleRank-Write.cypher
@@ -0,0 +1,21 @@
+// Calculates and writes the (amongst others) "centralityPageRankToArticleRankDifference" property.
+
+   MATCH (codeUnit)
+   WHERE $projection_node_label IN labels(codeUnit)
+     AND codeUnit.centralityPageRank    IS NOT NULL
+     AND codeUnit.centralityArticleRank IS NOT NULL
+    WITH collect(codeUnit)                                 AS codeUnits
+        ,min(codeUnit.centralityPageRank)                  AS minPageRank
+        ,max(codeUnit.centralityPageRank)                  AS maxPageRank
+        ,min(codeUnit.centralityArticleRank)               AS minArticleRank
+        ,max(codeUnit.centralityArticleRank)               AS maxArticleRank
+  UNWIND codeUnits AS codeUnit
+    WITH *
+        ,(codeUnit.centralityPageRank - minPageRank) / (maxPageRank - minPageRank)             AS normalizedPageRank
+        ,(codeUnit.centralityArticleRank - minArticleRank) / (maxArticleRank - minArticleRank) AS normalizedArticleRank
+   WITH *
+       ,normalizedPageRank - normalizedArticleRank         AS normalizedPageRankToArticleRankDifference
+    SET codeUnit.centralityPageRankToArticleRankDifference =  normalizedPageRankToArticleRankDifference
+       ,codeUnit.centralityPageRankNormalized              =  normalizedPageRank
+       ,codeUnit.centralityArticleRankNormalized           =  normalizedArticleRank
+RETURN count(*) AS nodePropertiesWritten
diff --git a/domains/anomaly-detection/graphs/TopAuthority.cypher b/domains/anomaly-detection/graphs/TopAuthority.cypher
@@ -0,0 +1,114 @@
+// Anomaly Detection Graphs: Find top nodes marked as "Authority" including their incoming and outgoing dependencies, sizes based on PageRank and thick outline for nodes with high Page Rank to Article Rank difference in Graphviz format.
+
+// Step 1: Query overall statistics, e.g. min/max weight for later normalization
+ MATCH (sourceForStatistics)-[dependencyForStatistics:DEPENDS_ON]->(targetForStatistics)
+ WHERE $projection_node_label IN labels(sourceForStatistics)
+   AND $projection_node_label IN labels(targetForStatistics)
+  WITH max(coalesce(dependencyForStatistics.weight25PercentInterfaces, dependencyForStatistics.weight)) AS maxWeight
+      ,percentileDisc(sourceForStatistics.centralityPageRankToArticleRankDifference, 0.80) AS pageToArticleRankThreshold
+      ,percentileDisc(targetForStatistics.centralityPageRankNormalized, 0.80)              AS pageRankThreshold
+// Step 2: Query selected central node
+ MATCH (central)
+ WHERE $projection_node_label IN labels(central)
+   AND central.anomalyAuthorityRank = toInteger($projection_node_rank)
+  WITH maxWeight
+      ,pageToArticleRankThreshold
+      ,pageRankThreshold
+      ,central
+      ,"Top Rank #" + $projection_node_rank + " " + $projection_language + " " + $projection_node_label + " Authority\\n" AS graphLabel
+      ,coalesce(central.fqn, central.globalFqn, central.fileName, central.signature, central.name)   AS targetName
+  WITH *, "\\n\\ndark nodes: incoming dependencies (limited max. 40)\\n"                             AS graphLegend
+  WITH *, graphLegend + "bright nodes: outgoing dependencies (limited max. 40)\\n"                   AS graphLegend
+  WITH *, graphLegend + "node value: Page Rank (normalized)\\n"                                      AS graphLegend
+  WITH *, graphLegend + "large circle: > 80% percentile of Page Rank\\n"                             AS graphLegend
+  WITH *, graphLegend + "thick outline: > 80% percentile of Page Rank to Article Rank Difference\\n" AS graphLegend
+  WITH *, ["graph   [label=\"" + graphLabel + targetName + graphLegend + "\\n\"];"]       AS graphVizOutput
+  WITH *, "🏛️ authority #" + central.anomalyAuthorityRank + "\\n" + central.name          AS centralNodeLabel
+  WITH *, graphVizOutput + ["central [label=\"" + centralNodeLabel + "\"];"]              AS graphVizOutput
+// Step 3: Query direct incoming dependencies to the central node
+ MATCH (source)-[dependency:DEPENDS_ON]->(central)
+  WHERE $projection_node_label IN labels(source)
+    AND source.outgoingDependencies > 0
+  ORDER BY dependency.weight DESC, source.name ASC
+  LIMIT 40
+   WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1)  AS weight
+   WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0)        AS penWidth
+   WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth   AS edgeAttributes
+   WITH *, CASE WHEN source.centralityPageRankToArticleRankDifference >= pageToArticleRankThreshold 
+                THEN 5 ELSE 2 END                                                AS scaledNodeBorder
+   WITH *, CASE WHEN source.centralityPageRankNormalized >= pageRankThreshold 
+                THEN "shape = \"circle\"; height=2; " ELSE "" END                AS nodeEmphasis
+   WITH *, round(source.centralityPageRankNormalized * 100.0, 2) + "%"           AS labelValue
+   // Add the last part of the element id to the node name to make it unique.
+   WITH *, source.name + "_" + split(elementId(source), ':')[-1]                 AS sourceId
+   WITH *, "penwidth = " + scaledNodeBorder + "; "                               AS directInBorder
+   // Split long names like inner classes identified by a dollar sign ($)
+   WITH *, replace(source.name, '$', '$\\n')                                     AS sourceNameSplit
+   WITH *, "label = \"" + sourceNameSplit + "\\n(" + labelValue + ")\"; "        AS directInLabel
+   WITH *, " [" + nodeEmphasis + directInLabel + directInBorder + "]; "          AS directInNodeProperties
+   WITH *, "\"" + sourceId + "\" " + directInNodeProperties                      AS directInNode
+   WITH maxWeight
+       ,pageToArticleRankThreshold
+       ,pageRankThreshold
+       ,central
+       ,graphVizOutput 
+       ,collect(source) AS incomingDependencyNodes
+       ,collect(directInNode + "\"" + sourceId + "\" -> central [" + edgeAttributes + "];") AS directInEdges
+   WITH *, graphVizOutput + directInEdges AS graphVizOutput
+// Step 4: Query direct outgoing dependencies from the central node
+ MATCH (source)<-[dependency:DEPENDS_ON]-(central)
+  WHERE $projection_node_label IN labels(source)
+    AND source.incomingDependencies > 0
+  ORDER BY dependency.weight DESC, source.name ASC
+  LIMIT 40
+   WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1)  AS weight
+   WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0)        AS penWidth
+   WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth   AS edgeAttributes
+   // Use a lighter color for the target nodes of outgoing dependencies from the central node and their edges
+   WITH *, edgeAttributes + "; color = 5"                                        AS edgeAttributes
+   WITH *, "color = 5; fillcolor = 1; "                                          AS directOutColor
+   WITH *, CASE WHEN source.centralityPageRankToArticleRankDifference >= pageToArticleRankThreshold 
+                THEN 5 ELSE 2 END                                                AS scaledNodeBorder
+   WITH *, CASE WHEN source.centralityPageRankNormalized >= pageRankThreshold 
+                THEN "shape = \"circle\"; height=2; " ELSE "" END                AS nodeEmphasis
+   WITH *, round(source.centralityPageRankNormalized * 100.0, 2) + "%"           AS labelValue
+   // Add the last part of the element id to the node name to make it unique.
+   WITH *, source.name + "_" + split(elementId(source), ':')[-1]                 AS sourceId
+   WITH *, "penwidth = " + scaledNodeBorder + "; "                               AS directOutBorder
+   // Split long names like inner classes identified by a dollar sign ($)
+   WITH *, replace(source.name, '$', '$\\n')                                     AS sourceNameSplit
+   WITH *, "label = \"" + sourceNameSplit + "\\n(" + labelValue + ")\"; "        AS directOutLabel
+   WITH *, " [" + nodeEmphasis + directOutLabel + directOutBorder + directOutColor + "]; " AS directOutNodeProperties
+   WITH *, "\"" + sourceId + "\" " + directOutNodeProperties                     AS directOutNode
+   WITH maxWeight
+       ,central
+       ,graphVizOutput
+       ,incomingDependencyNodes
+       ,collect(source) AS outgoingDependencyNodes
+       ,collect(directOutNode + "central -> \"" + sourceId + "\" [" + edgeAttributes + "];") AS directOutEdges
+   WITH *, graphVizOutput + directOutEdges                   AS graphVizOutput
+   WITH *, incomingDependencyNodes + outgoingDependencyNodes AS directDependentNodes
+// Step 5: Query dependencies between direct dependencies outside the central node
+ UNWIND directDependentNodes AS directDependentNode
+  MATCH (directDependentNode)-[dependency:DEPENDS_ON]->(anotherDirectDependentNode)
+  WHERE anotherDirectDependentNode IN directDependentNodes
+    AND anotherDirectDependentNode <> directDependentNode
+  ORDER BY dependency.weight DESC, directDependentNode.name ASC
+   WITH graphVizOutput 
+       ,directDependentNode
+       ,dependency
+       ,collect(anotherDirectDependentNode)[0] AS firstLinkedDependentNode
+  LIMIT 140
+   WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
+   // Use a fixed small pen width for secondary dependencies for better visibility of the more important direct dependency 
+   WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=0.3"          AS edgeAttributes
+   // Use an even lighter color for secondary dependency edges
+   WITH *, edgeAttributes + "; color = 3"                                       AS edgeAttributes
+   // Add the last part of the element id to the node name to make it unique.
+   WITH *, directDependentNode.name + "_" + split(elementId(directDependentNode), ':')[-1] AS directDependentNodeId
+   WITH *, firstLinkedDependentNode.name + "_" + split(elementId(firstLinkedDependentNode), ':')[-1] AS firstLinkedDependentNodeId
+   WITH *, "\"" + directDependentNodeId + "\" -> \"" + firstLinkedDependentNodeId + "\""   AS directDependenciesEdge
+   WITH *, collect(directDependenciesEdge + " [" + edgeAttributes + "]")                   AS directDependenciesEdges
+   WITH *, graphVizOutput + directDependenciesEdges AS graphVizOutput
+UNWIND graphVizOutput AS graphVizOutputLine
+RETURN DISTINCT graphVizOutputLine