Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/public-analyze-code-graph.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ jobs:
repository: JohT/code-graph-analysis-pipeline
ref: ${{ inputs.ref }}
persist-credentials: false
fetch-tags: true

- name: (Java Setup) Java Development Kit (JDK) ${{ matrix.java }}
uses: actions/setup-java@dded0888837ed1f317902acf8a20df0ad188d165 # v5
Expand Down
3 changes: 3 additions & 0 deletions domains/anomaly-detection/anomalyDetectionCsv.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ anomaly_detection_features() {
# Determine the article rank if not already done
execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-ArticleRank-Exists.cypher" \
"${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-ArticleRank-Write.cypher" "${@}"
# Determine the normalized difference between Page Rank and Article Rank if not already done
execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-PageToArticleRank-Exists.cypher" \
"${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-PageToArticleRank-Write.cypher" "${@}"
}

# Run queries to find anomalies in the graph.
Expand Down
3 changes: 3 additions & 0 deletions domains/anomaly-detection/anomalyDetectionPython.sh
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ anomaly_detection_features() {
# Determine the article rank if not already done
execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-ArticleRank-Exists.cypher" \
"${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-ArticleRank-Write.cypher" "${@}"
# Determine the normalized difference between Page Rank and Article Rank if not already done
execute_cypher_queries_until_results "${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-PageToArticleRank-Exists.cypher" \
"${ANOMALY_DETECTION_FEATURE_CYPHER_DIR}/AnomalyDetectionFeature-PageToArticleRank-Write.cypher" "${@}"
}

# Execute the Python scripts for anomaly detection.
Expand Down
27 changes: 27 additions & 0 deletions domains/anomaly-detection/anomalyDetectionVisualization.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env bash

# This script is dynamically triggered by "VisualizationReports.sh" when report "All" or "Visualization" is enabled.
# It is designed as an entry point and delegates the execution to the dedicated "anomalyDetectionGraphVisualization.sh" script that does the "heavy lifting".

# Note that "scripts/prepareAnalysis.sh" is required to run prior to this script.

# Requires anomalyDetectionGraphVisualization.sh

# Fail on any error ("-e" = exit on first error, "-o pipefail" exist on errors within piped commands)
set -o errexit -o pipefail

# Overrideable Constants (defaults also defined in sub scripts)
REPORTS_DIRECTORY=${REPORTS_DIRECTORY:-"reports"}

## Get this "scripts/reports" directory if not already set
# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution.
# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes.
# This way non-standard tools like readlink aren't needed.
ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR:-$(CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)}
# echo "anomalyDetectionCsv: ANOMALY_DETECTION_SCRIPT_DIR=${ANOMALY_DETECTION_SCRIPT_DIR}"

# Get the "summary" directory by taking the path of this script and selecting "summary".
ANOMALY_DETECTION_GRAPHS_DIR=${ANOMALY_DETECTION_GRAPHS_DIR:-"${ANOMALY_DETECTION_SCRIPT_DIR}/graphs"} # Contains everything (scripts, queries, templates) to create the Markdown summary report for anomaly detection

# Delegate the execution to the responsible script.
source "${ANOMALY_DETECTION_GRAPHS_DIR}/anomalyDetectionGraphVisualization.sh"
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Return the first node with (amongst others) a "centralityPageRankToArticleRankDifference" property if it exists

MATCH (codeUnit)
WHERE $projection_node_label IN labels(codeUnit)
AND codeUnit.centralityPageRankToArticleRankDifference IS NOT NULL
AND codeUnit.centralityPageRankNormalized IS NOT NULL
AND codeUnit.centralityPArticleRankNormalized IS NOT NULL
RETURN codeUnit.name AS shortCodeUnitName
,elementId(codeUnit) AS nodeElementId
,codeUnit.centralityPageRankToArticleRankDifference AS pageToArticleRankDifference
LIMIT 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Calculates and writes the (amongst others) "centralityPageRankToArticleRankDifference" property.

MATCH (codeUnit)
WHERE $projection_node_label IN labels(codeUnit)
AND codeUnit.centralityPageRank IS NOT NULL
AND codeUnit.centralityArticleRank IS NOT NULL
WITH collect(codeUnit) AS codeUnits
,min(codeUnit.centralityPageRank) AS minPageRank
,max(codeUnit.centralityPageRank) AS maxPageRank
,min(codeUnit.centralityArticleRank) AS minArticleRank
,max(codeUnit.centralityArticleRank) AS maxArticleRank
UNWIND codeUnits AS codeUnit
WITH *
,(codeUnit.centralityPageRank - minPageRank) / (maxPageRank - minPageRank) AS normalizedPageRank
,(codeUnit.centralityArticleRank - minArticleRank) / (maxArticleRank - minArticleRank) AS normalizedArticleRank
WITH *
,normalizedPageRank - normalizedArticleRank AS normalizedPageRankToArticleRankDifference
SET codeUnit.centralityPageRankToArticleRankDifference = normalizedPageRankToArticleRankDifference
,codeUnit.centralityPageRankNormalized = normalizedPageRank
,codeUnit.centralityArticleRankNormalized = normalizedArticleRank
RETURN count(*) AS nodePropertiesWritten
114 changes: 114 additions & 0 deletions domains/anomaly-detection/graphs/TopAuthority.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// Anomaly Detection Graphs: Find top nodes marked as "Authority" including their incoming and outgoing dependencies, sizes based on PageRank and thick outline for nodes with high Page Rank to Article Rank difference in Graphviz format.

// Step 1: Query overall statistics, e.g. min/max weight for later normalization
MATCH (sourceForStatistics)-[dependencyForStatistics:DEPENDS_ON]->(targetForStatistics)
WHERE $projection_node_label IN labels(sourceForStatistics)
AND $projection_node_label IN labels(targetForStatistics)
WITH max(coalesce(dependencyForStatistics.weight25PercentInterfaces, dependencyForStatistics.weight)) AS maxWeight
,percentileDisc(sourceForStatistics.centralityPageRankToArticleRankDifference, 0.80) AS pageToArticleRankThreshold
,percentileDisc(targetForStatistics.centralityPageRankNormalized, 0.80) AS pageRankThreshold
// Step 2: Query selected central node
MATCH (central)
WHERE $projection_node_label IN labels(central)
AND central.anomalyAuthorityRank = toInteger($projection_node_rank)
WITH maxWeight
,pageToArticleRankThreshold
,pageRankThreshold
,central
,"Top Rank #" + $projection_node_rank + " " + $projection_language + " " + $projection_node_label + " Authority\\n" AS graphLabel
,coalesce(central.fqn, central.globalFqn, central.fileName, central.signature, central.name) AS targetName
WITH *, "\\n\\ndark nodes: incoming dependencies (limited max. 40)\\n" AS graphLegend
WITH *, graphLegend + "bright nodes: outgoing dependencies (limited max. 40)\\n" AS graphLegend
WITH *, graphLegend + "node value: Page Rank (normalized)\\n" AS graphLegend
WITH *, graphLegend + "large circle: > 80% percentile of Page Rank\\n" AS graphLegend
WITH *, graphLegend + "thick outline: > 80% percentile of Page Rank to Article Rank Difference\\n" AS graphLegend
WITH *, ["graph [label=\"" + graphLabel + targetName + graphLegend + "\\n\"];"] AS graphVizOutput
WITH *, "🏛️ authority #" + central.anomalyAuthorityRank + "\\n" + central.name AS centralNodeLabel
WITH *, graphVizOutput + ["central [label=\"" + centralNodeLabel + "\"];"] AS graphVizOutput
// Step 3: Query direct incoming dependencies to the central node
MATCH (source)-[dependency:DEPENDS_ON]->(central)
WHERE $projection_node_label IN labels(source)
AND source.outgoingDependencies > 0
ORDER BY dependency.weight DESC, source.name ASC
LIMIT 40
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0) AS penWidth
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth AS edgeAttributes
WITH *, CASE WHEN source.centralityPageRankToArticleRankDifference >= pageToArticleRankThreshold
THEN 5 ELSE 2 END AS scaledNodeBorder
WITH *, CASE WHEN source.centralityPageRankNormalized >= pageRankThreshold
THEN "shape = \"circle\"; height=2; " ELSE "" END AS nodeEmphasis
WITH *, round(source.centralityPageRankNormalized * 100.0, 2) + "%" AS labelValue
// Add the last part of the element id to the node name to make it unique.
WITH *, source.name + "_" + split(elementId(source), ':')[-1] AS sourceId
WITH *, "penwidth = " + scaledNodeBorder + "; " AS directInBorder
// Split long names like inner classes identified by a dollar sign ($)
WITH *, replace(source.name, '$', '$\\n') AS sourceNameSplit
WITH *, "label = \"" + sourceNameSplit + "\\n(" + labelValue + ")\"; " AS directInLabel
WITH *, " [" + nodeEmphasis + directInLabel + directInBorder + "]; " AS directInNodeProperties
WITH *, "\"" + sourceId + "\" " + directInNodeProperties AS directInNode
WITH maxWeight
,pageToArticleRankThreshold
,pageRankThreshold
,central
,graphVizOutput
,collect(source) AS incomingDependencyNodes
,collect(directInNode + "\"" + sourceId + "\" -> central [" + edgeAttributes + "];") AS directInEdges
WITH *, graphVizOutput + directInEdges AS graphVizOutput
// Step 4: Query direct outgoing dependencies from the central node
MATCH (source)<-[dependency:DEPENDS_ON]-(central)
WHERE $projection_node_label IN labels(source)
AND source.incomingDependencies > 0
ORDER BY dependency.weight DESC, source.name ASC
LIMIT 40
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
WITH *, round((toFloat(weight) / toFloat(maxWeight) * 2.5) + 0.4, 1.0) AS penWidth
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=" + penWidth AS edgeAttributes
// Use a lighter color for the target nodes of outgoing dependencies from the central node and their edges
WITH *, edgeAttributes + "; color = 5" AS edgeAttributes
WITH *, "color = 5; fillcolor = 1; " AS directOutColor
WITH *, CASE WHEN source.centralityPageRankToArticleRankDifference >= pageToArticleRankThreshold
THEN 5 ELSE 2 END AS scaledNodeBorder
WITH *, CASE WHEN source.centralityPageRankNormalized >= pageRankThreshold
THEN "shape = \"circle\"; height=2; " ELSE "" END AS nodeEmphasis
WITH *, round(source.centralityPageRankNormalized * 100.0, 2) + "%" AS labelValue
// Add the last part of the element id to the node name to make it unique.
WITH *, source.name + "_" + split(elementId(source), ':')[-1] AS sourceId
WITH *, "penwidth = " + scaledNodeBorder + "; " AS directOutBorder
// Split long names like inner classes identified by a dollar sign ($)
WITH *, replace(source.name, '$', '$\\n') AS sourceNameSplit
WITH *, "label = \"" + sourceNameSplit + "\\n(" + labelValue + ")\"; " AS directOutLabel
WITH *, " [" + nodeEmphasis + directOutLabel + directOutBorder + directOutColor + "]; " AS directOutNodeProperties
WITH *, "\"" + sourceId + "\" " + directOutNodeProperties AS directOutNode
WITH maxWeight
,central
,graphVizOutput
,incomingDependencyNodes
,collect(source) AS outgoingDependencyNodes
,collect(directOutNode + "central -> \"" + sourceId + "\" [" + edgeAttributes + "];") AS directOutEdges
WITH *, graphVizOutput + directOutEdges AS graphVizOutput
WITH *, incomingDependencyNodes + outgoingDependencyNodes AS directDependentNodes
// Step 5: Query dependencies between direct dependencies outside the central node
UNWIND directDependentNodes AS directDependentNode
MATCH (directDependentNode)-[dependency:DEPENDS_ON]->(anotherDirectDependentNode)
WHERE anotherDirectDependentNode IN directDependentNodes
AND anotherDirectDependentNode <> directDependentNode
ORDER BY dependency.weight DESC, directDependentNode.name ASC
WITH graphVizOutput
,directDependentNode
,dependency
,collect(anotherDirectDependentNode)[0] AS firstLinkedDependentNode
LIMIT 140
WITH *, coalesce(dependency.weight25PercentInterfaces, dependency.weight, 1) AS weight
// Use a fixed small pen width for secondary dependencies for better visibility of the more important direct dependency
WITH *, "label=" + weight + "; weight=" + weight + "; penwidth=0.3" AS edgeAttributes
// Use an even lighter color for secondary dependency edges
WITH *, edgeAttributes + "; color = 3" AS edgeAttributes
// Add the last part of the element id to the node name to make it unique.
WITH *, directDependentNode.name + "_" + split(elementId(directDependentNode), ':')[-1] AS directDependentNodeId
WITH *, firstLinkedDependentNode.name + "_" + split(elementId(firstLinkedDependentNode), ':')[-1] AS firstLinkedDependentNodeId
WITH *, "\"" + directDependentNodeId + "\" -> \"" + firstLinkedDependentNodeId + "\"" AS directDependenciesEdge
WITH *, collect(directDependenciesEdge + " [" + edgeAttributes + "]") AS directDependenciesEdges
WITH *, graphVizOutput + directDependenciesEdges AS graphVizOutput
UNWIND graphVizOutput AS graphVizOutputLine
RETURN DISTINCT graphVizOutputLine
Loading
Loading