Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions cypher/Topological_Sort/Topological_Sort_List.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Topological Sort to list the properties topologicalSortIndex (e.g. build order) and maxDistanceFromSource (build level) for each code unit node.
// Needs graph-data-science plugin version >= 2.5.0

CALL gds.dag.topologicalSort.stream(
$dependencies_projection + '-without-empty', {
computeMaxDistanceFromSource: true
}) YIELD nodeId, maxDistanceFromSource
WITH nodeId
,gds.util.asNode(nodeId) AS codeUnit
,toInteger(maxDistanceFromSource) AS maxDistanceFromSource
SET codeUnit.maxDistanceFromSource = maxDistanceFromSource
WITH collect(nodeId) AS sortedNodeIds
,collect({codeUnit: codeUnit, maxDistanceFromSource: maxDistanceFromSource}) AS topologicalSortedCodeUnits
,max(maxDistanceFromSource) AS overallMaxDistance
FOREACH (i IN range(0, SIZE(sortedNodeIds)-1) |
SET gds.util.asNode(sortedNodeIds[i]).topologicalSortIndex = i)
WITH topologicalSortedCodeUnits
,overallMaxDistance
UNWIND topologicalSortedCodeUnits AS sorted
RETURN coalesce(sorted.codeUnit.fqn, sorted.codeUnit.fileName, sorted.codeUnit.name) AS codeUnitName
,sorted.codeUnit.maxDistanceFromSource AS maxDistanceFromSource
,overallMaxDistance AS overalNumberOfBuildLevels
18 changes: 18 additions & 0 deletions cypher/Topological_Sort/Topological_Sort_Query.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// Topological Sort to query the properties topologicalSortIndex (e.g. build order) and maxDistanceFromSource (build level) for each code unit node in topologicalSortIndex order.
// Needs graph-data-science plugin version >= 2.5.0

MATCH (codeUnit)
WHERE codeUnit.maxDistanceFromSource IS NOT NULL
AND codeUnit.topologicalSortIndex IS NOT NULL
AND $dependencies_projection_node IN LABELS(codeUnit)
WITH collect(codeUnit) AS codeUnits
,max(codeUnit.maxDistanceFromSource) AS overallMaxDistanceFromSource
UNWIND codeUnits AS codeUnit
RETURN coalesce(codeUnit.fqn, codeUnit.fileName, codeUnit.name) AS codeUnitName
,coalesce(replace(last(split(codeUnit.fileName, '/')), '.jar', ''), codeUnit.name) AS shortName
,codeUnit.topologicalSortIndex AS topologicalSortIndex
,codeUnit.maxDistanceFromSource AS maxDistanceFromSource
,overallMaxDistanceFromSource
,codeUnit.incomingDependencies AS incomingDependencies
,codeUnit.outgoingDependencies AS outgoingDependencies
ORDER BY codeUnit.topologicalSortIndex
17 changes: 17 additions & 0 deletions cypher/Topological_Sort/Topological_Sort_Write.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Topological Sort to write the properties topologicalSortIndex (e.g. build order) and maxDistanceFromSource (build level) into the graph.
// Needs graph-data-science plugin version >= 2.5.0

CALL gds.dag.topologicalSort.stream(
$dependencies_projection + '-without-empty', {
computeMaxDistanceFromSource: true
}) YIELD nodeId, maxDistanceFromSource
WITH nodeId
,gds.util.asNode(nodeId) AS codeUnit
,toInteger(maxDistanceFromSource) AS maxDistanceFromSource
SET codeUnit.maxDistanceFromSource = maxDistanceFromSource
WITH collect(nodeId) AS sortedNodeIds
,collect(codeUnit) AS sortedArtifacts
,max(maxDistanceFromSource) AS overallMaxDistance
FOREACH (i IN range(0, SIZE(sortedArtifacts)-1) |
SET gds.util.asNode(sortedNodeIds[i]).topologicalSortIndex = i)
RETURN size(sortedArtifacts) AS numberOfArtifacts, overallMaxDistance

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

49 changes: 0 additions & 49 deletions scripts/reports/ArtifactTopologicalSortCsv.sh

This file was deleted.

2 changes: 1 addition & 1 deletion scripts/reports/CentralityCsv.sh
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ TYPE_NODE="dependencies_projection_node=Type"
TYPE_WEIGHT="dependencies_projection_weight_property=weight"

# Type Centrality
echo "centralityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing package dependencies..."
echo "centralityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing type dependencies..."
createProjection "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}"
time centralityWithPageRank "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}"
time centralityWithArticleRank "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}"
Expand Down
115 changes: 115 additions & 0 deletions scripts/reports/TopologicalSortCsv.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#!/usr/bin/env bash

# Applies the Topological Sorting algorithm for directed acyclic graphs (DAG) to order code units by their dependencies
# using Graph Data Science Library of Neo4j and creates CSV reports.
# This is useful to get the build order and build levels for modules that depend on each other.
# It requires an already running Neo4j graph database with already scanned and analyzed artifacts.
# The reports (csv files) will be written into the sub directory reports/topology-csv.
# Note that "scripts/prepareAnalysis.sh" is required to run prior to this script.

# Requires executeQueryFunctions.sh

# Overrideable constants (defaults also defined in sub scripts)
REPORTS_DIRECTORY=${REPORTS_DIRECTORY:-"reports"}

## Get this "scripts/reports" directory if not already set
# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution.
# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes.
# This way non-standard tools like readlink aren't needed.
REPORTS_SCRIPT_DIR=${REPORTS_SCRIPT_DIR:-$( CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P )}
echo "topologicalSortCsv: REPORTS_SCRIPT_DIR=${REPORTS_SCRIPT_DIR}"

# Get the "scripts" directory by taking the path of this script and going one directory up.
SCRIPTS_DIR=${SCRIPTS_DIR:-"${REPORTS_SCRIPT_DIR}/.."} # Repository directory containing the shell scripts
echo "topologicalSortCsv: SCRIPTS_DIR=${SCRIPTS_DIR}"

# Get the "cypher" directory by taking the path of this script and going two directory up and then to "cypher".
CYPHER_DIR=${CYPHER_DIR:-"${REPORTS_SCRIPT_DIR}/../../cypher"}
echo "topologicalSortCsv: CYPHER_DIR=$CYPHER_DIR"

# Define functions to execute a cypher query from within the given file (first and only argument)
source "${SCRIPTS_DIR}/executeQueryFunctions.sh"

# Create report directory
REPORT_NAME="topology-csv"
FULL_REPORT_DIRECTORY="${REPORTS_DIRECTORY}/${REPORT_NAME}"
mkdir -p "${FULL_REPORT_DIRECTORY}"

# Topological Sort Preparation
# Selects the nodes and relationships for the algorithm and creates an in-memory projection.
# Nodes without incoming and outgoing dependencies (zero degree) will be filtered out with a subgraph.
#
# Required Parameters:
# - dependencies_projection=...
# Name prefix for the in-memory projection name for dependencies. Example: "package"
# - dependencies_projection_node=...
# Label of the nodes that will be used for the projection. Example: "Package"
# - dependencies_projection_weight_property=...
# Name of the node property that contains the dependency weight. Example: "weight"
createProjection() {
local PROJECTION_CYPHER_DIR="$CYPHER_DIR/Dependencies_Projection"

execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_1_Delete_Projection.cypher" "${@}"
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_2_Delete_Subgraph.cypher" "${@}"
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_3_Create_Projection.cypher" "${@}"
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_5_Create_Subgraph.cypher" "${@}"
}

# Apply the algorithm "Topological Sort".
#
# Required Parameters:
# - dependencies_projection=...
# Name prefix for the in-memory projection name for dependencies. Example: "package"
# - dependencies_projection_node=...
# Label of the nodes that will be used for the projection. Example: "Package"
# - dependencies_projection_weight_property=...
# Name of the node property that contains the dependency weight. Example: "weight"
topologicalSort() {
local TOPOLOGICAL_SORT_DIR="$CYPHER_DIR/Topological_Sort"

# Update Graph (node properties)
execute_cypher "${TOPOLOGICAL_SORT_DIR}/Topological_Sort_Write.cypher" "${@}"

# Stream to CSV
local nodeLabel
nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}" )
execute_cypher "${TOPOLOGICAL_SORT_DIR}/Topological_Sort_Query.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Topolical_Sort.csv"
}

# ---------------------------------------------------------------

# Artifact Query Parameters
ARTIFACT_PROJECTION="dependencies_projection=artifact-topology"
ARTIFACT_NODE="dependencies_projection_node=Artifact"
ARTIFACT_WEIGHT="dependencies_projection_weight_property=weight"

# Artifact Topology
echo "topologicalSortCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing artifact dependencies..."
createProjection "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}"
time topologicalSort "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}"

# ---------------------------------------------------------------

# Package Query Parameters
PACKAGE_PROJECTION="dependencies_projection=package-topology"
PACKAGE_NODE="dependencies_projection_node=Package"
PACKAGE_WEIGHT="dependencies_projection_weight_property=weight25PercentInterfaces"

# Package Topology
echo "topologicalSortCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing package dependencies..."
createProjection "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}"
time topologicalSort "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}"

# ---------------------------------------------------------------

# Type Query Parameters
TYPE_PROJECTION="dependencies_projection=type-topology"
TYPE_NODE="dependencies_projection_node=Type"
TYPE_WEIGHT="dependencies_projection_weight_property=weight"

# Type Topology
echo "topologicalSortCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing type dependencies..."
createProjection "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}"
time topologicalSort "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}"

echo "topologicalSortCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Successfully finished"