From 8f8bb42400eeb51a51d8ee8325caff9f9aafe445 Mon Sep 17 00:00:00 2001 From: JohT Date: Wed, 4 Oct 2023 23:09:01 +0200 Subject: [PATCH 1/7] Fix parametrized stream for mutated properties --- .../Dependencies_8_Stream_Mutated.cypher | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/cypher/Dependencies_Projection/Dependencies_8_Stream_Mutated.cypher b/cypher/Dependencies_Projection/Dependencies_8_Stream_Mutated.cypher index a4be59790..1bb054119 100644 --- a/cypher/Dependencies_Projection/Dependencies_8_Stream_Mutated.cypher +++ b/cypher/Dependencies_Projection/Dependencies_8_Stream_Mutated.cypher @@ -1,17 +1,18 @@ // Read a property from the projection into the Graph. Variables: dependencies_projection, dependencies_projection_write_property -CALL gds.fastRP.stream( - $dependencies_projection + '-without-empty', { - embeddingDimension: toInteger($dependencies_projection_embedding_dimension) - ,relationshipWeightProperty: $dependencies_projection_weight_property - } +CALL gds.graph.nodeProperties.stream( + $dependencies_projection + '-without-empty' + ,[$dependencies_projection_write_property] ) -YIELD nodeId, embedding +YIELD nodeId, nodeProperty, propertyValue WITH gds.util.asNode(nodeId) AS codeUnit - ,embedding + ,nodeProperty AS propertyName + ,propertyValue OPTIONAL MATCH (artifact:Artifact)-[:CONTAINS]->(codeUnit) RETURN DISTINCT coalesce(codeUnit.fqn, codeUnit.fileName, codeUnit.name) AS codeUnitName + ,coalesce(replace(last(split(codeUnit.fileName, '/')), '.jar', ''), codeUnit.name) AS shortCodeUnitName + ,propertyName + ,propertyValue ,coalesce(codeUnit.leidenCommunityId, 0) AS communityId ,coalesce(codeUnit.centralityPageRank, 0.01) AS centrality - ,replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName - ,embedding \ No newline at end of file + ,replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName \ No newline at end of file From 7ef3cc10e3bf90aa0cb96d81bb3d753d55a065a2 Mon Sep 17 00:00:00 2001 From: JohT Date: Wed, 4 Oct 2023 23:10:53 +0200 Subject: [PATCH 2/7] Add K-Core Decomposition community detection --- ...on_5a_K_Core_Decomposition_Estimate.cypher | 24 ++++++++++++ ..._5b_K_Core_Decomposition_Statistics.cypher | 7 ++++ ...tion_5c_K_Core_Decomposition_Mutate.cypher | 8 ++++ ...tion_5d_K_Core_Decomposition_Stream.cypher | 18 +++++++++ ...ction_5e_K_Core_Decomposition_Write.cypher | 18 +++++++++ .../Dependencies_10_Delete_Label.cypher | 11 ++++++ .../Dependencies_11_Add_Label.cypher | 12 ++++++ scripts/reports/CommunityCsv.sh | 37 +++++++++++++++++++ 8 files changed, 135 insertions(+) create mode 100644 cypher/Community_Detection/Community_Detection_5a_K_Core_Decomposition_Estimate.cypher create mode 100644 cypher/Community_Detection/Community_Detection_5b_K_Core_Decomposition_Statistics.cypher create mode 100644 cypher/Community_Detection/Community_Detection_5c_K_Core_Decomposition_Mutate.cypher create mode 100644 cypher/Community_Detection/Community_Detection_5d_K_Core_Decomposition_Stream.cypher create mode 100644 cypher/Community_Detection/Community_Detection_5e_K_Core_Decomposition_Write.cypher create mode 100644 cypher/Dependencies_Projection/Dependencies_10_Delete_Label.cypher create mode 100644 cypher/Dependencies_Projection/Dependencies_11_Add_Label.cypher diff --git a/cypher/Community_Detection/Community_Detection_5a_K_Core_Decomposition_Estimate.cypher b/cypher/Community_Detection/Community_Detection_5a_K_Core_Decomposition_Estimate.cypher new file mode 100644 index 000000000..70857c416 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_5a_K_Core_Decomposition_Estimate.cypher @@ -0,0 +1,24 @@ +// Community Detection K-Core Decomposition Estimate + +CALL gds.kcore.write.estimate( + $dependencies_projection + '-without-empty', { + writeProperty: $dependencies_projection_write_property +}) + YIELD requiredMemory + ,nodeCount + ,relationshipCount + ,bytesMin + ,bytesMax + ,heapPercentageMin + ,heapPercentageMax + ,treeView + ,mapView +RETURN requiredMemory + ,nodeCount + ,relationshipCount + ,bytesMin + ,bytesMax + ,heapPercentageMin + ,heapPercentageMax + ,treeView + ,mapView \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_5b_K_Core_Decomposition_Statistics.cypher b/cypher/Community_Detection/Community_Detection_5b_K_Core_Decomposition_Statistics.cypher new file mode 100644 index 000000000..90ccdf50b --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_5b_K_Core_Decomposition_Statistics.cypher @@ -0,0 +1,7 @@ +// Community Detection K-Core Decomposition Statistics + +CALL gds.kcore.stats( + $dependencies_projection + '-without-empty', { +}) + YIELD degeneracy, preProcessingMillis, computeMillis, postProcessingMillis +RETURN degeneracy, preProcessingMillis, computeMillis, postProcessingMillis \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_5c_K_Core_Decomposition_Mutate.cypher b/cypher/Community_Detection/Community_Detection_5c_K_Core_Decomposition_Mutate.cypher new file mode 100644 index 000000000..ac1ecf5b1 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_5c_K_Core_Decomposition_Mutate.cypher @@ -0,0 +1,8 @@ +// Community Detection K-Core Decomposition Mutate + +CALL gds.kcore.mutate( + $dependencies_projection + '-without-empty', { + mutateProperty: $dependencies_projection_write_property +}) + YIELD degeneracy, nodePropertiesWritten, preProcessingMillis, computeMillis, postProcessingMillis, mutateMillis +RETURN degeneracy, nodePropertiesWritten, preProcessingMillis, computeMillis, postProcessingMillis, mutateMillis \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_5d_K_Core_Decomposition_Stream.cypher b/cypher/Community_Detection/Community_Detection_5d_K_Core_Decomposition_Stream.cypher new file mode 100644 index 000000000..95fd6d4e2 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_5d_K_Core_Decomposition_Stream.cypher @@ -0,0 +1,18 @@ +// Community Detection K-Core Decomposition Stream + +CALL gds.kcore.stream( + $dependencies_projection + '-without-empty', { +}) + YIELD nodeId, coreValue + WITH gds.util.asNode(nodeId) AS member + ,coreValue + WITH member + ,coalesce(member.fqn, member.fileName, member.name) AS memberName + ,coreValue + WITH count(DISTINCT member) AS memberCount + ,collect(DISTINCT memberName) AS memberNames + ,coreValue +RETURN memberCount + ,coreValue + ,memberNames + ORDER BY memberCount DESC, coreValue ASC \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_5e_K_Core_Decomposition_Write.cypher b/cypher/Community_Detection/Community_Detection_5e_K_Core_Decomposition_Write.cypher new file mode 100644 index 000000000..050de3807 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_5e_K_Core_Decomposition_Write.cypher @@ -0,0 +1,18 @@ +// Community Detection K-Core Decomposition write node property communitykCoreDecompositionValue + +CALL gds.kcore.write( + $dependencies_projection + '-without-empty', { + writeProperty: $dependencies_projection_write_property +}) +YIELD degeneracy + ,preProcessingMillis + ,computeMillis + ,writeMillis + ,postProcessingMillis + ,nodePropertiesWritten +RETURN degeneracy + ,preProcessingMillis + ,computeMillis + ,writeMillis + ,postProcessingMillis + ,nodePropertiesWritten \ No newline at end of file diff --git a/cypher/Dependencies_Projection/Dependencies_10_Delete_Label.cypher b/cypher/Dependencies_Projection/Dependencies_10_Delete_Label.cypher new file mode 100644 index 000000000..4bc08a4ad --- /dev/null +++ b/cypher/Dependencies_Projection/Dependencies_10_Delete_Label.cypher @@ -0,0 +1,11 @@ +// Community Detection Label Propagation Label Delete + + CALL db.labels() YIELD label + WHERE label STARTS WITH $dependencies_projection_node + $dependencies_projection_write_label + WITH collect(label) AS selectedLabels + MATCH (member) + WHERE $dependencies_projection_node IN labels(member) + AND member[$dependencies_projection_write_property] IS NOT NULL + WITH collect(member) AS members, selectedLabels + CALL apoc.create.removeLabels(members, selectedLabels) YIELD node +RETURN COUNT(node) AS nodesCount; \ No newline at end of file diff --git a/cypher/Dependencies_Projection/Dependencies_11_Add_Label.cypher b/cypher/Dependencies_Projection/Dependencies_11_Add_Label.cypher new file mode 100644 index 000000000..42ce1c933 --- /dev/null +++ b/cypher/Dependencies_Projection/Dependencies_11_Add_Label.cypher @@ -0,0 +1,12 @@ +// Write a property from the projection into the Graph. Variables: dependencies_projection, dependencies_projection_write_property + + MATCH (member) + WHERE member[$dependencies_projection_write_property] IS NOT NULL + AND $dependencies_projection_node IN LABELS(member) + WITH collect(member) AS members + ,count(DISTINCT member) AS memberCount + ,$dependencies_projection_node + $dependencies_projection_write_label + toString(member[$dependencies_projection_write_property]) AS labelName + WHERE memberCount > 1 +UNWIND members AS member + CALL apoc.create.addLabels(member, [labelName]) YIELD node +RETURN count(node) AS nodesCount \ No newline at end of file diff --git a/scripts/reports/CommunityCsv.sh b/scripts/reports/CommunityCsv.sh index 2c6990c58..791cb6f03 100755 --- a/scripts/reports/CommunityCsv.sh +++ b/scripts/reports/CommunityCsv.sh @@ -165,6 +165,40 @@ detectCommunitiesWithWeaklyConnectedComponents() { execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_3f_WeaklyConnectedComponents_Label.cypher" "${@}" } + +# Community Detection using the K-Core Decomposition Algorithm +# +# Required Parameters: +# - dependencies_projection=... +# Name prefix for the in-memory projection name for dependencies. Example: "package" +# - dependencies_projection_node=... +# Label of the nodes that will be used for the projection. Example: "Package" +# - dependencies_projection_weight_property=... +# Name of the node property that contains the dependency weight. Example: "weight" +detectCommunitiesWithKCoreDecomposition() { + local COMMUNITY_DETECTION_CYPHER_DIR="${CYPHER_DIR}/Community_Detection" + local PROJECTION_CYPHER_DIR="${CYPHER_DIR}/Dependencies_Projection" + local writePropertyName="dependencies_projection_write_property=communityKCoreDecompositionValue" + local writeLabelName="dependencies_projection_write_label=KCoreDecomposition" + + # Statistics + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_5a_K_Core_Decomposition_Estimate.cypher" "${@}" "${writePropertyName}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_5b_K_Core_Decomposition_Statistics.cypher" "${@}" + + # Run the algorithm and write the result into the in-memory projection ("mutate") + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_5c_K_Core_Decomposition_Mutate.cypher" "${@}" "${writePropertyName}" + + # Stream to CSV + local nodeLabel + nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}") + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_8_Stream_Mutated.cypher" "${@}" "${writePropertyName}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Communities_K_Core_Decomposition.csv" + + # Update Graph (node properties and labels) using the already mutated property projection + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_9_Write_Mutated.cypher" "${@}" "${writePropertyName}" "${writeLabelName}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_10_Delete_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_11_Add_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}" +} + # --------------------------------------------------------------- # Artifact Query Parameters @@ -180,6 +214,7 @@ time detectCommunitiesWithLeiden "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ time detectCommunitiesWithLouvain "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" time detectCommunitiesWithWeaklyConnectedComponents "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" time detectCommunitiesWithLabelPropagation "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" "${ARTIFACT_GAMMA}" +time detectCommunitiesWithKCoreDecomposition "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" "${ARTIFACT_GAMMA}" # --------------------------------------------------------------- @@ -196,6 +231,7 @@ time detectCommunitiesWithLeiden "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PA time detectCommunitiesWithLouvain "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" time detectCommunitiesWithWeaklyConnectedComponents "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" time detectCommunitiesWithLabelPropagation "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" "${PACKAGE_GAMMA}" +time detectCommunitiesWithKCoreDecomposition "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" "${PACKAGE_GAMMA}" # Package Community Detection - Special CSV Queries after update execute_cypher "${CYPHER_DIR}/Community_Detection/Compare_Community_Detection_Results.cypher" > "${FULL_REPORT_DIRECTORY}/Compare_Community_Detection_Results.csv" @@ -216,6 +252,7 @@ time detectCommunitiesWithLeiden "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEI time detectCommunitiesWithLouvain "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" time detectCommunitiesWithWeaklyConnectedComponents "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" time detectCommunitiesWithLabelPropagation "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" "${TYPE_GAMMA}" +time detectCommunitiesWithKCoreDecomposition "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" "${TYPE_GAMMA}" # Type Community Detection - Special CSV Queries after update execute_cypher "${CYPHER_DIR}/Community_Detection/Which_type_community_spans_several_artifacts_and_how_are_the_types_distributed.cypher" > "${FULL_REPORT_DIRECTORY}/Type_Communities_Leiden_That_Span_Multiple_Artifacts.csv" From ce2b9852096fa088123ff5c2631e318c3e703613 Mon Sep 17 00:00:00 2001 From: JohT Date: Thu, 5 Oct 2023 08:22:51 +0200 Subject: [PATCH 3/7] Optimize Community Detection using Mutate --- ...unity_Detection_1a_Louvain_Estimate.cypher | 2 +- ...mmunity_Detection_1c_Louvain_Mutate.cypher | 38 +++++ ...munity_Detection_1d_Louvain_Stream.cypher} | 0 ...tion_1d_Stream_Intermediate_Mutated.cypher | 21 +++ ...y_Detection_1e_Louvain_Label_Delete.cypher | 11 -- ...ite_intermediateLouvainCommunityId.cypher} | 0 ...e_Louvain_Write_louvainCommunityId.cypher} | 0 ...ommunity_Detection_1f_Louvain_Label.cypher | 13 -- ...munity_Detection_2a_Leiden_Estimate.cypher | 2 +- ...ommunity_Detection_2c_Leiden_Mutate.cypher | 38 +++++ ...mmunity_Detection_2d_Leiden_Stream.cypher} | 0 ...ty_Detection_2e_Leiden_Label_Delete.cypher | 11 -- ...Community_Detection_2f_Leiden_Label.cypher | 13 -- ..._WeaklyConnectedComponents_Estimate.cypher | 6 +- ...eaklyConnectedComponents_Statistics.cypher | 4 +- ...3c_WeaklyConnectedComponents_Mutate.cypher | 30 ++++ ...d_WeaklyConnectedComponents_Stream.cypher} | 0 ...klyConnectedComponents_Label_Delete.cypher | 11 -- ...3e_WeaklyConnectedComponents_Write.cypher} | 0 ..._3f_WeaklyConnectedComponents_Label.cypher | 13 -- ...ction_4a_Label_Propagation_Estimate.cypher | 2 +- ...tection_4c_Label_Propagation_Mutate.cypher | 34 ++++ ...ection_4d_Label_Propagation_Stream.cypher} | 0 ...n_4e_Label_Propagation_Label_Delete.cypher | 11 -- ...tection_4e_Label_Propagation_Write.cypher} | 0 ...etection_4f_Label_Propagation_Label.cypher | 13 -- .../Dependencies_8_Stream_Mutated.cypher | 14 +- ...endencies_8_Stream_Mutated_Extended.cypher | 18 ++ ...pendencies_8_Stream_Mutated_Grouped.cypher | 19 +++ scripts/reports/CommunityCsv.sh | 156 +++++++++++------- 30 files changed, 309 insertions(+), 171 deletions(-) create mode 100644 cypher/Community_Detection/Community_Detection_1c_Louvain_Mutate.cypher rename cypher/Community_Detection/{Community_Detection_1c_Louvain_Stream.cypher => Community_Detection_1d_Louvain_Stream.cypher} (100%) create mode 100644 cypher/Community_Detection/Community_Detection_1d_Stream_Intermediate_Mutated.cypher delete mode 100644 cypher/Community_Detection/Community_Detection_1e_Louvain_Label_Delete.cypher rename cypher/Community_Detection/{Community_Detection_1d_Louvain_Write_intermediateLouvainCommunityId.cypher => Community_Detection_1e_Louvain_Write_intermediateLouvainCommunityId.cypher} (100%) rename cypher/Community_Detection/{Community_Detection_1d_Louvain_Write_louvainCommunityId.cypher => Community_Detection_1e_Louvain_Write_louvainCommunityId.cypher} (100%) delete mode 100644 cypher/Community_Detection/Community_Detection_1f_Louvain_Label.cypher create mode 100644 cypher/Community_Detection/Community_Detection_2c_Leiden_Mutate.cypher rename cypher/Community_Detection/{Community_Detection_2c_Leiden_Stream.cypher => Community_Detection_2d_Leiden_Stream.cypher} (100%) delete mode 100644 cypher/Community_Detection/Community_Detection_2e_Leiden_Label_Delete.cypher delete mode 100644 cypher/Community_Detection/Community_Detection_2f_Leiden_Label.cypher create mode 100644 cypher/Community_Detection/Community_Detection_3c_WeaklyConnectedComponents_Mutate.cypher rename cypher/Community_Detection/{Community_Detection_3c_WeaklyConnectedComponents_Stream.cypher => Community_Detection_3d_WeaklyConnectedComponents_Stream.cypher} (100%) delete mode 100644 cypher/Community_Detection/Community_Detection_3e_WeaklyConnectedComponents_Label_Delete.cypher rename cypher/Community_Detection/{Community_Detection_3d_WeaklyConnectedComponents_Write.cypher => Community_Detection_3e_WeaklyConnectedComponents_Write.cypher} (100%) delete mode 100644 cypher/Community_Detection/Community_Detection_3f_WeaklyConnectedComponents_Label.cypher create mode 100644 cypher/Community_Detection/Community_Detection_4c_Label_Propagation_Mutate.cypher rename cypher/Community_Detection/{Community_Detection_4c_Label_Propagation_Stream.cypher => Community_Detection_4d_Label_Propagation_Stream.cypher} (100%) delete mode 100644 cypher/Community_Detection/Community_Detection_4e_Label_Propagation_Label_Delete.cypher rename cypher/Community_Detection/{Community_Detection_4d_Label_Propagation_Write.cypher => Community_Detection_4e_Label_Propagation_Write.cypher} (100%) delete mode 100644 cypher/Community_Detection/Community_Detection_4f_Label_Propagation_Label.cypher create mode 100644 cypher/Dependencies_Projection/Dependencies_8_Stream_Mutated_Extended.cypher create mode 100644 cypher/Dependencies_Projection/Dependencies_8_Stream_Mutated_Grouped.cypher diff --git a/cypher/Community_Detection/Community_Detection_1a_Louvain_Estimate.cypher b/cypher/Community_Detection/Community_Detection_1a_Louvain_Estimate.cypher index af11bfe52..08b9c0a94 100644 --- a/cypher/Community_Detection/Community_Detection_1a_Louvain_Estimate.cypher +++ b/cypher/Community_Detection/Community_Detection_1a_Louvain_Estimate.cypher @@ -4,7 +4,7 @@ CALL gds.louvain.write.estimate( $dependencies_projection + '-without-empty', { tolerance: 0.00001, relationshipWeightProperty: $dependencies_projection_weight_property, - writeProperty: 'louvainCommunityId', + writeProperty: $dependencies_projection_write_property, includeIntermediateCommunities: true }) YIELD nodeCount diff --git a/cypher/Community_Detection/Community_Detection_1c_Louvain_Mutate.cypher b/cypher/Community_Detection/Community_Detection_1c_Louvain_Mutate.cypher new file mode 100644 index 000000000..d24ea78da --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_1c_Louvain_Mutate.cypher @@ -0,0 +1,38 @@ +// Community Detection Louvain Mutate + +CALL gds.louvain.mutate( + $dependencies_projection + '-without-empty', { + tolerance: 0.00001, + consecutiveIds: NOT toBoolean($dependencies_include_intermediate_communities), + includeIntermediateCommunities: toBoolean($dependencies_include_intermediate_communities), + relationshipWeightProperty: $dependencies_projection_weight_property, + mutateProperty: $dependencies_projection_write_property +}) + YIELD communityCount + ,nodePropertiesWritten + ,ranLevels + ,modularity + ,modularities + ,preProcessingMillis + ,computeMillis + ,mutateMillis + ,postProcessingMillis + ,communityDistribution +RETURN communityCount + ,nodePropertiesWritten + ,ranLevels + ,modularity + ,modularities + ,preProcessingMillis + ,computeMillis + ,mutateMillis + ,postProcessingMillis + ,communityDistribution.min + ,communityDistribution.mean + ,communityDistribution.max + ,communityDistribution.p50 + ,communityDistribution.p75 + ,communityDistribution.p90 + ,communityDistribution.p95 + ,communityDistribution.p99 + ,communityDistribution.p999 \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_1c_Louvain_Stream.cypher b/cypher/Community_Detection/Community_Detection_1d_Louvain_Stream.cypher similarity index 100% rename from cypher/Community_Detection/Community_Detection_1c_Louvain_Stream.cypher rename to cypher/Community_Detection/Community_Detection_1d_Louvain_Stream.cypher diff --git a/cypher/Community_Detection/Community_Detection_1d_Stream_Intermediate_Mutated.cypher b/cypher/Community_Detection/Community_Detection_1d_Stream_Intermediate_Mutated.cypher new file mode 100644 index 000000000..2af4251a0 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_1d_Stream_Intermediate_Mutated.cypher @@ -0,0 +1,21 @@ +// Community Detection Stream Intermediate Mutated for hierarchical algorithmns (Louvain, Leiden) + +CALL gds.graph.nodeProperty.stream( + $dependencies_projection + '-without-empty' + ,$dependencies_projection_write_property +) + YIELD nodeId, propertyValue + WITH gds.util.asNode(nodeId) AS member + ,propertyValue AS intermediateCommunityIds + WITH member + ,intermediateCommunityIds + ,coalesce(member.fqn, member.fileName, member.name) AS memberName + WITH intermediateCommunityIds + ,count(DISTINCT member) AS memberCount + ,collect(DISTINCT memberName) AS memberNames +RETURN intermediateCommunityIds[0] AS firstCommunityId + ,last(intermediateCommunityIds) AS communityId + ,intermediateCommunityIds + ,memberCount + ,memberNames + ORDER BY memberCount DESC, communityId ASC \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_1e_Louvain_Label_Delete.cypher b/cypher/Community_Detection/Community_Detection_1e_Louvain_Label_Delete.cypher deleted file mode 100644 index 3cc8e0f65..000000000 --- a/cypher/Community_Detection/Community_Detection_1e_Louvain_Label_Delete.cypher +++ /dev/null @@ -1,11 +0,0 @@ -// Community Detection Louvain Label Delete - - CALL db.labels() YIELD label - WHERE label STARTS WITH $dependencies_projection_node + "Louvain" - WITH collect(label) AS selectedLabels - MATCH (member) - WHERE $dependencies_projection_node IN LABELS(member) - AND member.louvainCommunityId IS NOT NULL - WITH collect(member) AS members, selectedLabels - CALL apoc.create.removeLabels(members, selectedLabels) YIELD node -RETURN COUNT(node) AS nodesCount; \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_1d_Louvain_Write_intermediateLouvainCommunityId.cypher b/cypher/Community_Detection/Community_Detection_1e_Louvain_Write_intermediateLouvainCommunityId.cypher similarity index 100% rename from cypher/Community_Detection/Community_Detection_1d_Louvain_Write_intermediateLouvainCommunityId.cypher rename to cypher/Community_Detection/Community_Detection_1e_Louvain_Write_intermediateLouvainCommunityId.cypher diff --git a/cypher/Community_Detection/Community_Detection_1d_Louvain_Write_louvainCommunityId.cypher b/cypher/Community_Detection/Community_Detection_1e_Louvain_Write_louvainCommunityId.cypher similarity index 100% rename from cypher/Community_Detection/Community_Detection_1d_Louvain_Write_louvainCommunityId.cypher rename to cypher/Community_Detection/Community_Detection_1e_Louvain_Write_louvainCommunityId.cypher diff --git a/cypher/Community_Detection/Community_Detection_1f_Louvain_Label.cypher b/cypher/Community_Detection/Community_Detection_1f_Louvain_Label.cypher deleted file mode 100644 index 7e00d1115..000000000 --- a/cypher/Community_Detection/Community_Detection_1f_Louvain_Label.cypher +++ /dev/null @@ -1,13 +0,0 @@ -// Community Detection Add LouvainCommunity+Id label - - MATCH (member) - WHERE member.louvainCommunityId IS NOT NULL - AND $dependencies_projection_node IN LABELS(member) - WITH member.louvainCommunityId AS communityId - ,collect(member) AS members - ,count(DISTINCT member) AS memberCount - ,$dependencies_projection_node + 'LouvainCommunity' + toString(member.louvainCommunityId) AS labelName - WHERE memberCount > 1 -UNWIND members AS member - CALL apoc.create.addLabels(member, [labelName]) YIELD node -RETURN count(node) AS nodesCount \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_2a_Leiden_Estimate.cypher b/cypher/Community_Detection/Community_Detection_2a_Leiden_Estimate.cypher index 7554ce393..590014862 100644 --- a/cypher/Community_Detection/Community_Detection_2a_Leiden_Estimate.cypher +++ b/cypher/Community_Detection/Community_Detection_2a_Leiden_Estimate.cypher @@ -7,7 +7,7 @@ CALL gds.beta.leiden.write.estimate( tolerance: 0.0000001, consecutiveIds: true, relationshipWeightProperty: $dependencies_projection_weight_property, - writeProperty: 'leidenCommunityId' + writeProperty: $dependencies_projection_write_property }) YIELD nodeCount ,relationshipCount diff --git a/cypher/Community_Detection/Community_Detection_2c_Leiden_Mutate.cypher b/cypher/Community_Detection/Community_Detection_2c_Leiden_Mutate.cypher new file mode 100644 index 000000000..381507a55 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_2c_Leiden_Mutate.cypher @@ -0,0 +1,38 @@ +// Community Detection Leiden Mutate + +CALL gds.leiden.mutate( + $dependencies_projection + '-without-empty', { + tolerance: 0.00001, + consecutiveIds: NOT toBoolean($dependencies_include_intermediate_communities), + includeIntermediateCommunities: toBoolean($dependencies_include_intermediate_communities), + relationshipWeightProperty: $dependencies_projection_weight_property, + mutateProperty: $dependencies_projection_write_property +}) + YIELD communityCount + ,nodePropertiesWritten + ,ranLevels + ,modularity + ,modularities + ,preProcessingMillis + ,computeMillis + ,mutateMillis + ,postProcessingMillis + ,communityDistribution +RETURN communityCount + ,nodePropertiesWritten + ,ranLevels + ,modularity + ,modularities + ,preProcessingMillis + ,computeMillis + ,mutateMillis + ,postProcessingMillis + ,communityDistribution.min + ,communityDistribution.mean + ,communityDistribution.max + ,communityDistribution.p50 + ,communityDistribution.p75 + ,communityDistribution.p90 + ,communityDistribution.p95 + ,communityDistribution.p99 + ,communityDistribution.p999 \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_2c_Leiden_Stream.cypher b/cypher/Community_Detection/Community_Detection_2d_Leiden_Stream.cypher similarity index 100% rename from cypher/Community_Detection/Community_Detection_2c_Leiden_Stream.cypher rename to cypher/Community_Detection/Community_Detection_2d_Leiden_Stream.cypher diff --git a/cypher/Community_Detection/Community_Detection_2e_Leiden_Label_Delete.cypher b/cypher/Community_Detection/Community_Detection_2e_Leiden_Label_Delete.cypher deleted file mode 100644 index a10b5442a..000000000 --- a/cypher/Community_Detection/Community_Detection_2e_Leiden_Label_Delete.cypher +++ /dev/null @@ -1,11 +0,0 @@ -// Community Detection Leiden Label Delete - - CALL db.labels() YIELD label - WHERE label STARTS WITH $dependencies_projection_node + "Leiden" - WITH collect(label) AS selectedLabels - MATCH (member) - WHERE $dependencies_projection_node IN LABELS(member) - AND member.leidenCommunityId IS NOT NULL - WITH collect(member) AS members, selectedLabels - CALL apoc.create.removeLabels(members, selectedLabels) YIELD node -RETURN COUNT(node) AS nodesCount; \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_2f_Leiden_Label.cypher b/cypher/Community_Detection/Community_Detection_2f_Leiden_Label.cypher deleted file mode 100644 index 7403ecd31..000000000 --- a/cypher/Community_Detection/Community_Detection_2f_Leiden_Label.cypher +++ /dev/null @@ -1,13 +0,0 @@ -// Community Detection Add LeidenCommunity+Id label - - MATCH (member) - WHERE member.leidenCommunityId IS NOT NULL - AND $dependencies_projection_node IN LABELS(member) - WITH member.leidenCommunityId AS communityId - ,collect(member) AS members - ,count(DISTINCT member) AS memberCount - ,$dependencies_projection_node + 'LeidenCommunity' + toString(member.leidenCommunityId) AS labelName - WHERE memberCount > 1 -UNWIND members AS member - CALL apoc.create.addLabels(member, [labelName]) YIELD node -RETURN count(node) AS nodesCount \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_3a_WeaklyConnectedComponents_Estimate.cypher b/cypher/Community_Detection/Community_Detection_3a_WeaklyConnectedComponents_Estimate.cypher index 189081360..436dfdf8f 100644 --- a/cypher/Community_Detection/Community_Detection_3a_WeaklyConnectedComponents_Estimate.cypher +++ b/cypher/Community_Detection/Community_Detection_3a_WeaklyConnectedComponents_Estimate.cypher @@ -2,9 +2,9 @@ CALL gds.labelPropagation.write.estimate( $dependencies_projection + '-without-empty', { - relationshipWeightProperty: $dependencies_projection_weight_property, - consecutiveIds: true, - writeProperty: 'labelPropagationCommunityId' + relationshipWeightProperty: $dependencies_projection_weight_property + ,writeProperty: $dependencies_projection_write_property + ,consecutiveIds: true }) YIELD requiredMemory ,nodeCount diff --git a/cypher/Community_Detection/Community_Detection_3b_WeaklyConnectedComponents_Statistics.cypher b/cypher/Community_Detection/Community_Detection_3b_WeaklyConnectedComponents_Statistics.cypher index 40e16201b..b6a4e49fa 100644 --- a/cypher/Community_Detection/Community_Detection_3b_WeaklyConnectedComponents_Statistics.cypher +++ b/cypher/Community_Detection/Community_Detection_3b_WeaklyConnectedComponents_Statistics.cypher @@ -2,8 +2,8 @@ CALL gds.wcc.stats( $dependencies_projection + '-without-empty', { - relationshipWeightProperty: $dependencies_projection_weight_property, - consecutiveIds: true + relationshipWeightProperty: $dependencies_projection_weight_property + ,consecutiveIds: true }) YIELD componentCount ,preProcessingMillis diff --git a/cypher/Community_Detection/Community_Detection_3c_WeaklyConnectedComponents_Mutate.cypher b/cypher/Community_Detection/Community_Detection_3c_WeaklyConnectedComponents_Mutate.cypher new file mode 100644 index 000000000..9474318d7 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_3c_WeaklyConnectedComponents_Mutate.cypher @@ -0,0 +1,30 @@ +// Community Detection Weakly Connected Components Mutate + +CALL gds.wcc.mutate( + $dependencies_projection + '-without-empty', { + relationshipWeightProperty: $dependencies_projection_weight_property + ,mutateProperty: $dependencies_projection_write_property + ,consecutiveIds: true +}) + YIELD componentCount + ,nodePropertiesWritten + ,preProcessingMillis + ,computeMillis + ,mutateMillis + ,postProcessingMillis + ,componentDistribution +RETURN componentCount + ,nodePropertiesWritten + ,preProcessingMillis + ,computeMillis + ,mutateMillis + ,postProcessingMillis + ,componentDistribution.min + ,componentDistribution.mean + ,componentDistribution.max + ,componentDistribution.p50 + ,componentDistribution.p75 + ,componentDistribution.p90 + ,componentDistribution.p95 + ,componentDistribution.p99 + ,componentDistribution.p999 \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_3c_WeaklyConnectedComponents_Stream.cypher b/cypher/Community_Detection/Community_Detection_3d_WeaklyConnectedComponents_Stream.cypher similarity index 100% rename from cypher/Community_Detection/Community_Detection_3c_WeaklyConnectedComponents_Stream.cypher rename to cypher/Community_Detection/Community_Detection_3d_WeaklyConnectedComponents_Stream.cypher diff --git a/cypher/Community_Detection/Community_Detection_3e_WeaklyConnectedComponents_Label_Delete.cypher b/cypher/Community_Detection/Community_Detection_3e_WeaklyConnectedComponents_Label_Delete.cypher deleted file mode 100644 index 41db60d67..000000000 --- a/cypher/Community_Detection/Community_Detection_3e_WeaklyConnectedComponents_Label_Delete.cypher +++ /dev/null @@ -1,11 +0,0 @@ -// Community Detection Leiden Label Delete - - CALL db.labels() YIELD label - WHERE label STARTS WITH $dependencies_projection_node + "WeaklyConnectedComponent" - WITH collect(label) AS selectedLabels - MATCH (member) - WHERE $dependencies_projection_node IN LABELS(member) - AND member.weaklyConnectedComponentId IS NOT NULL - WITH collect(member) AS members, selectedLabels - CALL apoc.create.removeLabels(members, selectedLabels) YIELD node -RETURN COUNT(node) AS nodesCount; \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_3d_WeaklyConnectedComponents_Write.cypher b/cypher/Community_Detection/Community_Detection_3e_WeaklyConnectedComponents_Write.cypher similarity index 100% rename from cypher/Community_Detection/Community_Detection_3d_WeaklyConnectedComponents_Write.cypher rename to cypher/Community_Detection/Community_Detection_3e_WeaklyConnectedComponents_Write.cypher diff --git a/cypher/Community_Detection/Community_Detection_3f_WeaklyConnectedComponents_Label.cypher b/cypher/Community_Detection/Community_Detection_3f_WeaklyConnectedComponents_Label.cypher deleted file mode 100644 index 5f41cc147..000000000 --- a/cypher/Community_Detection/Community_Detection_3f_WeaklyConnectedComponents_Label.cypher +++ /dev/null @@ -1,13 +0,0 @@ -// Community Detection Add WeaklyConnectedComponent+Id label - - MATCH (member) - WHERE member.weaklyConnectedComponentId IS NOT NULL - AND $dependencies_projection_node IN LABELS(member) - WITH member.weaklyConnectedComponentId AS communityId - ,collect(member) AS members - ,count(DISTINCT member) AS memberCount - ,$dependencies_projection_node + 'WeaklyConnectedComponent' + toString(member.weaklyConnectedComponentId) AS labelName - WHERE memberCount > 1 -UNWIND members AS member - CALL apoc.create.addLabels(member, [labelName]) YIELD node -RETURN count(node) AS nodesCount \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_4a_Label_Propagation_Estimate.cypher b/cypher/Community_Detection/Community_Detection_4a_Label_Propagation_Estimate.cypher index cb526f14f..2b46e1ac5 100644 --- a/cypher/Community_Detection/Community_Detection_4a_Label_Propagation_Estimate.cypher +++ b/cypher/Community_Detection/Community_Detection_4a_Label_Propagation_Estimate.cypher @@ -3,8 +3,8 @@ CALL gds.labelPropagation.write.estimate( $dependencies_projection + '-without-empty', { relationshipWeightProperty: $dependencies_projection_weight_property + ,writeProperty: $dependencies_projection_write_property ,consecutiveIds: true - ,writeProperty: 'labelPropagationCommunityId' }) YIELD requiredMemory ,nodeCount diff --git a/cypher/Community_Detection/Community_Detection_4c_Label_Propagation_Mutate.cypher b/cypher/Community_Detection/Community_Detection_4c_Label_Propagation_Mutate.cypher new file mode 100644 index 000000000..1827f1db1 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_4c_Label_Propagation_Mutate.cypher @@ -0,0 +1,34 @@ +// Community Detection Label Propagation Mutate + +CALL gds.labelPropagation.mutate( + $dependencies_projection + '-without-empty', { + relationshipWeightProperty: $dependencies_projection_weight_property + ,mutateProperty: $dependencies_projection_write_property + ,consecutiveIds: true +}) +YIELD ranIterations + ,didConverge + ,communityCount + ,preProcessingMillis + ,computeMillis + ,mutateMillis + ,postProcessingMillis + ,nodePropertiesWritten + ,communityDistribution +RETURN ranIterations + ,didConverge + ,communityCount + ,preProcessingMillis + ,computeMillis + ,mutateMillis + ,postProcessingMillis + ,nodePropertiesWritten + ,communityDistribution.min + ,communityDistribution.mean + ,communityDistribution.max + ,communityDistribution.p50 + ,communityDistribution.p75 + ,communityDistribution.p90 + ,communityDistribution.p95 + ,communityDistribution.p99 + ,communityDistribution.p999 \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_4c_Label_Propagation_Stream.cypher b/cypher/Community_Detection/Community_Detection_4d_Label_Propagation_Stream.cypher similarity index 100% rename from cypher/Community_Detection/Community_Detection_4c_Label_Propagation_Stream.cypher rename to cypher/Community_Detection/Community_Detection_4d_Label_Propagation_Stream.cypher diff --git a/cypher/Community_Detection/Community_Detection_4e_Label_Propagation_Label_Delete.cypher b/cypher/Community_Detection/Community_Detection_4e_Label_Propagation_Label_Delete.cypher deleted file mode 100644 index 3526d4070..000000000 --- a/cypher/Community_Detection/Community_Detection_4e_Label_Propagation_Label_Delete.cypher +++ /dev/null @@ -1,11 +0,0 @@ -// Community Detection Label Propagation Label Delete - - CALL db.labels() YIELD label - WHERE label STARTS WITH $dependencies_projection_node + "LabelPropagation" - WITH collect(label) AS selectedLabels - MATCH (member) - WHERE $dependencies_projection_node IN LABELS(member) - AND member.labelPropagationCommunityId IS NOT NULL - WITH collect(member) AS members, selectedLabels - CALL apoc.create.removeLabels(members, selectedLabels) YIELD node -RETURN COUNT(node) AS nodesCount; \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_4d_Label_Propagation_Write.cypher b/cypher/Community_Detection/Community_Detection_4e_Label_Propagation_Write.cypher similarity index 100% rename from cypher/Community_Detection/Community_Detection_4d_Label_Propagation_Write.cypher rename to cypher/Community_Detection/Community_Detection_4e_Label_Propagation_Write.cypher diff --git a/cypher/Community_Detection/Community_Detection_4f_Label_Propagation_Label.cypher b/cypher/Community_Detection/Community_Detection_4f_Label_Propagation_Label.cypher deleted file mode 100644 index ecfc1dffd..000000000 --- a/cypher/Community_Detection/Community_Detection_4f_Label_Propagation_Label.cypher +++ /dev/null @@ -1,13 +0,0 @@ -// Community Detection Add LabelPropagationCommunity+Id label - - MATCH (member) - WHERE member.labelPropagationCommunityId IS NOT NULL - AND $dependencies_projection_node IN LABELS(member) - WITH member.labelPropagationCommunityId AS communityId - ,collect(member) AS members - ,count(DISTINCT member) AS memberCount - ,$dependencies_projection_node + 'LabelPropagationCommunity' + toString(member.labelPropagationCommunityId) AS labelName - WHERE memberCount > 1 -UNWIND members AS member - CALL apoc.create.addLabels(member, [labelName]) YIELD node -RETURN count(node) AS nodesCount \ No newline at end of file diff --git a/cypher/Dependencies_Projection/Dependencies_8_Stream_Mutated.cypher b/cypher/Dependencies_Projection/Dependencies_8_Stream_Mutated.cypher index 1bb054119..b9d0a1e53 100644 --- a/cypher/Dependencies_Projection/Dependencies_8_Stream_Mutated.cypher +++ b/cypher/Dependencies_Projection/Dependencies_8_Stream_Mutated.cypher @@ -1,4 +1,4 @@ -// Read a property from the projection into the Graph. Variables: dependencies_projection, dependencies_projection_write_property +// Read a property from the projection. Variables: dependencies_projection, dependencies_projection_write_property CALL gds.graph.nodeProperties.stream( $dependencies_projection + '-without-empty' @@ -6,13 +6,9 @@ CALL gds.graph.nodeProperties.stream( ) YIELD nodeId, nodeProperty, propertyValue WITH gds.util.asNode(nodeId) AS codeUnit - ,nodeProperty AS propertyName + ,nodeProperty AS propertyName ,propertyValue -OPTIONAL MATCH (artifact:Artifact)-[:CONTAINS]->(codeUnit) -RETURN DISTINCT coalesce(codeUnit.fqn, codeUnit.fileName, codeUnit.name) AS codeUnitName - ,coalesce(replace(last(split(codeUnit.fileName, '/')), '.jar', ''), codeUnit.name) AS shortCodeUnitName +RETURN DISTINCT coalesce(codeUnit.fqn, codeUnit.fileName, codeUnit.name) AS codeUnitName + ,coalesce(codeUnit.name, replace(last(split(codeUnit.fileName, '/')), '.jar', '')) AS shortCodeUnitName ,propertyName - ,propertyValue - ,coalesce(codeUnit.leidenCommunityId, 0) AS communityId - ,coalesce(codeUnit.centralityPageRank, 0.01) AS centrality - ,replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName \ No newline at end of file + ,propertyValue \ No newline at end of file diff --git a/cypher/Dependencies_Projection/Dependencies_8_Stream_Mutated_Extended.cypher b/cypher/Dependencies_Projection/Dependencies_8_Stream_Mutated_Extended.cypher new file mode 100644 index 000000000..cc95b8faa --- /dev/null +++ b/cypher/Dependencies_Projection/Dependencies_8_Stream_Mutated_Extended.cypher @@ -0,0 +1,18 @@ +// Read a property from the projection into the Graph. Variables: dependencies_projection, dependencies_projection_write_property + +CALL gds.graph.nodeProperties.stream( + $dependencies_projection + '-without-empty' + ,[$dependencies_projection_write_property] +) +YIELD nodeId, nodeProperty, propertyValue + WITH gds.util.asNode(nodeId) AS codeUnit + ,nodeProperty AS propertyName + ,propertyValue +OPTIONAL MATCH (artifact:Artifact)-[:CONTAINS]->(codeUnit) +RETURN DISTINCT coalesce(codeUnit.fqn, codeUnit.fileName, codeUnit.name) AS codeUnitName + ,coalesce(codeUnit.name, replace(last(split(codeUnit.fileName, '/')), '.jar', '')) AS shortCodeUnitName + ,propertyName + ,propertyValue + ,coalesce(codeUnit.leidenCommunityId, 0) AS communityId // optional, might be null + ,coalesce(codeUnit.centralityPageRank, 0.01) AS centrality // optional, might be null + ,replace(last(split(artifact.fileName, '/')), '.jar', '') AS owningArtifactName \ No newline at end of file diff --git a/cypher/Dependencies_Projection/Dependencies_8_Stream_Mutated_Grouped.cypher b/cypher/Dependencies_Projection/Dependencies_8_Stream_Mutated_Grouped.cypher new file mode 100644 index 000000000..7b34db879 --- /dev/null +++ b/cypher/Dependencies_Projection/Dependencies_8_Stream_Mutated_Grouped.cypher @@ -0,0 +1,19 @@ +// Read a property from the projection. Variables: dependencies_projection, dependencies_projection_write_property + +CALL gds.graph.nodeProperties.stream( + $dependencies_projection + '-without-empty' + ,[$dependencies_projection_write_property] +) +YIELD nodeId, nodeProperty, propertyValue + WITH gds.util.asNode(nodeId) AS codeUnit + ,nodeProperty AS propertyName + ,propertyValue + WITH propertyName + ,propertyValue + ,coalesce(codeUnit.fqn, codeUnit.fileName, codeUnit.name) AS codeUnitName + ,coalesce(codeUnit.name, replace(last(split(codeUnit.fileName, '/')), '.jar', '')) AS shortCodeUnitName + WITH propertyName + ,propertyValue + ,collect(DISTINCT codeUnitName) AS codeUnitNames + ,collect(DISTINCT shortCodeUnitName) AS shortCodeUnitNames +RETURN propertyName, propertyValue, size(codeUnitNames) AS codeUnits, codeUnitNames, shortCodeUnitNames \ No newline at end of file diff --git a/scripts/reports/CommunityCsv.sh b/scripts/reports/CommunityCsv.sh index 791cb6f03..aae8b1153 100755 --- a/scripts/reports/CommunityCsv.sh +++ b/scripts/reports/CommunityCsv.sh @@ -54,7 +54,7 @@ createProjection() { execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_5_Create_Subgraph.cypher" "${@}" } -# Community Detection using the Label Propagation Algorithm +# Community Detection using the Louvain Algorithm # # Required Parameters: # - dependencies_projection=... @@ -63,22 +63,39 @@ createProjection() { # Label of the nodes that will be used for the projection. Example: "Package" # - dependencies_projection_weight_property=... # Name of the node property that contains the dependency weight. Example: "weight" -detectCommunitiesWithLabelPropagation() { +detectCommunitiesWithLouvain() { local COMMUNITY_DETECTION_CYPHER_DIR="${CYPHER_DIR}/Community_Detection" + local PROJECTION_CYPHER_DIR="${CYPHER_DIR}/Dependencies_Projection" + + local writePropertyName="dependencies_projection_write_property=louvainCommunityId" + local writePropertyNameIntermediate="dependencies_projection_write_property=intermediateLouvainCommunityIds" + + local excludeIntermediateCommunities="dependencies_include_intermediate_communities=false" + local includeIntermediateCommunities="dependencies_include_intermediate_communities=true" + + local writeLabelName="dependencies_projection_write_label=LouvainCommunity" # Statistics - execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_4a_Label_Propagation_Estimate.cypher" "${@}" - execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_4b_Label_Propagation_Statistics.cypher" "${@}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_1a_Louvain_Estimate.cypher" "${@}" "${writePropertyNameIntermediate}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_1b_Louvain_Statistics.cypher" "${@}" - # Stream to CSV + # Run the algorithm and write the result into the in-memory projection ("mutate") + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_1c_Louvain_Mutate.cypher" "${@}" "${writePropertyName}" "${excludeIntermediateCommunities}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_1c_Louvain_Mutate.cypher" "${@}" "${writePropertyNameIntermediate}" "${includeIntermediateCommunities}" + + # Stream to CSV + # Reads the mutated intermediate community ids for hierarchical algorighms in general + # and provides the first, the final and all intermediate community ids. local nodeLabel nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}") - execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_4c_Label_Propagation_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Communities_Label_Propagation.csv" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_1d_Stream_Intermediate_Mutated.cypher" "${@}" "${writePropertyNameIntermediate}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Communities_Louvain.csv" + #execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_1d_Louvain_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Communities_Louvain.csv" - # Update Graph (node properties and labels) - execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_4d_Label_Propagation_Write.cypher" "${@}" - execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_4e_Label_Propagation_Label_Delete.cypher" "${@}" - execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_4f_Label_Propagation_Label.cypher" "${@}" + # Update Graph (node properties and labels) using the already mutated property projection + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_9_Write_Mutated.cypher" "${@}" "${writePropertyName}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_9_Write_Mutated.cypher" "${@}" "${writePropertyNameIntermediate}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_10_Delete_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_11_Add_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}" } # Community Detection using the Leiden Algorithm @@ -90,27 +107,42 @@ detectCommunitiesWithLabelPropagation() { # Label of the nodes that will be used for the projection. Example: "Package" # - dependencies_projection_weight_property=... # Name of the node property that contains the dependency weight. Example: "weight" -# - dependencies_leiden_gamma -# Leiden algorithmus parameter "gamma". Example (Default): 1.00 detectCommunitiesWithLeiden() { local COMMUNITY_DETECTION_CYPHER_DIR="${CYPHER_DIR}/Community_Detection" + local PROJECTION_CYPHER_DIR="${CYPHER_DIR}/Dependencies_Projection" + + local writePropertyName="dependencies_projection_write_property=leidenCommunityId" + local writePropertyNameIntermediate="dependencies_projection_write_property=intermediateleidenCommunityIds" + + local excludeIntermediateCommunities="dependencies_include_intermediate_communities=false" + local includeIntermediateCommunities="dependencies_include_intermediate_communities=true" + + local writeLabelName="dependencies_projection_write_label=LeidenCommunity" # Statistics - execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_2a_Leiden_Estimate.cypher" "${@}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_2a_Leiden_Estimate.cypher" "${@}" "${writePropertyNameIntermediate}" execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_2b_Leiden_Statistics.cypher" "${@}" + # Run the algorithm and write the result into the in-memory projection ("mutate") + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_2c_Leiden_Mutate.cypher" "${@}" "${writePropertyName}" "${excludeIntermediateCommunities}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_2c_Leiden_Mutate.cypher" "${@}" "${writePropertyNameIntermediate}" "${includeIntermediateCommunities}" + # Stream to CSV + # Reads the mutated intermediate community ids for hierarchical algorighms in general + # and provides the first, the final and all intermediate community ids. local nodeLabel nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}") - execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_2c_Leiden_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Communities_Leiden.csv" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_1d_Stream_Intermediate_Mutated.cypher" "${@}" "${writePropertyNameIntermediate}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Communities_Louvain.csv" + #execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_2d_Leiden_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Communities_Leiden.csv" - # Update Graph (node properties and labels) - execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_2d_Leiden_Write_Node_Property.cypher" "${@}" - execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_2e_Leiden_Label_Delete.cypher" "${@}" - execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_2f_Leiden_Label.cypher" "${@}" + # Update Graph (node properties and labels) using the already mutated property projection + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_9_Write_Mutated.cypher" "${@}" "${writePropertyName}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_9_Write_Mutated.cypher" "${@}" "${writePropertyNameIntermediate}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_10_Delete_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_11_Add_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}" } -# Community Detection using the Louvain Algorithm +# Community Detection using the Weakly Connected Components Algorithm # # Required Parameters: # - dependencies_projection=... @@ -119,26 +151,33 @@ detectCommunitiesWithLeiden() { # Label of the nodes that will be used for the projection. Example: "Package" # - dependencies_projection_weight_property=... # Name of the node property that contains the dependency weight. Example: "weight" -detectCommunitiesWithLouvain() { +detectCommunitiesWithWeaklyConnectedComponents() { local COMMUNITY_DETECTION_CYPHER_DIR="${CYPHER_DIR}/Community_Detection" + local PROJECTION_CYPHER_DIR="${CYPHER_DIR}/Dependencies_Projection" + + local writePropertyName="dependencies_projection_write_property=weaklyConnectedComponentId" + local writeLabelName="dependencies_projection_write_label=WeaklyConnectedComponent" # Statistics - execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_1a_Louvain_Estimate.cypher" "${@}" - execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_1b_Louvain_Statistics.cypher" "${@}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_3a_WeaklyConnectedComponents_Estimate.cypher" "${@}" "${writePropertyName}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_3b_WeaklyConnectedComponents_Statistics.cypher" "${@}" + # Run the algorithm and write the result into the in-memory projection ("mutate") + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_3c_WeaklyConnectedComponents_Mutate.cypher" "${@}" "${writePropertyName}" + # Stream to CSV local nodeLabel nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}") - execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_1c_Louvain_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Communities_Louvain.csv" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_8_Stream_Mutated_Grouped.cypher" "${@}" "${writePropertyName}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Communities_Weakly_Connected_Components.csv" + #execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_3d_WeaklyConnectedComponents_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Communities_Weakly_Connected_Components.csv" - # Update Graph (node properties and labels) - execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_1d_Louvain_Write_louvainCommunityId.cypher" "${@}" - execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_1d_Louvain_Write_intermediateLouvainCommunityId.cypher" "${@}" - execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_1e_Louvain_Label_Delete.cypher" "${@}" - execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_1f_Louvain_Label.cypher" "${@}" + # Update Graph (node properties and labels) using the already mutated property projection + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_9_Write_Mutated.cypher" "${@}" "${writePropertyName}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_10_Delete_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_11_Add_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}" } -# Community Detection using the Weakly Connected Components Algorithm +# Community Detection using the Label Propagation Algorithm # # Required Parameters: # - dependencies_projection=... @@ -147,25 +186,32 @@ detectCommunitiesWithLouvain() { # Label of the nodes that will be used for the projection. Example: "Package" # - dependencies_projection_weight_property=... # Name of the node property that contains the dependency weight. Example: "weight" -detectCommunitiesWithWeaklyConnectedComponents() { +detectCommunitiesWithLabelPropagation() { + local PROJECTION_CYPHER_DIR="${CYPHER_DIR}/Dependencies_Projection" local COMMUNITY_DETECTION_CYPHER_DIR="${CYPHER_DIR}/Community_Detection" + local writePropertyName="dependencies_projection_write_property=labelPropagationCommunityId" + local writeLabelName="dependencies_projection_write_label=LabelPropagation" + # Statistics - execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_3a_WeaklyConnectedComponents_Estimate.cypher" "${@}" - execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_3b_WeaklyConnectedComponents_Statistics.cypher" "${@}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_4a_Label_Propagation_Estimate.cypher" "${@}" "${writePropertyName}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_4b_Label_Propagation_Statistics.cypher" "${@}" + # Run the algorithm and write the result into the in-memory projection ("mutate") + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_4c_Label_Propagation_Mutate.cypher" "${@}" "${writePropertyName}" + # Stream to CSV local nodeLabel nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}") - execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_3c_WeaklyConnectedComponents_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Communities_Weakly_Connected_Components.csv" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_8_Stream_Mutated_Grouped.cypher" "${@}" "${writePropertyName}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Communities_Label_Propagation.csv" + #execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_4d_Label_Propagation_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Communities_Label_Propagation.csv" - # Update Graph (node properties and labels) - execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_3d_WeaklyConnectedComponents_Write.cypher" "${@}" - execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_3e_WeaklyConnectedComponents_Label_Delete.cypher" "${@}" - execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_3f_WeaklyConnectedComponents_Label.cypher" "${@}" + # Update Graph (node properties and labels) using the already mutated property projection + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_9_Write_Mutated.cypher" "${@}" "${writePropertyName}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_10_Delete_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_11_Add_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}" } - # Community Detection using the K-Core Decomposition Algorithm # # Required Parameters: @@ -178,6 +224,7 @@ detectCommunitiesWithWeaklyConnectedComponents() { detectCommunitiesWithKCoreDecomposition() { local COMMUNITY_DETECTION_CYPHER_DIR="${CYPHER_DIR}/Community_Detection" local PROJECTION_CYPHER_DIR="${CYPHER_DIR}/Dependencies_Projection" + local writePropertyName="dependencies_projection_write_property=communityKCoreDecompositionValue" local writeLabelName="dependencies_projection_write_label=KCoreDecomposition" @@ -191,14 +238,22 @@ detectCommunitiesWithKCoreDecomposition() { # Stream to CSV local nodeLabel nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}") - execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_8_Stream_Mutated.cypher" "${@}" "${writePropertyName}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Communities_K_Core_Decomposition.csv" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_8_Stream_Mutated_Grouped.cypher" "${@}" "${writePropertyName}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Communities_K_Core_Decomposition.csv" # Update Graph (node properties and labels) using the already mutated property projection - execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_9_Write_Mutated.cypher" "${@}" "${writePropertyName}" "${writeLabelName}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_9_Write_Mutated.cypher" "${@}" "${writePropertyName}" execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_10_Delete_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}" execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_11_Add_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}" } +detectCommunities() { + createProjection "${@}" + time detectCommunitiesWithWeaklyConnectedComponents "${@}" + time detectCommunitiesWithLabelPropagation "${@}" + time detectCommunitiesWithLouvain "${@}" + time detectCommunitiesWithLeiden "${@}" + time detectCommunitiesWithKCoreDecomposition "${@}" +} # --------------------------------------------------------------- # Artifact Query Parameters @@ -209,12 +264,7 @@ ARTIFACT_GAMMA="dependencies_leiden_gamma=1.11" # default = 1.00 # Artifact Community Detection echo "communityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing artifact dependencies..." -createProjection "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" -time detectCommunitiesWithLeiden "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" "${ARTIFACT_GAMMA}" -time detectCommunitiesWithLouvain "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" -time detectCommunitiesWithWeaklyConnectedComponents "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" -time detectCommunitiesWithLabelPropagation "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" "${ARTIFACT_GAMMA}" -time detectCommunitiesWithKCoreDecomposition "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" "${ARTIFACT_GAMMA}" +detectCommunities "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" "${ARTIFACT_GAMMA}" # --------------------------------------------------------------- @@ -226,12 +276,7 @@ PACKAGE_GAMMA="dependencies_leiden_gamma=1.14" # default = 1.00 # Package Community Detection echo "communityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') communityCsv: Processing package dependencies..." -createProjection "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" -time detectCommunitiesWithLeiden "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" "${PACKAGE_GAMMA}" -time detectCommunitiesWithLouvain "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" -time detectCommunitiesWithWeaklyConnectedComponents "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" -time detectCommunitiesWithLabelPropagation "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" "${PACKAGE_GAMMA}" -time detectCommunitiesWithKCoreDecomposition "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" "${PACKAGE_GAMMA}" +detectCommunities "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" "${PACKAGE_GAMMA}" # Package Community Detection - Special CSV Queries after update execute_cypher "${CYPHER_DIR}/Community_Detection/Compare_Community_Detection_Results.cypher" > "${FULL_REPORT_DIRECTORY}/Compare_Community_Detection_Results.csv" @@ -247,12 +292,7 @@ TYPE_GAMMA="dependencies_leiden_gamma=5.00" # default = 1.00 # Type Community Detection echo "communityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing type dependencies..." -createProjection "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" -time detectCommunitiesWithLeiden "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" "${TYPE_GAMMA}" -time detectCommunitiesWithLouvain "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" -time detectCommunitiesWithWeaklyConnectedComponents "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" -time detectCommunitiesWithLabelPropagation "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" "${TYPE_GAMMA}" -time detectCommunitiesWithKCoreDecomposition "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" "${TYPE_GAMMA}" +detectCommunities "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" "${TYPE_GAMMA}" # Type Community Detection - Special CSV Queries after update execute_cypher "${CYPHER_DIR}/Community_Detection/Which_type_community_spans_several_artifacts_and_how_are_the_types_distributed.cypher" > "${FULL_REPORT_DIRECTORY}/Type_Communities_Leiden_That_Span_Multiple_Artifacts.csv" From 09b2215bfc3f4adbe34f8f3e3f4dd5e09c0051f5 Mon Sep 17 00:00:00 2001 From: JohT Date: Fri, 6 Oct 2023 20:35:09 +0200 Subject: [PATCH 4/7] Fix missing query parameter for node embeddings --- scripts/reports/NodeEmbeddingsCsv.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/reports/NodeEmbeddingsCsv.sh b/scripts/reports/NodeEmbeddingsCsv.sh index fb1d948cf..6c2212149 100755 --- a/scripts/reports/NodeEmbeddingsCsv.sh +++ b/scripts/reports/NodeEmbeddingsCsv.sh @@ -100,7 +100,7 @@ nodeEmbeddingsWithFastRandomProjection() { # Stream to CSV local nodeLabel nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}") - execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_8_Stream_Mutated.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Embeddings_Label_Random_Projection.csv" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_8_Stream_Mutated.cypher" "${@}" "${writePropertyName}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Embeddings_Label_Random_Projection.csv" # Update Graph (node properties and labels) using the already mutated property projection execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_9_Write_Mutated.cypher" "${@}" "${writePropertyName}" @@ -170,7 +170,7 @@ nodeEmbeddingsWithNode2Vec() { # Stream to CSV local nodeLabel nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}") - execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_8_Stream_Mutated.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Embeddings_Node2Vec.csv" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_8_Stream_Mutated.cypher" "${@}" "${writePropertyName}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Embeddings_Node2Vec.csv" # Update Graph (node properties and labels) using the already mutated property projection execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_9_Write_Mutated.cypher" "${@}" "${writePropertyName}" From fcf178804bc5241c80ec4aff6c781892e35d732b Mon Sep 17 00:00:00 2001 From: JohT Date: Sun, 8 Oct 2023 16:59:47 +0200 Subject: [PATCH 5/7] Add Approximate Maximum k-cut community detection --- ..._Approximate_Maximum_k_cut_Estimate.cypher | 26 +++++++++++ ...6c_Approximate_Maximum_k_cut_Mutate.cypher | 10 ++++ ...6d_Approximate_Maximum_k_cut_Stream.cypher | 18 ++++++++ scripts/reports/CommunityCsv.sh | 46 +++++++++++++++++-- 4 files changed, 97 insertions(+), 3 deletions(-) create mode 100644 cypher/Community_Detection/Community_Detection_6a_Approximate_Maximum_k_cut_Estimate.cypher create mode 100644 cypher/Community_Detection/Community_Detection_6c_Approximate_Maximum_k_cut_Mutate.cypher create mode 100644 cypher/Community_Detection/Community_Detection_6d_Approximate_Maximum_k_cut_Stream.cypher diff --git a/cypher/Community_Detection/Community_Detection_6a_Approximate_Maximum_k_cut_Estimate.cypher b/cypher/Community_Detection/Community_Detection_6a_Approximate_Maximum_k_cut_Estimate.cypher new file mode 100644 index 000000000..88769682e --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_6a_Approximate_Maximum_k_cut_Estimate.cypher @@ -0,0 +1,26 @@ +// Community Detection Approximate Maximum k-cut Estimate + +CALL gds.maxkcut.mutate.estimate( + $dependencies_projection + '-without-empty', { + relationshipWeightProperty: $dependencies_projection_weight_property + ,mutateProperty: $dependencies_projection_write_property + ,k: toInteger($dependencies_maxkcut) +}) + YIELD requiredMemory + ,nodeCount + ,relationshipCount + ,bytesMin + ,bytesMax + ,heapPercentageMin + ,heapPercentageMax + ,treeView + ,mapView +RETURN requiredMemory + ,nodeCount + ,relationshipCount + ,bytesMin + ,bytesMax + ,heapPercentageMin + ,heapPercentageMax + ,treeView + ,mapView \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_6c_Approximate_Maximum_k_cut_Mutate.cypher b/cypher/Community_Detection/Community_Detection_6c_Approximate_Maximum_k_cut_Mutate.cypher new file mode 100644 index 000000000..becda650f --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_6c_Approximate_Maximum_k_cut_Mutate.cypher @@ -0,0 +1,10 @@ +// Community Detection Approximate Maximum k-cut Mutate + +CALL gds.maxkcut.mutate( + $dependencies_projection + '-without-empty', { + relationshipWeightProperty: $dependencies_projection_weight_property + ,mutateProperty: $dependencies_projection_write_property + ,k: toInteger($dependencies_maxkcut) +}) + YIELD cutCost, nodePropertiesWritten, preProcessingMillis, computeMillis, postProcessingMillis, mutateMillis +RETURN cutCost, nodePropertiesWritten, preProcessingMillis, computeMillis, postProcessingMillis, mutateMillis \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_6d_Approximate_Maximum_k_cut_Stream.cypher b/cypher/Community_Detection/Community_Detection_6d_Approximate_Maximum_k_cut_Stream.cypher new file mode 100644 index 000000000..f4d187157 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_6d_Approximate_Maximum_k_cut_Stream.cypher @@ -0,0 +1,18 @@ +// Community Detection Approximate Maximum k-cut Stream + +CALL gds.maxkcut.stream( + $dependencies_projection + '-without-empty', { +}) + YIELD nodeId, communityId + WITH gds.util.asNode(nodeId) AS member + ,communityId + WITH member + ,coalesce(member.fqn, member.fileName, member.name) AS memberName + ,communityId + WITH communityId + ,count(DISTINCT member) AS memberCount + ,collect(DISTINCT memberName) AS memberNames +RETURN communityId + ,memberCount + ,memberNames + ORDER BY memberCount DESC, communityId ASC \ No newline at end of file diff --git a/scripts/reports/CommunityCsv.sh b/scripts/reports/CommunityCsv.sh index aae8b1153..5f5ec2c3f 100755 --- a/scripts/reports/CommunityCsv.sh +++ b/scripts/reports/CommunityCsv.sh @@ -246,6 +246,42 @@ detectCommunitiesWithKCoreDecomposition() { execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_11_Add_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}" } +# Community Detection using the Approximate Maximum k-cut Algorithm +# +# Required Parameters: +# - dependencies_projection=... +# Name prefix for the in-memory projection name for dependencies. Example: "package" +# - dependencies_projection_node=... +# Label of the nodes that will be used for the projection. Example: "Package" +# - dependencies_projection_weight_property=... +# Name of the node property that contains the dependency weight. Example: "weight" +detectCommunitiesWithApproximateMaximumKCut() { + local COMMUNITY_DETECTION_CYPHER_DIR="${CYPHER_DIR}/Community_Detection" + local PROJECTION_CYPHER_DIR="${CYPHER_DIR}/Dependencies_Projection" + + local writePropertyName="dependencies_projection_write_property=communityMaximumKCutId" + local writeLabelName="dependencies_projection_write_label=MaximumKCut" + + # Statistics + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_6a_Approximate_Maximum_k_cut_Estimate.cypher" "${@}" "${writePropertyName}" + # Note: There is no statistics function yet in gds version 2.5.0-preview3 + #execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_5b_K_Core_Decomposition_Statistics.cypher" "${@}" + + # Run the algorithm and write the result into the in-memory projection ("mutate") + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_6c_Approximate_Maximum_k_cut_Mutate.cypher" "${@}" "${writePropertyName}" + + # Stream to CSV + local nodeLabel + nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}") + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_8_Stream_Mutated_Grouped.cypher" "${@}" "${writePropertyName}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Communities_Approximate_Maximum_K_Cut.csv" + #execute_cypher "${PROJECTION_CYPHER_DIR}/Community_Detection_6d_Approximate_Maximum_k_cut_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Communities_Approximate_Maximum_K_Cut.csv" + + # Update Graph (node properties and labels) using the already mutated property projection + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_9_Write_Mutated.cypher" "${@}" "${writePropertyName}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_10_Delete_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_11_Add_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}" +} + detectCommunities() { createProjection "${@}" time detectCommunitiesWithWeaklyConnectedComponents "${@}" @@ -253,6 +289,7 @@ detectCommunities() { time detectCommunitiesWithLouvain "${@}" time detectCommunitiesWithLeiden "${@}" time detectCommunitiesWithKCoreDecomposition "${@}" + time detectCommunitiesWithApproximateMaximumKCut "${@}" } # --------------------------------------------------------------- @@ -261,10 +298,11 @@ ARTIFACT_PROJECTION="dependencies_projection=artifact-community" ARTIFACT_NODE="dependencies_projection_node=Artifact" ARTIFACT_WEIGHT="dependencies_projection_weight_property=weight" ARTIFACT_GAMMA="dependencies_leiden_gamma=1.11" # default = 1.00 +ARTIFACT_KCUT="dependencies_maxkcut=5" # default = 2 # Artifact Community Detection echo "communityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing artifact dependencies..." -detectCommunities "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" "${ARTIFACT_GAMMA}" +detectCommunities "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" "${ARTIFACT_GAMMA}" "${ARTIFACT_KCUT}" # --------------------------------------------------------------- @@ -273,10 +311,11 @@ PACKAGE_PROJECTION="dependencies_projection=package-community" PACKAGE_NODE="dependencies_projection_node=Package" PACKAGE_WEIGHT="dependencies_projection_weight_property=weight25PercentInterfaces" PACKAGE_GAMMA="dependencies_leiden_gamma=1.14" # default = 1.00 +PACKAGE_KCUT="dependencies_maxkcut=20" # default = 2 # Package Community Detection echo "communityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') communityCsv: Processing package dependencies..." -detectCommunities "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" "${PACKAGE_GAMMA}" +detectCommunities "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" "${PACKAGE_GAMMA}" "${PACKAGE_KCUT}" # Package Community Detection - Special CSV Queries after update execute_cypher "${CYPHER_DIR}/Community_Detection/Compare_Community_Detection_Results.cypher" > "${FULL_REPORT_DIRECTORY}/Compare_Community_Detection_Results.csv" @@ -289,10 +328,11 @@ TYPE_PROJECTION="dependencies_projection=type-community" TYPE_NODE="dependencies_projection_node=Type" TYPE_WEIGHT="dependencies_projection_weight_property=weight" TYPE_GAMMA="dependencies_leiden_gamma=5.00" # default = 1.00 +TYPE_KCUT="dependencies_maxkcut=100" # default = 2 # Type Community Detection echo "communityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing type dependencies..." -detectCommunities "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" "${TYPE_GAMMA}" +detectCommunities "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" "${TYPE_GAMMA}" "${TYPE_KCUT}" # Type Community Detection - Special CSV Queries after update execute_cypher "${CYPHER_DIR}/Community_Detection/Which_type_community_spans_several_artifacts_and_how_are_the_types_distributed.cypher" > "${FULL_REPORT_DIRECTORY}/Type_Communities_Leiden_That_Span_Multiple_Artifacts.csv" From 4d592d733f1d17b6507873e4f7d33e60cb7b84a3 Mon Sep 17 00:00:00 2001 From: JohT Date: Sun, 8 Oct 2023 18:03:42 +0200 Subject: [PATCH 6/7] Add Modularity to Louvain & Leiden communities --- .../Community_Detection_7d_Modularity.cypher | 10 +++++++++ ...ity_Detection_7d_Modularity_Members.cypher | 22 +++++++++++++++++++ ...unity_Detection_7e_Write_Modularity.cypher | 15 +++++++++++++ scripts/reports/CommunityCsv.sh | 2 ++ 4 files changed, 49 insertions(+) create mode 100644 cypher/Community_Detection/Community_Detection_7d_Modularity.cypher create mode 100644 cypher/Community_Detection/Community_Detection_7d_Modularity_Members.cypher create mode 100644 cypher/Community_Detection/Community_Detection_7e_Write_Modularity.cypher diff --git a/cypher/Community_Detection/Community_Detection_7d_Modularity.cypher b/cypher/Community_Detection/Community_Detection_7d_Modularity.cypher new file mode 100644 index 000000000..ab4906309 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_7d_Modularity.cypher @@ -0,0 +1,10 @@ +// Community Detection Modularity + +CALL gds.alpha.modularity.stream( + $dependencies_projection + '-without-empty', { + relationshipWeightProperty: $dependencies_projection_weight_property + ,communityProperty: $dependencies_projection_write_property +}) + YIELD communityId, modularity +RETURN communityId, modularity +ORDER BY communityId ASCENDING \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_7d_Modularity_Members.cypher b/cypher/Community_Detection/Community_Detection_7d_Modularity_Members.cypher new file mode 100644 index 000000000..c0f88dbec --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_7d_Modularity_Members.cypher @@ -0,0 +1,22 @@ +// Community Detection Modularity Members + +CALL gds.modularity.stream( + $dependencies_projection + '-without-empty', { + relationshipWeightProperty: $dependencies_projection_weight_property + ,communityProperty: $dependencies_projection_write_property +}) + YIELD communityId, modularity + WITH communityId, modularity + MATCH (member) + WHERE member[$dependencies_projection_write_property] = communityId + AND $dependencies_projection_node IN LABELS(member) + WITH communityId + ,modularity + ,coalesce(member.fqn, member.fileName, member.name) AS memberName + ,coalesce(member.name, replace(last(split(member.fileName, '/')), '.jar', '')) AS shortMemberName +RETURN communityId + ,modularity + ,count(DISTINCT memberName) AS memberCount + ,collect(DISTINCT shortMemberName) AS shortMemberNames + ,collect(DISTINCT memberName) AS memberNames +ORDER BY communityId ASCENDING \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_7e_Write_Modularity.cypher b/cypher/Community_Detection/Community_Detection_7e_Write_Modularity.cypher new file mode 100644 index 000000000..14436e29d --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_7e_Write_Modularity.cypher @@ -0,0 +1,15 @@ +// Community Detection Write Modularity + +CALL gds.modularity.stream( + $dependencies_projection + '-without-empty', { + relationshipWeightProperty: $dependencies_projection_weight_property + ,communityProperty: $dependencies_projection_write_property +}) + YIELD communityId, modularity + WITH communityId, modularity + MATCH (member) + WHERE member[$dependencies_projection_write_property] = communityId + AND $dependencies_projection_node IN LABELS(member) + CALL apoc.create.setProperty(member, $dependencies_projection_write_property + 'Modularity', modularity) + YIELD node +RETURN count(DISTINCT node) AS writtenModularityNodes; \ No newline at end of file diff --git a/scripts/reports/CommunityCsv.sh b/scripts/reports/CommunityCsv.sh index 5f5ec2c3f..1fb8f6afb 100755 --- a/scripts/reports/CommunityCsv.sh +++ b/scripts/reports/CommunityCsv.sh @@ -96,6 +96,7 @@ detectCommunitiesWithLouvain() { execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_9_Write_Mutated.cypher" "${@}" "${writePropertyNameIntermediate}" execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_10_Delete_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}" execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_11_Add_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_7e_Write_Modularity.cypher" "${@}" "${writePropertyName}" } # Community Detection using the Leiden Algorithm @@ -140,6 +141,7 @@ detectCommunitiesWithLeiden() { execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_9_Write_Mutated.cypher" "${@}" "${writePropertyNameIntermediate}" execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_10_Delete_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}" execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_11_Add_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_7e_Write_Modularity.cypher" "${@}" "${writePropertyName}" } # Community Detection using the Weakly Connected Components Algorithm From d946ad3183c6f05657a3a12e6c49eddb82b94fe1 Mon Sep 17 00:00:00 2001 From: JohT Date: Mon, 9 Oct 2023 09:16:13 +0200 Subject: [PATCH 7/7] Unify community detection property names --- ...rite_intermediateLouvainCommunityId.cypher | 4 +-- ...1e_Louvain_Write_louvainCommunityId.cypher | 4 +-- ...ction_2d_Leiden_Write_Node_Property.cypher | 4 +-- ..._3e_WeaklyConnectedComponents_Write.cypher | 4 +-- ...etection_4e_Label_Propagation_Write.cypher | 4 +-- ...Compare_Community_Detection_Results.cypher | 14 ----------- .../Compare_Louvain_vs_Leiden_Results.cypher | 21 ++++++++++++++++ .../Community_Detection/Set_Parameters.cypher | 2 +- ..._community_spans_multiple_artifacts.cypher | 2 +- ...nd_how_are_the_packages_distributed.cypher | 6 ++--- ...s_and_how_are_the_types_distributed.cypher | 4 +-- ...endencies_8_Stream_Mutated_Extended.cypher | 2 +- ...age_per_artifact_package_aggregated.cypher | 2 +- ...gs_1d_Fast_Random_Projection_Stream.cypher | 4 +-- .../Node_Embeddings_2d_Hash_GNN_Stream.cypher | 4 +-- .../Node_Embeddings_3d_Node2Vec_Stream.cypher | 4 +-- scripts/reports/CommunityCsv.sh | 25 +++++++++++++------ 17 files changed, 64 insertions(+), 46 deletions(-) delete mode 100644 cypher/Community_Detection/Compare_Community_Detection_Results.cypher create mode 100644 cypher/Community_Detection/Compare_Louvain_vs_Leiden_Results.cypher diff --git a/cypher/Community_Detection/Community_Detection_1e_Louvain_Write_intermediateLouvainCommunityId.cypher b/cypher/Community_Detection/Community_Detection_1e_Louvain_Write_intermediateLouvainCommunityId.cypher index 29124fd05..21d3c9747 100644 --- a/cypher/Community_Detection/Community_Detection_1e_Louvain_Write_intermediateLouvainCommunityId.cypher +++ b/cypher/Community_Detection/Community_Detection_1e_Louvain_Write_intermediateLouvainCommunityId.cypher @@ -1,11 +1,11 @@ -//Community Detection Louvain Write intermediateLouvainCommunityId +//Community Detection Louvain Write communityLouvainIntermediateIds CALL gds.louvain.write( $dependencies_projection + '-without-empty', { tolerance: 0.00001, includeIntermediateCommunities: true, relationshipWeightProperty: $dependencies_projection_weight_property, - writeProperty: 'intermediateLouvainCommunityId' + writeProperty: 'communityLouvainIntermediateIds' }) YIELD preProcessingMillis ,computeMillis diff --git a/cypher/Community_Detection/Community_Detection_1e_Louvain_Write_louvainCommunityId.cypher b/cypher/Community_Detection/Community_Detection_1e_Louvain_Write_louvainCommunityId.cypher index 5a3ca4dab..4f92cf66b 100644 --- a/cypher/Community_Detection/Community_Detection_1e_Louvain_Write_louvainCommunityId.cypher +++ b/cypher/Community_Detection/Community_Detection_1e_Louvain_Write_louvainCommunityId.cypher @@ -1,11 +1,11 @@ -//Community Detection Louvain write node property louvainCommunityId +//Community Detection Louvain write node property communityLouvainId CALL gds.louvain.write( $dependencies_projection + '-without-empty', { tolerance: 0.00001, consecutiveIds: true, relationshipWeightProperty: $dependencies_projection_weight_property, - writeProperty: 'louvainCommunityId' + writeProperty: 'communityLouvainId' }) YIELD preProcessingMillis ,computeMillis diff --git a/cypher/Community_Detection/Community_Detection_2d_Leiden_Write_Node_Property.cypher b/cypher/Community_Detection/Community_Detection_2d_Leiden_Write_Node_Property.cypher index 107ac9003..411c5a53f 100644 --- a/cypher/Community_Detection/Community_Detection_2d_Leiden_Write_Node_Property.cypher +++ b/cypher/Community_Detection/Community_Detection_2d_Leiden_Write_Node_Property.cypher @@ -1,4 +1,4 @@ -//Community Detection Leiden Write property leidenCommunityId +//Community Detection Leiden Write property communityLeidenId CALL gds.beta.leiden.write( $dependencies_projection + '-without-empty', { @@ -7,7 +7,7 @@ CALL gds.beta.leiden.write( tolerance: 0.0000001, consecutiveIds: true, relationshipWeightProperty: $dependencies_projection_weight_property, - writeProperty: 'leidenCommunityId' + writeProperty: 'communityLeidenId' }) YIELD preProcessingMillis ,computeMillis diff --git a/cypher/Community_Detection/Community_Detection_3e_WeaklyConnectedComponents_Write.cypher b/cypher/Community_Detection/Community_Detection_3e_WeaklyConnectedComponents_Write.cypher index 72b3c67d2..077db25ea 100644 --- a/cypher/Community_Detection/Community_Detection_3e_WeaklyConnectedComponents_Write.cypher +++ b/cypher/Community_Detection/Community_Detection_3e_WeaklyConnectedComponents_Write.cypher @@ -1,10 +1,10 @@ -// Community Detection Weakly Connected Components write node property weaklyConnectedComponentId +// Community Detection Weakly Connected Components write node property communityWeaklyConnectedComponentId CALL gds.wcc.write( $dependencies_projection + '-without-empty', { relationshipWeightProperty: $dependencies_projection_weight_property ,consecutiveIds: true - ,writeProperty: 'weaklyConnectedComponentId' + ,writeProperty: 'communityWeaklyConnectedComponentId' }) YIELD componentCount ,preProcessingMillis diff --git a/cypher/Community_Detection/Community_Detection_4e_Label_Propagation_Write.cypher b/cypher/Community_Detection/Community_Detection_4e_Label_Propagation_Write.cypher index 6326cbc2c..0339d8d44 100644 --- a/cypher/Community_Detection/Community_Detection_4e_Label_Propagation_Write.cypher +++ b/cypher/Community_Detection/Community_Detection_4e_Label_Propagation_Write.cypher @@ -1,10 +1,10 @@ -// Community Detection Label Propagation write node property labelPropagationCommunityId +// Community Detection Label Propagation write node property communityLabelPropagationId CALL gds.labelPropagation.write( $dependencies_projection + '-without-empty', { relationshipWeightProperty: $dependencies_projection_weight_property ,consecutiveIds: true - ,writeProperty: 'labelPropagationCommunityId' + ,writeProperty: 'communityLabelPropagationId' }) YIELD ranIterations ,didConverge diff --git a/cypher/Community_Detection/Compare_Community_Detection_Results.cypher b/cypher/Community_Detection/Compare_Community_Detection_Results.cypher deleted file mode 100644 index c5dbcd338..000000000 --- a/cypher/Community_Detection/Compare_Community_Detection_Results.cypher +++ /dev/null @@ -1,14 +0,0 @@ -// Compare Community Detection Results - -MATCH (package:Package) - WITH package.louvainCommunityId AS louvainCommunityId - ,package.leidenCommunityId AS leidenCommunityId - ,collect(DISTINCT package.fqn) AS packages - ,count(DISTINCT package.fqn) AS packageCount - WHERE louvainCommunityId IS NOT NULL - AND leidenCommunityId IS NOT NULL -RETURN louvainCommunityId - ,leidenCommunityId - ,packageCount - ,packages -ORDER BY packageCount DESC, louvainCommunityId ASC \ No newline at end of file diff --git a/cypher/Community_Detection/Compare_Louvain_vs_Leiden_Results.cypher b/cypher/Community_Detection/Compare_Louvain_vs_Leiden_Results.cypher new file mode 100644 index 000000000..8bca2d8fd --- /dev/null +++ b/cypher/Community_Detection/Compare_Louvain_vs_Leiden_Results.cypher @@ -0,0 +1,21 @@ +// Compare Louvain vs. Leiden Community Detection Results. Variables: dependencies_projection_node (e.g. "Artifact", "Package", "Type") + + MATCH (member) + WHERE $dependencies_projection_node IN LABELS(member) + WITH member.communityLouvainId AS louvainCommunityId + ,member.communityLeidenId AS leidenCommunityId + ,coalesce(member.fqn, member.fileName, member.signature, member.name) AS memberName + ,coalesce(member.name, replace(last(split(member.fileName, '/')), '.jar', '')) AS shortMemberName + WHERE louvainCommunityId IS NOT NULL + AND leidenCommunityId IS NOT NULL + WITH louvainCommunityId + ,leidenCommunityId + ,collect(DISTINCT shortMemberName) AS shortMemberNames + ,count(DISTINCT memberName) AS memberCount + ,collect(DISTINCT memberName) AS memberNames +RETURN louvainCommunityId + ,leidenCommunityId + ,memberCount + ,shortMemberNames + ,memberNames +ORDER BY louvainCommunityId ASC, memberCount DESC \ No newline at end of file diff --git a/cypher/Community_Detection/Set_Parameters.cypher b/cypher/Community_Detection/Set_Parameters.cypher index 693eec040..920bda6b8 100644 --- a/cypher/Community_Detection/Set_Parameters.cypher +++ b/cypher/Community_Detection/Set_Parameters.cypher @@ -4,6 +4,6 @@ "dependencies_projection": "package-community", "dependencies_projection_node": "Package", "dependencies_projection_weight_property": "weight25PercentInterfaces", - "dependencies_projection_write_property": "leidenCommunityId", + "dependencies_projection_write_property": "communityLeidenId", "dependencies_leiden_gamma": "1.14", } \ No newline at end of file diff --git a/cypher/Community_Detection/Which_package_community_spans_multiple_artifacts.cypher b/cypher/Community_Detection/Which_package_community_spans_multiple_artifacts.cypher index 78c8feac6..1106967c7 100644 --- a/cypher/Community_Detection/Which_package_community_spans_multiple_artifacts.cypher +++ b/cypher/Community_Detection/Which_package_community_spans_multiple_artifacts.cypher @@ -1,7 +1,7 @@ // Which package community spans multiple artifacts? MATCH (artifact:Artifact)-[:CONTAINS]->(package:Package) - WITH package.leidenCommunityId AS communityId + WITH package.communityLeidenId AS communityId ,collect(DISTINCT replace(last(split(artifact.fileName, '/')), '.jar', '')) AS artifactNames ,size(collect(DISTINCT artifact)) AS artifactCount WHERE communityId IS NOT NULL diff --git a/cypher/Community_Detection/Which_package_community_spans_several_artifacts_and_how_are_the_packages_distributed.cypher b/cypher/Community_Detection/Which_package_community_spans_several_artifacts_and_how_are_the_packages_distributed.cypher index 66bf8c8cd..0e8a63f37 100644 --- a/cypher/Community_Detection/Which_package_community_spans_several_artifacts_and_how_are_the_packages_distributed.cypher +++ b/cypher/Community_Detection/Which_package_community_spans_several_artifacts_and_how_are_the_packages_distributed.cypher @@ -3,9 +3,9 @@ MATCH (artifact:Artifact)-[:CONTAINS]->(package:Package) MATCH (externalArtifact:Artifact)-[:CONTAINS]->(externalPackage:Package) WHERE artifact.fileName <> externalArtifact.fileName - AND package.leidenCommunityId - = externalPackage.leidenCommunityId - WITH package.leidenCommunityId AS communityId + AND package.communityLeidenId + = externalPackage.communityLeidenId + WITH package.communityLeidenId AS communityId ,replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName ,collect(DISTINCT package.name) AS packageNames ,size(collect(DISTINCT package)) AS packageCount diff --git a/cypher/Community_Detection/Which_type_community_spans_several_artifacts_and_how_are_the_types_distributed.cypher b/cypher/Community_Detection/Which_type_community_spans_several_artifacts_and_how_are_the_types_distributed.cypher index f03dd01e9..8d9d6906f 100644 --- a/cypher/Community_Detection/Which_type_community_spans_several_artifacts_and_how_are_the_types_distributed.cypher +++ b/cypher/Community_Detection/Which_type_community_spans_several_artifacts_and_how_are_the_types_distributed.cypher @@ -4,8 +4,8 @@ MATCH (externalArtifact:Artifact)-[:CONTAINS]->(externalPackage:Package)-[:CONTAINS]->(externalType:Type) WHERE artifact.fileName <> externalArtifact.fileName AND package.fqn <> externalPackage.fqn - AND type.leidenCommunityId = externalType.leidenCommunityId - WITH type.leidenCommunityId AS communityId + AND type.communityLeidenId = externalType.communityLeidenId + WITH type.communityLeidenId AS communityId ,size(collect(DISTINCT artifact)) AS artifactCount ,collect(DISTINCT replace(last(split(artifact.fileName, '/')), '.jar', '')) AS artifactNames ,size(collect(DISTINCT package)) AS packageCount diff --git a/cypher/Dependencies_Projection/Dependencies_8_Stream_Mutated_Extended.cypher b/cypher/Dependencies_Projection/Dependencies_8_Stream_Mutated_Extended.cypher index cc95b8faa..0e0f10b1e 100644 --- a/cypher/Dependencies_Projection/Dependencies_8_Stream_Mutated_Extended.cypher +++ b/cypher/Dependencies_Projection/Dependencies_8_Stream_Mutated_Extended.cypher @@ -13,6 +13,6 @@ RETURN DISTINCT coalesce(codeUnit.fqn, codeUnit.fileName, codeUnit.name) AS code ,coalesce(codeUnit.name, replace(last(split(codeUnit.fileName, '/')), '.jar', '')) AS shortCodeUnitName ,propertyName ,propertyValue - ,coalesce(codeUnit.leidenCommunityId, 0) AS communityId // optional, might be null + ,coalesce(codeUnit.communityLeidenId, 0) AS communityId // optional, might be null ,coalesce(codeUnit.centralityPageRank, 0.01) AS centrality // optional, might be null ,replace(last(split(artifact.fileName, '/')), '.jar', '') AS owningArtifactName \ No newline at end of file diff --git a/cypher/External_Dependencies/External_package_usage_per_artifact_package_aggregated.cypher b/cypher/External_Dependencies/External_package_usage_per_artifact_package_aggregated.cypher index 0101f1aa3..8908a706e 100644 --- a/cypher/External_Dependencies/External_package_usage_per_artifact_package_aggregated.cypher +++ b/cypher/External_Dependencies/External_package_usage_per_artifact_package_aggregated.cypher @@ -5,7 +5,7 @@ MATCH (package)-[:CONTAINS]->(type:Type) WHERE NOT type:ExternalType WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName - ,artifact.leidenCommunityId AS leidenCommunityId + ,artifact.communityLeidenId AS leidenCommunityId ,count(DISTINCT package.fqn) AS artifactPackages ,count(DISTINCT type.fqn) AS artifactTypes ,collect(type) AS typeList diff --git a/cypher/Node_Embeddings/Node_Embeddings_1d_Fast_Random_Projection_Stream.cypher b/cypher/Node_Embeddings/Node_Embeddings_1d_Fast_Random_Projection_Stream.cypher index 94d62977c..92818b891 100644 --- a/cypher/Node_Embeddings/Node_Embeddings_1d_Fast_Random_Projection_Stream.cypher +++ b/cypher/Node_Embeddings/Node_Embeddings_1d_Fast_Random_Projection_Stream.cypher @@ -10,8 +10,8 @@ YIELD nodeId, embedding WITH gds.util.asNode(nodeId) AS codeUnit ,embedding OPTIONAL MATCH (artifact:Artifact)-[:CONTAINS]->(codeUnit) -RETURN DISTINCT coalesce(codeUnit.fqn, codeUnit.fileName, codeUnit.name) AS codeUnitName - ,coalesce(codeUnit.leidenCommunityId, 0) AS communityId +RETURN DISTINCT coalesce(codeUnit.fqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName + ,coalesce(codeUnit.communityLeidenId, 0) AS communityId ,coalesce(codeUnit.centralityPageRank, 0.01) AS centrality ,replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName ,embedding \ No newline at end of file diff --git a/cypher/Node_Embeddings/Node_Embeddings_2d_Hash_GNN_Stream.cypher b/cypher/Node_Embeddings/Node_Embeddings_2d_Hash_GNN_Stream.cypher index 1ea90a55a..c4985e637 100644 --- a/cypher/Node_Embeddings/Node_Embeddings_2d_Hash_GNN_Stream.cypher +++ b/cypher/Node_Embeddings/Node_Embeddings_2d_Hash_GNN_Stream.cypher @@ -15,8 +15,8 @@ YIELD nodeId, embedding WITH gds.util.asNode(nodeId) AS codeUnit ,embedding OPTIONAL MATCH (artifact:Artifact)-[:CONTAINS]->(codeUnit) -RETURN DISTINCT coalesce(codeUnit.fqn, codeUnit.fileName, codeUnit.name) AS codeUnitName - ,coalesce(codeUnit.leidenCommunityId, 0) AS communityId +RETURN DISTINCT coalesce(codeUnit.fqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName + ,coalesce(codeUnit.communityLeidenId, 0) AS communityId ,coalesce(codeUnit.centralityPageRank, 0.01) AS centrality ,replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName ,embedding \ No newline at end of file diff --git a/cypher/Node_Embeddings/Node_Embeddings_3d_Node2Vec_Stream.cypher b/cypher/Node_Embeddings/Node_Embeddings_3d_Node2Vec_Stream.cypher index cb4c49b52..ef4ce25e0 100644 --- a/cypher/Node_Embeddings/Node_Embeddings_3d_Node2Vec_Stream.cypher +++ b/cypher/Node_Embeddings/Node_Embeddings_3d_Node2Vec_Stream.cypher @@ -11,8 +11,8 @@ YIELD nodeId, embedding WITH gds.util.asNode(nodeId) AS codeUnit ,embedding OPTIONAL MATCH (artifact:Artifact)-[:CONTAINS]->(codeUnit) -RETURN DISTINCT coalesce(codeUnit.fqn, codeUnit.fileName, codeUnit.name) AS codeUnitName - ,coalesce(codeUnit.leidenCommunityId, 0) AS communityId +RETURN DISTINCT coalesce(codeUnit.fqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName + ,coalesce(codeUnit.communityLeidenId, 0) AS communityId ,coalesce(codeUnit.centralityPageRank, 0.01) AS centrality ,replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName ,embedding \ No newline at end of file diff --git a/scripts/reports/CommunityCsv.sh b/scripts/reports/CommunityCsv.sh index 1fb8f6afb..c2a6baf20 100755 --- a/scripts/reports/CommunityCsv.sh +++ b/scripts/reports/CommunityCsv.sh @@ -67,8 +67,8 @@ detectCommunitiesWithLouvain() { local COMMUNITY_DETECTION_CYPHER_DIR="${CYPHER_DIR}/Community_Detection" local PROJECTION_CYPHER_DIR="${CYPHER_DIR}/Dependencies_Projection" - local writePropertyName="dependencies_projection_write_property=louvainCommunityId" - local writePropertyNameIntermediate="dependencies_projection_write_property=intermediateLouvainCommunityIds" + local writePropertyName="dependencies_projection_write_property=communityLouvainId" + local writePropertyNameIntermediate="dependencies_projection_write_property=communityLouvainIntermediateIds" local excludeIntermediateCommunities="dependencies_include_intermediate_communities=false" local includeIntermediateCommunities="dependencies_include_intermediate_communities=true" @@ -112,8 +112,8 @@ detectCommunitiesWithLeiden() { local COMMUNITY_DETECTION_CYPHER_DIR="${CYPHER_DIR}/Community_Detection" local PROJECTION_CYPHER_DIR="${CYPHER_DIR}/Dependencies_Projection" - local writePropertyName="dependencies_projection_write_property=leidenCommunityId" - local writePropertyNameIntermediate="dependencies_projection_write_property=intermediateleidenCommunityIds" + local writePropertyName="dependencies_projection_write_property=communityLeidenId" + local writePropertyNameIntermediate="dependencies_projection_write_property=communityLeidenIntermediateIds" local excludeIntermediateCommunities="dependencies_include_intermediate_communities=false" local includeIntermediateCommunities="dependencies_include_intermediate_communities=true" @@ -157,7 +157,7 @@ detectCommunitiesWithWeaklyConnectedComponents() { local COMMUNITY_DETECTION_CYPHER_DIR="${CYPHER_DIR}/Community_Detection" local PROJECTION_CYPHER_DIR="${CYPHER_DIR}/Dependencies_Projection" - local writePropertyName="dependencies_projection_write_property=weaklyConnectedComponentId" + local writePropertyName="dependencies_projection_write_property=communityWeaklyConnectedComponentId" local writeLabelName="dependencies_projection_write_label=WeaklyConnectedComponent" # Statistics @@ -192,7 +192,7 @@ detectCommunitiesWithLabelPropagation() { local PROJECTION_CYPHER_DIR="${CYPHER_DIR}/Dependencies_Projection" local COMMUNITY_DETECTION_CYPHER_DIR="${CYPHER_DIR}/Community_Detection" - local writePropertyName="dependencies_projection_write_property=labelPropagationCommunityId" + local writePropertyName="dependencies_projection_write_property=communityLabelPropagationId" local writeLabelName="dependencies_projection_write_label=LabelPropagation" # Statistics @@ -284,6 +284,17 @@ detectCommunitiesWithApproximateMaximumKCut() { execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_11_Add_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}" } +# Compare the results of different community detection algorighms +# +# Required Parameters: +# - dependencies_projection_node=... +# Label of the nodes that will be used for the projection. Example: "Package" +compareCommunityDetectionResults() { + local nodeLabel + nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}") + execute_cypher "${CYPHER_DIR}/Community_Detection/Compare_Louvain_vs_Leiden_Results.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Compare_Louvain_with_Leiden.csv" +} + detectCommunities() { createProjection "${@}" time detectCommunitiesWithWeaklyConnectedComponents "${@}" @@ -292,6 +303,7 @@ detectCommunities() { time detectCommunitiesWithLeiden "${@}" time detectCommunitiesWithKCoreDecomposition "${@}" time detectCommunitiesWithApproximateMaximumKCut "${@}" + compareCommunityDetectionResults "${@}" } # --------------------------------------------------------------- @@ -320,7 +332,6 @@ echo "communityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') communityCsv: Processing pack detectCommunities "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" "${PACKAGE_GAMMA}" "${PACKAGE_KCUT}" # Package Community Detection - Special CSV Queries after update -execute_cypher "${CYPHER_DIR}/Community_Detection/Compare_Community_Detection_Results.cypher" > "${FULL_REPORT_DIRECTORY}/Compare_Community_Detection_Results.csv" execute_cypher "${CYPHER_DIR}/Community_Detection/Which_package_community_spans_several_artifacts_and_how_are_the_packages_distributed.cypher" > "${FULL_REPORT_DIRECTORY}/Package_Communities_Leiden_That_Span_Multiple_Artifacts.csv" # ---------------------------------------------------------------