Skip to content

Commit cd6db29

Browse files
authored
Merge pull request #73 from JohT/feature/details-about-communities
Add reports with metrics about detected communities
2 parents 1b6d49c + 2741238 commit cd6db29

11 files changed

+248
-32
lines changed

cypher/Community_Detection/Community_Detection_7d_Modularity_Members.cypher

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,6 @@ CALL gds.modularity.stream(
2525
RETURN communityId
2626
,memberModularity
2727
,memberCount
28-
,shortMemberNames
29-
,memberNames
28+
,shortMemberNames[0..9] AS someMemberNamesShort
29+
,memberNames[0..9] AS someMemberNames
3030
ORDER BY communityId ASCENDING
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// Community Detection Conductance
2+
3+
CALL gds.conductance.stream(
4+
$dependencies_projection + '-cleaned', {
5+
relationshipWeightProperty: $dependencies_projection_weight_property
6+
,communityProperty: $dependencies_projection_write_property
7+
})
8+
YIELD community, conductance
9+
RETURN community, conductance
10+
ORDER BY community ASCENDING
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// Community Detection Conductance Members
2+
3+
CALL gds.conductance.stream(
4+
$dependencies_projection + '-cleaned', {
5+
relationshipWeightProperty: $dependencies_projection_weight_property
6+
,communityProperty: $dependencies_projection_write_property
7+
})
8+
YIELD community AS communityId, conductance
9+
WITH collect({communityId: communityId, conductance: conductance}) AS communityMetrics
10+
MATCH (member)
11+
WHERE member[$dependencies_projection_write_property] IS NOT NULL
12+
AND $dependencies_projection_node IN LABELS(member)
13+
WITH communityMetrics
14+
,member[$dependencies_projection_write_property] AS communityId
15+
,coalesce(member.fqn, member.fileName, member.name) AS memberName
16+
,coalesce(member.name, replace(last(split(member.fileName, '/')), '.jar', '')) AS shortMemberName
17+
WITH communityMetrics
18+
,communityId
19+
,count(DISTINCT memberName) AS memberCount
20+
,collect(DISTINCT shortMemberName) AS shortMemberNames
21+
,collect(DISTINCT memberName) AS memberNames
22+
,reduce(memberConductance = 0, conductance IN communityMetrics |
23+
CASE conductance.communityId WHEN communityId THEN conductance.conductance
24+
ELSE memberConductance END) AS conductance
25+
RETURN communityId
26+
,conductance
27+
,memberCount
28+
,shortMemberNames[0..9] AS someMemberNamesShort
29+
,memberNames[0..9] AS someMemberNames
30+
ORDER BY communityId ASCENDING
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// Community Metrics
2+
3+
CALL gds.conductance.stream(
4+
$dependencies_projection + '-cleaned', {
5+
relationshipWeightProperty: $dependencies_projection_weight_property
6+
,communityProperty: $dependencies_projection_write_property
7+
})
8+
YIELD community AS communityId, conductance
9+
WITH collect({communityId: communityId, conductance: conductance}) AS conductances
10+
CALL gds.modularity.stream(
11+
$dependencies_projection + '-cleaned', {
12+
relationshipWeightProperty: $dependencies_projection_weight_property
13+
,communityProperty: $dependencies_projection_write_property
14+
})
15+
YIELD communityId, modularity
16+
WITH conductances
17+
,collect({communityId: communityId, modularity: modularity}) AS modularities
18+
MATCH (member)
19+
WHERE member[$dependencies_projection_write_property] IS NOT NULL
20+
AND $dependencies_projection_node IN LABELS(member)
21+
WITH conductances
22+
,modularities
23+
,member[$dependencies_projection_write_property] AS communityId
24+
,coalesce(member.fqn, member.fileName, member.name) AS memberName
25+
,coalesce(member.name, replace(last(split(member.fileName, '/')), '.jar', '')) AS shortMemberName
26+
WITH conductances
27+
,modularities
28+
,communityId
29+
,count(DISTINCT memberName) AS memberCount
30+
,collect(DISTINCT shortMemberName) AS shortMemberNames
31+
,collect(DISTINCT memberName) AS memberNames
32+
,reduce(memberConductance = 0, conductance IN conductances |
33+
CASE conductance.communityId WHEN communityId THEN conductance.conductance
34+
ELSE memberConductance END) AS conductance
35+
,reduce(memberModularity = 0, modularity IN modularities |
36+
CASE modularity.communityId WHEN communityId THEN modularity.modularity
37+
ELSE memberModularity END) AS modularity
38+
RETURN communityId
39+
,conductance
40+
,modularity
41+
,memberCount
42+
,shortMemberNames[0..9] AS someMemberNamesShort
43+
,memberNames[0..9] AS someMemberNames
44+
ORDER BY communityId ASCENDING
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// Communities that span the most packages
2+
3+
MATCH (a:Artifact)-[:CONTAINS]->(p:Package)-[:CONTAINS]->(t:Type)
4+
WITH replace(last(split(a.fileName, '/')), '.jar', '') AS artifactName
5+
,t.communityLeidenId AS communityId
6+
,collect(DISTINCT p.fqn) AS packageNames
7+
,count(DISTINCT p.fqn) AS packageCount
8+
,collect(DISTINCT t.fqn) AS typeNames
9+
,count(DISTINCT t.fqn) AS typeCount
10+
WHERE communityId IS NOT NULL
11+
RETURN artifactName
12+
,communityId
13+
,packageCount
14+
,typeCount
15+
,packageNames
16+
,typeNames
17+
ORDER BY packageCount DESCENDING
18+
,typeCount DESCENDING
19+
,communityId ASCENDING
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// Communities that span the most packages with type statistics
2+
3+
MATCH (a:Artifact)-[:CONTAINS]->(p:Package)-[:CONTAINS]->(t:Type)
4+
WITH replace(last(split(a.fileName, '/')), '.jar', '') AS artifactName
5+
,t.communityLeidenId AS communityId
6+
,p.fqn AS packageName
7+
,collect(DISTINCT p.fqn) AS packageNames
8+
,count(DISTINCT p.fqn) AS packageCount
9+
,collect(DISTINCT t.fqn) AS typeNames
10+
,count(DISTINCT t.fqn) AS typeCount
11+
ORDER BY typeCount ASCENDING
12+
WHERE communityId IS NOT NULL
13+
WITH artifactName
14+
,communityId
15+
,collect(DISTINCT packageName) AS packageNames
16+
,count(DISTINCT packageName) AS packageCount
17+
// The object structure of "packageCommunityTypes" only works in the browser.
18+
// It is only meant to be a helper to see how the communities and their packages are distributed in detail.
19+
//,collect(DISTINCT {package: packageName, numberOfTypes:typeCount}) AS packageCommunityTypes
20+
,sum(typeCount) AS sumTypeCount
21+
,min(typeCount) AS minTypeCount
22+
,max(typeCount) AS maxTypeCount
23+
,avg(typeCount) AS avgTypeCount
24+
,stDev(typeCount) AS stdTypeCount
25+
,percentileDisc(typeCount, 0.5) AS per5TypeCount
26+
RETURN artifactName
27+
,communityId
28+
,packageCount
29+
,sumTypeCount
30+
,minTypeCount
31+
,maxTypeCount
32+
,avgTypeCount
33+
,stdTypeCount
34+
,per5TypeCount
35+
//,packageCommunityTypes
36+
,packageNames
37+
ORDER BY packageCount DESCENDING
38+
,sumTypeCount DESCENDING
39+
,communityId ASCENDING
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Type communities with few members in foreign packages
2+
3+
MATCH (t:Type)
4+
WITH t.communityLeidenId AS communityId
5+
,count(DISTINCT t.fqn) AS numberOfTypesInCommunity
6+
WHERE communityId IS NOT NULL
7+
MATCH (a:Artifact)-[:CONTAINS]->(p:Package)-[:CONTAINS]->(communityType:Type)
8+
MATCH (p)-[:CONTAINS]->(packageType:Type)
9+
WHERE communityType.communityLeidenId = communityId
10+
AND packageType.communityLeidenId IS NOT NULL
11+
WITH replace(last(split(a.fileName, '/')), '.jar', '') AS artifactName
12+
,p.fqn AS packageName
13+
,numberOfTypesInCommunity
14+
,count(DISTINCT packageType.fqn) AS numberOfTypesInPackage
15+
,collect(communityType) AS packageTypes
16+
UNWIND packageTypes AS packageType
17+
WITH artifactName
18+
,packageName
19+
,packageType.communityLeidenId AS communityId
20+
,numberOfTypesInPackage
21+
,numberOfTypesInCommunity
22+
,count(DISTINCT packageType.fqn) AS numberOfTypes
23+
WHERE numberOfTypes < numberOfTypesInCommunity
24+
AND numberOfTypes < numberOfTypesInPackage
25+
RETURN artifactName
26+
,packageName
27+
,communityId
28+
,numberOfTypesInPackage
29+
,numberOfTypesInCommunity
30+
,numberOfTypes
31+
ORDER BY numberOfTypes ASCENDING
32+
,numberOfTypesInCommunity DESCENDING
33+
,numberOfTypesInPackage DESCENDING
34+
,packageName ASCENDING

cypher/Community_Detection_Label_Propagation.cypher

Lines changed: 0 additions & 9 deletions
This file was deleted.

cypher/Community_Detection_Weakly_Connected_Components.cypher

Lines changed: 0 additions & 9 deletions
This file was deleted.

scripts/executeQuery.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ then
105105
redColor='\033[0;31m'
106106
noColor='\033[0m'
107107
echo -e "${redColor}${cypher_query_file_name}: ${cypher_query_result}${noColor}" >&2
108+
echo -e "${redColor}Parameters: ${query_parameters}${noColor}" >&2
108109
exit 1
109110
fi
110111
#echo "executeQuery: Cypher Query OK Result: ${cypher_query_result}"
@@ -115,6 +116,7 @@ if [[ -n "${error_message}" ]]; then
115116
redColor='\033[0;31m'
116117
noColor='\033[0m'
117118
echo -e "${redColor}${cypher_query_file_name}: ${error_message}${noColor}" >&2
119+
echo -e "${redColor}Parameters: ${query_parameters}${noColor}" >&2
118120
exit 1
119121
fi
120122

0 commit comments

Comments
 (0)