Skip to content

Commit 7527c74

Browse files
committed
List type communities that span multiple packages
1 parent 1b6d49c commit 7527c74

File tree

4 files changed

+94
-0
lines changed

4 files changed

+94
-0
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// Communities that span the most packages
2+
3+
MATCH (a:Artifact)-[:CONTAINS]->(p:Package)-[:CONTAINS]->(t:Type)
4+
WITH replace(last(split(a.fileName, '/')), '.jar', '') AS artifactName
5+
,t.communityLeidenId AS communityId
6+
,collect(DISTINCT p.fqn) AS packageNames
7+
,count(DISTINCT p.fqn) AS packageCount
8+
,collect(DISTINCT t.fqn) AS typeNames
9+
,count(DISTINCT t.fqn) AS typeCount
10+
WHERE communityId IS NOT NULL
11+
RETURN artifactName
12+
,communityId
13+
,packageCount
14+
,typeCount
15+
,packageNames
16+
,typeNames
17+
ORDER BY packageCount DESCENDING
18+
,typeCount DESCENDING
19+
,communityId ASCENDING
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// Communities that span the most packages with type statistics
2+
3+
MATCH (a:Artifact)-[:CONTAINS]->(p:Package)-[:CONTAINS]->(t:Type)
4+
WITH replace(last(split(a.fileName, '/')), '.jar', '') AS artifactName
5+
,t.communityLeidenId AS communityId
6+
,p.fqn AS packageName
7+
,collect(DISTINCT p.fqn) AS packageNames
8+
,count(DISTINCT p.fqn) AS packageCount
9+
,collect(DISTINCT t.fqn) AS typeNames
10+
,count(DISTINCT t.fqn) AS typeCount
11+
ORDER BY typeCount ASCENDING
12+
WHERE communityId IS NOT NULL
13+
WITH artifactName
14+
,communityId
15+
,collect(DISTINCT packageName) AS packageNames
16+
,count(DISTINCT packageName) AS packageCount
17+
// The object structure of "packageCommunityTypes" only works in the browser.
18+
// It is only meant to be a helper to see how the communities and their packages are distributed in detail.
19+
//,collect(DISTINCT {package: packageName, numberOfTypes:typeCount}) AS packageCommunityTypes
20+
,sum(typeCount) AS sumTypeCount
21+
,min(typeCount) AS minTypeCount
22+
,max(typeCount) AS maxTypeCount
23+
,avg(typeCount) AS avgTypeCount
24+
,stDev(typeCount) AS stdTypeCount
25+
,percentileDisc(typeCount, 0.5) AS per5TypeCount
26+
RETURN artifactName
27+
,communityId
28+
,packageCount
29+
,sumTypeCount
30+
,minTypeCount
31+
,maxTypeCount
32+
,avgTypeCount
33+
,stdTypeCount
34+
,per5TypeCount
35+
//,packageCommunityTypes
36+
,packageNames
37+
ORDER BY packageCount DESCENDING
38+
,sumTypeCount DESCENDING
39+
,communityId ASCENDING
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Type communities with few members in foreign packages
2+
3+
MATCH (t:Type)
4+
WITH t.communityLeidenId AS communityId
5+
,count(DISTINCT t.fqn) AS numberOfTypesInCommunity
6+
WHERE communityId IS NOT NULL
7+
MATCH (a:Artifact)-[:CONTAINS]->(p:Package)-[:CONTAINS]->(communityType:Type)
8+
MATCH (p)-[:CONTAINS]->(packageType:Type)
9+
WHERE communityType.communityLeidenId = communityId
10+
AND packageType.communityLeidenId IS NOT NULL
11+
WITH replace(last(split(a.fileName, '/')), '.jar', '') AS artifactName
12+
,p.fqn AS packageName
13+
,numberOfTypesInCommunity
14+
,count(DISTINCT packageType.fqn) AS numberOfTypesInPackage
15+
,collect(communityType) AS packageTypes
16+
UNWIND packageTypes AS packageType
17+
WITH artifactName
18+
,packageName
19+
,packageType.communityLeidenId AS communityId
20+
,numberOfTypesInPackage
21+
,numberOfTypesInCommunity
22+
,count(DISTINCT packageType.fqn) AS numberOfTypes
23+
WHERE numberOfTypes < numberOfTypesInCommunity
24+
AND numberOfTypes < numberOfTypesInPackage
25+
RETURN artifactName
26+
,packageName
27+
,communityId
28+
,numberOfTypesInPackage
29+
,numberOfTypesInCommunity
30+
,numberOfTypes
31+
ORDER BY numberOfTypes ASCENDING
32+
,numberOfTypesInCommunity DESCENDING
33+
,numberOfTypesInPackage DESCENDING
34+
,packageName ASCENDING

scripts/reports/CommunityCsv.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,5 +389,7 @@ detectCommunities "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" "${TYPE_G
389389

390390
# Type Community Detection - Special CSV Queries after update
391391
execute_cypher "${CYPHER_DIR}/Community_Detection/Which_type_community_spans_several_artifacts_and_how_are_the_types_distributed.cypher" > "${FULL_REPORT_DIRECTORY}/Type_Communities_Leiden_That_Span_Multiple_Artifacts.csv"
392+
execute_cypher "${CYPHER_DIR}/Community_Detection/Type_communities_with_few_members_in_foreign_packages.cypher" > "${FULL_REPORT_DIRECTORY}/Type_communities_with_few_members_in_foreign_packages.csv"
393+
execute_cypher "${CYPHER_DIR}/Community_Detection/Type_communities_that_span_the_most_packages_with_type_statistics.cypher" > "${FULL_REPORT_DIRECTORY}/Type_communities_that_span_the_most_packages_with_type_statistics.csv"
392394

393395
echo "communityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Successfully finished"

0 commit comments

Comments
 (0)