Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,6 @@ CALL gds.modularity.stream(
RETURN communityId
,memberModularity
,memberCount
,shortMemberNames
,memberNames
,shortMemberNames[0..9] AS someMemberNamesShort
,memberNames[0..9] AS someMemberNames
ORDER BY communityId ASCENDING
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// Community Detection Conductance

CALL gds.conductance.stream(
$dependencies_projection + '-cleaned', {
relationshipWeightProperty: $dependencies_projection_weight_property
,communityProperty: $dependencies_projection_write_property
})
YIELD community, conductance
RETURN community, conductance
ORDER BY community ASCENDING
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Community Detection Conductance Members

CALL gds.conductance.stream(
$dependencies_projection + '-cleaned', {
relationshipWeightProperty: $dependencies_projection_weight_property
,communityProperty: $dependencies_projection_write_property
})
YIELD community AS communityId, conductance
WITH collect({communityId: communityId, conductance: conductance}) AS communityMetrics
MATCH (member)
WHERE member[$dependencies_projection_write_property] IS NOT NULL
AND $dependencies_projection_node IN LABELS(member)
WITH communityMetrics
,member[$dependencies_projection_write_property] AS communityId
,coalesce(member.fqn, member.fileName, member.name) AS memberName
,coalesce(member.name, replace(last(split(member.fileName, '/')), '.jar', '')) AS shortMemberName
WITH communityMetrics
,communityId
,count(DISTINCT memberName) AS memberCount
,collect(DISTINCT shortMemberName) AS shortMemberNames
,collect(DISTINCT memberName) AS memberNames
,reduce(memberConductance = 0, conductance IN communityMetrics |
CASE conductance.communityId WHEN communityId THEN conductance.conductance
ELSE memberConductance END) AS conductance
RETURN communityId
,conductance
,memberCount
,shortMemberNames[0..9] AS someMemberNamesShort
,memberNames[0..9] AS someMemberNames
ORDER BY communityId ASCENDING
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Community Metrics

CALL gds.conductance.stream(
$dependencies_projection + '-cleaned', {
relationshipWeightProperty: $dependencies_projection_weight_property
,communityProperty: $dependencies_projection_write_property
})
YIELD community AS communityId, conductance
WITH collect({communityId: communityId, conductance: conductance}) AS conductances
CALL gds.modularity.stream(
$dependencies_projection + '-cleaned', {
relationshipWeightProperty: $dependencies_projection_weight_property
,communityProperty: $dependencies_projection_write_property
})
YIELD communityId, modularity
WITH conductances
,collect({communityId: communityId, modularity: modularity}) AS modularities
MATCH (member)
WHERE member[$dependencies_projection_write_property] IS NOT NULL
AND $dependencies_projection_node IN LABELS(member)
WITH conductances
,modularities
,member[$dependencies_projection_write_property] AS communityId
,coalesce(member.fqn, member.fileName, member.name) AS memberName
,coalesce(member.name, replace(last(split(member.fileName, '/')), '.jar', '')) AS shortMemberName
WITH conductances
,modularities
,communityId
,count(DISTINCT memberName) AS memberCount
,collect(DISTINCT shortMemberName) AS shortMemberNames
,collect(DISTINCT memberName) AS memberNames
,reduce(memberConductance = 0, conductance IN conductances |
CASE conductance.communityId WHEN communityId THEN conductance.conductance
ELSE memberConductance END) AS conductance
,reduce(memberModularity = 0, modularity IN modularities |
CASE modularity.communityId WHEN communityId THEN modularity.modularity
ELSE memberModularity END) AS modularity
RETURN communityId
,conductance
,modularity
,memberCount
,shortMemberNames[0..9] AS someMemberNamesShort
,memberNames[0..9] AS someMemberNames
ORDER BY communityId ASCENDING
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Communities that span the most packages

MATCH (a:Artifact)-[:CONTAINS]->(p:Package)-[:CONTAINS]->(t:Type)
WITH replace(last(split(a.fileName, '/')), '.jar', '') AS artifactName
,t.communityLeidenId AS communityId
,collect(DISTINCT p.fqn) AS packageNames
,count(DISTINCT p.fqn) AS packageCount
,collect(DISTINCT t.fqn) AS typeNames
,count(DISTINCT t.fqn) AS typeCount
WHERE communityId IS NOT NULL
RETURN artifactName
,communityId
,packageCount
,typeCount
,packageNames
,typeNames
ORDER BY packageCount DESCENDING
,typeCount DESCENDING
,communityId ASCENDING
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Communities that span the most packages with type statistics

MATCH (a:Artifact)-[:CONTAINS]->(p:Package)-[:CONTAINS]->(t:Type)
WITH replace(last(split(a.fileName, '/')), '.jar', '') AS artifactName
,t.communityLeidenId AS communityId
,p.fqn AS packageName
,collect(DISTINCT p.fqn) AS packageNames
,count(DISTINCT p.fqn) AS packageCount
,collect(DISTINCT t.fqn) AS typeNames
,count(DISTINCT t.fqn) AS typeCount
ORDER BY typeCount ASCENDING
WHERE communityId IS NOT NULL
WITH artifactName
,communityId
,collect(DISTINCT packageName) AS packageNames
,count(DISTINCT packageName) AS packageCount
// The object structure of "packageCommunityTypes" only works in the browser.
// It is only meant to be a helper to see how the communities and their packages are distributed in detail.
//,collect(DISTINCT {package: packageName, numberOfTypes:typeCount}) AS packageCommunityTypes
,sum(typeCount) AS sumTypeCount
,min(typeCount) AS minTypeCount
,max(typeCount) AS maxTypeCount
,avg(typeCount) AS avgTypeCount
,stDev(typeCount) AS stdTypeCount
,percentileDisc(typeCount, 0.5) AS per5TypeCount
RETURN artifactName
,communityId
,packageCount
,sumTypeCount
,minTypeCount
,maxTypeCount
,avgTypeCount
,stdTypeCount
,per5TypeCount
//,packageCommunityTypes
,packageNames
ORDER BY packageCount DESCENDING
,sumTypeCount DESCENDING
,communityId ASCENDING
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Type communities with few members in foreign packages

MATCH (t:Type)
WITH t.communityLeidenId AS communityId
,count(DISTINCT t.fqn) AS numberOfTypesInCommunity
WHERE communityId IS NOT NULL
MATCH (a:Artifact)-[:CONTAINS]->(p:Package)-[:CONTAINS]->(communityType:Type)
MATCH (p)-[:CONTAINS]->(packageType:Type)
WHERE communityType.communityLeidenId = communityId
AND packageType.communityLeidenId IS NOT NULL
WITH replace(last(split(a.fileName, '/')), '.jar', '') AS artifactName
,p.fqn AS packageName
,numberOfTypesInCommunity
,count(DISTINCT packageType.fqn) AS numberOfTypesInPackage
,collect(communityType) AS packageTypes
UNWIND packageTypes AS packageType
WITH artifactName
,packageName
,packageType.communityLeidenId AS communityId
,numberOfTypesInPackage
,numberOfTypesInCommunity
,count(DISTINCT packageType.fqn) AS numberOfTypes
WHERE numberOfTypes < numberOfTypesInCommunity
AND numberOfTypes < numberOfTypesInPackage
RETURN artifactName
,packageName
,communityId
,numberOfTypesInPackage
,numberOfTypesInCommunity
,numberOfTypes
ORDER BY numberOfTypes ASCENDING
,numberOfTypesInCommunity DESCENDING
,numberOfTypesInPackage DESCENDING
,packageName ASCENDING
9 changes: 0 additions & 9 deletions cypher/Community_Detection_Label_Propagation.cypher

This file was deleted.

9 changes: 0 additions & 9 deletions cypher/Community_Detection_Weakly_Connected_Components.cypher

This file was deleted.

2 changes: 2 additions & 0 deletions scripts/executeQuery.sh
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ then
redColor='\033[0;31m'
noColor='\033[0m'
echo -e "${redColor}${cypher_query_file_name}: ${cypher_query_result}${noColor}" >&2
echo -e "${redColor}Parameters: ${query_parameters}${noColor}" >&2
exit 1
fi
#echo "executeQuery: Cypher Query OK Result: ${cypher_query_result}"
Expand All @@ -115,6 +116,7 @@ if [[ -n "${error_message}" ]]; then
redColor='\033[0;31m'
noColor='\033[0m'
echo -e "${redColor}${cypher_query_file_name}: ${error_message}${noColor}" >&2
echo -e "${redColor}Parameters: ${query_parameters}${noColor}" >&2
exit 1
fi

Expand Down
80 changes: 68 additions & 12 deletions scripts/reports/CommunityCsv.sh
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ detectCommunitiesWithLouvain() {
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_9_Write_Mutated.cypher" "${@}" "${writePropertyNameIntermediate}"
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_10_Delete_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}"
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_11_Add_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}"

calculateCommunityMetrics "${@}" "${writePropertyName}"
}

# Community Detection using the Leiden Algorithm
Expand Down Expand Up @@ -157,19 +159,8 @@ detectCommunitiesWithLeiden() {
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_9_Write_Mutated.cypher" "${@}" "${writePropertyNameIntermediate}"
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_10_Delete_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}"
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_11_Add_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}"
}

# Write modularity for Leiden communities
#
# Required Parameters:
# - dependencies_projection=...
# Name prefix for the in-memory projection name for dependencies. Example: "package"
# - dependencies_projection_weight_property=...
# Name of the node property that contains the dependency weight. Example: "weight"
writeLeidenModularity() {
local COMMUNITY_DETECTION_CYPHER_DIR="${CYPHER_DIR}/Community_Detection"
local writePropertyName="dependencies_projection_write_property=communityLeidenId"
execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_7e_Write_Modularity.cypher" "${@}" "${writePropertyName}"
calculateCommunityMetrics "${@}" "${writePropertyName}"
}

# Community Detection using the Weakly Connected Components Algorithm
Expand Down Expand Up @@ -205,6 +196,8 @@ detectCommunitiesWithWeaklyConnectedComponents() {
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_9_Write_Mutated.cypher" "${@}" "${writePropertyName}"
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_10_Delete_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}"
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_11_Add_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}"

calculateCommunityMetrics "${@}" "${writePropertyName}"
}

# Community Detection using the Label Propagation Algorithm
Expand Down Expand Up @@ -240,6 +233,8 @@ detectCommunitiesWithLabelPropagation() {
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_9_Write_Mutated.cypher" "${@}" "${writePropertyName}"
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_10_Delete_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}"
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_11_Add_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}"

calculateCommunityMetrics "${@}" "${writePropertyName}"
}

# Community Detection using the K-Core Decomposition Algorithm
Expand Down Expand Up @@ -274,6 +269,8 @@ detectCommunitiesWithKCoreDecomposition() {
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_9_Write_Mutated.cypher" "${@}" "${writePropertyName}"
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_10_Delete_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}"
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_11_Add_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}"

calculateCommunityMetrics "${@}" "${writePropertyName}"
}

# Community Detection using the Approximate Maximum k-cut Algorithm
Expand Down Expand Up @@ -310,6 +307,63 @@ detectCommunitiesWithApproximateMaximumKCut() {
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_9_Write_Mutated.cypher" "${@}" "${writePropertyName}"
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_10_Delete_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}"
execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_11_Add_Label.cypher" "${@}" "${writePropertyName}" "${writeLabelName}"

calculateCommunityMetrics "${@}" "${writePropertyName}"
}

# Calculates community metrics including "Modularity" and "Conductance".
#
#
# Required Parameters:
# - dependencies_projection=...
# Name prefix for the in-memory projection name for dependencies. Example: "package"
# - writePropertyName=...
# Name of the property that contains the communitiy id
# - dependencies_projection_weight_property=...
# Name of the node property that contains the dependency weight. Example: "weight"
calculateCommunityMetrics() {
local COMMUNITY_DETECTION_CYPHER_DIR="${CYPHER_DIR}/Community_Detection"

local nodeLabel
nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}")

local propertyName
propertyName=$( extractQueryParameter "dependencies_projection_write_property" "${@}")

local fileNamePrefix
fileNamePrefix="${FULL_REPORT_DIRECTORY}/${nodeLabel}_${propertyName}_Community_"

# Print results to CSV
local combinedMetrics
if combinedMetrics=$( execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_9_Community_Metrics.cypher" "${@}" ); then
echo "${combinedMetrics}" > "${fileNamePrefix}_Metrics.csv"
else
# Combined metrics failed. Trying one by one at least get those that doesn't fail.
local modularity
if modularity=$( execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_7d_Modularity_Members.cypher" "${@}" ); then
echo "${modularity}" > "${fileNamePrefix}_Modularity.csv"
fi
local conductance
if conductance=$( execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_8d_Conductance_Members.cypher" "${@}" ); then
echo "${conductance}" > "${fileNamePrefix}_Conductance.csv"
fi
fi
# Continue even if there were metrics that failed since they aren't essential
# and there seem to be open issues like:
# gds.modularity.stream ArrayIndexOutOfBoundsException: Index -1 out of bounds for length 100
}

# Write modularity for Leiden communities
#
# Required Parameters:
# - dependencies_projection=...
# Name prefix for the in-memory projection name for dependencies. Example: "package"
# - dependencies_projection_weight_property=...
# Name of the node property that contains the dependency weight. Example: "weight"
writeLeidenModularity() {
local COMMUNITY_DETECTION_CYPHER_DIR="${CYPHER_DIR}/Community_Detection"
local writePropertyName="dependencies_projection_write_property=communityLeidenId"
execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_7e_Write_Modularity.cypher" "${@}" "${writePropertyName}"
}

# Compare the results of different community detection algorighms
Expand Down Expand Up @@ -389,5 +443,7 @@ detectCommunities "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" "${TYPE_G

# Type Community Detection - Special CSV Queries after update
execute_cypher "${CYPHER_DIR}/Community_Detection/Which_type_community_spans_several_artifacts_and_how_are_the_types_distributed.cypher" > "${FULL_REPORT_DIRECTORY}/Type_Communities_Leiden_That_Span_Multiple_Artifacts.csv"
execute_cypher "${CYPHER_DIR}/Community_Detection/Type_communities_with_few_members_in_foreign_packages.cypher" > "${FULL_REPORT_DIRECTORY}/Type_communities_with_few_members_in_foreign_packages.csv"
execute_cypher "${CYPHER_DIR}/Community_Detection/Type_communities_that_span_the_most_packages_with_type_statistics.cypher" > "${FULL_REPORT_DIRECTORY}/Type_communities_that_span_the_most_packages_with_type_statistics.csv"

echo "communityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Successfully finished"