diff --git a/cypher/Artifact_Dependencies/Artifacts_with_dependencies_to_other_artifacts.cypher b/cypher/Artifact_Dependencies/Artifacts_with_dependencies_to_other_artifacts.cypher new file mode 100644 index 000000000..0afbd5139 --- /dev/null +++ b/cypher/Artifact_Dependencies/Artifacts_with_dependencies_to_other_artifacts.cypher @@ -0,0 +1,47 @@ +// Artifacts with dependencies to other artifacts + +MATCH (artifact:Artifact)-[:CONTAINS]->(packageInArtifact:Package) +MATCH (packageInArtifact)-[:CONTAINS]->(typeInPackage:Type) +MATCH (typeInPackage)-[:DEPENDS_ON]->(dependencyType:Type) +MATCH (dependencyPackage:Package)-[:CONTAINS]->(dependencyType) +MATCH (dependencyArtifact:Artifact)-[:CONTAINS]->(dependencyPackage) +WHERE artifact.fileName <> dependencyArtifact.fileName + WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName + ,artifact.numberOfPackages AS packagesInArtifactCount + ,artifact.numberOfTypes AS typesInArtifactCount + ,collect(DISTINCT packageInArtifact.fqn) AS packages + ,count(DISTINCT packageInArtifact.fqn) AS packagesCount + ,round(100.0 / artifact.numberOfPackages + * count(DISTINCT packageInArtifact.fqn) + , 2) AS packageSpread + ,collect(DISTINCT typeInPackage.name) AS types + ,count(DISTINCT typeInPackage.fqn) AS typesCount + ,round(100.0 / artifact.numberOfTypes + * count(DISTINCT typeInPackage.fqn) + , 2) AS typesSpread + ,replace(last(split(dependencyArtifact.fileName, '/')), '.jar', '') AS dependencyArtifactName +// additionally group by if the dependency is an interface or not + ,dependencyType:Interface AS dependencyTypeIsInterface + ,collect(DISTINCT dependencyPackage.fqn) AS dependencyPackages + ,count(DISTINCT dependencyPackage.fqn) AS dependencyPackagesCount + ,collect(DISTINCT dependencyType.name) AS dependencyTypes + ,count(DISTINCT dependencyType.fqn) AS dependencyTypesCount +// Filter out empty dependency sets +WHERE dependencyPackagesCount > 0 + AND packagesCount > 1 +RETURN artifactName + ,packagesInArtifactCount + ,packagesCount + ,packageSpread + ,typesInArtifactCount + ,typesCount + ,typesSpread + ,dependencyArtifactName + ,dependencyTypeIsInterface + ,dependencyPackagesCount + ,dependencyTypesCount + ,dependencyPackages[0..2] AS someDependencyPackages + ,dependencyTypes[0..4] AS someDependencyTypes + ,packages[0..2] AS someCallingPackages + ,types[0..4] AS someCallingTypes +ORDER BY packagesCount DESC \ No newline at end of file diff --git a/cypher/Artifact_Dependencies/Incoming_Artifact_Dependencies.cypher b/cypher/Artifact_Dependencies/Incoming_Artifact_Dependencies.cypher new file mode 100644 index 000000000..a13e2d33a --- /dev/null +++ b/cypher/Artifact_Dependencies/Incoming_Artifact_Dependencies.cypher @@ -0,0 +1,13 @@ +// Incoming Artifact Dependencies + + MATCH (a:Artifact:Archive) +OPTIONAL MATCH (a)<-[r:DEPENDS_ON]-(ea:Artifact:Archive) + WHERE a.fileName <> ea.fileName + WITH a + ,COUNT(ea) AS incomingDependencies + ,SUM(r.weight) AS incomingDependenciesWeight + SET a.incomingDependencies = incomingDependencies + ,a.incomingDependenciesWeight = incomingDependenciesWeight + RETURN a.fileName + ,incomingDependencies + ,incomingDependenciesWeight \ No newline at end of file diff --git a/cypher/Artifact_Dependencies/Most_used_internal_dependencies_acreoss_artifacts.cypher b/cypher/Artifact_Dependencies/Most_used_internal_dependencies_acreoss_artifacts.cypher new file mode 100644 index 000000000..b056ca378 --- /dev/null +++ b/cypher/Artifact_Dependencies/Most_used_internal_dependencies_acreoss_artifacts.cypher @@ -0,0 +1,24 @@ +// Most used internal dependencies across artifacts + +MATCH (type:Type)-[:DEPENDS_ON]->(dependencyType:Type) +MATCH (artifact:Artifact)-[:CONTAINS]->(package:Package)-[:CONTAINS]->(type:Type) +MATCH (dependencyArtifact:Artifact)-[:CONTAINS]->(dependencyPackage:Package)-[:CONTAINS]->(dependencyType) +WHERE artifact.fileName <> dependencyArtifact.fileName + WITH replace(last(split(dependencyArtifact.fileName, '/')), '.jar', '') AS dependencyArtifactName + ,COLLECT(DISTINCT dependencyPackage.fqn) AS dependencyPackageNames + ,COLLECT(DISTINCT dependencyType.name) AS dependencyTypeNames + ,COLLECT(DISTINCT replace(last(split(artifact.fileName, '/')), '.jar', '')) AS artifactNames + ,COUNT(DISTINCT package.fqn) AS numberOfPackages + ,COUNT(DISTINCT type.fqn) AS numberOfTypes + ,COUNT(DISTINCT dependencyType) AS numberOfDependencyTypes + ,REDUCE(interfaces=0, depType IN COLLECT(DISTINCT dependencyType) | + CASE WHEN depType:Interface THEN interfaces + 1 ELSE interfaces END ) AS numberOfDependencyInterfaces + ORDER BY numberOfPackages DESC +RETURN dependencyArtifactName AS dependency + ,numberOfPackages AS usedByPackages + ,numberOfTypes AS usedByTypes + ,SIZE(dependencyPackageNames) AS providesPackages + ,SIZE(dependencyTypeNames) AS providesTypes + ,ROUND(100.0 / numberOfDependencyTypes * numberOfDependencyInterfaces, 2) AS interfaceRate + ,dependencyPackageNames[0..5] AS someProvidedPackages + ,dependencyTypeNames[0..5] AS someProvidedTypes \ No newline at end of file diff --git a/cypher/Artifact_Dependencies/Outgoing_Artifact_Dependencies.cypher b/cypher/Artifact_Dependencies/Outgoing_Artifact_Dependencies.cypher new file mode 100644 index 000000000..f0a46e979 --- /dev/null +++ b/cypher/Artifact_Dependencies/Outgoing_Artifact_Dependencies.cypher @@ -0,0 +1,13 @@ +// Outgoing Artifact Dependencies + + MATCH (a:Artifact:Archive) +OPTIONAL MATCH (a)-[r:DEPENDS_ON]->(ea:Artifact:Archive) + WHERE a.fileName <> ea.fileName + WITH a + ,COUNT(ea) AS outgoingDependencies + ,SUM(r.weight) AS outgoingDependenciesWeight + SET a.outgoingDependencies = outgoingDependencies + ,a.outgoingDependenciesWeight = outgoingDependenciesWeight + RETURN a.fileName + ,outgoingDependencies + ,outgoingDependenciesWeight \ No newline at end of file diff --git a/cypher/Artifact_Dependencies/Set_number_of_packages_and_types_on_artifacts.cypher b/cypher/Artifact_Dependencies/Set_number_of_packages_and_types_on_artifacts.cypher new file mode 100644 index 000000000..491a27894 --- /dev/null +++ b/cypher/Artifact_Dependencies/Set_number_of_packages_and_types_on_artifacts.cypher @@ -0,0 +1,13 @@ +// Set number of packages and types on artifacts + + MATCH (artifact:Artifact)-[:CONTAINS]->(package:Package) + MATCH (package)-[:CONTAINS]->(type:Type) + WITH artifact + ,COUNT(DISTINCT package.fqn) AS numberOfPackages + ,COUNT(DISTINCT type.fqn) AS numberOfTypes + SET artifact.numberOfPackages = numberOfPackages + ,artifact.numberOfTypes = numberOfTypes +RETURN artifact.fileName + ,numberOfPackages + ,numberOfTypes + ORDER BY artifact.fileName \ No newline at end of file diff --git a/cypher/Artifact_Dependencies/Usage_and_spread_of_internal_artifact_dependencies.cypher b/cypher/Artifact_Dependencies/Usage_and_spread_of_internal_artifact_dependencies.cypher new file mode 100644 index 000000000..07b67f5e7 --- /dev/null +++ b/cypher/Artifact_Dependencies/Usage_and_spread_of_internal_artifact_dependencies.cypher @@ -0,0 +1,54 @@ +// Usage and spread of internal artifact dependencies + +MATCH (artifact:Artifact)-[:CONTAINS]->(packageInArtifact:Package) +MATCH (packageInArtifact)-[:CONTAINS]->(typeInPackage:Type) +MATCH (typeInPackage)-[:DEPENDS_ON]->(dependencyType:Type) +MATCH (dependencyPackage:Package)-[:CONTAINS]->(dependencyType) +MATCH (dependencyArtifact:Artifact)-[:CONTAINS]->(dependencyPackage) +WHERE artifact.fileName <> dependencyArtifact.fileName + WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName + ,artifact.numberOfPackages AS packagesInArtifactCount + ,artifact.numberOfTypes AS typesInArtifactCount + ,collect(DISTINCT packageInArtifact.fqn) AS packages + ,count(DISTINCT packageInArtifact.fqn) AS packagesCount + ,(100.0 + / artifact.numberOfPackages + * count(DISTINCT packageInArtifact.fqn)) AS packageSpread + ,collect(DISTINCT typeInPackage.name) AS types + ,count(DISTINCT typeInPackage.fqn) AS typesCount + ,(100.0 + / artifact.numberOfTypes + * count(DISTINCT typeInPackage.fqn)) AS typesSpread + ,replace(last(split(dependencyArtifact.fileName, '/')), '.jar', '') AS dependencyArtifactName +// additionally group by if the dependency is an interface or not + ,dependencyType:Interface AS dependencyTypeIsInterface + ,collect(DISTINCT dependencyPackage.fqn) AS dependencyPackages + ,count(DISTINCT dependencyPackage.fqn) AS dependencyPackagesCount + ,collect(DISTINCT dependencyType.name) AS dependencyTypes + ,count(DISTINCT dependencyType.fqn) AS dependencyTypesCount +// Filter out empty dependency sets +WHERE dependencyPackagesCount > 0 + AND packagesCount > 1 +RETURN dependencyArtifactName + ,dependencyTypeIsInterface + ,COUNT(DISTINCT artifactName) AS usedInArtifacts + ,SUM(packagesCount) AS usedInPackages + + ,MIN(packageSpread) AS minPackageSpread + ,MAX(packageSpread) AS maxPackageSpread + ,AVG(packageSpread) AS avgPackageSpread + ,stDev(packageSpread) AS stdPackageSpread + ,percentileDisc(packageSpread, 0.5) AS per5PackageSpread + + ,MIN(packagesCount) AS minPackageCount + ,MAX(packagesCount) AS maxPackageCount + ,AVG(packagesCount) AS avgPackageCount + ,stDev(packagesCount) AS stdPackageCount + ,percentileDisc(packagesCount, 0.5) AS per5PackageCount + + ,MIN(typesSpread) AS minTypeSpread + ,MAX(typesSpread) AS maxTypeSpread + ,AVG(typesSpread) AS avgTypeSpread + ,stDev(typesSpread) AS stdTypeSpread + ,percentileDisc(typesSpread, 0.5) AS per5TypeSpread +ORDER BY toLower(dependencyArtifactName) ASC \ No newline at end of file diff --git a/cypher/Artifact_Dependencies/Usage_and_spread_of_internal_artifact_dependents.cypher b/cypher/Artifact_Dependencies/Usage_and_spread_of_internal_artifact_dependents.cypher new file mode 100644 index 000000000..769d3aaa3 --- /dev/null +++ b/cypher/Artifact_Dependencies/Usage_and_spread_of_internal_artifact_dependents.cypher @@ -0,0 +1,55 @@ +// Usage and spread of internal artifact dependents + +MATCH (artifact:Artifact)-[:CONTAINS]->(packageInArtifact:Package) +MATCH (packageInArtifact)-[:CONTAINS]->(typeInPackage:Type) +MATCH (typeInPackage)-[:DEPENDS_ON]->(dependencyType:Type) +MATCH (dependencyPackage:Package)-[:CONTAINS]->(dependencyType) +MATCH (dependencyArtifact:Artifact)-[:CONTAINS]->(dependencyPackage) +WHERE artifact.fileName <> dependencyArtifact.fileName + WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName + ,artifact.numberOfPackages AS packagesInArtifactCount + ,artifact.numberOfTypes AS typesInArtifactCount + ,collect(DISTINCT packageInArtifact.fqn) AS packages + ,count(DISTINCT packageInArtifact.fqn) AS packagesCount + ,(100.0 + / artifact.numberOfPackages + * count(DISTINCT packageInArtifact.fqn)) AS packageSpread + ,collect(DISTINCT typeInPackage.name) AS types + ,count(DISTINCT typeInPackage.fqn) AS typesCount + ,(100.0 + / artifact.numberOfTypes + * count(DISTINCT typeInPackage.fqn)) AS typesSpread + ,replace(last(split(dependencyArtifact.fileName, '/')), '.jar', '') AS dependencyArtifactName +// additionally group by if the dependency is an interface or not + ,dependencyType:Interface AS dependencyTypeIsInterface + ,collect(DISTINCT dependencyPackage.fqn) AS dependencyPackages + ,count(DISTINCT dependencyPackage.fqn) AS dependencyPackagesCount + ,collect(DISTINCT dependencyType.name) AS dependencyTypes + ,count(DISTINCT dependencyType.fqn) AS dependencyTypesCount +// Filter out empty dependency sets +WHERE dependencyPackagesCount > 0 + AND packagesCount > 1 +RETURN artifactName + ,dependencyTypeIsInterface + ,COUNT(DISTINCT dependencyArtifactName) AS artifactDependencies + ,SUM(dependencyPackagesCount) AS artifactDependencyPackages + ,100.0 / SUM(packagesInArtifactCount) * SUM(packagesCount) AS dependentPackagesRate + + ,MIN(packageSpread) AS minPackageSpread + ,MAX(packageSpread) AS maxPackageSpread + ,AVG(packageSpread) AS avgPackageSpread + ,stDev(packageSpread) AS stdPackageSpread + ,percentileDisc(packageSpread, 0.5) AS per5PackageSpread + + ,MIN(packagesCount) AS minPackageCount + ,MAX(packagesCount) AS maxPackageCount + ,AVG(packagesCount) AS avgPackageCount + ,stDev(packagesCount) AS stdPackageCount + ,percentileDisc(packagesCount, 0.5) AS per5PackageCount + + ,MIN(typesSpread) AS minTypeSpread + ,MAX(typesSpread) AS maxTypeSpread + ,AVG(typesSpread) AS avgTypeSpread + ,stDev(typesSpread) AS stdTypeSpread + ,percentileDisc(typesSpread, 0.5) AS per5TypeSpread +ORDER BY toLower(artifactName) ASC \ No newline at end of file diff --git a/cypher/Candidates_for_Interface_Segregation.cypher b/cypher/Candidates_for_Interface_Segregation.cypher index 2e9157f07..0fa970345 100644 --- a/cypher/Candidates_for_Interface_Segregation.cypher +++ b/cypher/Candidates_for_Interface_Segregation.cypher @@ -2,7 +2,7 @@ MATCH (type:Type)-[:DECLARES]->(method:Method)-[:INVOKES]->(dependentMethod:Method) MATCH (dependentMethod)<-[:DECLARES]-(dependentType:Type) -MATCH (dependentType)-[:IMPLEMENTS*]->(superType:Type)-[:DECLARES]->(inheritedMethod:Method) +MATCH (dependentType)-[:IMPLEMENTS*1..9]->(superType:Type)-[:DECLARES]->(inheritedMethod:Method) WHERE type.fqn <> dependentType.fqn AND dependentMethod.name IS NOT NULL AND inheritedMethod.name IS NOT NULL @@ -15,6 +15,8 @@ WHERE type.fqn <> dependentType.fqn // Count the different signatures without the return type // of all declared methods including the inherited ones ,count(DISTINCT split(method.signature, ' ')[1]) + count(DISTINCT split(inheritedMethod.signature, ' ')[1]) AS declaredMethods +// Filter out types that declare only a few more methods than those that are actually used. +// A good interface segregation candidate declares a lot of methods where only a few of them are used widely. WHERE declaredMethods > calledMethods + 2 WITH fullDependentTypeName ,declaredMethods diff --git a/cypher/Community_Detection_Leiden/Community_Detection_0_Delete_Projection.cypher b/cypher/Community_Detection_Leiden/Community_Detection_0_Delete_Projection.cypher index 2f689daae..35554ac9d 100644 --- a/cypher/Community_Detection_Leiden/Community_Detection_0_Delete_Projection.cypher +++ b/cypher/Community_Detection_Leiden/Community_Detection_0_Delete_Projection.cypher @@ -1,5 +1,5 @@ //Community Detection 0 Delete Projection - CALL gds.graph.drop('package-dependencies' + CALL gds.graph.drop('package-dependencies', false) YIELD graphName, nodeCount, relationshipCount, creationTime, modificationTime RETURN graphName, nodeCount, relationshipCount, creationTime, modificationTime \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_0_Delete_Projection.cypher b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_0_Delete_Projection.cypher new file mode 100644 index 000000000..c223aaacd --- /dev/null +++ b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_0_Delete_Projection.cypher @@ -0,0 +1,5 @@ +//Community Detection 0 Delete Projection + + CALL gds.graph.drop('artifact-dependencies', false) + YIELD graphName, nodeCount, relationshipCount, creationTime, modificationTime +RETURN graphName, nodeCount, relationshipCount, creationTime, modificationTime \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_0b_Delete_Projection.cypher b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_0b_Delete_Projection.cypher new file mode 100644 index 000000000..2757e95ca --- /dev/null +++ b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_0b_Delete_Projection.cypher @@ -0,0 +1,5 @@ +//Community Detection 0b Delete Projection + + CALL gds.graph.drop('artifact-dependencies-without-empty', false) + YIELD graphName, nodeCount, relationshipCount, creationTime, modificationTime +RETURN graphName, nodeCount, relationshipCount, creationTime, modificationTime \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_1_Create_undirected_Projection.cypher b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_1_Create_undirected_Projection.cypher new file mode 100644 index 000000000..5db35470c --- /dev/null +++ b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_1_Create_undirected_Projection.cypher @@ -0,0 +1,15 @@ +//Community Detection 1 Create undirected Projection + +CALL gds.graph.project('artifact-dependencies', 'Artifact', + { + DEPENDS_ON: { + orientation: 'UNDIRECTED' + } + }, + { + relationshipProperties: ['weight'], + nodeProperties: ['incomingDependencies', 'outgoingDependencies'] + } +) + YIELD graphName, nodeCount, relationshipCount +RETURN graphName, nodeCount, relationshipCount \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_1b_Create_subgraph_without_empty_artifacts.cypher b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_1b_Create_subgraph_without_empty_artifacts.cypher new file mode 100644 index 000000000..47b0adcdf --- /dev/null +++ b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_1b_Create_subgraph_without_empty_artifacts.cypher @@ -0,0 +1,10 @@ +//Community Detection 1b Create subgraph without empty artifacts + +CALL gds.beta.graph.project.subgraph( + 'artifact-dependencies-without-empty', + 'artifact-dependencies', + 'n.outgoingDependencies > 0 OR n.incomingDependencies > 0', + '*' +) + YIELD graphName, fromGraphName, nodeCount, relationshipCount, nodeFilter +RETURN graphName, fromGraphName, nodeCount, relationshipCount, nodeFilter \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_2_Leiden_Estimate_Memory.cypher b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_2_Leiden_Estimate_Memory.cypher new file mode 100644 index 000000000..e7271a086 --- /dev/null +++ b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_2_Leiden_Estimate_Memory.cypher @@ -0,0 +1,23 @@ +//Community Detection 2 Leiden Estimate Memory + +CALL gds.beta.leiden.write.estimate('artifact-dependencies-without-empty', { + gamma: 1.11, + theta: 0.001, + consecutiveIds: true, + relationshipWeightProperty: 'weight', + writeProperty: 'leidenCommunityId' +}) +YIELD nodeCount + ,relationshipCount + ,bytesMin + ,bytesMax + ,heapPercentageMin + ,heapPercentageMax + ,treeView +RETURN nodeCount + ,relationshipCount + ,bytesMin + ,bytesMax + ,heapPercentageMin + ,heapPercentageMax + ,treeView \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_3_Leiden_Statistics.cypher b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_3_Leiden_Statistics.cypher new file mode 100644 index 000000000..7200b4b59 --- /dev/null +++ b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_3_Leiden_Statistics.cypher @@ -0,0 +1,25 @@ +//Community Detection 3 Leiden Statistics + +CALL gds.beta.leiden.stats('artifact-dependencies-without-empty', { + gamma: 1.11, + theta: 0.001, + includeIntermediateCommunities: true, + relationshipWeightProperty: 'weight' +}) +YIELD communityCount + ,ranLevels + ,modularity + ,modularities + ,communityDistribution +RETURN communityCount + ,ranLevels + ,modularity + ,modularities + ,communityDistribution.min + ,communityDistribution.mean + ,communityDistribution.max + ,communityDistribution.p50 + ,communityDistribution.p75 + ,communityDistribution.p90 + ,communityDistribution.p95 + ,communityDistribution.p99 \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_4_Leiden_Stream.cypher b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_4_Leiden_Stream.cypher new file mode 100644 index 000000000..09fb72892 --- /dev/null +++ b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_4_Leiden_Stream.cypher @@ -0,0 +1,17 @@ +//Community Detection 4 Leiden Stream + +CALL gds.beta.leiden.stream('artifact-dependencies-without-empty', { + gamma: 1.11, + theta: 0.001, + includeIntermediateCommunities: true, + relationshipWeightProperty: 'weight' +}) + YIELD nodeId, communityId, intermediateCommunityIds + WITH communityId + ,intermediateCommunityIds + ,gds.util.asNode(nodeId) AS artifact +RETURN intermediateCommunityIds[0] AS firstCommunityId + ,communityId AS finalCommunityId + ,COUNT(DISTINCT artifact) AS countOfMembers + ,collect(DISTINCT replace(last(split(artifact.fileName, '/')), '.jar', '')) AS artifactNames + ORDER BY countOfMembers DESC, communityId ASC \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_5_Leiden_Write_property_leidenCommunityId.cypher b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_5_Leiden_Write_property_leidenCommunityId.cypher new file mode 100644 index 000000000..095c1b2ae --- /dev/null +++ b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_5_Leiden_Write_property_leidenCommunityId.cypher @@ -0,0 +1,36 @@ +//Community Detection 5 Leiden Write property leidenCommunityId + +CALL gds.beta.leiden.write('artifact-dependencies-without-empty', { + gamma: 1.11, + theta: 0.001, + consecutiveIds: true, + relationshipWeightProperty: 'weight', + writeProperty: 'leidenCommunityId' +}) +YIELD preProcessingMillis + ,computeMillis + ,writeMillis + ,postProcessingMillis + ,nodePropertiesWritten + ,communityCount + ,ranLevels + ,modularity + ,modularities + ,communityDistribution +RETURN preProcessingMillis + ,computeMillis + ,writeMillis + ,postProcessingMillis + ,nodePropertiesWritten + ,communityCount + ,ranLevels + ,modularity + ,communityDistribution.min + ,communityDistribution.mean + ,communityDistribution.max + ,communityDistribution.p50 + ,communityDistribution.p75 + ,communityDistribution.p90 + ,communityDistribution.p95 + ,communityDistribution.p99 + ,modularities \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_6_Delete_Existing_Labels.cypher b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_6_Delete_Existing_Labels.cypher new file mode 100644 index 000000000..2e95af7aa --- /dev/null +++ b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_6_Delete_Existing_Labels.cypher @@ -0,0 +1,9 @@ +//Community Detection 6 Delete Existing Labels + + CALL db.labels() YIELD label + WHERE label STARTS WITH "ArtifactLeiden" + WITH collect(label) AS labels + MATCH (artifact:Artifact) + WITH collect(artifact) AS artifacts, labels + CALL apoc.create.removeLabels(artifacts, labels) YIELD node +RETURN COUNT(node) AS nodesCount; \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_7_Add_ArtifactLeidenCommunity_Id_label_to_artifacts.cypher b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_7_Add_ArtifactLeidenCommunity_Id_label_to_artifacts.cypher new file mode 100644 index 000000000..1d0a4419b --- /dev/null +++ b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_7_Add_ArtifactLeidenCommunity_Id_label_to_artifacts.cypher @@ -0,0 +1,12 @@ +//Community Detection 7 Add ArtifactLeidenCommunity+Id label to artifacts +//with more than one member + + MATCH (artifact:Artifact:Archive) + WITH artifact.leidenCommunityId AS communityId + ,collect(artifact) AS artifacts + ,COUNT(DISTINCT artifact.fileName) AS members + ,'ArtifactLeidenCommunity' + toString(artifact.leidenCommunityId) AS labelName + WHERE members > 1 +UNWIND artifacts AS artifact + CALL apoc.create.addLabels(artifact, [labelName]) YIELD node +RETURN COUNT(node) as nodesCount \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_8_Check_Leiden_Community_Id.cypher b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_8_Check_Leiden_Community_Id.cypher new file mode 100644 index 000000000..a269dbe27 --- /dev/null +++ b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_8_Check_Leiden_Community_Id.cypher @@ -0,0 +1,3 @@ +// Community Detection 8 Check Leiden Community Id + +MATCH (a:Artifact) WHERE a.leidenCommunityId IS NOT NULL RETURN a.leidenCommunityId LIMIT 1 \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden_for_Artifacts/Get_all_Artifacts_with_a_Community_Detection_Label.cypher b/cypher/Community_Detection_Leiden_for_Artifacts/Get_all_Artifacts_with_a_Community_Detection_Label.cypher new file mode 100644 index 000000000..a3743aa8b --- /dev/null +++ b/cypher/Community_Detection_Leiden_for_Artifacts/Get_all_Artifacts_with_a_Community_Detection_Label.cypher @@ -0,0 +1,5 @@ +// Get all Artifacts with a Community Detection Label + +MATCH (artifact:Artifact) +WHERE any(label IN labels(artifact) WHERE label CONTAINS 'Community') +RETURN DISTINCT artifact; \ No newline at end of file diff --git a/cypher/Cyclic_Dependencies/Cyclic_Dependencies.cypher b/cypher/Cyclic_Dependencies/Cyclic_Dependencies.cypher index da1945e8d..64909dff0 100644 --- a/cypher/Cyclic_Dependencies/Cyclic_Dependencies.cypher +++ b/cypher/Cyclic_Dependencies/Cyclic_Dependencies.cypher @@ -1,7 +1,35 @@ -// Cyclic Dependencies -MATCH (package:Package)-[:CONTAINS]->(type:Type)-[:DEPENDS_ON]->(dependentType:Type)<-[:CONTAINS]-(dependentPackage:Package) -MATCH (dependentPackage)-[:CONTAINS]->(cycleType:Type)-[:DEPENDS_ON]->(cycleDependentType:Type)<-[:CONTAINS]-(package) -WHERE package <> dependentPackage -RETURN package, dependentPackage - ,type, dependentType, cycleType, cycleDependentType - LIMIT 100 \ No newline at end of file +//Cyclic Dependencies as List + +MATCH (package:Package)-[:CONTAINS]->(forwardSource:Type)-[:DEPENDS_ON]->(forwardTarget:Type)<-[:CONTAINS]-(dependentPackage:Package) +MATCH (dependentPackage)-[:CONTAINS]->(backwardSource:Type)-[:DEPENDS_ON]->(backwardTarget:Type)<-[:CONTAINS]-(package) +MATCH (artifact:Artifact)-[:CONTAINS]->(package) +MATCH (dependentArtifact:Artifact)-[:CONTAINS]->(dependentPackage) +WHERE package.fqn <> dependentPackage.fqn + WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName + ,package.fqn AS packageName + ,replace(last(split(dependentArtifact .fileName, '/')), '.jar', '') AS dependentArtifactName + ,dependentPackage.fqn AS dependentPackageName + ,collect(DISTINCT forwardSource.name + '->' + forwardTarget.name) AS forwardDependencies + ,collect(DISTINCT backwardSource.name + '->' + backwardTarget.name) AS backwardDependencies + WITH artifactName + ,packageName + ,dependentArtifactName + ,dependentPackageName + ,forwardDependencies + ,backwardDependencies + ,size(forwardDependencies) AS numberOfForwardDependencies + ,size(backwardDependencies) AS numberOfBackwardDependencies + ,size(forwardDependencies) + size(backwardDependencies) AS numberOfAllCyclicDependencies +WHERE (size(forwardDependencies) > size(backwardDependencies) + OR (size(forwardDependencies) = size(backwardDependencies) + AND size(packageName) >= size(dependentPackageName))) +RETURN artifactName + ,packageName + ,dependentArtifactName + ,dependentPackageName + ,toFloat(ABS(numberOfForwardDependencies - numberOfBackwardDependencies)) / numberOfAllCyclicDependencies AS forwardToBackwardBalance + ,numberOfForwardDependencies AS numberForward + ,numberOfBackwardDependencies AS numberBackward + ,forwardDependencies[0..9] AS someForwardDependencies + ,backwardDependencies +ORDER BY forwardToBackwardBalance DESC, packageName ASC \ No newline at end of file diff --git a/cypher/Cyclic_Dependencies/Cyclic_Dependencies_as_unwinded_List.cypher b/cypher/Cyclic_Dependencies/Cyclic_Dependencies_Breakdown.cypher similarity index 60% rename from cypher/Cyclic_Dependencies/Cyclic_Dependencies_as_unwinded_List.cypher rename to cypher/Cyclic_Dependencies/Cyclic_Dependencies_Breakdown.cypher index dd52e9f77..845136678 100644 --- a/cypher/Cyclic_Dependencies/Cyclic_Dependencies_as_unwinded_List.cypher +++ b/cypher/Cyclic_Dependencies/Cyclic_Dependencies_Breakdown.cypher @@ -1,14 +1,20 @@ -//Cyclic Dependencies as unwinded List +//Cyclic Dependencies Breakdown MATCH (package:Package)-[:CONTAINS]->(forwardSource:Type)-[:DEPENDS_ON]->(forwardTarget:Type)<-[:CONTAINS]-(dependentPackage:Package) MATCH (dependentPackage)-[:CONTAINS]->(backwardSource:Type)-[:DEPENDS_ON]->(backwardTarget:Type)<-[:CONTAINS]-(package) -WHERE package <> dependentPackage - WITH package - ,dependentPackage +MATCH (artifact:Artifact)-[:CONTAINS]->(package) +MATCH (dependentArtifact:Artifact)-[:CONTAINS]->(dependentPackage) +WHERE package.fqn <> dependentPackage.fqn + WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName + ,package.fqn AS packageName + ,replace(last(split(dependentArtifact .fileName, '/')), '.jar', '') AS dependentArtifactName + ,dependentPackage.fqn AS dependentPackageName ,collect(DISTINCT forwardSource.name + '->' + forwardTarget.name) AS forwardDependencies ,collect(DISTINCT backwardTarget.name + '<-' + backwardSource.name) AS backwardDependencies - WITH package - ,dependentPackage + WITH artifactName + ,packageName + ,dependentArtifactName + ,dependentPackageName ,forwardDependencies ,backwardDependencies ,size(forwardDependencies) AS numberOfForwardDependencies @@ -16,10 +22,12 @@ WHERE package <> dependentPackage ,size(forwardDependencies) + size(backwardDependencies) AS numberOfAllCyclicDependencies WHERE (size(forwardDependencies) > size(backwardDependencies) OR (size(forwardDependencies) = size(backwardDependencies) - AND size(package.fqn) >= size(dependentPackage.fqn))) + AND size(packageName) >= size(dependentPackageName))) UNWIND (backwardDependencies + forwardDependencies) AS dependency -RETURN package.fqn AS packageName - ,dependentPackage.fqn AS dependentPackageName +RETURN artifactName + ,packageName + ,dependentArtifactName + ,dependentPackageName ,dependency ,toFloat(ABS(numberOfForwardDependencies - numberOfBackwardDependencies)) / numberOfAllCyclicDependencies AS forwardToBackwardBalance ,numberOfForwardDependencies AS numberForward diff --git a/cypher/Cyclic_Dependencies/Cyclic_Dependencies_as_List.cypher b/cypher/Cyclic_Dependencies/Cyclic_Dependencies_Breakdown_Backward_Only.cypher similarity index 51% rename from cypher/Cyclic_Dependencies/Cyclic_Dependencies_as_List.cypher rename to cypher/Cyclic_Dependencies/Cyclic_Dependencies_Breakdown_Backward_Only.cypher index 4e09fa3b7..3b073399f 100644 --- a/cypher/Cyclic_Dependencies/Cyclic_Dependencies_as_List.cypher +++ b/cypher/Cyclic_Dependencies/Cyclic_Dependencies_Breakdown_Backward_Only.cypher @@ -1,14 +1,20 @@ -//Cyclic Dependencies as List +//Cyclic Dependencies Breakdown Backward Only MATCH (package:Package)-[:CONTAINS]->(forwardSource:Type)-[:DEPENDS_ON]->(forwardTarget:Type)<-[:CONTAINS]-(dependentPackage:Package) MATCH (dependentPackage)-[:CONTAINS]->(backwardSource:Type)-[:DEPENDS_ON]->(backwardTarget:Type)<-[:CONTAINS]-(package) -WHERE package <> dependentPackage - WITH package - ,dependentPackage +MATCH (artifact:Artifact)-[:CONTAINS]->(package) +MATCH (dependentArtifact:Artifact)-[:CONTAINS]->(dependentPackage) +WHERE package.fqn <> dependentPackage.fqn + WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName + ,package.fqn AS packageName + ,replace(last(split(dependentArtifact .fileName, '/')), '.jar', '') AS dependentArtifactName + ,dependentPackage.fqn AS dependentPackageName ,collect(DISTINCT forwardSource.name + '->' + forwardTarget.name) AS forwardDependencies - ,collect(DISTINCT backwardSource.name + '->' + backwardTarget.name) AS backwardDependencies - WITH package - ,dependentPackage + ,collect(DISTINCT backwardTarget.name + '<-' + backwardSource.name) AS backwardDependencies + WITH artifactName + ,packageName + ,dependentArtifactName + ,dependentPackageName ,forwardDependencies ,backwardDependencies ,size(forwardDependencies) AS numberOfForwardDependencies @@ -16,12 +22,14 @@ WHERE package <> dependentPackage ,size(forwardDependencies) + size(backwardDependencies) AS numberOfAllCyclicDependencies WHERE (size(forwardDependencies) > size(backwardDependencies) OR (size(forwardDependencies) = size(backwardDependencies) - AND size(package.fqn) >= size(dependentPackage.fqn))) -RETURN package.fqn AS packageName - ,dependentPackage.fqn AS dependentPackageName + AND size(packageName) >= size(dependentPackageName))) +UNWIND backwardDependencies AS dependency +RETURN artifactName + ,packageName + ,dependentArtifactName + ,dependentPackageName + ,dependency ,toFloat(ABS(numberOfForwardDependencies - numberOfBackwardDependencies)) / numberOfAllCyclicDependencies AS forwardToBackwardBalance ,numberOfForwardDependencies AS numberForward ,numberOfBackwardDependencies AS numberBackward - ,forwardDependencies - ,backwardDependencies ORDER BY forwardToBackwardBalance DESC, packageName ASC \ No newline at end of file diff --git a/cypher/Cyclic_Dependencies/Cyclic_Dependencies_as_Nodes.cypher b/cypher/Cyclic_Dependencies/Cyclic_Dependencies_as_Nodes.cypher new file mode 100644 index 000000000..da1945e8d --- /dev/null +++ b/cypher/Cyclic_Dependencies/Cyclic_Dependencies_as_Nodes.cypher @@ -0,0 +1,7 @@ +// Cyclic Dependencies +MATCH (package:Package)-[:CONTAINS]->(type:Type)-[:DEPENDS_ON]->(dependentType:Type)<-[:CONTAINS]-(dependentPackage:Package) +MATCH (dependentPackage)-[:CONTAINS]->(cycleType:Type)-[:DEPENDS_ON]->(cycleDependentType:Type)<-[:CONTAINS]-(package) +WHERE package <> dependentPackage +RETURN package, dependentPackage + ,type, dependentType, cycleType, cycleDependentType + LIMIT 100 \ No newline at end of file diff --git a/cypher/External_Dependencies/External_package_levels.cypher b/cypher/External_Dependencies/External_package_levels.cypher new file mode 100644 index 000000000..49fdd696d --- /dev/null +++ b/cypher/External_Dependencies/External_package_levels.cypher @@ -0,0 +1,16 @@ +// External package levels + +MATCH (externalType:ExternalType) + WITH replace(externalType.fqn, '.' + externalType.name, '') AS externalPackageName + WITH count(DISTINCT split(externalPackageName, '.')[0]) AS externalFirstLevelPackages + ,count(DISTINCT split(externalPackageName, '.')[0..1]) AS externalSecondLevelPackages + ,count(DISTINCT split(externalPackageName, '.')[0..2]) AS externalThirdLevelPackages + ,count(DISTINCT split(externalPackageName, '.')[0..3]) AS externalForthLevelPackages + ,count(DISTINCT split(externalPackageName, '.')[0..4]) AS externalFifthLevelPackages + ,count(DISTINCT externalPackageName) AS allExternalPackages +RETURN externalFirstLevelPackages + ,externalSecondLevelPackages + ,externalThirdLevelPackages + ,externalForthLevelPackages + ,externalFifthLevelPackages + ,allExternalPackages \ No newline at end of file diff --git a/cypher/External_Dependencies/External_package_name_elements.cypher b/cypher/External_Dependencies/External_package_name_elements.cypher new file mode 100644 index 000000000..20079be7e --- /dev/null +++ b/cypher/External_Dependencies/External_package_name_elements.cypher @@ -0,0 +1,11 @@ +// External package name elements + +MATCH (externalType:ExternalType) + WITH replace(externalType.fqn, '.' + externalType.name, '') AS packageName + WITH size(split(packageName,'.')) AS packageNameElements + ,count(DISTINCT packageName) AS packageCount + ,collect(DISTINCT packageName)[0..19] AS somePackageNames +RETURN packageNameElements + ,packageCount + ,somePackageNames +ORDER BY packageNameElements \ No newline at end of file diff --git a/cypher/External_Dependencies/External_package_usage_overall.cypher b/cypher/External_Dependencies/External_package_usage_overall.cypher index 35835e2c5..b026375f3 100644 --- a/cypher/External_Dependencies/External_package_usage_overall.cypher +++ b/cypher/External_Dependencies/External_package_usage_overall.cypher @@ -1,17 +1,27 @@ // External package usage overall - MATCH (type:Type) - WITH count(type) as allTypes, collect(type) as typeList + MATCH (package:Package)-[:CONTAINS]->(type:Type) + WITH count(DISTINCT type.fqn) AS allTypes + ,count(DISTINCT package.fqn) AS allPackages + ,collect(type) as typeList UNWIND typeList AS type MATCH (type)-[externalDependency:DEPENDS_ON]->(externalType:ExternalType) + MATCH (typePackage:Package)-[:CONTAINS]->(type) WITH allTypes + ,allPackages ,replace(externalType.fqn, '.' + externalType.name, '') AS externalPackageName - ,count(externalDependency) AS numberOfExternalTypeCaller - ,sum(externalDependency.weight) AS numberOfExternalTypeCalls + ,count(DISTINCT typePackage.fqn) AS numberOfExternalCallerPackages + ,count(DISTINCT type.fqn) AS numberOfExternalCallerTypes + ,count(externalDependency) AS numberOfExternalTypeCalls + ,sum(externalDependency.weight) AS numberOfExternalTypeCallsWeighted ,collect(DISTINCT externalType.name) AS externalTypeNames +where numberOfExternalTypeCalls <> numberOfExternalCallerTypes RETURN externalPackageName - ,numberOfExternalTypeCaller + ,numberOfExternalCallerPackages + ,numberOfExternalCallerTypes ,numberOfExternalTypeCalls + ,numberOfExternalTypeCallsWeighted + ,allPackages ,allTypes - ,externalTypeNames - ORDER BY numberOfExternalTypeCaller DESC, externalPackageName ASC \ No newline at end of file + ,externalTypeNames[0..9] AS tenExternalTypeNames + ORDER BY numberOfExternalCallerPackages DESC, externalPackageName ASC \ No newline at end of file diff --git a/cypher/External_Dependencies/External_package_usage_per_artifact_and_external_package.cypher b/cypher/External_Dependencies/External_package_usage_per_artifact_and_external_package.cypher new file mode 100644 index 000000000..622b8f9bb --- /dev/null +++ b/cypher/External_Dependencies/External_package_usage_per_artifact_and_external_package.cypher @@ -0,0 +1,50 @@ +// External package usage per artifact and external package + +// Get the overall artifact statistics first + MATCH (artifact:Artifact)-[:CONTAINS]->(package:Package) + MATCH (package)-[:CONTAINS]->(type:Type) + OPTIONAL MATCH (packageUsingExternal:Package)-[:CONTAINS]->(type)-[:DEPENDS_ON]->(external:ExternalType) + WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName + ,count(DISTINCT package.fqn) AS artifactPackages + ,count(DISTINCT type.fqn) AS artifactTypes + ,count(DISTINCT replace(external.fqn, '.' + external.name, '')) AS artifactExternalPackages + ,count(DISTINCT packageUsingExternal.fqn) AS artifactExternalCallingPackages + ,collect(type) AS typeList + WITH artifactName + ,artifactPackages + ,artifactTypes + ,artifactExternalPackages + ,artifactExternalCallingPackages + ,round((100.0 / artifactPackages * artifactExternalCallingPackages), 2) AS artifactExternalCallingPackagesRate + ,typeList +// Get the external dependencies for each internal type +UNWIND typeList AS type + MATCH (type)-[:DEPENDS_ON]->(externalType:ExternalType) + MATCH (typePackage:Package)-[:CONTAINS]->(type) +// Optionally filter out dependencies to external annotations +// WHERE NOT externalType:ExternalAnnotation + WITH artifactName + ,artifactPackages + ,artifactTypes + ,artifactExternalPackages + ,artifactExternalCallingPackages + ,artifactExternalCallingPackagesRate + ,typePackage.fqn AS packageName + ,type.fqn AS fullTypeName + ,replace(externalType.fqn, '.' + externalType.name, '') AS externalPackageName +// Group by artifact and external package +RETURN artifactName + ,artifactPackages + ,artifactTypes + ,artifactExternalPackages + ,artifactExternalCallingPackages + ,artifactExternalCallingPackagesRate + ,externalPackageName + ,count(DISTINCT packageName) AS numberOfPackages + ,count(DISTINCT fullTypeName) AS numberOfTypes + ,100.0 / artifactPackages * count(DISTINCT packageName) AS packagesCallingExternalRate + ,100.0 / artifactTypes * count(DISTINCT fullTypeName) AS typesCallingExternalRate + ,COLLECT(DISTINCT packageName) AS nameOfPackages + ,COLLECT(DISTINCT fullTypeName)[0..9] AS someTypeNames +// Order the results by number of packages that use the external package dependency descending +ORDER BY artifactExternalCallingPackagesRate DESC, artifactName ASC, numberOfPackages DESC, externalPackageName ASC \ No newline at end of file diff --git a/cypher/External_Dependencies/External_package_usage_per_artifact_distribution.cypher b/cypher/External_Dependencies/External_package_usage_per_artifact_distribution.cypher new file mode 100644 index 000000000..a708fdbb3 --- /dev/null +++ b/cypher/External_Dependencies/External_package_usage_per_artifact_distribution.cypher @@ -0,0 +1,39 @@ +// External package usage per artifact distribution + + MATCH (artifact:Artifact)-[:CONTAINS]->(package:Package) + MATCH (package)-[:CONTAINS]->(type:Type) + WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName + ,count(DISTINCT package.fqn) AS artifactPackages + ,count(DISTINCT type.fqn) AS artifactTypes + ,collect(type) AS typeList +UNWIND typeList AS type + MATCH (type)-[:DEPENDS_ON]->(externalType:ExternalType) + MATCH (typePackage:Package)-[:CONTAINS]->(type) + WHERE NOT externalType:ExternalAnnotation + WITH artifactName + ,artifactPackages + ,artifactTypes + ,typePackage.fqn AS packageName + ,type.fqn AS fullTypeName + ,replace(externalType.fqn, '.' + externalType.name, '') AS externalPackageName + WITH artifactName + ,artifactPackages + ,artifactTypes + ,count(DISTINCT externalPackageName) AS numberOfExternalPackages + ,COLLECT(DISTINCT externalPackageName) AS nameOfExternalPackages + ,count(DISTINCT packageName) AS numberOfPackages + ,COLLECT(DISTINCT packageName) AS nameOfPackages + ,count(DISTINCT fullTypeName) AS numberOfTypes + ,COLLECT(DISTINCT fullTypeName) AS nameOfTypes +RETURN artifactName + ,artifactPackages + ,artifactTypes + ,numberOfExternalPackages + ,numberOfPackages + ,numberOfTypes + ,100.0 / artifactTypes * numberOfTypes AS typesCallingExternalRate + ,100.0 / artifactPackages * numberOfPackages AS packagesCallingExternalRate + ,nameOfExternalPackages[0..9] AS someExternalPackageNames + ,nameOfPackages[0..9] AS someExternalCallingPackageNames + ,nameOfTypes[0..9] AS someExternalCallingTypeNames +ORDER BY numberOfPackages DESC, artifactName ASC \ No newline at end of file diff --git a/cypher/External_Dependencies/External_package_usage_per_artifact_package_aggregated.cypher b/cypher/External_Dependencies/External_package_usage_per_artifact_package_aggregated.cypher new file mode 100644 index 000000000..0101f1aa3 --- /dev/null +++ b/cypher/External_Dependencies/External_package_usage_per_artifact_package_aggregated.cypher @@ -0,0 +1,81 @@ +// External package usage per artifact package aggregated + +// Get the overall artifact statistics first + MATCH (artifact:Artifact)-[:CONTAINS]->(package:Package) + MATCH (package)-[:CONTAINS]->(type:Type) + WHERE NOT type:ExternalType + WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName + ,artifact.leidenCommunityId AS leidenCommunityId + ,count(DISTINCT package.fqn) AS artifactPackages + ,count(DISTINCT type.fqn) AS artifactTypes + ,collect(type) AS typeList +// Get the external dependencies for each internal type +UNWIND typeList AS type + MATCH (type)-[:DEPENDS_ON]->(externalType:ExternalType) + MATCH (typePackage:Package)-[:CONTAINS]->(type) +// Filter out dependencies to exxternal annotations + WHERE NOT externalType:ExternalAnnotation + WITH artifactName + ,leidenCommunityId + ,artifactPackages + ,artifactTypes + ,typePackage.fqn AS packageName + ,type.fqn AS fullTypeName + ,replace(externalType.fqn, '.' + externalType.name, '') AS externalPackageName +// Group by artifact and external package + WITH artifactName + ,leidenCommunityId + ,artifactPackages + ,artifactTypes + ,externalPackageName + ,count(DISTINCT packageName) AS numberOfPackages + ,COLLECT(DISTINCT packageName) AS nameOfPackages + ,count(DISTINCT fullTypeName) AS numberOfTypes + ,COLLECT(DISTINCT fullTypeName) AS nameOfTypes + ,100.0 / artifactPackages * count(DISTINCT packageName) AS packagesCallingExternalRate + ,100.0 / artifactTypes * count(DISTINCT fullTypeName) AS typesCallingExternalRate +// Pre order the results by number of packages that use the external package dependency descending +ORDER BY numberOfPackages DESC, artifactName ASC +// Optionally filter out external package dependencies that are only used by one package +// WHERE numberOfPackages > 1 +// Group by artifact, aggregate statistics and return the results +RETURN artifactName + ,leidenCommunityId + ,artifactPackages + ,artifactTypes + ,count(DISTINCT externalPackageName) AS numberOfExternalPackages + + // Statistics about the packages and their external package usage count + ,min(numberOfPackages) AS minNumberOfPackages + ,max(numberOfPackages) AS maxNumberOfPackages + ,percentileCont(numberOfPackages, 0.5) AS medNumberOfPackages + ,avg(numberOfPackages) AS avgNumberOfPackages + ,stDev(numberOfPackages) AS stdNumberOfPackages + + // Statistics about the packages and their external package usage percentage + ,min(packagesCallingExternalRate) AS minNumberOfPackagesPercentage + ,max(packagesCallingExternalRate) AS maxNumberOfPackagesPercentage + ,percentileCont(packagesCallingExternalRate, 0.5) AS medNumberOfPackagesPercentage + ,avg(packagesCallingExternalRate) AS avgNumberOfPackagesPercentage + ,stDev(packagesCallingExternalRate) AS stdNumberOfPackagesPercentage + + // Statistics about the types and their external package usage count + ,min(numberOfTypes) AS minNumberOfTypes + ,max(numberOfTypes) AS maxNumberOfTypes + ,percentileCont(numberOfTypes, 0.5) AS medNumberOfTypes + ,avg(numberOfTypes) AS avgNumberOfTypes + ,stDev(numberOfTypes) AS stdNumberOfTypes + + // Statistics about the types and their external package usage count percentage + ,min(typesCallingExternalRate) AS minNumberOfTypesPercentage + ,max(typesCallingExternalRate) AS maxNumberOfTypesPercentage + ,percentileCont(typesCallingExternalRate, 0.5) AS medNumberOfTypesPercentage + ,avg(typesCallingExternalRate) AS avgNumberOfTypesPercentage + ,stDev(typesCallingExternalRate) AS stdNumberOfTypesPercentage + + // Examples of external packages, caller packages and caller types + ,collect(externalPackageName)[0..9] AS top10ExternalPackageNamesByUsageDescending + ,COLLECT(nameOfPackages)[0][0..9] AS somePackageNames + ,COLLECT(nameOfTypes)[0][0..9] AS someTypeNames + +ORDER BY maxNumberOfPackages DESC, artifactName ASC \ No newline at end of file diff --git a/cypher/External_Dependencies/External_package_usage_per_artifact_sorted.cypher b/cypher/External_Dependencies/External_package_usage_per_artifact_sorted.cypher new file mode 100644 index 000000000..5be6fa16f --- /dev/null +++ b/cypher/External_Dependencies/External_package_usage_per_artifact_sorted.cypher @@ -0,0 +1,40 @@ +// External package usage per artifact sorted by external usage descending + + MATCH (artifact:Artifact:Archive)-[:CONTAINS]->(type:Type) + OPTIONAL MATCH (type)-[:DEPENDS_ON]->(externalType:ExternalType) + WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName + ,count(DISTINCT type.fqn) AS numberOfTypesInArtifact + ,count(DISTINCT externalType.fqn) AS numberOfExternalTypesInArtifact + ,count(DISTINCT replace(externalType.fqn, '.' + externalType.name, '')) AS numberOfExternalPackagesInArtifact + ,collect(DISTINCT type) AS typeList +UNWIND typeList AS type + MATCH (type)-[externalDependency:DEPENDS_ON]->(externalType:ExternalType) + WITH numberOfTypesInArtifact + ,numberOfExternalTypesInArtifact + ,numberOfExternalPackagesInArtifact + ,100.0 / numberOfTypesInArtifact * numberOfExternalTypesInArtifact AS externalTypeRate + ,externalDependency + ,artifactName + ,type.fqn AS fullTypeName + ,type.name AS typeName + ,replace(externalType.fqn, '.' + externalType.name, '') AS externalPackageName + ,externalType.name AS externalTypeName + WITH numberOfTypesInArtifact + ,numberOfExternalTypesInArtifact + ,numberOfExternalPackagesInArtifact + ,externalTypeRate + ,artifactName + ,externalPackageName + ,count(externalDependency) AS numberOfExternalTypeCaller + ,sum(externalDependency.weight) AS numberOfExternalTypeCalls + ,collect(DISTINCT externalTypeName) AS externalTypeNames +RETURN artifactName + ,externalPackageName + ,numberOfExternalTypeCaller + ,numberOfExternalTypeCalls + ,numberOfTypesInArtifact + ,numberOfExternalTypesInArtifact + ,numberOfExternalPackagesInArtifact + ,externalTypeRate + ,externalTypeNames +ORDER BY externalTypeRate DESC, artifactName ASC, numberOfExternalTypeCaller DESC, externalPackageName ASC \ No newline at end of file diff --git a/cypher/External_Dependencies/External_package_usage_per_artifact_sorted_top.cypher b/cypher/External_Dependencies/External_package_usage_per_artifact_sorted_top.cypher new file mode 100644 index 000000000..178b24e2c --- /dev/null +++ b/cypher/External_Dependencies/External_package_usage_per_artifact_sorted_top.cypher @@ -0,0 +1,52 @@ +// External package usage per artifact top externals + + MATCH (artifact:Artifact:Archive)-[:CONTAINS]->(type:Type) + OPTIONAL MATCH (type)-[:DEPENDS_ON]->(externalType:ExternalType) + WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName + ,count(DISTINCT type.fqn) AS numberOfTypesInArtifact + ,count(DISTINCT externalType.fqn) AS numberOfExternalTypesInArtifact + ,count(DISTINCT replace(externalType.fqn, '.' + externalType.name, '')) AS numberOfExternalPackagesInArtifact + ,collect(DISTINCT type) AS typeList +UNWIND typeList AS type + MATCH (type)-[externalDependency:DEPENDS_ON]->(externalType:ExternalType) + WITH numberOfTypesInArtifact + ,numberOfExternalTypesInArtifact + ,numberOfExternalPackagesInArtifact + ,100.0 / numberOfTypesInArtifact * numberOfExternalTypesInArtifact AS externalTypeRate + ,externalDependency + ,artifactName + ,type.fqn AS fullTypeName + ,type.name AS typeName + ,replace(externalType.fqn, '.' + externalType.name, '') AS externalPackageName + ,externalType.name AS externalTypeName + ORDER BY externalTypeRate DESC, artifactName ASC + WITH numberOfTypesInArtifact + ,numberOfExternalTypesInArtifact + ,numberOfExternalPackagesInArtifact + ,externalTypeRate + ,artifactName + ,externalPackageName + ,count(externalDependency) AS numberOfExternalTypeCaller + ,sum(externalDependency.weight) AS numberOfExternalTypeCalls + ,collect(DISTINCT externalTypeName) AS externalTypeNames + ORDER BY externalTypeRate DESC, artifactName ASC, numberOfExternalTypeCaller DESC + WITH numberOfTypesInArtifact + ,numberOfExternalTypesInArtifact + ,numberOfExternalPackagesInArtifact + ,externalTypeRate + ,artifactName + ,COLLECT(DISTINCT externalPackageName) AS externalPackageNames + ,SUM(numberOfExternalTypeCaller) AS numberOfExternalTypeCaller + ,sum(numberOfExternalTypeCalls) AS numberOfExternalTypeCalls + ,collect(externalTypeNames) AS externalTypeNames +RETURN artifactName + ,numberOfTypesInArtifact + ,numberOfExternalTypesInArtifact + ,numberOfExternalPackagesInArtifact + ,externalTypeRate + ,numberOfExternalTypeCaller + ,numberOfExternalTypeCalls + ,size(externalPackageNames) AS numberOfExternalPackages + ,externalPackageNames[0..4] AS top5ExternalPackages + ,externalTypeNames[0..1] AS someExternalTypes +LIMIT 40 \ No newline at end of file diff --git a/cypher/External_Dependencies/External_package_usage_per_internal_package_count.cypher b/cypher/External_Dependencies/External_package_usage_per_internal_package_count.cypher new file mode 100644 index 000000000..4f35261d4 --- /dev/null +++ b/cypher/External_Dependencies/External_package_usage_per_internal_package_count.cypher @@ -0,0 +1,45 @@ +// External package usage per internal package count + +// Get the overall artifact statistics first + MATCH (artifact:Artifact)-[:CONTAINS]->(package:Package) + MATCH (package)-[:CONTAINS]->(type:Type) + WHERE NOT type:ExternalType + WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName + ,count(DISTINCT package.fqn) AS artifactPackages + ,count(DISTINCT type.fqn) AS artifactTypes + ,collect(type) AS typeList +// Get the external dependencies for each internal type +UNWIND typeList AS type + MATCH (type)-[:DEPENDS_ON]->(externalType:ExternalType) + MATCH (typePackage:Package)-[:CONTAINS]->(type) +// Optionally filter out dependencies to external annotations +// WHERE NOT externalType:ExternalAnnotation + WITH artifactName + ,artifactPackages + ,artifactTypes + ,typePackage.fqn AS packageName + ,type.fqn AS fullTypeName + ,replace(externalType.fqn, '.' + externalType.name, '') AS externalPackageName +// Group by artifact and external package +WITH artifactName + ,artifactPackages + ,artifactTypes + ,externalPackageName + ,count(DISTINCT packageName) AS numberOfPackages + ,count(DISTINCT fullTypeName) AS numberOfTypes + ,100.0 / artifactPackages * count(DISTINCT packageName) AS packagesCallingExternalRate + ,100.0 / artifactTypes * count(DISTINCT fullTypeName) AS typesCallingExternalRate + ,COLLECT(DISTINCT packageName) AS nameOfPackages + ,COLLECT(DISTINCT fullTypeName)[0..9] AS someTypeNames +RETURN artifactName + ,artifactPackages + ,artifactTypes + ,numberOfPackages + ,count(DISTINCT externalPackageName) AS numberOfExternalPackages + ,collect(DISTINCT externalPackageName) AS externalPackageNames + ,max(packagesCallingExternalRate) AS maxPackagesCallingExternalRate + ,max(typesCallingExternalRate) AS maxTypesCallingExternalRate + ,COLLECT(nameOfPackages)[0][0..9] AS somePackageNames + ,COLLECT(someTypeNames)[0] AS someTypeNames +// Order the results by number of packages that use the external package dependency descending +ORDER BY numberOfPackages DESC, artifactName ASC \ No newline at end of file diff --git a/cypher/External_Dependencies/External_package_usage_per_type_distribution.cypher b/cypher/External_Dependencies/External_package_usage_per_type_distribution.cypher deleted file mode 100644 index 7b7445d24..000000000 --- a/cypher/External_Dependencies/External_package_usage_per_type_distribution.cypher +++ /dev/null @@ -1,29 +0,0 @@ -// External package usage per type distribution - - MATCH (artifact:Artifact)-[:CONTAINS]->(type:Type) - WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName - ,count(type) AS artifactTypes - ,collect(type) AS typeList -UNWIND typeList AS type - MATCH (type)-[:DEPENDS_ON]->(externalType:ExternalType) - WHERE externalType:ExternalAnnotation - WITH artifactName - ,artifactTypes - ,type.fqn AS fullTypeName - ,replace(externalType.fqn, '.' + externalType.name, '') AS externalPackageName - WITH artifactName - ,artifactTypes - ,fullTypeName - ,count(DISTINCT externalPackageName) AS numberOfExternalPackages - WITH artifactName - ,artifactTypes - ,numberOfExternalPackages - ,count(DISTINCT fullTypeName) AS numberOfTypes - ,COLLECT(DISTINCT fullTypeName) AS nameOfTypes -RETURN artifactName - ,artifactTypes - ,numberOfExternalPackages - ,numberOfTypes - ,100.0 / artifactTypes * numberOfTypes AS numberOfTypesPercentage - ,nameOfTypes -ORDER BY artifactName ASC, numberOfExternalPackages ASC \ No newline at end of file diff --git a/cypher/External_Dependencies/External_package_usage_spread.cypher b/cypher/External_Dependencies/External_package_usage_spread.cypher new file mode 100644 index 000000000..2f9238ec6 --- /dev/null +++ b/cypher/External_Dependencies/External_package_usage_spread.cypher @@ -0,0 +1,65 @@ +// External package usage spread + +// Get the overall artifact statistics first + MATCH (artifact:Artifact)-[:CONTAINS]->(package:Package) + MATCH (package)-[:CONTAINS]->(type:Type) + WHERE NOT type:ExternalType + WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName + ,count(DISTINCT package.fqn) AS artifactPackages + ,count(DISTINCT type.fqn) AS artifactTypes + ,collect(type) AS typeList +// Get the external dependencies for each internal type +UNWIND typeList AS type + MATCH (type)-[:DEPENDS_ON]->(externalType:ExternalType) + MATCH (typePackage:Package)-[:CONTAINS]->(type) +// Filter out dependencies to external annotations + WHERE NOT externalType:ExternalAnnotation + WITH artifactName + ,artifactPackages + ,artifactTypes + ,replace(externalType.fqn, '.' + externalType.name, '') AS externalPackageName + // Gathering counts and numbers for every artifact and the external packages it uses + ,count(DISTINCT typePackage.fqn) AS numberOfPackages + ,COLLECT(DISTINCT typePackage.fqn) AS nameOfPackages + ,count(DISTINCT type.fqn) AS numberOfTypes + ,COLLECT(DISTINCT type.fqn )[0..9] AS someTypeNames + ,100.0 / artifactPackages * count(DISTINCT typePackage.fqn) AS packagesCallingExternalRate + ,100.0 / artifactTypes * count(DISTINCT type.fqn) AS typesCallingExternalRate +// Group external package +RETURN externalPackageName + ,count(DISTINCT artifactName) AS numberOfArtifacts + + // Statistics about the packages and their external package usage count + ,sum(numberOfPackages) AS sumNumberOfPackages + ,min(numberOfPackages) AS minNumberOfPackages + ,max(numberOfPackages) AS maxNumberOfPackages + ,percentileCont(numberOfPackages, 0.5) AS medNumberOfPackages + ,avg(numberOfPackages) AS avgNumberOfPackages + ,stDev(numberOfPackages) AS stdNumberOfPackages + + // Statistics about the packages and their external package usage percentage + ,min(packagesCallingExternalRate) AS minNumberOfPackagesPercentage + ,max(packagesCallingExternalRate) AS maxNumberOfPackagesPercentage + ,percentileCont(packagesCallingExternalRate, 0.5) AS medNumberOfPackagesPercentage + ,avg(packagesCallingExternalRate) AS avgNumberOfPackagesPercentage + ,stDev(packagesCallingExternalRate) AS stdNumberOfPackagesPercentage + + // Statistics about the types and their external package usage count + ,sum(numberOfTypes) AS sumNumberOfTypes + ,min(numberOfTypes) AS minNumberOfTypes + ,max(numberOfTypes) AS maxNumberOfTypes + ,percentileCont(numberOfTypes, 0.5) AS medNumberOfTypes + ,avg(numberOfTypes) AS avgNumberOfTypes + ,stDev(numberOfTypes) AS stdNumberOfTypes + + // Statistics about the types and their external package usage count percentage + ,min(typesCallingExternalRate) AS minNumberOfTypesPercentage + ,max(typesCallingExternalRate) AS maxNumberOfTypesPercentage + ,percentileCont(typesCallingExternalRate, 0.5) AS medNumberOfTypesPercentage + ,avg(typesCallingExternalRate) AS avgNumberOfTypesPercentage + ,stDev(typesCallingExternalRate) AS stdNumberOfTypesPercentage + + ,collect(DISTINCT artifactName)[0..4] AS someArtifactNames + +// Order the results by number of artifacts that use the external package dependency descending +ORDER BY numberOfArtifacts DESC, externalPackageName ASC \ No newline at end of file diff --git a/cypher/External_Dependencies/External_second_level_package_usage_overall.cypher b/cypher/External_Dependencies/External_second_level_package_usage_overall.cypher new file mode 100644 index 000000000..8f0c27314 --- /dev/null +++ b/cypher/External_Dependencies/External_second_level_package_usage_overall.cypher @@ -0,0 +1,27 @@ +// External second level package usage overall + + MATCH (package:Package)-[:CONTAINS]->(type:Type) + WITH count(DISTINCT type.fqn) AS allTypes + ,count(DISTINCT package.fqn) AS allPackages + ,collect(type) as typeList +UNWIND typeList AS type + MATCH (type)-[externalDependency:DEPENDS_ON]->(externalType:ExternalType) + MATCH (typePackage:Package)-[:CONTAINS]->(type) + WITH allTypes + ,allPackages + ,apoc.text.join(split(externalType.fqn,'.')[0..2], '.') AS externalSecondLevelPackageName + ,count(DISTINCT typePackage.fqn) AS numberOfExternalCallerPackages + ,count(DISTINCT type.fqn) AS numberOfExternalCallerTypes + ,count(externalDependency) AS numberOfExternalTypeCalls + ,sum(externalDependency.weight) AS numberOfExternalTypeCallsWeighted + ,collect(DISTINCT externalType.name) AS externalTypeNames +where numberOfExternalTypeCalls <> numberOfExternalCallerTypes +RETURN externalSecondLevelPackageName + ,numberOfExternalCallerPackages + ,numberOfExternalCallerTypes + ,numberOfExternalTypeCalls + ,numberOfExternalTypeCallsWeighted + ,allPackages + ,allTypes + ,externalTypeNames[0..9] AS tenExternalTypeNames + ORDER BY numberOfExternalCallerPackages DESC, externalSecondLevelPackageName ASC \ No newline at end of file diff --git a/cypher/External_Dependencies/External_second_level_package_usage_per_artifact_and_external_package.cypher b/cypher/External_Dependencies/External_second_level_package_usage_per_artifact_and_external_package.cypher new file mode 100644 index 000000000..0db7da4ee --- /dev/null +++ b/cypher/External_Dependencies/External_second_level_package_usage_per_artifact_and_external_package.cypher @@ -0,0 +1,50 @@ +// External second level package usage per artifact and external package + +// Get the overall artifact statistics first + MATCH (artifact:Artifact)-[:CONTAINS]->(package:Package) + MATCH (package)-[:CONTAINS]->(type:Type) + OPTIONAL MATCH (packageUsingExternal:Package)-[:CONTAINS]->(type)-[:DEPENDS_ON]->(external:ExternalType) + WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName + ,count(DISTINCT package.fqn) AS artifactPackages + ,count(DISTINCT type.fqn) AS artifactTypes + ,count(DISTINCT split(external.fqn,'.')[0..2]) AS artifactExternalPackagesFirst2Levels + ,count(DISTINCT packageUsingExternal.fqn) AS artifactExternalCallingPackages + ,collect(type) AS typeList + WITH artifactName + ,artifactPackages + ,artifactTypes + ,artifactExternalPackagesFirst2Levels + ,artifactExternalCallingPackages + ,round((100.0 / artifactPackages * artifactExternalCallingPackages), 2) AS artifactExternalCallingPackagesRate + ,typeList +// Get the external dependencies for each internal type +UNWIND typeList AS type + MATCH (type)-[:DEPENDS_ON]->(externalType:ExternalType) + MATCH (typePackage:Package)-[:CONTAINS]->(type) +// Optionally filter out dependencies to external annotations +// WHERE NOT externalType:ExternalAnnotation + WITH artifactName + ,artifactPackages + ,artifactTypes + ,artifactExternalPackagesFirst2Levels + ,artifactExternalCallingPackages + ,artifactExternalCallingPackagesRate + ,typePackage.fqn AS packageName + ,type.fqn AS fullTypeName + ,apoc.text.join(split(externalType.fqn,'.')[0..2], '.') AS externalPackageNameFirst2Levels +// Group by artifact and first to external package levels +RETURN artifactName + ,artifactPackages + ,artifactTypes + ,artifactExternalPackagesFirst2Levels + ,artifactExternalCallingPackages + ,artifactExternalCallingPackagesRate + ,externalPackageNameFirst2Levels + ,count(DISTINCT packageName) AS numberOfPackages + ,count(DISTINCT fullTypeName) AS numberOfTypes + ,100.0 / artifactPackages * count(DISTINCT packageName) AS packagesCallingExternalRate + ,100.0 / artifactTypes * count(DISTINCT fullTypeName) AS typesCallingExternalRate + ,COLLECT(DISTINCT packageName) AS nameOfPackages + ,COLLECT(DISTINCT fullTypeName)[0..9] AS someTypeNames +// Order the results by number of packages that use the external package dependency descending +ORDER BY artifactExternalCallingPackagesRate DESC, artifactName ASC, numberOfPackages DESC, externalPackageNameFirst2Levels ASC \ No newline at end of file diff --git a/cypher/External_Dependencies/External_second_level_package_usage_spread.cypher b/cypher/External_Dependencies/External_second_level_package_usage_spread.cypher new file mode 100644 index 000000000..e074d2d90 --- /dev/null +++ b/cypher/External_Dependencies/External_second_level_package_usage_spread.cypher @@ -0,0 +1,65 @@ +// External second level package usage spread + +// Get the overall artifact statistics first + MATCH (artifact:Artifact)-[:CONTAINS]->(package:Package) + MATCH (package)-[:CONTAINS]->(type:Type) + WHERE NOT type:ExternalType + WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName + ,count(DISTINCT package.fqn) AS artifactPackages + ,count(DISTINCT type.fqn) AS artifactTypes + ,collect(type) AS typeList +// Get the external dependencies for each internal type +UNWIND typeList AS type + MATCH (type)-[:DEPENDS_ON]->(externalType:ExternalType) + MATCH (typePackage:Package)-[:CONTAINS]->(type) +// Filter out dependencies to external annotations + WHERE NOT externalType:ExternalAnnotation + WITH artifactName + ,artifactPackages + ,artifactTypes + ,apoc.text.join(split(externalType.fqn,'.')[0..2], '.') AS externalSecondLevelPackageName + // Gathering counts and numbers for every artifact and the external packages it uses + ,count(DISTINCT typePackage.fqn) AS numberOfPackages + ,COLLECT(DISTINCT typePackage.fqn) AS nameOfPackages + ,count(DISTINCT type.fqn) AS numberOfTypes + ,COLLECT(DISTINCT type.fqn )[0..9] AS someTypeNames + ,100.0 / artifactPackages * count(DISTINCT typePackage.fqn) AS packagesCallingExternalRate + ,100.0 / artifactTypes * count(DISTINCT type.fqn) AS typesCallingExternalRate +// Group by second level external package +RETURN externalSecondLevelPackageName + ,count(DISTINCT artifactName) AS numberOfArtifacts + + // Statistics about the packages and their external package usage count + ,sum(numberOfPackages) AS sumNumberOfPackages + ,min(numberOfPackages) AS minNumberOfPackages + ,max(numberOfPackages) AS maxNumberOfPackages + ,percentileCont(numberOfPackages, 0.5) AS medNumberOfPackages + ,avg(numberOfPackages) AS avgNumberOfPackages + ,stDev(numberOfPackages) AS stdNumberOfPackages + + // Statistics about the packages and their external package usage percentage + ,min(packagesCallingExternalRate) AS minNumberOfPackagesPercentage + ,max(packagesCallingExternalRate) AS maxNumberOfPackagesPercentage + ,percentileCont(packagesCallingExternalRate, 0.5) AS medNumberOfPackagesPercentage + ,avg(packagesCallingExternalRate) AS avgNumberOfPackagesPercentage + ,stDev(packagesCallingExternalRate) AS stdNumberOfPackagesPercentage + + // Statistics about the types and their external package usage count + ,sum(numberOfTypes) AS sumNumberOfTypes + ,min(numberOfTypes) AS minNumberOfTypes + ,max(numberOfTypes) AS maxNumberOfTypes + ,percentileCont(numberOfTypes, 0.5) AS medNumberOfTypes + ,avg(numberOfTypes) AS avgNumberOfTypes + ,stDev(numberOfTypes) AS stdNumberOfTypes + + // Statistics about the types and their external package usage count percentage + ,min(typesCallingExternalRate) AS minNumberOfTypesPercentage + ,max(typesCallingExternalRate) AS maxNumberOfTypesPercentage + ,percentileCont(typesCallingExternalRate, 0.5) AS medNumberOfTypesPercentage + ,avg(typesCallingExternalRate) AS avgNumberOfTypesPercentage + ,stDev(typesCallingExternalRate) AS stdNumberOfTypesPercentage + + ,collect(DISTINCT artifactName)[0..4] AS someArtifactNames + +// Order the results by number of artifacts that use the external package dependency descending +ORDER BY numberOfArtifacts DESC, externalSecondLevelPackageName ASC \ No newline at end of file diff --git a/cypher/Package_Usage/How_many_classes_compared_to_all_existing_in_the_same_package_are_used_by_dependent_packages_across_different_artifacts.cypher b/cypher/Internal_Dependencies/How_many_classes_compared_to_all_existing_in_the_same_package_are_used_by_dependent_packages_across_different_artifacts.cypher similarity index 100% rename from cypher/Package_Usage/How_many_classes_compared_to_all_existing_in_the_same_package_are_used_by_dependent_packages_across_different_artifacts.cypher rename to cypher/Internal_Dependencies/How_many_classes_compared_to_all_existing_in_the_same_package_are_used_by_dependent_packages_across_different_artifacts.cypher diff --git a/cypher/Package_Usage/How_many_packages_compared_to_all_existing_are_used_by_dependent_artifacts.cypher b/cypher/Internal_Dependencies/How_many_packages_compared_to_all_existing_are_used_by_dependent_artifacts.cypher similarity index 100% rename from cypher/Package_Usage/How_many_packages_compared_to_all_existing_are_used_by_dependent_artifacts.cypher rename to cypher/Internal_Dependencies/How_many_packages_compared_to_all_existing_are_used_by_dependent_artifacts.cypher diff --git a/cypher/Package_Usage/List_types_that_are_used_by_many_different_packages.cypher b/cypher/Internal_Dependencies/List_types_that_are_used_by_many_different_packages.cypher similarity index 56% rename from cypher/Package_Usage/List_types_that_are_used_by_many_different_packages.cypher rename to cypher/Internal_Dependencies/List_types_that_are_used_by_many_different_packages.cypher index e60011325..25c95565f 100644 --- a/cypher/Package_Usage/List_types_that_are_used_by_many_different_packages.cypher +++ b/cypher/Internal_Dependencies/List_types_that_are_used_by_many_different_packages.cypher @@ -3,10 +3,10 @@ MATCH (artifact:Artifact)-[:CONTAINS]->(package:Package)-[:CONTAINS]->(type:Type)-[:DEPENDS_ON]->(dependentType:Type)<-[:CONTAINS]-(dependentPackage:Package)<-[:CONTAINS]-(dependentArtifact:Artifact) WHERE package <> dependentPackage WITH dependentType - ,labels(dependentType) AS dependentTypeLabels - ,COUNT(DISTINCT package) AS numberOfUsingPackages -RETURN dependentType.fqn - ,dependentType.name + ,labels(dependentType) AS dependentTypeLabels + ,COUNT(DISTINCT package.fqn) AS numberOfUsingPackages +RETURN dependentType.fqn AS fullQualifiedDependentTypeName + ,dependentType.name AS dependentTypeName ,dependentTypeLabels ,numberOfUsingPackages - ORDER BY numberOfUsingPackages DESC \ No newline at end of file + ORDER BY numberOfUsingPackages DESC, dependentTypeName ASC \ No newline at end of file diff --git a/cypher/List_all_existing_artifacts.cypher b/cypher/List_all_existing_artifacts.cypher index 375fcdd8c..b2ed60035 100644 --- a/cypher/List_all_existing_artifacts.cypher +++ b/cypher/List_all_existing_artifacts.cypher @@ -2,6 +2,8 @@ MATCH (artifact:Artifact:Archive)-[:CONTAINS]->(package:Package)-[:CONTAINS]->(type:Type) WITH last(split(artifact.fileName, '/')) AS artifactName - ,COUNT(DISTINCT package) AS packages - ,COUNT(DISTINCT type) AS types -RETURN artifactName, packages, types \ No newline at end of file + ,artifact.incomingDependencies AS incomingDependencies + ,artifact.outgoingDependencies AS outgoingDependencies + ,COUNT(DISTINCT package.fqn) AS packages + ,COUNT(DISTINCT type.fqn) AS types +RETURN artifactName, packages, types, incomingDependencies, outgoingDependencies \ No newline at end of file diff --git a/cypher/Overview/Effective_lines_of_method_code_per_package.cypher b/cypher/Overview/Effective_lines_of_method_code_per_package.cypher index 8ee6739f9..423e82bd1 100644 --- a/cypher/Overview/Effective_lines_of_method_code_per_package.cypher +++ b/cypher/Overview/Effective_lines_of_method_code_per_package.cypher @@ -8,6 +8,7 @@ ,package.fqn AS fullPackageName ,package.name AS packageName ,sum(method.effectiveLineCount) AS sumEffectiveLinesOfMethodCode + ,sum(method.cyclomaticComplexity) AS sumCyclomaticComplexity ,COUNT(DISTINCT method) AS numberOfMethods ,reduce( // Get the max effectiveLineCount of all methods in the package with the name and type of the method loc = {max:-1}, // initial object with max lines of code = -1 @@ -16,7 +17,7 @@ THEN {max: m.method.effectiveLineCount, method: m.method, type: m.type} // then update the object ELSE loc // otherwise keep the object as it was END - ) AS methodWithMaxLoc + ) AS methodWithMaxLinesOfCode ,reduce( // Get the max cyclomaticComplexity of all methods in the package with the name and type of the method cmplx = {max:-1}, // initial object with max cyclomatic complexity = -1 m IN collect({method:method, type:type}) | // collect all methods and their types as objects @@ -27,12 +28,13 @@ ) AS methodWithMaxCyclomaticComplexity RETURN artifactName, fullPackageName ,sumEffectiveLinesOfMethodCode AS linesInPackage + ,sumCyclomaticComplexity AS complexityInPackage ,numberOfMethods AS methodCount - ,methodWithMaxLoc.max AS maxLinesMethod - ,methodWithMaxLoc.type.name AS maxLinesMethodType - ,methodWithMaxLoc.method.name AS maxLinesMethodName + ,methodWithMaxLinesOfCode.max AS maxLinesMethod + ,methodWithMaxLinesOfCode.type.name AS maxLinesMethodType + ,methodWithMaxLinesOfCode.method.name AS maxLinesMethodName ,methodWithMaxCyclomaticComplexity.max AS maxComplexity ,methodWithMaxCyclomaticComplexity.type.name AS maxComplexityType ,methodWithMaxCyclomaticComplexity.method.name AS maxComplexityMethod ,packageName -ORDER BY sumEffectiveLinesOfMethodCode DESC, artifactName ASC, fullPackageName \ No newline at end of file +ORDER BY linesInPackage DESC, artifactName ASC, fullPackageName \ No newline at end of file diff --git a/cypher/Overview/Number_of_types_per_artifact.cypher b/cypher/Overview/Number_of_types_per_artifact.cypher index 0a020d9a6..eecbd07a9 100644 --- a/cypher/Overview/Number_of_types_per_artifact.cypher +++ b/cypher/Overview/Number_of_types_per_artifact.cypher @@ -2,11 +2,21 @@ MATCH (artifact:Artifact)-[:CONTAINS]->(type:Type) WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName + ,count(DISTINCT type.fqn) AS numberOfArtifactTypes + ,collect(DISTINCT type) AS types +UNWIND types AS type + WITH artifactName + ,numberOfArtifactTypes ,type ,labels(type) AS typeLabels UNWIND typeLabels AS typeLabel - WITH artifactName, type, typeLabel + WITH artifactName + ,numberOfArtifactTypes + ,type + ,typeLabel WHERE typeLabel IN ['Class', 'Interface', 'Annotation', 'Enum'] RETURN artifactName + ,numberOfArtifactTypes ,typeLabel AS languageElement - ,count(type) AS numberOfTypes \ No newline at end of file + ,count(type) AS numberOfTypes + ORDER BY numberOfArtifactTypes DESC, artifactName ASC \ No newline at end of file diff --git a/cypher/Overview/Overview_size.cypher b/cypher/Overview/Overview_size.cypher new file mode 100644 index 000000000..22e6e5b71 --- /dev/null +++ b/cypher/Overview/Overview_size.cypher @@ -0,0 +1,44 @@ +// Overview size + + MATCH (n) + WITH COUNT(n) AS nodeCount + MATCH ()-[]->() + WITH nodeCount + ,count(*) AS relationshipCount + MATCH (a:Artifact:Archive) + WITH nodeCount + ,relationshipCount + ,count(DISTINCT a.fileName) AS artifactCount + MATCH (p:Package) + WITH nodeCount + ,relationshipCount + ,artifactCount + ,count(DISTINCT p.fqn) AS packageCount + MATCH (t:Type) + WITH nodeCount + ,relationshipCount + ,artifactCount + ,packageCount + ,count(DISTINCT t.fqn) AS typeCount + MATCH (m:Method) + WITH nodeCount + ,relationshipCount + ,artifactCount + ,packageCount + ,typeCount + ,count(DISTINCT m.signature) AS methodCount + MATCH (member:Member) + WITH nodeCount + ,relationshipCount + ,artifactCount + ,packageCount + ,typeCount + ,methodCount + ,count(DISTINCT member.signature) AS memberCount +RETURN nodeCount + ,relationshipCount + ,artifactCount + ,packageCount + ,typeCount + ,methodCount + ,memberCount \ No newline at end of file diff --git a/cypher/Topological_Sort_Artifacts/0_Delete_Projections_for_directed_artifact_dependencies.cypher b/cypher/Topological_Sort_Artifacts/0_Delete_Projections_for_directed_artifact_dependencies.cypher new file mode 100644 index 000000000..685f11973 --- /dev/null +++ b/cypher/Topological_Sort_Artifacts/0_Delete_Projections_for_directed_artifact_dependencies.cypher @@ -0,0 +1,5 @@ +//0 Delete Projections for directed artifact dependencies + + CALL gds.graph.drop('artifact-dependencies-directed', false) + YIELD graphName, nodeCount, relationshipCount, creationTime, modificationTime +RETURN graphName, nodeCount, relationshipCount, creationTime, modificationTime \ No newline at end of file diff --git a/cypher/Topological_Sort_Artifacts/0b_Delete_Projections_for_directed_artifact_dependencies.cypher b/cypher/Topological_Sort_Artifacts/0b_Delete_Projections_for_directed_artifact_dependencies.cypher new file mode 100644 index 000000000..c621d56ac --- /dev/null +++ b/cypher/Topological_Sort_Artifacts/0b_Delete_Projections_for_directed_artifact_dependencies.cypher @@ -0,0 +1,5 @@ +//0b Delete Projections for directed artifact dependencies + + CALL gds.graph.drop('artifact-dependencies-directed-without-empty', false) + YIELD graphName, nodeCount, relationshipCount, creationTime, modificationTime +RETURN graphName, nodeCount, relationshipCount, creationTime, modificationTime \ No newline at end of file diff --git a/cypher/Topological_Sort_Artifacts/1_Create_directed_Projection.cypher b/cypher/Topological_Sort_Artifacts/1_Create_directed_Projection.cypher new file mode 100644 index 000000000..2f2164e03 --- /dev/null +++ b/cypher/Topological_Sort_Artifacts/1_Create_directed_Projection.cypher @@ -0,0 +1,9 @@ +//1 Create directed Projection +CALL gds.graph.project('artifact-dependencies-directed', 'Artifact', 'DEPENDS_ON', + { + relationshipProperties: ['weight'], + nodeProperties: ['incomingDependencies', 'outgoingDependencies'] + } +) + YIELD graphName, nodeCount, relationshipCount +RETURN graphName, nodeCount, relationshipCount \ No newline at end of file diff --git a/cypher/Topological_Sort_Artifacts/2_Create_directed_subgraph_without_empty_artifacts.cypher b/cypher/Topological_Sort_Artifacts/2_Create_directed_subgraph_without_empty_artifacts.cypher new file mode 100644 index 000000000..85b5e1841 --- /dev/null +++ b/cypher/Topological_Sort_Artifacts/2_Create_directed_subgraph_without_empty_artifacts.cypher @@ -0,0 +1,10 @@ +//2 Create directed subgraph without empty artifacts + +CALL gds.beta.graph.project.subgraph( + 'artifact-dependencies-directed-without-empty', + 'artifact-dependencies-directed', + 'n.outgoingDependencies > 0 OR n.incomingDependencies > 0', + '*' +) + YIELD graphName, fromGraphName, nodeCount, relationshipCount, nodeFilter +RETURN graphName, fromGraphName, nodeCount, relationshipCount, nodeFilter \ No newline at end of file diff --git a/cypher/Topological_Sort_Artifacts/3_Topological_Sort_Artifacts.cypher b/cypher/Topological_Sort_Artifacts/3_Topological_Sort_Artifacts.cypher new file mode 100644 index 000000000..9454a1bbb --- /dev/null +++ b/cypher/Topological_Sort_Artifacts/3_Topological_Sort_Artifacts.cypher @@ -0,0 +1,16 @@ +//3 Topological Sort Artifacts +//Needs graph-data-science plugin version >= 2.5.0 + +CALL gds.dag.topologicalSort.stream('artifact-dependencies-directed-without-empty',{ + computeMaxDistanceFromSource: true +}) YIELD nodeId, maxDistanceFromSource + WITH nodeId + ,gds.util.asNode(nodeId) AS artifact + ,toInteger(maxDistanceFromSource) AS maxDistanceFromSource + SET artifact.maxDistanceFromSource = maxDistanceFromSource + WITH COLLECT(nodeId) AS sortedNodeIds + ,COLLECT(artifact) AS sortedArtifacts + ,MAX(maxDistanceFromSource) AS overallMaxDistance +FOREACH (i IN RANGE(0, SIZE(sortedArtifacts)-1) | + SET gds.util.asNode(sortedNodeIds[i]).topologicalSortIndex = i) + RETURN size(sortedArtifacts) AS numberOfArtifacts, overallMaxDistance \ No newline at end of file diff --git a/cypher/Topological_Sort_Artifacts/3b_Topological_Sort_Artifacts_as_list.cypher b/cypher/Topological_Sort_Artifacts/3b_Topological_Sort_Artifacts_as_list.cypher new file mode 100644 index 000000000..6b0a9e729 --- /dev/null +++ b/cypher/Topological_Sort_Artifacts/3b_Topological_Sort_Artifacts_as_list.cypher @@ -0,0 +1,21 @@ +//3 Topological Sort Artifacts as list +//Needs graph-data-science plugin version >= 2.5.0 + +CALL gds.dag.topologicalSort.stream('artifact-dependencies-directed-without-empty',{ + computeMaxDistanceFromSource: true +}) YIELD nodeId, maxDistanceFromSource + WITH nodeId + ,gds.util.asNode(nodeId) AS artifact + ,toInteger(maxDistanceFromSource) AS maxDistanceFromSource + SET artifact.maxDistanceFromSource = maxDistanceFromSource + WITH COLLECT(nodeId) AS sortedNodeIds + ,COLLECT({artifact: artifact, maxDistanceFromSource: maxDistanceFromSource}) AS topologicalSortedArtifacts + ,MAX(maxDistanceFromSource) AS overallMaxDistance +FOREACH (i IN RANGE(0, SIZE(sortedNodeIds)-1) | + SET gds.util.asNode(sortedNodeIds[i]).topologicalSortIndex = i) + WITH topologicalSortedArtifacts + ,overallMaxDistance + UNWIND topologicalSortedArtifacts AS topologicalSortedArtifact + RETURN replace(last(split(topologicalSortedArtifact.artifact.fileName, '/')), '.jar', '') AS artifactName + ,topologicalSortedArtifact.maxDistanceFromSource AS buildLevel + ,overallMaxDistance AS overalNumberOfBuildLevels \ No newline at end of file diff --git a/cypher/Topological_Sort_Artifacts/4_Query_artifacts_in_topological_order.cypher b/cypher/Topological_Sort_Artifacts/4_Query_artifacts_in_topological_order.cypher new file mode 100644 index 000000000..abda069f9 --- /dev/null +++ b/cypher/Topological_Sort_Artifacts/4_Query_artifacts_in_topological_order.cypher @@ -0,0 +1,14 @@ +//4 Artifacts in topological order + +MATCH (artifact:Artifact) +WHERE artifact.topologicalSortIndex IS NOT NULL + WITH COLLECT(artifact) AS artifacts + ,MAX(artifact.maxDistanceFromSource) AS maxBuildLevel +UNWIND artifacts AS artifact +RETURN replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName + ,artifact.topologicalSortIndex AS topologicalSortIndex + ,artifact.maxDistanceFromSource AS buildLevel + ,maxBuildLevel + ,artifact.incomingDependencies AS incomingDependencies + ,artifact.outgoingDependencies AS outgoingDependencies +ORDER BY artifact.topologicalSortIndex \ No newline at end of file diff --git a/cypher/Topological_Sort_Artifacts/5_Experimental_Depth_First_Search_Artifacts_Index.cypher b/cypher/Topological_Sort_Artifacts/5_Experimental_Depth_First_Search_Artifacts_Index.cypher new file mode 100644 index 000000000..12a355c5f --- /dev/null +++ b/cypher/Topological_Sort_Artifacts/5_Experimental_Depth_First_Search_Artifacts_Index.cypher @@ -0,0 +1,46 @@ +//5 Experimental Depth First Search Artifacts Index +//depthFirstSearchLevel is not correct + +// Depth First Search starting from a node with no incoming dependencies +MATCH (source:Artifact{fileName:'/axon-configuration-4.8.0.jar'}) + CALL gds.dfs.stream('artifact-dependencies-directed-without-empty', { + sourceNode: source +}) + YIELD nodeIds +// Generate an index to iterate through the searched nodes +UNWIND range(0, size(nodeIds)-1) AS nodeIndex + WITH nodeIndex + ,nodeIds + ,gds.util.asNodes(nodeIds) AS searchedNodes + WITH nodeIndex + ,nodeIds + ,searchedNodes + ,searchedNodes[nodeIndex] AS indexedNode + // Get the previous node to be able to detect where depth first search went back + ,CASE WHEN nodeIndex > 0 + THEN searchedNodes[nodeIndex - 1] + ELSE NULL + END AS previousNode +// Get the parent node of the indexed one + OPTIONAL MATCH (indexedNode)<-[:DEPENDS_ON]-(parent:Artifact) + WITH nodeIndex + ,nodeIds + ,searchedNodes + ,indexedNode + ,previousNode + ,COLLECT(parent.fileName) AS parentFilenames + ,(previousNode IN COLLECT(parent)) AS previousIsParent + ,COLLECT(apoc.coll.indexOf(searchedNodes[0..nodeIndex], parent)) AS previousParentIndizes + ,apoc.coll.max(COLLECT(apoc.coll.indexOf(searchedNodes[0..nodeIndex], parent))) + 1 AS topologyLevel + // Set the property 'depthFirstSearchIndex' to the index + // TODO Set 'depthFirstSearchLevel' relative to the level of the parent, not its dfs index + SET indexedNode.depthFirstSearchIndex = nodeIndex + ,indexedNode.depthFirstSearchLevel = topologyLevel +RETURN indexedNode.fileName + ,nodeIndex + ,previousNode.fileName + ,previousIsParent + ,previousParentIndizes + ,topologyLevel + ,parentFilenames +//FOREACH (i IN RANGE(0, SIZE(nodeIds)-1) | SET gds.util.asNode(nodeIds[i]).depthFirstSearchIndex = i) \ No newline at end of file diff --git a/graph-visualization/artifactDependenciesGraph/artifactDependenciesGraph.html b/graph-visualization/artifactDependenciesGraph/artifactDependenciesGraph.html index 434c1ef3a..edd0c2f7c 100644 --- a/graph-visualization/artifactDependenciesGraph/artifactDependenciesGraph.html +++ b/graph-visualization/artifactDependenciesGraph/artifactDependenciesGraph.html @@ -10,6 +10,8 @@ + + @@ -19,7 +21,7 @@ -
+