Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Artifacts with dependencies to other artifacts

MATCH (artifact:Artifact)-[:CONTAINS]->(packageInArtifact:Package)
MATCH (packageInArtifact)-[:CONTAINS]->(typeInPackage:Type)
MATCH (typeInPackage)-[:DEPENDS_ON]->(dependencyType:Type)
MATCH (dependencyPackage:Package)-[:CONTAINS]->(dependencyType)
MATCH (dependencyArtifact:Artifact)-[:CONTAINS]->(dependencyPackage)
WHERE artifact.fileName <> dependencyArtifact.fileName
WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName
,artifact.numberOfPackages AS packagesInArtifactCount
,artifact.numberOfTypes AS typesInArtifactCount
,collect(DISTINCT packageInArtifact.fqn) AS packages
,count(DISTINCT packageInArtifact.fqn) AS packagesCount
,round(100.0 / artifact.numberOfPackages
* count(DISTINCT packageInArtifact.fqn)
, 2) AS packageSpread
,collect(DISTINCT typeInPackage.name) AS types
,count(DISTINCT typeInPackage.fqn) AS typesCount
,round(100.0 / artifact.numberOfTypes
* count(DISTINCT typeInPackage.fqn)
, 2) AS typesSpread
,replace(last(split(dependencyArtifact.fileName, '/')), '.jar', '') AS dependencyArtifactName
// additionally group by if the dependency is an interface or not
,dependencyType:Interface AS dependencyTypeIsInterface
,collect(DISTINCT dependencyPackage.fqn) AS dependencyPackages
,count(DISTINCT dependencyPackage.fqn) AS dependencyPackagesCount
,collect(DISTINCT dependencyType.name) AS dependencyTypes
,count(DISTINCT dependencyType.fqn) AS dependencyTypesCount
// Filter out empty dependency sets
WHERE dependencyPackagesCount > 0
AND packagesCount > 1
RETURN artifactName
,packagesInArtifactCount
,packagesCount
,packageSpread
,typesInArtifactCount
,typesCount
,typesSpread
,dependencyArtifactName
,dependencyTypeIsInterface
,dependencyPackagesCount
,dependencyTypesCount
,dependencyPackages[0..2] AS someDependencyPackages
,dependencyTypes[0..4] AS someDependencyTypes
,packages[0..2] AS someCallingPackages
,types[0..4] AS someCallingTypes
ORDER BY packagesCount DESC
13 changes: 13 additions & 0 deletions cypher/Artifact_Dependencies/Incoming_Artifact_Dependencies.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Incoming Artifact Dependencies

MATCH (a:Artifact:Archive)
OPTIONAL MATCH (a)<-[r:DEPENDS_ON]-(ea:Artifact:Archive)
WHERE a.fileName <> ea.fileName
WITH a
,COUNT(ea) AS incomingDependencies
,SUM(r.weight) AS incomingDependenciesWeight
SET a.incomingDependencies = incomingDependencies
,a.incomingDependenciesWeight = incomingDependenciesWeight
RETURN a.fileName
,incomingDependencies
,incomingDependenciesWeight
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Most used internal dependencies across artifacts

MATCH (type:Type)-[:DEPENDS_ON]->(dependencyType:Type)
MATCH (artifact:Artifact)-[:CONTAINS]->(package:Package)-[:CONTAINS]->(type:Type)
MATCH (dependencyArtifact:Artifact)-[:CONTAINS]->(dependencyPackage:Package)-[:CONTAINS]->(dependencyType)
WHERE artifact.fileName <> dependencyArtifact.fileName
WITH replace(last(split(dependencyArtifact.fileName, '/')), '.jar', '') AS dependencyArtifactName
,COLLECT(DISTINCT dependencyPackage.fqn) AS dependencyPackageNames
,COLLECT(DISTINCT dependencyType.name) AS dependencyTypeNames
,COLLECT(DISTINCT replace(last(split(artifact.fileName, '/')), '.jar', '')) AS artifactNames
,COUNT(DISTINCT package.fqn) AS numberOfPackages
,COUNT(DISTINCT type.fqn) AS numberOfTypes
,COUNT(DISTINCT dependencyType) AS numberOfDependencyTypes
,REDUCE(interfaces=0, depType IN COLLECT(DISTINCT dependencyType) |
CASE WHEN depType:Interface THEN interfaces + 1 ELSE interfaces END ) AS numberOfDependencyInterfaces
ORDER BY numberOfPackages DESC
RETURN dependencyArtifactName AS dependency
,numberOfPackages AS usedByPackages
,numberOfTypes AS usedByTypes
,SIZE(dependencyPackageNames) AS providesPackages
,SIZE(dependencyTypeNames) AS providesTypes
,ROUND(100.0 / numberOfDependencyTypes * numberOfDependencyInterfaces, 2) AS interfaceRate
,dependencyPackageNames[0..5] AS someProvidedPackages
,dependencyTypeNames[0..5] AS someProvidedTypes
13 changes: 13 additions & 0 deletions cypher/Artifact_Dependencies/Outgoing_Artifact_Dependencies.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Outgoing Artifact Dependencies

MATCH (a:Artifact:Archive)
OPTIONAL MATCH (a)-[r:DEPENDS_ON]->(ea:Artifact:Archive)
WHERE a.fileName <> ea.fileName
WITH a
,COUNT(ea) AS outgoingDependencies
,SUM(r.weight) AS outgoingDependenciesWeight
SET a.outgoingDependencies = outgoingDependencies
,a.outgoingDependenciesWeight = outgoingDependenciesWeight
RETURN a.fileName
,outgoingDependencies
,outgoingDependenciesWeight
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Set number of packages and types on artifacts

MATCH (artifact:Artifact)-[:CONTAINS]->(package:Package)
MATCH (package)-[:CONTAINS]->(type:Type)
WITH artifact
,COUNT(DISTINCT package.fqn) AS numberOfPackages
,COUNT(DISTINCT type.fqn) AS numberOfTypes
SET artifact.numberOfPackages = numberOfPackages
,artifact.numberOfTypes = numberOfTypes
RETURN artifact.fileName
,numberOfPackages
,numberOfTypes
ORDER BY artifact.fileName
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Usage and spread of internal artifact dependencies

MATCH (artifact:Artifact)-[:CONTAINS]->(packageInArtifact:Package)
MATCH (packageInArtifact)-[:CONTAINS]->(typeInPackage:Type)
MATCH (typeInPackage)-[:DEPENDS_ON]->(dependencyType:Type)
MATCH (dependencyPackage:Package)-[:CONTAINS]->(dependencyType)
MATCH (dependencyArtifact:Artifact)-[:CONTAINS]->(dependencyPackage)
WHERE artifact.fileName <> dependencyArtifact.fileName
WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName
,artifact.numberOfPackages AS packagesInArtifactCount
,artifact.numberOfTypes AS typesInArtifactCount
,collect(DISTINCT packageInArtifact.fqn) AS packages
,count(DISTINCT packageInArtifact.fqn) AS packagesCount
,(100.0
/ artifact.numberOfPackages
* count(DISTINCT packageInArtifact.fqn)) AS packageSpread
,collect(DISTINCT typeInPackage.name) AS types
,count(DISTINCT typeInPackage.fqn) AS typesCount
,(100.0
/ artifact.numberOfTypes
* count(DISTINCT typeInPackage.fqn)) AS typesSpread
,replace(last(split(dependencyArtifact.fileName, '/')), '.jar', '') AS dependencyArtifactName
// additionally group by if the dependency is an interface or not
,dependencyType:Interface AS dependencyTypeIsInterface
,collect(DISTINCT dependencyPackage.fqn) AS dependencyPackages
,count(DISTINCT dependencyPackage.fqn) AS dependencyPackagesCount
,collect(DISTINCT dependencyType.name) AS dependencyTypes
,count(DISTINCT dependencyType.fqn) AS dependencyTypesCount
// Filter out empty dependency sets
WHERE dependencyPackagesCount > 0
AND packagesCount > 1
RETURN dependencyArtifactName
,dependencyTypeIsInterface
,COUNT(DISTINCT artifactName) AS usedInArtifacts
,SUM(packagesCount) AS usedInPackages

,MIN(packageSpread) AS minPackageSpread
,MAX(packageSpread) AS maxPackageSpread
,AVG(packageSpread) AS avgPackageSpread
,stDev(packageSpread) AS stdPackageSpread
,percentileDisc(packageSpread, 0.5) AS per5PackageSpread

,MIN(packagesCount) AS minPackageCount
,MAX(packagesCount) AS maxPackageCount
,AVG(packagesCount) AS avgPackageCount
,stDev(packagesCount) AS stdPackageCount
,percentileDisc(packagesCount, 0.5) AS per5PackageCount

,MIN(typesSpread) AS minTypeSpread
,MAX(typesSpread) AS maxTypeSpread
,AVG(typesSpread) AS avgTypeSpread
,stDev(typesSpread) AS stdTypeSpread
,percentileDisc(typesSpread, 0.5) AS per5TypeSpread
ORDER BY toLower(dependencyArtifactName) ASC
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Usage and spread of internal artifact dependents

MATCH (artifact:Artifact)-[:CONTAINS]->(packageInArtifact:Package)
MATCH (packageInArtifact)-[:CONTAINS]->(typeInPackage:Type)
MATCH (typeInPackage)-[:DEPENDS_ON]->(dependencyType:Type)
MATCH (dependencyPackage:Package)-[:CONTAINS]->(dependencyType)
MATCH (dependencyArtifact:Artifact)-[:CONTAINS]->(dependencyPackage)
WHERE artifact.fileName <> dependencyArtifact.fileName
WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName
,artifact.numberOfPackages AS packagesInArtifactCount
,artifact.numberOfTypes AS typesInArtifactCount
,collect(DISTINCT packageInArtifact.fqn) AS packages
,count(DISTINCT packageInArtifact.fqn) AS packagesCount
,(100.0
/ artifact.numberOfPackages
* count(DISTINCT packageInArtifact.fqn)) AS packageSpread
,collect(DISTINCT typeInPackage.name) AS types
,count(DISTINCT typeInPackage.fqn) AS typesCount
,(100.0
/ artifact.numberOfTypes
* count(DISTINCT typeInPackage.fqn)) AS typesSpread
,replace(last(split(dependencyArtifact.fileName, '/')), '.jar', '') AS dependencyArtifactName
// additionally group by if the dependency is an interface or not
,dependencyType:Interface AS dependencyTypeIsInterface
,collect(DISTINCT dependencyPackage.fqn) AS dependencyPackages
,count(DISTINCT dependencyPackage.fqn) AS dependencyPackagesCount
,collect(DISTINCT dependencyType.name) AS dependencyTypes
,count(DISTINCT dependencyType.fqn) AS dependencyTypesCount
// Filter out empty dependency sets
WHERE dependencyPackagesCount > 0
AND packagesCount > 1
RETURN artifactName
,dependencyTypeIsInterface
,COUNT(DISTINCT dependencyArtifactName) AS artifactDependencies
,SUM(dependencyPackagesCount) AS artifactDependencyPackages
,100.0 / SUM(packagesInArtifactCount) * SUM(packagesCount) AS dependentPackagesRate

,MIN(packageSpread) AS minPackageSpread
,MAX(packageSpread) AS maxPackageSpread
,AVG(packageSpread) AS avgPackageSpread
,stDev(packageSpread) AS stdPackageSpread
,percentileDisc(packageSpread, 0.5) AS per5PackageSpread

,MIN(packagesCount) AS minPackageCount
,MAX(packagesCount) AS maxPackageCount
,AVG(packagesCount) AS avgPackageCount
,stDev(packagesCount) AS stdPackageCount
,percentileDisc(packagesCount, 0.5) AS per5PackageCount

,MIN(typesSpread) AS minTypeSpread
,MAX(typesSpread) AS maxTypeSpread
,AVG(typesSpread) AS avgTypeSpread
,stDev(typesSpread) AS stdTypeSpread
,percentileDisc(typesSpread, 0.5) AS per5TypeSpread
ORDER BY toLower(artifactName) ASC
4 changes: 3 additions & 1 deletion cypher/Candidates_for_Interface_Segregation.cypher
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

MATCH (type:Type)-[:DECLARES]->(method:Method)-[:INVOKES]->(dependentMethod:Method)
MATCH (dependentMethod)<-[:DECLARES]-(dependentType:Type)
MATCH (dependentType)-[:IMPLEMENTS*]->(superType:Type)-[:DECLARES]->(inheritedMethod:Method)
MATCH (dependentType)-[:IMPLEMENTS*1..9]->(superType:Type)-[:DECLARES]->(inheritedMethod:Method)
WHERE type.fqn <> dependentType.fqn
AND dependentMethod.name IS NOT NULL
AND inheritedMethod.name IS NOT NULL
Expand All @@ -15,6 +15,8 @@ WHERE type.fqn <> dependentType.fqn
// Count the different signatures without the return type
// of all declared methods including the inherited ones
,count(DISTINCT split(method.signature, ' ')[1]) + count(DISTINCT split(inheritedMethod.signature, ' ')[1]) AS declaredMethods
// Filter out types that declare only a few more methods than those that are actually used.
// A good interface segregation candidate declares a lot of methods where only a few of them are used widely.
WHERE declaredMethods > calledMethods + 2
WITH fullDependentTypeName
,declaredMethods
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//Community Detection 0 Delete Projection

CALL gds.graph.drop('package-dependencies'
CALL gds.graph.drop('package-dependencies', false)
YIELD graphName, nodeCount, relationshipCount, creationTime, modificationTime
RETURN graphName, nodeCount, relationshipCount, creationTime, modificationTime
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
//Community Detection 0 Delete Projection

CALL gds.graph.drop('artifact-dependencies', false)
YIELD graphName, nodeCount, relationshipCount, creationTime, modificationTime
RETURN graphName, nodeCount, relationshipCount, creationTime, modificationTime
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
//Community Detection 0b Delete Projection

CALL gds.graph.drop('artifact-dependencies-without-empty', false)
YIELD graphName, nodeCount, relationshipCount, creationTime, modificationTime
RETURN graphName, nodeCount, relationshipCount, creationTime, modificationTime
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
//Community Detection 1 Create undirected Projection

CALL gds.graph.project('artifact-dependencies', 'Artifact',
{
DEPENDS_ON: {
orientation: 'UNDIRECTED'
}
},
{
relationshipProperties: ['weight'],
nodeProperties: ['incomingDependencies', 'outgoingDependencies']
}
)
YIELD graphName, nodeCount, relationshipCount
RETURN graphName, nodeCount, relationshipCount
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
//Community Detection 1b Create subgraph without empty artifacts

CALL gds.beta.graph.project.subgraph(
'artifact-dependencies-without-empty',
'artifact-dependencies',
'n.outgoingDependencies > 0 OR n.incomingDependencies > 0',
'*'
)
YIELD graphName, fromGraphName, nodeCount, relationshipCount, nodeFilter
RETURN graphName, fromGraphName, nodeCount, relationshipCount, nodeFilter
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
//Community Detection 2 Leiden Estimate Memory

CALL gds.beta.leiden.write.estimate('artifact-dependencies-without-empty', {
gamma: 1.11,
theta: 0.001,
consecutiveIds: true,
relationshipWeightProperty: 'weight',
writeProperty: 'leidenCommunityId'
})
YIELD nodeCount
,relationshipCount
,bytesMin
,bytesMax
,heapPercentageMin
,heapPercentageMax
,treeView
RETURN nodeCount
,relationshipCount
,bytesMin
,bytesMax
,heapPercentageMin
,heapPercentageMax
,treeView
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
//Community Detection 3 Leiden Statistics

CALL gds.beta.leiden.stats('artifact-dependencies-without-empty', {
gamma: 1.11,
theta: 0.001,
includeIntermediateCommunities: true,
relationshipWeightProperty: 'weight'
})
YIELD communityCount
,ranLevels
,modularity
,modularities
,communityDistribution
RETURN communityCount
,ranLevels
,modularity
,modularities
,communityDistribution.min
,communityDistribution.mean
,communityDistribution.max
,communityDistribution.p50
,communityDistribution.p75
,communityDistribution.p90
,communityDistribution.p95
,communityDistribution.p99
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
//Community Detection 4 Leiden Stream

CALL gds.beta.leiden.stream('artifact-dependencies-without-empty', {
gamma: 1.11,
theta: 0.001,
includeIntermediateCommunities: true,
relationshipWeightProperty: 'weight'
})
YIELD nodeId, communityId, intermediateCommunityIds
WITH communityId
,intermediateCommunityIds
,gds.util.asNode(nodeId) AS artifact
RETURN intermediateCommunityIds[0] AS firstCommunityId
,communityId AS finalCommunityId
,COUNT(DISTINCT artifact) AS countOfMembers
,collect(DISTINCT replace(last(split(artifact.fileName, '/')), '.jar', '')) AS artifactNames
ORDER BY countOfMembers DESC, communityId ASC
Loading