Skip to content

Commit 4779262

Browse files
committed
fixup! Optimize external dependencies report for > scale
1 parent 3db4134 commit 4779262

7 files changed

+139
-62
lines changed
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// External package usage per artifact and external package
2+
3+
// Get the overall artifact statistics first
4+
MATCH (artifact:Artifact)-[:CONTAINS]->(package:Package)
5+
MATCH (package)-[:CONTAINS]->(type:Type)
6+
WHERE NOT type:ExternalType
7+
WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName
8+
,count(DISTINCT package.fqn) AS artifactPackages
9+
,count(DISTINCT type.fqn) AS artifactTypes
10+
,collect(type) AS typeList
11+
// Get the external dependencies for each internal type
12+
UNWIND typeList AS type
13+
MATCH (type)-[:DEPENDS_ON]->(externalType:ExternalType)
14+
MATCH (typePackage:Package)-[:CONTAINS]->(type)
15+
// Optionally filter out dependencies to exxternal annotations
16+
// WHERE NOT externalType:ExternalAnnotation
17+
WITH artifactName
18+
,artifactPackages
19+
,artifactTypes
20+
,typePackage.fqn AS packageName
21+
,type.fqn AS fullTypeName
22+
,replace(externalType.fqn, '.' + externalType.name, '') AS externalPackageName
23+
// Group by artifact and external package
24+
RETURN artifactName
25+
,artifactPackages
26+
,artifactTypes
27+
,externalPackageName
28+
,count(DISTINCT packageName) AS numberOfPackages
29+
,COLLECT(DISTINCT packageName) AS nameOfPackages
30+
,count(DISTINCT fullTypeName) AS numberOfTypes
31+
,COLLECT(DISTINCT fullTypeName) AS nameOfTypes
32+
,100.0 / artifactPackages * count(DISTINCT packageName) AS packagesCallingExernalRate
33+
,100.0 / artifactTypes * count(DISTINCT fullTypeName) AS typesCallingExternalRate
34+
// Order the results by number of packages that use the external package dependency descending
35+
ORDER BY numberOfPackages DESC, artifactName ASC, externalPackageName ASC

cypher/External_Dependencies/External_package_usage_per_artifact_distribution.cypher

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,35 +3,37 @@
33
MATCH (artifact:Artifact)-[:CONTAINS]->(package:Package)
44
MATCH (package)-[:CONTAINS]->(type:Type)
55
WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName
6-
,count(DISTINCT type.fqn) AS artifactTypes
76
,count(DISTINCT package.fqn) AS artifactPackages
7+
,count(DISTINCT type.fqn) AS artifactTypes
88
,collect(type) AS typeList
99
UNWIND typeList AS type
1010
MATCH (type)-[:DEPENDS_ON]->(externalType:ExternalType)
1111
MATCH (typePackage:Package)-[:CONTAINS]->(type)
1212
WHERE NOT externalType:ExternalAnnotation
1313
WITH artifactName
14-
,artifactTypes
1514
,artifactPackages
15+
,artifactTypes
1616
,typePackage.fqn AS packageName
1717
,type.fqn AS fullTypeName
1818
,replace(externalType.fqn, '.' + externalType.name, '') AS externalPackageName
1919
WITH artifactName
2020
,artifactPackages
2121
,artifactTypes
22-
,count(DISTINCT externalPackageName) AS numberOfExternalPackages
23-
,count(DISTINCT packageName) AS numberOfPackages
24-
,count(DISTINCT fullTypeName) AS numberOfTypes
25-
,COLLECT(DISTINCT packageName) AS nameOfPackages
26-
,COLLECT(DISTINCT fullTypeName) AS nameOfTypes
22+
,count(DISTINCT externalPackageName) AS numberOfExternalPackages
23+
,COLLECT(DISTINCT externalPackageName) AS nameOfExternalPackages
24+
,count(DISTINCT packageName) AS numberOfPackages
25+
,COLLECT(DISTINCT packageName) AS nameOfPackages
26+
,count(DISTINCT fullTypeName) AS numberOfTypes
27+
,COLLECT(DISTINCT fullTypeName) AS nameOfTypes
2728
RETURN artifactName
28-
,artifactTypes
2929
,artifactPackages
30+
,artifactTypes
3031
,numberOfExternalPackages
3132
,numberOfPackages
3233
,numberOfTypes
33-
,100.0 / artifactTypes * numberOfTypes AS typesCallingExternalOnesRate
34-
,100.0 / artifactPackages * numberOfPackages AS packagesCallingExernalOnesRate
35-
,nameOfPackages
36-
,nameOfTypes[0..9] AS someTypeNames
37-
ORDER BY numberOfPackages DESC
34+
,100.0 / artifactTypes * numberOfTypes AS typesCallingExternalRate
35+
,100.0 / artifactPackages * numberOfPackages AS packagesCallingExternalRate
36+
,nameOfExternalPackages[0..9] AS someExternalPackageNames
37+
,nameOfPackages[0..9] AS someExternalCallingPackageNames
38+
,nameOfTypes[0..9] AS someExternalCallingTypeNames
39+
ORDER BY numberOfPackages DESC, artifactName ASC
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
// External package usage per artifact package aggregated
2+
3+
// Get the overall artifact statistics first
4+
MATCH (artifact:Artifact)-[:CONTAINS]->(package:Package)
5+
MATCH (package)-[:CONTAINS]->(type:Type)
6+
WHERE NOT type:ExternalType
7+
WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName
8+
,count(DISTINCT package.fqn) AS artifactPackages
9+
,count(DISTINCT type.fqn) AS artifactTypes
10+
,collect(type) AS typeList
11+
// Get the external dependencies for each internal type
12+
UNWIND typeList AS type
13+
MATCH (type)-[:DEPENDS_ON]->(externalType:ExternalType)
14+
MATCH (typePackage:Package)-[:CONTAINS]->(type)
15+
// Filter out dependencies to exxternal annotations
16+
WHERE NOT externalType:ExternalAnnotation
17+
WITH artifactName
18+
,artifactPackages
19+
,artifactTypes
20+
,typePackage.fqn AS packageName
21+
,type.fqn AS fullTypeName
22+
,replace(externalType.fqn, '.' + externalType.name, '') AS externalPackageName
23+
// Group by artifact and external package
24+
WITH artifactName
25+
,artifactPackages
26+
,artifactTypes
27+
,externalPackageName
28+
,count(DISTINCT packageName) AS numberOfPackages
29+
,COLLECT(DISTINCT packageName) AS nameOfPackages
30+
,count(DISTINCT fullTypeName) AS numberOfTypes
31+
,COLLECT(DISTINCT fullTypeName) AS nameOfTypes
32+
,100.0 / artifactPackages * count(DISTINCT packageName) AS packagesCallingExernalRate
33+
,100.0 / artifactTypes * count(DISTINCT fullTypeName) AS typesCallingExternalRate
34+
// Pre order the results by number of packages that use the external package dependency descending
35+
ORDER BY numberOfPackages DESC, artifactName ASC
36+
// Optionally filter out external package dependencies that are only used by one package
37+
// WHERE numberOfPackages > 1
38+
// Group by artifact, aggregate statistics and return the results
39+
RETURN artifactName
40+
,artifactPackages
41+
,artifactTypes
42+
,count(DISTINCT externalPackageName) AS numberOfExternalPackages
43+
44+
// Statistics about the packages and their external package usage count
45+
,max(numberOfPackages) AS maxNumberOfPackages
46+
,min(numberOfPackages) AS minNumberOfPackages
47+
,percentileCont(numberOfPackages, 0.5) AS medNumberOfPackages
48+
,avg(numberOfPackages) AS avgNumberOfPackages
49+
,stDev(numberOfPackages) AS stdNumberOfPackages
50+
51+
// Statistics about the packages and their external package usage percentage
52+
,max(packagesCallingExernalRate) AS maxNumberOfPackagesPercentage
53+
,min(packagesCallingExernalRate) AS minNumberOfPackagesPercentage
54+
,percentileCont(packagesCallingExernalRate, 0.5) AS medNumberOfPackagesPercentage
55+
,avg(packagesCallingExernalRate) AS avgNumberOfPackagesPercentage
56+
,stDev(packagesCallingExernalRate) AS stdNumberOfPackagesPercentage
57+
58+
// Statistics about the types and their external package usage count
59+
,max(numberOfTypes) AS maxNumberOfTypes
60+
,min(numberOfTypes) AS minNumberOfTypes
61+
,percentileCont(numberOfTypes, 0.5) AS medNumberOfTypes
62+
,avg(numberOfTypes) AS avgNumberOfTypes
63+
,stDev(numberOfTypes) AS stdNumberOfTypes
64+
65+
// Statistics about the types and their external package usage count percentage
66+
,max(typesCallingExternalRate) AS maxNumberOfTypesPercentage
67+
,min(typesCallingExternalRate) AS minNumberOfTypesPercentage
68+
,percentileCont(typesCallingExternalRate, 0.5) AS medNumberOfTypesPercentage
69+
,avg(typesCallingExternalRate) AS avgNumberOfTypesPercentage
70+
,stDev(typesCallingExternalRate) AS stdNumberOfTypesPercentage
71+
72+
// Examples of external packages, caller packages and caller types
73+
,collect(externalPackageName)[0..9] AS top10ExternalPackageNamesByUsageDescending
74+
,COLLECT(nameOfPackages)[0][0..9] AS somePackageNames
75+
,COLLECT(nameOfTypes)[0][0..9] AS someTypeNames
76+
77+
ORDER BY maxNumberOfPackages DESC, artifactName ASC

cypher/External_Dependencies/External_package_usage_per_package.cypher

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
MATCH (artifact:Artifact)-[:CONTAINS]->(package:Package)
44
MATCH (package)-[:CONTAINS]->(type:Type)
55
WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName
6-
,count(DISTINCT type.fqn) AS artifactTypes
76
,count(DISTINCT package.fqn) AS artifactPackages
7+
,count(DISTINCT type.fqn) AS artifactTypes
88
,collect(type) AS typeList
99
UNWIND typeList AS type
1010
MATCH (type)-[:DEPENDS_ON]->(externalType:ExternalType)
@@ -25,8 +25,8 @@ UNWIND typeList AS type
2525
,count(DISTINCT fullTypeName) AS numberOfTypes
2626
,COLLECT(DISTINCT fullTypeName) AS nameOfTypes
2727
RETURN artifactName
28-
,artifactTypes
2928
,artifactPackages
29+
,artifactTypes
3030
,externalPackageName
3131
,numberOfPackages
3232
,numberOfTypes

cypher/External_Dependencies/External_package_usage_per_type_distribution.cypher

Lines changed: 0 additions & 38 deletions
This file was deleted.

jupyter/ExternalDependencies.ipynb

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -424,9 +424,9 @@
424424
"metadata": {},
425425
"outputs": [],
426426
"source": [
427-
"external_package_usage_per_type_distribution = query_cypher_to_data_frame(\"../cypher/External_Dependencies/External_package_usage_per_type_distribution.cypher\")\n",
428-
"external_package_usage_per_type_distribution_truncated=external_package_usage_per_type_distribution[['artifactName', 'artifactPackages', 'artifactTypes', 'numberOfExternalPackages', 'numberOfTypes', 'typesCallingExternalRate', 'packagesCallingExternalRate']].head(20)\n",
429-
"external_package_usage_per_type_distribution_truncated"
427+
"external_package_usage_per_artifact_distribution = query_cypher_to_data_frame(\"../cypher/External_Dependencies/External_package_usage_per_artifact_distribution.cypher\")\n",
428+
"external_package_usage_per_artifact_distribution_truncated=external_package_usage_per_artifact_distribution[['artifactName', 'artifactPackages', 'artifactTypes', 'numberOfExternalPackages', 'numberOfTypes', 'typesCallingExternalRate', 'packagesCallingExternalRate']].head(20)\n",
429+
"external_package_usage_per_artifact_distribution_truncated"
430430
]
431431
},
432432
{
@@ -450,12 +450,12 @@
450450
"\n",
451451
"# Organize artifacts in columns with the number of package that call external packages as values using pivot\n",
452452
"# Every row represents the number of external packages\n",
453-
"external_package_usage_per_type_distribution_for_each_artifact=external_package_usage_per_type_distribution.pivot(index='numberOfExternalPackages', columns='artifactName', values='packagesCallingExternalRate')\n",
453+
"external_package_usage_per_artifact_distribution_for_each_artifact=external_package_usage_per_artifact_distribution.pivot(index='numberOfExternalPackages', columns='artifactName', values='packagesCallingExternalRate')\n",
454454
"\n",
455455
"# Fill missing values with zero\n",
456-
"external_package_usage_per_type_distribution_for_each_artifact.fillna(0, inplace=True)\n",
456+
"external_package_usage_per_artifact_distribution_for_each_artifact.fillna(0, inplace=True)\n",
457457
"\n",
458-
"external_package_usage_per_type_distribution_for_each_artifact"
458+
"external_package_usage_per_artifact_distribution_for_each_artifact"
459459
]
460460
},
461461
{
@@ -477,7 +477,7 @@
477477
"outputs": [],
478478
"source": [
479479
"plot.figure();\n",
480-
"axes = external_package_usage_per_type_distribution_for_each_artifact.plot(\n",
480+
"axes = external_package_usage_per_artifact_distribution_for_each_artifact.plot(\n",
481481
" kind='bar', \n",
482482
" grid=True,\n",
483483
" title='Relative External Package Usage', \n",
@@ -507,7 +507,7 @@
507507
"outputs": [],
508508
"source": [
509509
"plot.figure();\n",
510-
"axes = external_package_usage_per_type_distribution_for_each_artifact.transpose().plot(\n",
510+
"axes = external_package_usage_per_artifact_distribution_for_each_artifact.transpose().plot(\n",
511511
" kind='bar', \n",
512512
" grid=True,\n",
513513
" title='Relative External Package Usage', \n",

scripts/reports/ExternalDependenciesCsv.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ execute_cypher "${EXTERNAL_DEPENDENCIES_CYPHER_DIR}/External_package_usage_overa
4343
execute_cypher "${EXTERNAL_DEPENDENCIES_CYPHER_DIR}/External_package_usage_per_type.cypher" > "${FULL_REPORT_DIRECTORY}/External_package_usage_per_type.csv"
4444
execute_cypher "${EXTERNAL_DEPENDENCIES_CYPHER_DIR}/External_package_usage_per_artifact.cypher" > "${FULL_REPORT_DIRECTORY}/External_package_usage_per_artifact.csv"
4545
execute_cypher "${EXTERNAL_DEPENDENCIES_CYPHER_DIR}/External_package_usage_per_artifact_sorted_top.cypher" > "${FULL_REPORT_DIRECTORY}/External_package_usage_per_artifact_sorted_top.csv"
46-
execute_cypher "${EXTERNAL_DEPENDENCIES_CYPHER_DIR}/External_package_usage_per_type_distribution.cypher" > "${FULL_REPORT_DIRECTORY}/External_package_usage_per_type_distribution.csv"
46+
execute_cypher "${EXTERNAL_DEPENDENCIES_CYPHER_DIR}/External_package_usage_per_artifact_distribution.cypher" > "${FULL_REPORT_DIRECTORY}/External_package_usage_per_artifact_distribution.csv"
47+
execute_cypher "${EXTERNAL_DEPENDENCIES_CYPHER_DIR}/External_package_usage_per_package_distribution.cypher" > "${FULL_REPORT_DIRECTORY}/External_package_usage_per_artifact_package_aggregated.csv"
4748
execute_cypher "${EXTERNAL_DEPENDENCIES_CYPHER_DIR}/External_package_usage_per_artifact_and_package.cypher" > "${FULL_REPORT_DIRECTORY}/External_package_usage_per_artifact_and_package.csv"
4849
execute_cypher "${EXTERNAL_DEPENDENCIES_CYPHER_DIR}/Maven_POMs_and_their_declared_dependencies.cypher" > "${FULL_REPORT_DIRECTORY}/Maven_POM_dependencies.csv"

0 commit comments

Comments
 (0)