Skip to content

Commit 47de7d0

Browse files
committed
Optimize external dependencies report for > scale
1 parent 39f013a commit 47de7d0

File tree

4 files changed

+152
-11
lines changed

4 files changed

+152
-11
lines changed
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
// External package usage per artifact sorted by external usage descending
2+
3+
MATCH (artifact:Artifact:Archive)-[:CONTAINS]->(type:Type)
4+
OPTIONAL MATCH (type)-[:DEPENDS_ON]->(externalType:ExternalType)
5+
WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName
6+
,count(DISTINCT type.fqn) AS numberOfTypesInArtifact
7+
,count(DISTINCT externalType.fqn) AS numberOfExternalTypesInArtifact
8+
,count(DISTINCT replace(externalType.fqn, '.' + externalType.name, '')) AS numberOfExternalPackagesInArtifact
9+
,collect(DISTINCT type) AS typeList
10+
UNWIND typeList AS type
11+
MATCH (type)-[externalDependency:DEPENDS_ON]->(externalType:ExternalType)
12+
WITH numberOfTypesInArtifact
13+
,numberOfExternalTypesInArtifact
14+
,numberOfExternalPackagesInArtifact
15+
,100.0 / numberOfTypesInArtifact * numberOfExternalTypesInArtifact AS externalTypeRate
16+
,externalDependency
17+
,artifactName
18+
,type.fqn AS fullTypeName
19+
,type.name AS typeName
20+
,replace(externalType.fqn, '.' + externalType.name, '') AS externalPackageName
21+
,externalType.name AS externalTypeName
22+
WITH numberOfTypesInArtifact
23+
,numberOfExternalTypesInArtifact
24+
,numberOfExternalPackagesInArtifact
25+
,externalTypeRate
26+
,artifactName
27+
,externalPackageName
28+
,count(externalDependency) AS numberOfExternalTypeCaller
29+
,sum(externalDependency.weight) AS numberOfExternalTypeCalls
30+
,collect(DISTINCT externalTypeName) AS externalTypeNames
31+
RETURN artifactName
32+
,externalPackageName
33+
,numberOfExternalTypeCaller
34+
,numberOfExternalTypeCalls
35+
,numberOfTypesInArtifact
36+
,numberOfExternalTypesInArtifact
37+
,numberOfExternalPackagesInArtifact
38+
,externalTypeRate
39+
,externalTypeNames
40+
ORDER BY externalTypeRate DESC, artifactName ASC, numberOfExternalTypeCaller DESC, externalPackageName ASC
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// External package usage per artifact top externals
2+
3+
MATCH (artifact:Artifact:Archive)-[:CONTAINS]->(type:Type)
4+
OPTIONAL MATCH (type)-[:DEPENDS_ON]->(externalType:ExternalType)
5+
WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName
6+
,count(DISTINCT type.fqn) AS numberOfTypesInArtifact
7+
,count(DISTINCT externalType.fqn) AS numberOfExternalTypesInArtifact
8+
,count(DISTINCT replace(externalType.fqn, '.' + externalType.name, '')) AS numberOfExternalPackagesInArtifact
9+
,collect(DISTINCT type) AS typeList
10+
UNWIND typeList AS type
11+
MATCH (type)-[externalDependency:DEPENDS_ON]->(externalType:ExternalType)
12+
WITH numberOfTypesInArtifact
13+
,numberOfExternalTypesInArtifact
14+
,numberOfExternalPackagesInArtifact
15+
,100.0 / numberOfTypesInArtifact * numberOfExternalTypesInArtifact AS externalTypeRate
16+
,externalDependency
17+
,artifactName
18+
,type.fqn AS fullTypeName
19+
,type.name AS typeName
20+
,replace(externalType.fqn, '.' + externalType.name, '') AS externalPackageName
21+
,externalType.name AS externalTypeName
22+
ORDER BY externalTypeRate DESC, artifactName ASC
23+
WITH numberOfTypesInArtifact
24+
,numberOfExternalTypesInArtifact
25+
,numberOfExternalPackagesInArtifact
26+
,externalTypeRate
27+
,artifactName
28+
,externalPackageName
29+
,count(externalDependency) AS numberOfExternalTypeCaller
30+
,sum(externalDependency.weight) AS numberOfExternalTypeCalls
31+
,collect(DISTINCT externalTypeName) AS externalTypeNames
32+
ORDER BY externalTypeRate DESC, artifactName ASC, numberOfExternalTypeCaller DESC
33+
WITH numberOfTypesInArtifact
34+
,numberOfExternalTypesInArtifact
35+
,numberOfExternalPackagesInArtifact
36+
,externalTypeRate
37+
,artifactName
38+
,COLLECT(DISTINCT externalPackageName) AS externalPackageNames
39+
,SUM(numberOfExternalTypeCaller) AS numberOfExternalTypeCaller
40+
,sum(numberOfExternalTypeCalls) AS numberOfExternalTypeCalls
41+
,collect(externalTypeNames) AS externalTypeNames
42+
RETURN artifactName
43+
,numberOfTypesInArtifact
44+
,numberOfExternalTypesInArtifact
45+
,numberOfExternalPackagesInArtifact
46+
,externalTypeRate
47+
,numberOfExternalTypeCaller
48+
,numberOfExternalTypeCalls
49+
,size(externalPackageNames) AS numberOfExternalPackages
50+
,externalPackageNames[0..4] AS top5ExternalPackages
51+
,externalTypeNames[0..1] AS someExternalTypes
52+
LIMIT 40

jupyter/ExternalDependencies.ipynb

Lines changed: 59 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -253,32 +253,75 @@
253253
]
254254
},
255255
{
256-
"attachments": {},
257256
"cell_type": "markdown",
258-
"id": "33c3bb79",
257+
"id": "0bd11586",
259258
"metadata": {},
260259
"source": [
261-
"### Table 3 - External usage per artifact\n",
260+
"### Table 3a - External usage per artifact sorted by highest external type rate descending\n",
261+
"\n",
262+
"The following table shows the most used external packages separately for each artifact including external annotations. The results are sorted by the artifacts with the highest external type usage rate descending. \n",
263+
"\n",
264+
"The intention of this table is to find artifacts that use a lot of external dependencies in relation to their size and get all the external packages and their usage.\n",
262265
"\n",
263-
"The following table shows the most used external packages separately for each artifact including external annotations. \n",
266+
"Here, only the first 40 entries are shown. The whole table can be found in the CSV reports.\n",
264267
"\n",
265268
"**Columns:**\n",
266269
"- *artifactName* is used to group the the external package usage per artifact for a more detailed analysis.\n",
267270
"- *externalPackageName* identifies the external package as described above\n",
268271
"- *numberOfExternalTypeCaller* refers to the distinct types that make use of the external package\n",
269272
"- *numberOfExternalTypeCalls* includes every invocation or reference to the types in the external package\n",
270273
"- *numberOfTypesInArtifact* represents the total count of all analyzed types for the artifact\n",
274+
"- *numberOfExternalTypesInArtifact* is the number of all external types that are used by the artifact\n",
275+
"- *numberOfExternalPackagesInArtifact* is the number of all external packages that are used by the artifact\n",
276+
"- *externalTypeRate* is the numberOfExternalTypesInArtifact / numberOfTypesInArtifact * 100\n",
271277
"- *externalTypeNames* contains a list of actually utilized types of the external package"
272278
]
273279
},
280+
{
281+
"cell_type": "code",
282+
"execution_count": null,
283+
"id": "f8459ede",
284+
"metadata": {},
285+
"outputs": [],
286+
"source": [
287+
"query_cypher_to_data_frame(\"../cypher/External_Dependencies/External_package_usage_per_artifact_sorted.cypher\").head(40)"
288+
]
289+
},
290+
{
291+
"attachments": {},
292+
"cell_type": "markdown",
293+
"id": "33c3bb79",
294+
"metadata": {},
295+
"source": [
296+
"### Table 3b - External usage per artifact\n",
297+
"\n",
298+
"The following table shows the most used external packages separately for each artifact including external annotations. The results are grouped per artifact and sorted by the artifacts with the highest external type usage rate descending. Additionally, for each artifact the top 5 used external packages are listed in the top5ExternalPackages column. \n",
299+
"\n",
300+
"The intention of this table is to find artifacts that use a lot of external dependencies in relation to their size and get an overview per artifact with the top 5 used external packages, the number of external types and packages used etc. .\n",
301+
"\n",
302+
"Only the first 40 entries are shown. The whole table can be found in the CSV reports.\n",
303+
"\n",
304+
"**Columns:**\n",
305+
"- *artifactName* is used to group the the external package usage per artifact for a more detailed analysis.\n",
306+
"- *numberOfTypesInArtifact* represents the total count of all analyzed types for the artifact\n",
307+
"- *numberOfExternalTypesInArtifact* is the number of all external types that are used by the artifact\n",
308+
"- *numberOfExternalPackagesInArtifact* is the number of all external packages that are used by the artifact\n",
309+
"- *externalTypeRate* is the numberOfExternalTypesInArtifact / numberOfTypesInArtifact * 100\n",
310+
"- *numberOfExternalTypeCaller* refers to the distinct types that make use of the external package\n",
311+
"- *numberOfExternalTypeCalls* includes every invocation or reference to the types in the external package\n",
312+
"- *numberOfExternalPackages* is the number of distinct external packages used by the artifact\n",
313+
"- *top5ExternalPackages* contains a list of the top 5 most used external packages of the artifact\n",
314+
"- *someExternalTypes* contains a list of lists and is also mean't to provide some examples of external types used"
315+
]
316+
},
274317
{
275318
"cell_type": "code",
276319
"execution_count": null,
277320
"id": "1637f8ee",
278321
"metadata": {},
279322
"outputs": [],
280323
"source": [
281-
"query_cypher_to_data_frame(\"../cypher/External_Dependencies/External_package_usage_per_artifact.cypher\")"
324+
"query_cypher_to_data_frame(\"../cypher/External_Dependencies/External_package_usage_per_artifact_sorted_top.cypher\").head(40)"
282325
]
283326
},
284327
{
@@ -289,7 +332,9 @@
289332
"source": [
290333
"### Table 4 - External usage per artifact and package\n",
291334
"\n",
292-
"The next table lists internal packages and the artifacts they belong to that use many different external types of a specific external package without taken external annotations into account. Only the first 30 rows are shown.\n",
335+
"This table lists internal packages and the artifacts they belong to that use many different external types of a specific external package without taking external annotations into account. \n",
336+
"\n",
337+
"Only the first 40 entries are shown. The whole table can be found in the CSV reports.\n",
293338
"\n",
294339
"**Columns:**\n",
295340
"- *artifactName* that contains the type that calls the external package\n",
@@ -310,7 +355,7 @@
310355
"outputs": [],
311356
"source": [
312357
"external_package_usage_per_package = query_cypher_to_data_frame(\"../cypher/External_Dependencies/External_package_usage_per_artifact_and_package.cypher\")\n",
313-
"external_package_usage_per_package.head(30)"
358+
"external_package_usage_per_package.head(40)"
314359
]
315360
},
316361
{
@@ -321,7 +366,9 @@
321366
"source": [
322367
"### Table 5 - Top 20 external package usage per type\n",
323368
"\n",
324-
"This table lists the internal types that utilize the most different external types and packages. These have the highest probability of change depending on external libraries. A case-by-case approach is also advisable here because there could for example also be code units that encapsulate an external library and have this high count of external dependencies on purpose.\n",
369+
"This table shows internal types that utilize the most different external types and packages. These have the highest probability of change depending on external libraries. A case-by-case approach is also advisable here because there could for example also be code units that encapsulate an external library and have this high count of external dependencies on purpose.\n",
370+
"\n",
371+
"Only the top 20 entries are shown. The whole table can be found in the CSV reports.\n",
325372
"\n",
326373
"**Columns:**\n",
327374
"- *artifactName* that contains the type that calls the external package\n",
@@ -355,11 +402,12 @@
355402
"source": [
356403
"### Table 6 - External package usage distribution per type\n",
357404
"\n",
358-
"The next table shown here only includes the first 20 rows.\n",
359-
"It shows how many types use one external package, how many use two, etc. .\n",
405+
"This table shows how many types use one external package, how many use two, etc. .\n",
360406
"This gives an overview of the distribution of external package calls and the overall coupling to external libraries. The higher the count of distinct external packages the lower should be the count of types that use them. Dependencies to external annotations are left out here.\n",
361407
"\n",
362-
"Have a look above to find out which types have the highest external package dependency usage.\n",
408+
"More details about which types have the highest external package dependency usage can be in the tables 4 and 5 above.\n",
409+
"\n",
410+
"Only the first 20 entries are shown. The whole table can be found in the CSV reports.\n",
363411
"\n",
364412
"**Columns:**\n",
365413
"- *artifactName* that contains the type that calls the external package\n",

scripts/reports/ExternalDependenciesCsv.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ fi
4242
execute_cypher "${EXTERNAL_DEPENDENCIES_CYPHER_DIR}/External_package_usage_overall.cypher" > "${FULL_REPORT_DIRECTORY}/External_package_usage_overall.csv"
4343
execute_cypher "${EXTERNAL_DEPENDENCIES_CYPHER_DIR}/External_package_usage_per_type.cypher" > "${FULL_REPORT_DIRECTORY}/External_package_usage_per_type.csv"
4444
execute_cypher "${EXTERNAL_DEPENDENCIES_CYPHER_DIR}/External_package_usage_per_artifact.cypher" > "${FULL_REPORT_DIRECTORY}/External_package_usage_per_artifact.csv"
45+
execute_cypher "${EXTERNAL_DEPENDENCIES_CYPHER_DIR}/External_package_usage_per_artifact_sorted_top.cypher" > "${FULL_REPORT_DIRECTORY}/External_package_usage_per_artifact_sorted_top.csv"
4546
execute_cypher "${EXTERNAL_DEPENDENCIES_CYPHER_DIR}/External_package_usage_per_type_distribution.cypher" > "${FULL_REPORT_DIRECTORY}/External_package_usage_per_type_distribution.csv"
4647
execute_cypher "${EXTERNAL_DEPENDENCIES_CYPHER_DIR}/External_package_usage_per_artifact_and_package.cypher" > "${FULL_REPORT_DIRECTORY}/External_package_usage_per_artifact_and_package.csv"
4748
execute_cypher "${EXTERNAL_DEPENDENCIES_CYPHER_DIR}/Maven_POMs_and_their_declared_dependencies.cypher" > "${FULL_REPORT_DIRECTORY}/Maven_POM_dependencies.csv"

0 commit comments

Comments
 (0)