diff --git a/.github/workflows/code-reports.yml b/.github/workflows/code-reports.yml index 98afce860..e6f0dba9f 100644 --- a/.github/workflows/code-reports.yml +++ b/.github/workflows/code-reports.yml @@ -118,8 +118,20 @@ jobs: working-directory: temp run: ./../scripts/copyReportsIntoResults.sh - # Upload the results in case they are needed for troubleshooting for a couple of days - - name: Archive results + # Upload logs and unfinished reports in case of an error for troubleshooting + - name: Archive failed run with logs and unfinished results + if: failure() + uses: actions/upload-artifact@v3 + with: + name: code-analysis-logs-java-${{ matrix.java }}-python-${{ matrix.python }}-mambaforge-${{ matrix.mambaforge }} + path: | + ./temp/**/runtime/* + ./results + retention-days: 5 + + # Upload successful results in case they are needed for troubleshooting + - name: Archive successful results + if: success() uses: actions/upload-artifact@v3 with: name: code-report-results-java-${{ matrix.java }}-python-${{ matrix.python }}-mambaforge-${{ matrix.mambaforge }} diff --git a/COMMANDS.md b/COMMANDS.md index 1b0db2022..ed0700162 100644 --- a/COMMANDS.md +++ b/COMMANDS.md @@ -64,6 +64,42 @@ a profile, the newest versions will be used. Profiles are scripts that can be fo - Use your own initial Neo4j password - For more details have a look at the script [analyze.sh](./scripts/analysis/analyze.sh) +### Examples + +#### Start an analysis with CSV reports only + +If only the CSV reports are needed, that are the result of Cypher queries and don't need any further dependencies (like Python) +the analysis can be speeded up with: + +```shell +./../../scripts/analysis/analyze.sh --report Csv +``` + +#### Start an analysis with Jupyter reports only + +If only the Jupyter reports are needed e.g. when the CSV reports had already been generated, the this can be done with: + +```shell +./../../scripts/analysis/analyze.sh --report Jupyter +``` + +#### Start an analysis without PDF generation + +Generating a PDF from a Jupyter notebook using [nbconvert](https://nbconvert.readthedocs.io) might take a while or even fail due to a timeout error. Here is an example on how to skip PDF generation: + +```shell +SKIP_JUPYTER_NOTEBOOK_PDF_GENERATION=true ./../../scripts/analysis/analyze.sh +``` + +#### Setup everything to explore the graph manually + +To prepare everything for analysis including installation, configuration and preparation queries to explore the graph manually +without report generation use this command: + +```shell +./../../scripts/analysis/analyze.sh --explore +``` + ## Generate Markdown References ### Update Cypher Reference diff --git a/cypher/CYPHER.md b/cypher/CYPHER.md index a64d4ba10..4fe5584b8 100644 --- a/cypher/CYPHER.md +++ b/cypher/CYPHER.md @@ -67,18 +67,29 @@ Script | Directory | Description | [Community_Detection_for_Types_6_Leiden_Delete_Labels.cypher](./Community_Detection_for_Types/Community_Detection_for_Types_6_Leiden_Delete_Labels.cypher) | Community_Detection_for_Types | Community Detection for Types 6 Leiden Delete Labels | | [Community_Detection_for_Types_7_Add_LeidenTypeCommunity_Id_label_to_types.cypher](./Community_Detection_for_Types/Community_Detection_for_Types_7_Add_LeidenTypeCommunity_Id_label_to_types.cypher) | Community_Detection_for_Types | Community Detection for Types 7 Add LeidenTypeCommunity+Id label to types with more than one member | | [Which_type_community_spans_several_artifacts_and_how_are_the_types_distributed.cypher](./Community_Detection_for_Types/Which_type_community_spans_several_artifacts_and_how_are_the_types_distributed.cypher) | Community_Detection_for_Types | Which type community spans several artifacts and how are the types distributed? | +| [Count_nodes_and_relationships.cypher](./Count_nodes_and_relationships.cypher) | | Count nodes and relationships | +| [Create_a_DEPENDS_ON_relationship_for_every_DEPENDS_ON_ARTIFACT.cypher](./Create_a_DEPENDS_ON_relationship_for_every_DEPENDS_ON_ARTIFACT.cypher) | | Create a DEPENDS_ON relationship for every DEPENDS_ON_ARTIFACT | +| [Create_a_DEPENDS_ON_relationship_for_every_DEPENDS_ON_PACKAGE.cypher](./Create_a_DEPENDS_ON_relationship_for_every_DEPENDS_ON_PACKAGE.cypher) | | Create a DEPENDS_ON relationship for every DEPENDS_ON_PACKAGE | +| [Create_index_for_full_qualified_type_name.cypher](./Create_index_for_full_qualified_type_name.cypher) | | Create index for the full qualified type name | | [Cyclic_Dependencies.cypher](./Cyclic_Dependencies/Cyclic_Dependencies.cypher) | Cyclic_Dependencies | Cyclic Dependencies | | [Cyclic_Dependencies_Concatenated.cypher](./Cyclic_Dependencies/Cyclic_Dependencies_Concatenated.cypher) | Cyclic_Dependencies | Cyclic Dependencies Concatenated | | [Cyclic_Dependencies_as_List.cypher](./Cyclic_Dependencies/Cyclic_Dependencies_as_List.cypher) | Cyclic_Dependencies | Cyclic Dependencies as List | | [Cyclic_Dependencies_as_unwinded_List.cypher](./Cyclic_Dependencies/Cyclic_Dependencies_as_unwinded_List.cypher) | Cyclic_Dependencies | Cyclic Dependencies as unwinded List | +| [Cyclic_Dependencies_between_Artrifacts_as_unwinded_List.cypher](./Cyclic_Dependencies/Cyclic_Dependencies_between_Artrifacts_as_unwinded_List.cypher) | Cyclic_Dependencies | Cyclic Dependencies between Artifacts as unwinded List | | [Export_the_whole_database_as_CSV.cypher](./Export_the_whole_database_as_CSV.cypher) | | Export the whole database as CSV | -| [External_package_usage_overall.cypher](./External_Dependencies/External_package_usage_overall.cypher) | External_Dependencies | External package usage overall | -| [External_package_usage_per_artifact.cypher](./External_Dependencies/External_package_usage_per_artifact.cypher) | External_Dependencies | External package usage per artifact | +| [External_package_usage_overall.cypher](./External_Dependencies/External_package_usage_overall.cypher) | External_Dependencies | External package usage overall tuned | +| [External_package_usage_per_artifact.cypher](./External_Dependencies/External_package_usage_per_artifact.cypher) | External_Dependencies | External package usage per artifact tuned | | [External_package_usage_per_artifact_and_package.cypher](./External_Dependencies/External_package_usage_per_artifact_and_package.cypher) | External_Dependencies | External package usage per artifact and package | -| [External_package_usage_per_type.cypher](./External_Dependencies/External_package_usage_per_type.cypher) | External_Dependencies | External package usage per type | +| [External_package_usage_per_artifact_and_package_tuned.cypher](./External_Dependencies/External_package_usage_per_artifact_and_package_tuned.cypher) | External_Dependencies | External package usage per artifact and package tuned | +| [External_package_usage_per_artifact_and_package_without_annotations.cypher](./External_Dependencies/External_package_usage_per_artifact_and_package_without_annotations.cypher) | External_Dependencies | External package usage per artifact and package without annotations Note: The exists operation for "isAnnotation" is inefficient for large graphs. | +| [External_package_usage_per_type.cypher](./External_Dependencies/External_package_usage_per_type.cypher) | External_Dependencies | External package usage per type tuned | | [External_package_usage_per_type_distribution.cypher](./External_Dependencies/External_package_usage_per_type_distribution.cypher) | External_Dependencies | External package usage per type distribution | +| [External_package_usage_per_type_distribution_without_annotations.cypher](./External_Dependencies/External_package_usage_per_type_distribution_without_annotations.cypher) | External_Dependencies | External package usage per type distribution without annotations | | [External_types_per_artifact_using_requires.cypher](./External_Dependencies/External_types_per_artifact_using_requires.cypher) | External_Dependencies | External types per artifact using requires | +| [Label_external_types_and_annotations.cypher](./External_Dependencies/Label_external_types_and_annotations.cypher) | External_Dependencies | Label external types and annotations | +| [List_external_types_used.cypher](./External_Dependencies/List_external_types_used.cypher) | External_Dependencies | List external types used | | [Maven_POMs_and_their_declared_dependencies.cypher](./External_Dependencies/Maven_POMs_and_their_declared_dependencies.cypher) | External_Dependencies | Maven POMs and their declared dependencies | +| [Remove_external_type_and_annotation_labels.cypher](./External_Dependencies/Remove_external_type_and_annotation_labels.cypher) | External_Dependencies | Remove external type and annotation labels | | [Extract_Custom_Manifest_Entries.cypher](./Extract_Custom_Manifest_Entries.cypher) | | Extract Custom Manifest Entries | | [Get_Awesome_Procedures_On_Cypher_APOC_Version.cypher](./Get_Awesome_Procedures_On_Cypher_APOC_Version.cypher) | | Get Awesome Procedures On Cypher APOC Version | | [Get_Graph_Data_Science_Library_Version.cypher](./Get_Graph_Data_Science_Library_Version.cypher) | | Get Graph Data Science Library Version | diff --git a/cypher/Candidates_for_Interface_Segregation.cypher b/cypher/Candidates_for_Interface_Segregation.cypher index 616a23409..2e9157f07 100644 --- a/cypher/Candidates_for_Interface_Segregation.cypher +++ b/cypher/Candidates_for_Interface_Segregation.cypher @@ -1,25 +1,25 @@ -// Candidates for Interface Segregation +// Candidates for Interface Segregation -MATCH (type:Type)-[:DECLARES]->(method:Method)-[:INVOKES]->(dependentMethod:Method)<-[:DECLARES]-(dependentType:Type) -MATCH (dependentType)-[:DECLARES]->(declaredMethod:Method) +MATCH (type:Type)-[:DECLARES]->(method:Method)-[:INVOKES]->(dependentMethod:Method) +MATCH (dependentMethod)<-[:DECLARES]-(dependentType:Type) MATCH (dependentType)-[:IMPLEMENTS*]->(superType:Type)-[:DECLARES]->(inheritedMethod:Method) -WHERE type.fqn <> dependentType +WHERE type.fqn <> dependentType.fqn AND dependentMethod.name IS NOT NULL AND inheritedMethod.name IS NOT NULL AND dependentMethod.name <> '' // ignore constructors AND inheritedMethod.name <> '' // ignore constructors - WITH type - ,dependentType + WITH type.fqn AS fullTypeName + ,dependentType.fqn AS fullDependentTypeName ,collect(DISTINCT dependentMethod.name) AS calledMethodNames - ,count(DISTINCT dependentMethod) AS calledMethods + ,count(DISTINCT dependentMethod) AS calledMethods // Count the different signatures without the return type // of all declared methods including the inherited ones ,count(DISTINCT split(method.signature, ' ')[1]) + count(DISTINCT split(inheritedMethod.signature, ' ')[1]) AS declaredMethods WHERE declaredMethods > calledMethods + 2 - WITH dependentType + WITH fullDependentTypeName ,declaredMethods ,calledMethodNames ,calledMethods - ,count(DISTINCT type.fqn) AS callerTypes - RETURN dependentType.fqn, declaredMethods, calledMethodNames, calledMethods, callerTypes - ORDER BY callerTypes DESC, declaredMethods DESC, dependentType.fqn \ No newline at end of file + ,count(DISTINCT fullTypeName) AS callerTypes + RETURN fullDependentTypeName, declaredMethods, calledMethodNames, calledMethods, callerTypes + ORDER BY callerTypes DESC, declaredMethods DESC, fullDependentTypeName \ No newline at end of file diff --git a/cypher/Create_index_for_full_qualified_type_name.cypher b/cypher/Create_index_for_full_qualified_type_name.cypher new file mode 100644 index 000000000..c7e12aa3e --- /dev/null +++ b/cypher/Create_index_for_full_qualified_type_name.cypher @@ -0,0 +1,3 @@ +// Create index for the full qualified type name + +CREATE INDEX INDEX_FULL_QUALIFIED_TYPE_NAME IF NOT EXISTS FOR (t:Type) ON (t.fqn) \ No newline at end of file diff --git a/cypher/Cyclic_Dependencies/Cyclic_Dependencies_as_List.cypher b/cypher/Cyclic_Dependencies/Cyclic_Dependencies_as_List.cypher index a1aa0ecff..4e09fa3b7 100644 --- a/cypher/Cyclic_Dependencies/Cyclic_Dependencies_as_List.cypher +++ b/cypher/Cyclic_Dependencies/Cyclic_Dependencies_as_List.cypher @@ -2,6 +2,7 @@ MATCH (package:Package)-[:CONTAINS]->(forwardSource:Type)-[:DEPENDS_ON]->(forwardTarget:Type)<-[:CONTAINS]-(dependentPackage:Package) MATCH (dependentPackage)-[:CONTAINS]->(backwardSource:Type)-[:DEPENDS_ON]->(backwardTarget:Type)<-[:CONTAINS]-(package) +WHERE package <> dependentPackage WITH package ,dependentPackage ,collect(DISTINCT forwardSource.name + '->' + forwardTarget.name) AS forwardDependencies @@ -13,8 +14,7 @@ MATCH (dependentPackage)-[:CONTAINS]->(backwardSource:Type)-[:DEPENDS_ON]->(back ,size(forwardDependencies) AS numberOfForwardDependencies ,size(backwardDependencies) AS numberOfBackwardDependencies ,size(forwardDependencies) + size(backwardDependencies) AS numberOfAllCyclicDependencies -WHERE package <> dependentPackage - AND (size(forwardDependencies) > size(backwardDependencies) +WHERE (size(forwardDependencies) > size(backwardDependencies) OR (size(forwardDependencies) = size(backwardDependencies) AND size(package.fqn) >= size(dependentPackage.fqn))) RETURN package.fqn AS packageName diff --git a/cypher/Cyclic_Dependencies/Cyclic_Dependencies_as_unwinded_List.cypher b/cypher/Cyclic_Dependencies/Cyclic_Dependencies_as_unwinded_List.cypher index d4c22c35a..dd52e9f77 100644 --- a/cypher/Cyclic_Dependencies/Cyclic_Dependencies_as_unwinded_List.cypher +++ b/cypher/Cyclic_Dependencies/Cyclic_Dependencies_as_unwinded_List.cypher @@ -2,6 +2,7 @@ MATCH (package:Package)-[:CONTAINS]->(forwardSource:Type)-[:DEPENDS_ON]->(forwardTarget:Type)<-[:CONTAINS]-(dependentPackage:Package) MATCH (dependentPackage)-[:CONTAINS]->(backwardSource:Type)-[:DEPENDS_ON]->(backwardTarget:Type)<-[:CONTAINS]-(package) +WHERE package <> dependentPackage WITH package ,dependentPackage ,collect(DISTINCT forwardSource.name + '->' + forwardTarget.name) AS forwardDependencies @@ -13,8 +14,7 @@ MATCH (dependentPackage)-[:CONTAINS]->(backwardSource:Type)-[:DEPENDS_ON]->(back ,size(forwardDependencies) AS numberOfForwardDependencies ,size(backwardDependencies) AS numberOfBackwardDependencies ,size(forwardDependencies) + size(backwardDependencies) AS numberOfAllCyclicDependencies -WHERE package <> dependentPackage - AND (size(forwardDependencies) > size(backwardDependencies) +WHERE (size(forwardDependencies) > size(backwardDependencies) OR (size(forwardDependencies) = size(backwardDependencies) AND size(package.fqn) >= size(dependentPackage.fqn))) UNWIND (backwardDependencies + forwardDependencies) AS dependency diff --git a/cypher/Cyclic_Dependencies/Cyclic_Dependencies_between_Artrifacts_as_unwinded_List.cypher b/cypher/Cyclic_Dependencies/Cyclic_Dependencies_between_Artrifacts_as_unwinded_List.cypher index 2da2fc856..0d4ceb441 100644 --- a/cypher/Cyclic_Dependencies/Cyclic_Dependencies_between_Artrifacts_as_unwinded_List.cypher +++ b/cypher/Cyclic_Dependencies/Cyclic_Dependencies_between_Artrifacts_as_unwinded_List.cypher @@ -4,6 +4,8 @@ MATCH (package:Package)-[:CONTAINS]->(forwardSource:Type)-[:DEPENDS_ON]->(forwar MATCH (dependentPackage)-[:CONTAINS]->(backwardSource:Type)-[:DEPENDS_ON]->(backwardTarget:Type)<-[:CONTAINS]-(package) MATCH (artifact:Artifact)-[:CONTAINS]->(package) MATCH (dependentArtifact:Artifact)-[:CONTAINS]->(dependentPackage) +WHERE artifact <> dependentArtifact + AND package <> dependentPackage WITH artifact ,dependentArtifact ,package @@ -19,9 +21,7 @@ MATCH (dependentArtifact:Artifact)-[:CONTAINS]->(dependentPackage) ,size(forwardDependencies) AS numberOfForwardDependencies ,size(backwardDependencies) AS numberOfBackwardDependencies ,size(forwardDependencies) + size(backwardDependencies) AS numberOfAllCyclicDependencies -WHERE artifact <> dependentArtifact - AND package <> dependentPackage - AND (size(forwardDependencies) > size(backwardDependencies) +WHERE (size(forwardDependencies) > size(backwardDependencies) OR (size(forwardDependencies) = size(backwardDependencies) AND size(package.fqn) >= size(dependentPackage.fqn))) UNWIND (backwardDependencies + forwardDependencies) AS dependency diff --git a/cypher/External_Dependencies/External_package_usage_overall.cypher b/cypher/External_Dependencies/External_package_usage_overall.cypher index 4864430ff..35835e2c5 100644 --- a/cypher/External_Dependencies/External_package_usage_overall.cypher +++ b/cypher/External_Dependencies/External_package_usage_overall.cypher @@ -3,27 +3,15 @@ MATCH (type:Type) WITH count(type) as allTypes, collect(type) as typeList UNWIND typeList AS type - MATCH (type)-[externalDependency:DEPENDS_ON]->(externalType:Type) + MATCH (type)-[externalDependency:DEPENDS_ON]->(externalType:ExternalType) WITH allTypes ,replace(externalType.fqn, '.' + externalType.name, '') AS externalPackageName - ,externalType.name AS externalTypeName - ,externalDependency - ,(NOT externalType.fqn CONTAINS '.') AS isPrimitiveType - ,(externalType.fqn STARTS WITH 'java.') AS isJavaType - ,exists((externalType)-[:RESOLVES_TO]->(:Type)) AS isAlsoInternalType - ,(externalType.byteCodeVersion IS NULL) AS isExternalType - WHERE isPrimitiveType = false - AND isJavaType = false - AND isAlsoInternalType = false - AND isExternalType = true - WITH allTypes - ,externalPackageName - ,count(externalDependency) AS numberOfExternalTypeCaller - ,sum(externalDependency.weight) AS numberOfExternalTypeCalls - ,collect(DISTINCT externalTypeName) AS externalTypeNames + ,count(externalDependency) AS numberOfExternalTypeCaller + ,sum(externalDependency.weight) AS numberOfExternalTypeCalls + ,collect(DISTINCT externalType.name) AS externalTypeNames RETURN externalPackageName ,numberOfExternalTypeCaller ,numberOfExternalTypeCalls ,allTypes ,externalTypeNames - ORDER BY numberOfExternalTypeCaller DESC, externalTypeNames ASC \ No newline at end of file + ORDER BY numberOfExternalTypeCaller DESC, externalPackageName ASC \ No newline at end of file diff --git a/cypher/External_Dependencies/External_package_usage_per_artifact.cypher b/cypher/External_Dependencies/External_package_usage_per_artifact.cypher index 2848d78e7..ab54317fb 100644 --- a/cypher/External_Dependencies/External_package_usage_per_artifact.cypher +++ b/cypher/External_Dependencies/External_package_usage_per_artifact.cypher @@ -1,23 +1,18 @@ -// External package usage per artifact +// External package usage per artifact - MATCH (artifact:Artifact)-[:CONTAINS]->(type:Type) - WITH artifact, count(type) as numberOfTypesInArtifact, collect(type) as typeList -UNWIND typeList as type - MATCH (type)-[externalDependency:DEPENDS_ON]->(externalType:Type) - WHERE externalType.byteCodeVersion IS NULL + MATCH (artifact:Artifact:Archive)-[:CONTAINS]->(type:Type) + WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName + ,count(type) AS numberOfTypesInArtifact + ,collect(type) AS typeList +UNWIND typeList AS type + MATCH (type)-[externalDependency:DEPENDS_ON]->(externalType:ExternalType) WITH numberOfTypesInArtifact ,externalDependency - ,replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName + ,artifactName ,type.fqn AS fullTypeName ,type.name AS typeName ,replace(externalType.fqn, '.' + externalType.name, '') AS externalPackageName ,externalType.name AS externalTypeName - ,(NOT externalType.fqn CONTAINS '.') AS isPrimitiveType - ,(externalType.fqn STARTS WITH 'java.') AS isJavaType - ,exists((externalType)-[:RESOLVES_TO]->(:Type)) AS isAlsoInternalType - WHERE isPrimitiveType = false - AND isJavaType = false - AND isAlsoInternalType = false WITH numberOfTypesInArtifact ,artifactName ,externalPackageName @@ -30,4 +25,4 @@ RETURN artifactName ,numberOfExternalTypeCalls ,numberOfTypesInArtifact ,externalTypeNames -ORDER BY artifactName ASC, numberOfExternalTypeCaller DESC \ No newline at end of file +ORDER BY artifactName ASC, numberOfExternalTypeCaller DESC, externalPackageName ASC \ No newline at end of file diff --git a/cypher/External_Dependencies/External_package_usage_per_artifact_and_package.cypher b/cypher/External_Dependencies/External_package_usage_per_artifact_and_package.cypher index 0a74f6b9b..9ea0059ea 100644 --- a/cypher/External_Dependencies/External_package_usage_per_artifact_and_package.cypher +++ b/cypher/External_Dependencies/External_package_usage_per_artifact_and_package.cypher @@ -1,28 +1,24 @@ -// External package usage per artifact and package +// External package usage per artifact and package MATCH (artifact:Artifact)-[:CONTAINS]->(package:Package) MATCH (package)-[:CONTAINS]->(type:Type) - WITH artifact, package, count(type) AS numberOfTypesInPackage, collect(type) as typeList + WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName + ,package.fqn AS fullPackageName + ,package.name AS packageName + ,count(type) AS numberOfTypesInPackage + ,collect(type) AS typeList UNWIND typeList AS type - MATCH (type)-[externalDependency:DEPENDS_ON]->(externalType:Type) - WHERE externalType.byteCodeVersion IS NULL + MATCH (type)-[externalDependency:DEPENDS_ON]->(externalType:ExternalType) + WHERE NOT externalType:ExternalAnnotation WITH numberOfTypesInPackage ,externalDependency - ,replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName - ,package.fqn AS fullPackageName - ,package.name AS packageName + ,artifactName + ,fullPackageName + ,packageName ,type.fqn AS fullTypeName ,type.name AS typeName ,replace(externalType.fqn, '.' + externalType.name, '') AS externalPackageName ,externalType.name AS externalTypeName - ,(NOT externalType.fqn CONTAINS '.') AS isPrimitiveType - ,(externalType.fqn STARTS WITH 'java.') AS isJavaType - ,exists((externalType)-[:RESOLVES_TO]->(:Type)) AS isAlsoInternalType - ,exists((externalType)<-[:OF_TYPE]-()<-[:ANNOTATED_BY]-()) AS isAnnotation - WHERE isPrimitiveType = false - AND isJavaType = false - AND isAlsoInternalType = false - AND isAnnotation = false WITH numberOfTypesInPackage ,artifactName ,fullPackageName diff --git a/cypher/External_Dependencies/External_package_usage_per_artifact_and_package_with_annotations.cypher b/cypher/External_Dependencies/External_package_usage_per_artifact_and_package_with_annotations.cypher new file mode 100644 index 000000000..fb0f7c799 --- /dev/null +++ b/cypher/External_Dependencies/External_package_usage_per_artifact_and_package_with_annotations.cypher @@ -0,0 +1,34 @@ +// External package usage per artifact and package with external annotations + + MATCH (artifact:Artifact)-[:CONTAINS]->(package:Package) + MATCH (package)-[:CONTAINS]->(type:Type) + WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName + ,package.fqn AS fullPackageName + ,package.name AS packageName + ,count(type) AS numberOfTypesInPackage + ,collect(type) AS typeList +UNWIND typeList AS type + MATCH (type)-[externalDependency:DEPENDS_ON]->(externalType:ExternalType) + WITH numberOfTypesInPackage + ,externalDependency + ,artifactName + ,fullPackageName + ,packageName + ,type.fqn AS fullTypeName + ,type.name AS typeName + ,replace(externalType.fqn, '.' + externalType.name, '') AS externalPackageName + ,externalType.name AS externalTypeName + WITH numberOfTypesInPackage + ,artifactName + ,fullPackageName + ,packageName + ,externalPackageName + ,count(externalDependency) AS numberOfExternalTypeCaller + ,sum(externalDependency.weight) AS numberOfExternalTypeCalls + ,collect(DISTINCT externalTypeName) AS externalTypeNames +RETURN artifactName, fullPackageName + ,externalPackageName + ,numberOfExternalTypeCaller, numberOfExternalTypeCalls, numberOfTypesInPackage + ,externalTypeNames + ,packageName +ORDER BY numberOfExternalTypeCaller DESC, artifactName ASC, fullPackageName ASC \ No newline at end of file diff --git a/cypher/External_Dependencies/External_package_usage_per_type.cypher b/cypher/External_Dependencies/External_package_usage_per_type.cypher index a3c5593e1..1b8f9aedf 100644 --- a/cypher/External_Dependencies/External_package_usage_per_type.cypher +++ b/cypher/External_Dependencies/External_package_usage_per_type.cypher @@ -2,8 +2,7 @@ MATCH (artifact:Artifact)-[:CONTAINS]->(package:Package) MATCH (package)-[:CONTAINS]->(type:Type) - MATCH (type)-[externalDependency:DEPENDS_ON]->(externalType:Type) - WHERE externalType.byteCodeVersion IS NULL + MATCH (type)-[externalDependency:DEPENDS_ON]->(externalType:ExternalType) WITH externalDependency ,replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName ,package.fqn AS fullPackageName @@ -12,12 +11,6 @@ ,type.name AS typeName ,replace(externalType.fqn, '.' + externalType.name, '') AS externalPackageName ,externalType.name AS externalTypeName - ,(NOT externalType.fqn CONTAINS '.') AS isPrimitiveType - ,(externalType.fqn STARTS WITH 'java.') AS isJavaType - ,exists((externalType)-[:RESOLVES_TO]->(:Type)) AS isAlsoInternalType - WHERE isPrimitiveType = false - AND isJavaType = false - AND isAlsoInternalType = false WITH artifactName ,fullPackageName ,packageName diff --git a/cypher/External_Dependencies/External_package_usage_per_type_distribution.cypher b/cypher/External_Dependencies/External_package_usage_per_type_distribution.cypher index 199fd9c60..7b7445d24 100644 --- a/cypher/External_Dependencies/External_package_usage_per_type_distribution.cypher +++ b/cypher/External_Dependencies/External_package_usage_per_type_distribution.cypher @@ -5,20 +5,12 @@ ,count(type) AS artifactTypes ,collect(type) AS typeList UNWIND typeList AS type - MATCH (type)-[:DEPENDS_ON]->(externalType:Type) - WHERE externalType.byteCodeVersion IS NULL + MATCH (type)-[:DEPENDS_ON]->(externalType:ExternalType) + WHERE externalType:ExternalAnnotation WITH artifactName ,artifactTypes ,type.fqn AS fullTypeName ,replace(externalType.fqn, '.' + externalType.name, '') AS externalPackageName - ,(NOT externalType.fqn CONTAINS '.') AS isPrimitiveType - ,(externalType.fqn STARTS WITH 'java.') AS isJavaType - ,exists((externalType)-[:RESOLVES_TO]->(:Type)) AS isAlsoInternalType - ,exists((externalType)<-[:OF_TYPE]-()<-[:ANNOTATED_BY]-()) AS isAnnotation - WHERE isPrimitiveType = false - AND isJavaType = false - AND isAlsoInternalType = false - AND isAnnotation = false WITH artifactName ,artifactTypes ,fullTypeName diff --git a/cypher/External_Dependencies/External_package_usage_per_type_distribution_with_annotations.cypher b/cypher/External_Dependencies/External_package_usage_per_type_distribution_with_annotations.cypher new file mode 100644 index 000000000..20d0fbc2f --- /dev/null +++ b/cypher/External_Dependencies/External_package_usage_per_type_distribution_with_annotations.cypher @@ -0,0 +1,28 @@ +// External package usage per type distribution with external annotations + + MATCH (artifact:Artifact)-[:CONTAINS]->(type:Type) + WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName + ,count(type) AS artifactTypes + ,collect(type) AS typeList +UNWIND typeList AS type + MATCH (type)-[:DEPENDS_ON]->(externalType:ExternalType) + WITH artifactName + ,artifactTypes + ,type.fqn AS fullTypeName + ,replace(externalType.fqn, '.' + externalType.name, '') AS externalPackageName + WITH artifactName + ,artifactTypes + ,fullTypeName + ,count(DISTINCT externalPackageName) AS numberOfExternalPackages + WITH artifactName + ,artifactTypes + ,numberOfExternalPackages + ,count(DISTINCT fullTypeName) AS numberOfTypes + ,COLLECT(DISTINCT fullTypeName) AS nameOfTypes +RETURN artifactName + ,artifactTypes + ,numberOfExternalPackages + ,numberOfTypes + ,100.0 / artifactTypes * numberOfTypes AS numberOfTypesPercentage + ,nameOfTypes +ORDER BY artifactName ASC, numberOfExternalPackages ASC \ No newline at end of file diff --git a/cypher/External_Dependencies/External_types_per_artifact_using_requires.cypher b/cypher/External_Dependencies/External_types_per_artifact_using_requires.cypher index c17130c93..afc6fadcc 100644 --- a/cypher/External_Dependencies/External_types_per_artifact_using_requires.cypher +++ b/cypher/External_Dependencies/External_types_per_artifact_using_requires.cypher @@ -1,11 +1,8 @@ // External types per artifact using requires -MATCH (artifact:Artifact)-[:REQUIRES]->(externalType:Type) +MATCH (artifact:Artifact)-[:REQUIRES]->(externalType:ExternalType) MATCH (artifact)-[:CONTAINS]->(caller:Type) OPTIONAL MATCH (caller)-[callerDependency:DEPENDS_ON]->(externalType) -WHERE NOT externalType.fqn STARTS WITH 'java.' // ignore - AND externalType.fqn CONTAINS '.' // ignore primitives - AND NOT EXISTS((externalType)-[:RESOLVES_TO]->(:Type)) WITH replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName ,replace(externalType.fqn, '.' + externalType.name, '') AS externalTypePackage ,COLLECT(DISTINCT externalType.name) AS externalTypeNames diff --git a/cypher/External_Dependencies/Label_external_types_and_annotations.cypher b/cypher/External_Dependencies/Label_external_types_and_annotations.cypher new file mode 100644 index 000000000..e6de4a56f --- /dev/null +++ b/cypher/External_Dependencies/Label_external_types_and_annotations.cypher @@ -0,0 +1,17 @@ +// Label external types and annotations + + MATCH (externalType:Type) + WHERE externalType.byteCodeVersion IS NULL // byte code not available -> external dependency + WITH externalType + ,(NOT externalType.fqn CONTAINS '.') AS isPrimitiveType + ,(externalType.fqn STARTS WITH 'java.') AS isJavaType + ,exists((externalType)-[:RESOLVES_TO]->(:Type)) AS isAlsoInternalType + ,exists((externalType)<-[:OF_TYPE]-()<-[:ANNOTATED_BY]-()) AS isAnnotation + WHERE isPrimitiveType = false + AND isJavaType = false + AND isAlsoInternalType = false + WITH externalType + ,CASE WHEN isAnnotation THEN [1] ELSE [] END AS annotated +FOREACH (x in annotated | SET externalType:ExternalAnnotation) + SET externalType:ExternalType + RETURN labels(externalType), count(externalType) as numberOfExternalTypes \ No newline at end of file diff --git a/cypher/External_Dependencies/List_external_types_used.cypher b/cypher/External_Dependencies/List_external_types_used.cypher new file mode 100644 index 000000000..4b9dfac3a --- /dev/null +++ b/cypher/External_Dependencies/List_external_types_used.cypher @@ -0,0 +1,3 @@ +// List external types used + +MATCH (external:ExternalType) RETURN external.fqn \ No newline at end of file diff --git a/cypher/External_Dependencies/Remove_external_type_and_annotation_labels.cypher b/cypher/External_Dependencies/Remove_external_type_and_annotation_labels.cypher new file mode 100644 index 000000000..b4c4e69ec --- /dev/null +++ b/cypher/External_Dependencies/Remove_external_type_and_annotation_labels.cypher @@ -0,0 +1,4 @@ +// Remove external type and annotation labels + + MATCH (externalType:ExternalType) + REMOVE externalType:ExternalType:ExternalAnnotation \ No newline at end of file diff --git a/scripts/executeJupyterNotebook.sh b/scripts/executeJupyterNotebook.sh index 1d2198d24..ee5dd6748 100755 --- a/scripts/executeJupyterNotebook.sh +++ b/scripts/executeJupyterNotebook.sh @@ -113,7 +113,7 @@ jupyter nbconvert --to notebook \ --execute "${jupyter_notebook_file}" \ --output "$jupyter_notebook_output_file_name" \ --output-dir="./" \ - --ExecutePreprocessor.timeout=120 \ + --ExecutePreprocessor.timeout=480 \ || exit 5 # Convert the Jupyter Notebook to Markdown diff --git a/scripts/executeQuery.sh b/scripts/executeQuery.sh index 584427de2..55bb2cf1f 100755 --- a/scripts/executeQuery.sh +++ b/scripts/executeQuery.sh @@ -95,7 +95,7 @@ error_message=$( echo "${cyper_query_result}" | jq -r '.errors[0] // empty' ) if [[ -n "${error_message}" ]]; then redColor='\033[0;31m' noColor='\033[0m' # No Color - echo -e "${redColor}${error_message}${noColor}" >&2 + echo -e "${redColor}${cypher_query_file_name}: ${error_message}${noColor}" >&2 fi # Output results in CSV format diff --git a/scripts/prepareAnalysis.sh b/scripts/prepareAnalysis.sh index fbe999375..38a5c03ed 100644 --- a/scripts/prepareAnalysis.sh +++ b/scripts/prepareAnalysis.sh @@ -27,6 +27,10 @@ source "${SCRIPTS_DIR}/executeQueryFunctions.sh" # Local Constants PACKAGE_WEIGHTS_CYPHER_DIR="$CYPHER_DIR/Package_Relationship_Weights" PACKAGE_METRICS_CYPHER_DIR="$CYPHER_DIR/Metrics" +EXTERNAL_DEPENDENCIES_CYPHER_DIR="$CYPHER_DIR/External_Dependencies" + +# Preparation - Create indizes +execute_cypher "${CYPHER_DIR}/Create_index_for_full_qualified_type_name.cypher" || exit 1 # Preparation - Create DEPENDS_ON for every DEPENDS_ON_PACKAGE relationship execute_cypher_expect_results "${CYPHER_DIR}/Create_a_DEPENDS_ON_relationship_for_every_DEPENDS_ON_PACKAGE.cypher" || exit 1 @@ -41,3 +45,9 @@ execute_cypher_expect_results "${PACKAGE_WEIGHTS_CYPHER_DIR}/Add_weight10Percent # Preparation - Add Package node properties "incomingDependencies" and "outgoingDependencies" execute_cypher_expect_results "${PACKAGE_METRICS_CYPHER_DIR}/Set_Incoming_Package_Dependencies.cypher" || exit 1 execute_cypher_expect_results "${PACKAGE_METRICS_CYPHER_DIR}/Set_Outgoing_Package_Dependencies.cypher" || exit 1 + +# Preparation - Label external types and annotations +# "external" means that there is no byte code available, not a primitive type and not a java type +# "annoatation" means that there is a ANNOTATED_BY to that external type +execute_cypher "${EXTERNAL_DEPENDENCIES_CYPHER_DIR}/Remove_external_type_and_annotation_labels.cypher" || exit 1 +execute_cypher "${EXTERNAL_DEPENDENCIES_CYPHER_DIR}/Label_external_types_and_annotations.cypher" || exit 1 diff --git a/scripts/reports/ExternalDependenciesCsv.sh b/scripts/reports/ExternalDependenciesCsv.sh index 71a8f43ed..90b33ce98 100755 --- a/scripts/reports/ExternalDependenciesCsv.sh +++ b/scripts/reports/ExternalDependenciesCsv.sh @@ -34,6 +34,11 @@ mkdir -p "${FULL_REPORT_DIRECTORY}" # Local Constants EXTERNAL_DEPENDENCIES_CYPHER_DIR="${CYPHER_DIR}/External_Dependencies" +if ! execute_cypher_expect_results "${EXTERNAL_DEPENDENCIES_CYPHER_DIR}/List_external_types_used.cypher"; then + echo "Please execute 'prepareAnalysis.sh' with 'Label_external_types_and_annotations.cypher' first." + exit 1 +fi + execute_cypher "${EXTERNAL_DEPENDENCIES_CYPHER_DIR}/External_package_usage_overall.cypher" > "${FULL_REPORT_DIRECTORY}/External_package_usage_overall.csv" execute_cypher "${EXTERNAL_DEPENDENCIES_CYPHER_DIR}/External_package_usage_per_type.cypher" > "${FULL_REPORT_DIRECTORY}/External_package_usage_per_type.csv" execute_cypher "${EXTERNAL_DEPENDENCIES_CYPHER_DIR}/External_package_usage_per_artifact.cypher" > "${FULL_REPORT_DIRECTORY}/External_package_usage_per_artifact.csv" diff --git a/scripts/templates/template-neo4j-v4.conf b/scripts/templates/template-neo4j-v4.conf index ce0b02661..5ba512e4e 100644 --- a/scripts/templates/template-neo4j-v4.conf +++ b/scripts/templates/template-neo4j-v4.conf @@ -6,17 +6,17 @@ dbms.security.procedures.unrestricted=apoc.*,gds.* # Memory: Java Heap Size -dbms.memory.heap.initial_size=1024m -dbms.memory.heap.max_size=1024m +dbms.memory.heap.initial_size=4g +dbms.memory.heap.max_size=4g # Memory: The amount of memory to use for mapping the store files. -dbms.memory.pagecache.size=16m +dbms.memory.pagecache.size=1g # Memory: Exits JVM on the first occurrence of an out-of-memory error. dbms.jvm.additional=-XX:+ExitOnOutOfMemoryError # Memory: Limit the amount of memory that all of the running transaction can consume. -dbms.memory.transaction.global_max_size=384m +dbms.memory.transaction.global_max_size=2g # Memory: Limit the amount of memory that a single transaction can consume. -dbms.memory.transaction.max_size=256m \ No newline at end of file +dbms.memory.transaction.max_size=2g \ No newline at end of file diff --git a/scripts/templates/template-neo4j.conf b/scripts/templates/template-neo4j.conf index fb7a764e4..201a9acd8 100644 --- a/scripts/templates/template-neo4j.conf +++ b/scripts/templates/template-neo4j.conf @@ -6,17 +6,17 @@ dbms.security.procedures.unrestricted=apoc.*,gds.* # Memory: Java Heap Size -server.memory.heap.initial_size=1024m -server.memory.heap.max_size=1024m +server.memory.heap.initial_size=4g +server.memory.heap.max_size=4g # Memory: The amount of memory to use for mapping the store files. -server.memory.pagecache.size=16m +server.memory.pagecache.size=1g # Memory: Exits JVM on the first occurrence of an out-of-memory error. server.jvm.additional=-XX:+ExitOnOutOfMemoryError # Memory: Limit the amount of memory that all of the running transaction can consume. -db.memory.transaction.total.max=384m +db.memory.transaction.total.max=2g # Memory: Limit the amount of memory that a single transaction can consume. -db.memory.transaction.max=256m \ No newline at end of file +db.memory.transaction.max=2g \ No newline at end of file