JohT · JohT · Jul 13, 2024 · Jul 8, 2024 · Jul 10, 2024 · Jul 11, 2024
diff --git a/COMMANDS.md b/COMMANDS.md
@@ -233,7 +233,7 @@ Use [importGitLog.sh](./scripts/importGitLog.sh) to import git log data into the
 It uses `git log` to extract commits, their authors and the names of the files changed with them. These are stored in an intermediate CSV file and are then imported into Neo4j with the following schema:
 
 ```Cypher
-(Git:Log:Author)-[:AUTHORED]->(Git:Log:Commit)->[:CONTAINS]->(Git:Log:File)
+(Git:Log:Author)-[:AUTHORED]->(Git:Log:Commit)->[:CONTAINS_CHANGED]->(Git:Log:File)
 (Git:Log:Commit)->[:HAS_PARENT]-(Git:Log:Commit)
 ```
 
@@ -254,7 +254,7 @@ You can use [List_unresolved_git_files.cypher](./cypher/GitLog/List_unresolved_g
 Use [importAggregatedGitLog.sh](./scripts/importAggregatedGitLog.sh) to import git log data in an aggregated form into the Graph. It works similar to the [full git log version above](#import-git-log). The only difference is that not every single commit is imported. Instead, changes are grouped per month including their commit count. This is in many cases sufficient and reduces data size and processing time significantly. Here is the resulting schema:
 
 ```Cypher
-(Git:Log:Author)-[:AUTHORED]->(Git:Log:ChangeSpan)-[:CONTAINS]->(Git:Log:File)
+(Git:Log:Author)-[:AUTHORED]->(Git:Log:ChangeSpan)-[:CONTAINS_CHANGED]->(Git:Log:File)
 ```
 
 ## Database Queries

diff --git a/cypher/Centrality/Centrality_1c_Label_Delete.cypher b/cypher/Centrality/Centrality_1c_Label_Delete.cypher
@@ -1,7 +1,7 @@
 // Centrality Label Delete
 
   CALL db.labels() YIELD label
- WHERE label = 'Top' + apoc.text.capitalize($dependencies_projection_write_property)
+ WHERE label = 'Mark4Top' + apoc.text.capitalize($dependencies_projection_write_property)
   WITH collect(label) AS selectedLabels
  MATCH (member)
  WHERE $dependencies_projection_node IN LABELS(member) 

diff --git a/cypher/Centrality/Centrality_1d_Label_Add.cypher b/cypher/Centrality/Centrality_1d_Label_Add.cypher
@@ -10,7 +10,7 @@ UNWIND members AS member
  ORDER BY member[$dependencies_projection_write_property] DESCENDING    
   WITH memberCount2Percent
       ,collect(DISTINCT member)[0..memberCount2Percent] AS topMembers
-      ,'Top' + apoc.text.capitalize($dependencies_projection_write_property) AS labelName
+      ,'Mark4Top' + apoc.text.capitalize($dependencies_projection_write_property) AS labelName
 UNWIND topMembers AS topMember
   CALL apoc.create.addLabels(topMember, [labelName]) YIELD node
 RETURN count(node) AS nodesCount
diff --git a/...er/Community_Detection/Community_Detection_10a_LocalClusteringCoefficient_Estimate.cypher b/...er/Community_Detection/Community_Detection_10a_LocalClusteringCoefficient_Estimate.cypher
@@ -0,0 +1,23 @@
+// Community Detection - Local Clustering Coefficient - Estimate
+
+CALL gds.localClusteringCoefficient.write.estimate(
+ $dependencies_projection + '-cleaned', {
+    writeProperty: $dependencies_projection_write_property
+})
+ YIELD requiredMemory
+      ,nodeCount
+      ,relationshipCount
+      ,bytesMin
+      ,bytesMax
+      ,heapPercentageMin
+      ,heapPercentageMax
+      ,treeView
+      ,mapView
+RETURN requiredMemory
+      ,nodeCount
+      ,relationshipCount
+      ,bytesMin
+      ,bytesMax
+      ,heapPercentageMin
+      ,heapPercentageMax
+      ,treeView
diff --git a/.../Community_Detection/Community_Detection_10b_LocalClusteringCoefficient_Statistics.cypher b/.../Community_Detection/Community_Detection_10b_LocalClusteringCoefficient_Statistics.cypher
@@ -0,0 +1,7 @@
+// Community Detection - Local Clustering Coefficient - Statistics
+
+CALL gds.localClusteringCoefficient.stats(
+ $dependencies_projection + '-cleaned', {
+})
+ YIELD averageClusteringCoefficient, nodeCount, preProcessingMillis, computeMillis, postProcessingMillis
+RETURN averageClusteringCoefficient, nodeCount, preProcessingMillis, computeMillis, postProcessingMillis
diff --git a/cypher/Community_Detection/Community_Detection_10c_LocalClusteringCoefficient_Mutate.cypher b/cypher/Community_Detection/Community_Detection_10c_LocalClusteringCoefficient_Mutate.cypher
@@ -0,0 +1,8 @@
+// Community Detection - Local Clustering Coefficient - Mutate
+
+CALL gds.localClusteringCoefficient.mutate(
+ $dependencies_projection + '-cleaned', {
+    mutateProperty: $dependencies_projection_write_property
+})
+ YIELD averageClusteringCoefficient, nodeCount, nodePropertiesWritten, preProcessingMillis, computeMillis, postProcessingMillis, mutateMillis
+RETURN averageClusteringCoefficient, nodeCount, nodePropertiesWritten, preProcessingMillis, computeMillis, postProcessingMillis, mutateMillis
diff --git a/cypher/Community_Detection/Community_Detection_10d_LocalClusteringCoefficient_Stream.cypher b/cypher/Community_Detection/Community_Detection_10d_LocalClusteringCoefficient_Stream.cypher
@@ -0,0 +1,13 @@
+// Community Detection - Local Clustering Coefficient - Stream
+
+CALL gds.localClusteringCoefficient.stream(
+ $dependencies_projection + '-cleaned', {
+})
+ YIELD nodeId, localClusteringCoefficient
+  WITH gds.util.asNode(nodeId) AS member
+      ,localClusteringCoefficient
+  WITH coalesce(member.fqn, member.fileName, member.name) AS memberName
+      ,localClusteringCoefficient
+RETURN localClusteringCoefficient
+      ,memberName
+ ORDER BY localClusteringCoefficient DESC, memberName ASC
diff --git a/...ity_Detection/Community_Detection_10d_LocalClusteringCoefficient_Stream_Aggregated.cypher b/...ity_Detection/Community_Detection_10d_LocalClusteringCoefficient_Stream_Aggregated.cypher
@@ -0,0 +1,17 @@
+// Community Detection - Local Clustering Coefficient - Stream Aggregated
+
+CALL gds.localClusteringCoefficient.stream(
+ $dependencies_projection + '-cleaned', {
+})
+ YIELD nodeId, localClusteringCoefficient
+  WITH gds.util.asNode(nodeId) AS member
+      ,localClusteringCoefficient
+  WITH coalesce(member.fqn, member.fileName, member.name) AS memberName
+      ,localClusteringCoefficient
+  WITH round(localClusteringCoefficient, 2) AS localClusteringCoefficient
+      ,collect(DISTINCT memberName)[0..9]   AS memberNameExamples
+      ,count(DISTINCT memberName)           AS memberCount
+RETURN localClusteringCoefficient
+      ,memberCount
+      ,memberNameExamples
+ ORDER BY localClusteringCoefficient DESC, memberCount DESC
diff --git a/cypher/Community_Detection/Community_Detection_10e_LocalClusteringCoefficient_Write.cypher b/cypher/Community_Detection/Community_Detection_10e_LocalClusteringCoefficient_Write.cypher
@@ -0,0 +1,8 @@
+// Community Detection - Local Clustering Coefficient - Write
+
+CALL gds.localClusteringCoefficient.write(
+ $dependencies_projection + '-cleaned', {
+    writeProperty: $dependencies_projection_write_property
+})
+ YIELD averageClusteringCoefficient, nodeCount, nodePropertiesWritten, preProcessingMillis, computeMillis, postProcessingMillis, writeMillis
+RETURN averageClusteringCoefficient, nodeCount, nodePropertiesWritten, preProcessingMillis, computeMillis, postProcessingMillis, writeMillis
diff --git a/cypher/Dependencies_Projection/Dependencies_10_Delete_Label.cypher b/cypher/Dependencies_Projection/Dependencies_10_Delete_Label.cypher
@@ -1,7 +1,7 @@
 // Community Detection Label Propagation Label Delete
 
   CALL db.labels() YIELD label
- WHERE label STARTS WITH $dependencies_projection_node + $dependencies_projection_write_label
+ WHERE label STARTS WITH 'Mark4' + $dependencies_projection_node + $dependencies_projection_write_label
   WITH collect(label) AS selectedLabels
  MATCH (member)
  WHERE $dependencies_projection_node IN labels(member) 

diff --git a/cypher/Dependencies_Projection/Dependencies_11_Add_Label.cypher b/cypher/Dependencies_Projection/Dependencies_11_Add_Label.cypher
@@ -5,7 +5,7 @@
    AND $dependencies_projection_node IN LABELS(member) 
   WITH collect(member)            AS members
       ,count(DISTINCT member)     AS memberCount
-      ,$dependencies_projection_node + $dependencies_projection_write_label + toString(member[$dependencies_projection_write_property]) AS labelName
+      ,'Mark4' + $dependencies_projection_node + $dependencies_projection_write_label + toString(member[$dependencies_projection_write_property]) AS labelName
  WHERE memberCount > 1
 UNWIND members AS member
   CALL apoc.create.addLabels(member, [labelName]) YIELD node

diff --git a/cypher/GitLog/Import_aggregated_git_log_csv_data.cypher b/cypher/GitLog/Import_aggregated_git_log_csv_data.cypher
@@ -10,7 +10,7 @@ CALL { WITH row
     })
     MERGE (git_file:Git:Log:File {fileName: row.filename})
     MERGE (git_author)-[:AUTHORED]->(git_change_span)
-    MERGE (git_change_span)-[:CONTAINS]->(git_file)
+    MERGE (git_change_span)-[:CONTAINS_CHANGED]->(git_file)
 } IN TRANSACTIONS OF 1000 ROWS
 RETURN count(DISTINCT row.author)   AS numberOfAuthors
       ,count(DISTINCT row.filename) AS numberOfFiles

diff --git a/cypher/GitLog/Import_git_log_csv_data.cypher b/cypher/GitLog/Import_git_log_csv_data.cypher
@@ -12,7 +12,7 @@ CALL { WITH row
     })
     MERGE (git_file:Git:Log:File {fileName: row.filename})
     MERGE (git_author)-[:AUTHORED]->(git_commit)
-    MERGE (git_commit)-[:CONTAINS]->(git_file)
+    MERGE (git_commit)-[:CONTAINS_CHANGED]->(git_file)
 } IN TRANSACTIONS OF 1000 ROWS
 RETURN count(DISTINCT row.author)   AS numberOfAuthors
       ,count(DISTINCT row.filename) AS numberOfFiles

diff --git a/cypher/GitLog/List_ambiguous_git_files.cypher b/cypher/GitLog/List_ambiguous_git_files.cypher
@@ -1,7 +1,7 @@
 // List ambigiously resolved git files where a single git file is attached to more than one code file for troubleshooting/testing.
 
 MATCH (file:File&!Git)<-[:RESOLVES_TO]-(git_file:File&Git)
-OPTIONAL MATCH (artifact:Artifact:Archive)-[:CONTAINS]->(file)
+OPTIONAL MATCH (artifact:Artifact:Archive)-[:CONTAINS_CHANGED]->(file)
  WITH file.fileName                                           AS fileName
      ,reverse(split(reverse(file.fileName),'.')[0])           AS fileExtension
      ,count(DISTINCT git_file.fileName)                       AS gitFilesCount

diff --git a/cypher/GitLog/Set_number_of_aggregated_git_commits.cypher b/cypher/GitLog/Set_number_of_aggregated_git_commits.cypher
@@ -1,7 +1,7 @@
 // Set numberOfGitCommits property on code File nodes when aggregated change spans with grouped commits are present.
 
 MATCH (code_file:File&!Git)<-[:RESOLVES_TO]-(git_file:File&Git)
-MATCH (git_file)<-[:CONTAINS]-(git_changespan:Git:ChangeSpan)
+MATCH (git_file)<-[:CONTAINS_CHANGED]-(git_changespan:Git:ChangeSpan)
  WITH code_file, sum(git_changespan.commits) AS numberOfGitCommits
   SET code_file.numberOfGitCommits = numberOfGitCommits
 RETURN count(DISTINCT coalesce(code_file.absoluteFileName, code_file.fileName)) AS changedCodeFiles
diff --git a/cypher/GitLog/Set_number_of_git_commits.cypher b/cypher/GitLog/Set_number_of_git_commits.cypher
@@ -1,7 +1,7 @@
 // Set numberOfGitCommits property on code File nodes when git commits are present
 
 MATCH (code_file:File&!Git)<-[:RESOLVES_TO]-(git_file:File&Git)
-MATCH (git_file)<-[:CONTAINS]-(git_commit:Git:Commit)
+MATCH (git_file)<-[:CONTAINS_CHANGED]-(git_commit:Git:Commit)
  WITH code_file, count(DISTINCT git_commit.hash) AS numberOfGitCommits
   SET code_file.numberOfGitCommits = numberOfGitCommits
 RETURN count(DISTINCT coalesce(code_file.absoluteFileName, code_file.fileName)) AS changedCodeFiles
diff --git a/cypher/Overview/Node_label_combination_count.cypher b/cypher/Overview/Node_label_combination_count.cypher
@@ -0,0 +1,22 @@
+// Node count for each label combination. Sums up to the total number of nodes.
+
+ MATCH (allNodes)
+  WITH COUNT(allNodes) AS totalNodeCount
+ MATCH (nodesAndTheirLabels)
+  WITH totalNodeCount
+      ,labels(nodesAndTheirLabels) AS nodeLabels
+      ,nodesAndTheirLabels
+ UNWIND nodeLabels AS nodeLabel
+  WITH totalNodeCount
+      ,nodeLabel
+      ,nodesAndTheirLabels
+ WHERE NOT nodeLabel STARTS WITH 'Mark4'
+  WITH totalNodeCount
+      ,collect(nodeLabel)             AS nodeLabels
+      ,nodesAndTheirLabels
+  WITH totalNodeCount
+      ,nodeLabels
+      ,count(nodesAndTheirLabels)     AS nodesWithThatLabels
+      ,toFloat(count(nodesAndTheirLabels)) / totalNodeCount * 100.0  AS nodesWithThatLabelsPercent
+RETURN nodeLabels, nodesWithThatLabels, nodesWithThatLabelsPercent
+ORDER BY nodesWithThatLabels DESC
diff --git a/cypher/Overview/Node_label_count.cypher b/cypher/Overview/Node_label_count.cypher
@@ -0,0 +1,16 @@
+// Node count for each label separate. Doesn_t sum up to the number of total labels since one node can have multiple labels.
+
+ MATCH (allNodes)
+  WITH COUNT(allNodes) AS totalNodeCount
+ MATCH (nodesAndTheirLabels)
+  WITH totalNodeCount
+      ,labels(nodesAndTheirLabels) AS nodeLabels
+      ,nodesAndTheirLabels
+ UNWIND nodeLabels AS nodeLabel
+  WITH totalNodeCount
+      ,nodeLabel
+      ,count(nodesAndTheirLabels)                                    AS nodesWithThatLabel
+      ,toFloat(count(nodesAndTheirLabels)) / totalNodeCount * 100.0  AS nodesWithThatLabelPercent
+ WHERE NOT nodeLabel STARTS WITH 'Mark4'
+RETURN nodeLabel, nodesWithThatLabel, nodesWithThatLabelPercent
+ORDER BY nodesWithThatLabel DESC, nodeLabel ASC
diff --git a/cypher/Overview/Node_labels_and_their_relationships.cypher b/cypher/Overview/Node_labels_and_their_relationships.cypher
@@ -0,0 +1,29 @@
+// List node labels and their relationship types, their count and their density.
+
+ MATCH (nodeByLabel)
+  WITH labels(nodeByLabel)  AS nodeLabels
+      ,collect(nodeByLabel) AS nodesWithThatLabels
+      ,count(nodeByLabel)   AS numberOfNodesWithThatLabels
+UNWIND nodesWithThatLabels AS nodeWithThatLabels
+ MATCH (nodeWithThatLabels)-[relation]->(target)
+  WITH nodeLabels                  AS sourceLabels
+      ,numberOfNodesWithThatLabels AS numberOfNodesWithSameLabelsAsSource
+      ,type(relation)              AS relationType
+      ,labels(target)              AS targetLabels
+      ,count(DISTINCT relation)    AS numberOfRelationships
+  WITH sourceLabels
+      ,relationType
+      ,targetLabels
+      ,numberOfRelationships
+      ,numberOfNodesWithSameLabelsAsSource
+      ,count{ MATCH (targetWithLabel) WHERE labels(targetWithLabel) = targetLabels } AS numberOfNodesWithSameLabelsAsTarget
+RETURN sourceLabels
+      ,relationType
+      ,targetLabels
+      ,numberOfRelationships
+      ,numberOfNodesWithSameLabelsAsSource
+      ,numberOfNodesWithSameLabelsAsTarget
+      ,toFloat(numberOfRelationships)
+        / ( numberOfNodesWithSameLabelsAsSource * numberOfNodesWithSameLabelsAsTarget)
+        * 100 AS densityInPercent
+ORDER BY numberOfRelationships DESC
diff --git a/cypher/Overview/Number_of_elements_per_module_for_Typescript.cypher b/cypher/Overview/Number_of_elements_per_module_for_Typescript.cypher
@@ -0,0 +1,33 @@
+// Number of elements per module for Typescript
+
+ MATCH (module:TS:Module)-[:EXPORTS]->(element:TS)
+ OPTIONAL MATCH (module)<-[:CONTAINS]-(project:TS:Project)-[:HAS_ROOT]->(projectRoot:Directory)
+  WITH module.name      AS moduleName
+      ,replace(module.globalFqn, 
+               coalesce(projectRoot.absoluteFileName + '/', ''),
+               '')               AS modulePath
+      ,module.globalFqn          AS fullQualifiedModuleName
+      ,count(DISTINCT element)   AS numberOfModuleElements
+      ,collect(DISTINCT element) AS moduleElements
+UNWIND moduleElements AS element
+  WITH moduleName
+      ,modulePath
+      ,fullQualifiedModuleName
+      ,numberOfModuleElements
+      ,element
+      ,labels(element) AS elementLabels
+UNWIND elementLabels AS typeLabel
+  WITH moduleName
+      ,modulePath
+      ,fullQualifiedModuleName
+      ,numberOfModuleElements
+      ,element
+      ,typeLabel
+ WHERE NOT typeLabel IN ['TS', 'ExternalDeclaration']
+RETURN moduleName
+      ,modulePath
+      ,fullQualifiedModuleName
+      ,numberOfModuleElements
+      ,typeLabel       AS languageElement
+      ,count(element)  AS numberOfElements
+ ORDER BY numberOfModuleElements DESC, moduleName ASC
diff --git a/cypher/Overview/Overview_size_for_Typescript.cypher b/cypher/Overview/Overview_size_for_Typescript.cypher
@@ -0,0 +1,77 @@
+// Overview size for Typescript
+
+ MATCH (n)
+  WITH COUNT(n) AS nodeCount
+ MATCH ()-[]->()
+  WITH nodeCount
+      ,count(*) AS relationshipCount
+ MATCH (a:TS&Project)
+  WITH nodeCount
+      ,relationshipCount
+      ,count(DISTINCT a) AS projectCount
+ MATCH (p:TS&Module)
+  WITH nodeCount
+      ,relationshipCount
+      ,projectCount
+      ,count(DISTINCT p.globalFqn) AS moduleCount
+ MATCH (function:TS&Function)
+  WITH nodeCount
+      ,relationshipCount
+      ,projectCount
+      ,moduleCount
+      ,count(DISTINCT function) AS functionCount 
+ MATCH (object:TS&Object)
+  WITH nodeCount
+      ,relationshipCount
+      ,projectCount
+      ,moduleCount
+      ,functionCount
+      ,count(DISTINCT object) AS objectCount
+ MATCH (typeAlias:TS&TypeAlias)
+  WITH nodeCount
+      ,relationshipCount
+      ,projectCount
+      ,moduleCount
+      ,functionCount
+      ,objectCount
+      ,count(DISTINCT typeAlias) AS typeAliasCount
+ MATCH (interface:TS&Interface)
+  WITH nodeCount
+      ,relationshipCount
+      ,projectCount
+      ,moduleCount
+      ,functionCount
+      ,objectCount
+      ,typeAliasCount
+      ,count(DISTINCT interface) AS interfaceCount
+ MATCH (method:TS&Method)
+  WITH nodeCount
+      ,relationshipCount
+      ,projectCount
+      ,moduleCount
+      ,functionCount
+      ,objectCount
+      ,typeAliasCount
+      ,interfaceCount
+      ,count(DISTINCT method) AS methodCount
+ MATCH (class:TS&Class)
+  WITH nodeCount
+      ,relationshipCount
+      ,projectCount
+      ,moduleCount
+      ,functionCount
+      ,objectCount
+      ,typeAliasCount
+      ,interfaceCount
+      ,methodCount
+      ,count(DISTINCT class) AS classCount
+RETURN nodeCount
+      ,relationshipCount
+      ,projectCount
+      ,moduleCount
+      ,functionCount
+      ,objectCount
+      ,typeAliasCount
+      ,interfaceCount
+      ,classCount
+      ,methodCount
diff --git a/cypher/Overview/Relationship_type_count.cypher b/cypher/Overview/Relationship_type_count.cypher
@@ -0,0 +1,15 @@
+// Relationship count for each type separate. Sums up to the total number of relationships (100%).
+
+ MATCH ()-[allRelationships]-()
+  WITH COUNT(DISTINCT allRelationships) AS totalRelationshipCount
+ MATCH ()-[relationshipsAndTheirTypes]-()
+  WITH totalRelationshipCount
+      ,type(relationshipsAndTheirTypes)                AS relationshipType
+      ,count(DISTINCT relationshipsAndTheirTypes)      AS nodesWithThatRelationshipType
+      ,toFloat(
+          count(DISTINCT relationshipsAndTheirTypes)) 
+        / totalRelationshipCount * 100.0               AS nodesWithThatRelationshipTypePercent
+RETURN relationshipType
+      ,nodesWithThatRelationshipType
+      ,nodesWithThatRelationshipTypePercent
+ORDER BY nodesWithThatRelationshipType DESC, relationshipType ASC