Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions COMMANDS.md
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ Use [importGitLog.sh](./scripts/importGitLog.sh) to import git log data into the
It uses `git log` to extract commits, their authors and the names of the files changed with them. These are stored in an intermediate CSV file and are then imported into Neo4j with the following schema:

```Cypher
(Git:Log:Author)-[:AUTHORED]->(Git:Log:Commit)->[:CONTAINS]->(Git:Log:File)
(Git:Log:Author)-[:AUTHORED]->(Git:Log:Commit)->[:CONTAINS_CHANGED]->(Git:Log:File)
(Git:Log:Commit)->[:HAS_PARENT]-(Git:Log:Commit)
```

Expand All @@ -254,7 +254,7 @@ You can use [List_unresolved_git_files.cypher](./cypher/GitLog/List_unresolved_g
Use [importAggregatedGitLog.sh](./scripts/importAggregatedGitLog.sh) to import git log data in an aggregated form into the Graph. It works similar to the [full git log version above](#import-git-log). The only difference is that not every single commit is imported. Instead, changes are grouped per month including their commit count. This is in many cases sufficient and reduces data size and processing time significantly. Here is the resulting schema:

```Cypher
(Git:Log:Author)-[:AUTHORED]->(Git:Log:ChangeSpan)-[:CONTAINS]->(Git:Log:File)
(Git:Log:Author)-[:AUTHORED]->(Git:Log:ChangeSpan)-[:CONTAINS_CHANGED]->(Git:Log:File)
```

## Database Queries
Expand Down
2 changes: 1 addition & 1 deletion cypher/Centrality/Centrality_1c_Label_Delete.cypher
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Centrality Label Delete

CALL db.labels() YIELD label
WHERE label = 'Top' + apoc.text.capitalize($dependencies_projection_write_property)
WHERE label = 'Mark4Top' + apoc.text.capitalize($dependencies_projection_write_property)
WITH collect(label) AS selectedLabels
MATCH (member)
WHERE $dependencies_projection_node IN LABELS(member)
Expand Down
2 changes: 1 addition & 1 deletion cypher/Centrality/Centrality_1d_Label_Add.cypher
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ UNWIND members AS member
ORDER BY member[$dependencies_projection_write_property] DESCENDING
WITH memberCount2Percent
,collect(DISTINCT member)[0..memberCount2Percent] AS topMembers
,'Top' + apoc.text.capitalize($dependencies_projection_write_property) AS labelName
,'Mark4Top' + apoc.text.capitalize($dependencies_projection_write_property) AS labelName
UNWIND topMembers AS topMember
CALL apoc.create.addLabels(topMember, [labelName]) YIELD node
RETURN count(node) AS nodesCount
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Community Detection - Local Clustering Coefficient - Estimate

CALL gds.localClusteringCoefficient.write.estimate(
$dependencies_projection + '-cleaned', {
writeProperty: $dependencies_projection_write_property
})
YIELD requiredMemory
,nodeCount
,relationshipCount
,bytesMin
,bytesMax
,heapPercentageMin
,heapPercentageMax
,treeView
,mapView
RETURN requiredMemory
,nodeCount
,relationshipCount
,bytesMin
,bytesMax
,heapPercentageMin
,heapPercentageMax
,treeView
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// Community Detection - Local Clustering Coefficient - Statistics

CALL gds.localClusteringCoefficient.stats(
$dependencies_projection + '-cleaned', {
})
YIELD averageClusteringCoefficient, nodeCount, preProcessingMillis, computeMillis, postProcessingMillis
RETURN averageClusteringCoefficient, nodeCount, preProcessingMillis, computeMillis, postProcessingMillis
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// Community Detection - Local Clustering Coefficient - Mutate

CALL gds.localClusteringCoefficient.mutate(
$dependencies_projection + '-cleaned', {
mutateProperty: $dependencies_projection_write_property
})
YIELD averageClusteringCoefficient, nodeCount, nodePropertiesWritten, preProcessingMillis, computeMillis, postProcessingMillis, mutateMillis
RETURN averageClusteringCoefficient, nodeCount, nodePropertiesWritten, preProcessingMillis, computeMillis, postProcessingMillis, mutateMillis
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Community Detection - Local Clustering Coefficient - Stream

CALL gds.localClusteringCoefficient.stream(
$dependencies_projection + '-cleaned', {
})
YIELD nodeId, localClusteringCoefficient
WITH gds.util.asNode(nodeId) AS member
,localClusteringCoefficient
WITH coalesce(member.fqn, member.fileName, member.name) AS memberName
,localClusteringCoefficient
RETURN localClusteringCoefficient
,memberName
ORDER BY localClusteringCoefficient DESC, memberName ASC
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Community Detection - Local Clustering Coefficient - Stream Aggregated

CALL gds.localClusteringCoefficient.stream(
$dependencies_projection + '-cleaned', {
})
YIELD nodeId, localClusteringCoefficient
WITH gds.util.asNode(nodeId) AS member
,localClusteringCoefficient
WITH coalesce(member.fqn, member.fileName, member.name) AS memberName
,localClusteringCoefficient
WITH round(localClusteringCoefficient, 2) AS localClusteringCoefficient
,collect(DISTINCT memberName)[0..9] AS memberNameExamples
,count(DISTINCT memberName) AS memberCount
RETURN localClusteringCoefficient
,memberCount
,memberNameExamples
ORDER BY localClusteringCoefficient DESC, memberCount DESC
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// Community Detection - Local Clustering Coefficient - Write

CALL gds.localClusteringCoefficient.write(
$dependencies_projection + '-cleaned', {
writeProperty: $dependencies_projection_write_property
})
YIELD averageClusteringCoefficient, nodeCount, nodePropertiesWritten, preProcessingMillis, computeMillis, postProcessingMillis, writeMillis
RETURN averageClusteringCoefficient, nodeCount, nodePropertiesWritten, preProcessingMillis, computeMillis, postProcessingMillis, writeMillis
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Community Detection Label Propagation Label Delete

CALL db.labels() YIELD label
WHERE label STARTS WITH $dependencies_projection_node + $dependencies_projection_write_label
WHERE label STARTS WITH 'Mark4' + $dependencies_projection_node + $dependencies_projection_write_label
WITH collect(label) AS selectedLabels
MATCH (member)
WHERE $dependencies_projection_node IN labels(member)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
AND $dependencies_projection_node IN LABELS(member)
WITH collect(member) AS members
,count(DISTINCT member) AS memberCount
,$dependencies_projection_node + $dependencies_projection_write_label + toString(member[$dependencies_projection_write_property]) AS labelName
,'Mark4' + $dependencies_projection_node + $dependencies_projection_write_label + toString(member[$dependencies_projection_write_property]) AS labelName
WHERE memberCount > 1
UNWIND members AS member
CALL apoc.create.addLabels(member, [labelName]) YIELD node
Expand Down
2 changes: 1 addition & 1 deletion cypher/GitLog/Import_aggregated_git_log_csv_data.cypher
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ CALL { WITH row
})
MERGE (git_file:Git:Log:File {fileName: row.filename})
MERGE (git_author)-[:AUTHORED]->(git_change_span)
MERGE (git_change_span)-[:CONTAINS]->(git_file)
MERGE (git_change_span)-[:CONTAINS_CHANGED]->(git_file)
} IN TRANSACTIONS OF 1000 ROWS
RETURN count(DISTINCT row.author) AS numberOfAuthors
,count(DISTINCT row.filename) AS numberOfFiles
Expand Down
2 changes: 1 addition & 1 deletion cypher/GitLog/Import_git_log_csv_data.cypher
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ CALL { WITH row
})
MERGE (git_file:Git:Log:File {fileName: row.filename})
MERGE (git_author)-[:AUTHORED]->(git_commit)
MERGE (git_commit)-[:CONTAINS]->(git_file)
MERGE (git_commit)-[:CONTAINS_CHANGED]->(git_file)
} IN TRANSACTIONS OF 1000 ROWS
RETURN count(DISTINCT row.author) AS numberOfAuthors
,count(DISTINCT row.filename) AS numberOfFiles
Expand Down
2 changes: 1 addition & 1 deletion cypher/GitLog/List_ambiguous_git_files.cypher
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// List ambigiously resolved git files where a single git file is attached to more than one code file for troubleshooting/testing.

MATCH (file:File&!Git)<-[:RESOLVES_TO]-(git_file:File&Git)
OPTIONAL MATCH (artifact:Artifact:Archive)-[:CONTAINS]->(file)
OPTIONAL MATCH (artifact:Artifact:Archive)-[:CONTAINS_CHANGED]->(file)
WITH file.fileName AS fileName
,reverse(split(reverse(file.fileName),'.')[0]) AS fileExtension
,count(DISTINCT git_file.fileName) AS gitFilesCount
Expand Down
2 changes: 1 addition & 1 deletion cypher/GitLog/Set_number_of_aggregated_git_commits.cypher
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Set numberOfGitCommits property on code File nodes when aggregated change spans with grouped commits are present.

MATCH (code_file:File&!Git)<-[:RESOLVES_TO]-(git_file:File&Git)
MATCH (git_file)<-[:CONTAINS]-(git_changespan:Git:ChangeSpan)
MATCH (git_file)<-[:CONTAINS_CHANGED]-(git_changespan:Git:ChangeSpan)
WITH code_file, sum(git_changespan.commits) AS numberOfGitCommits
SET code_file.numberOfGitCommits = numberOfGitCommits
RETURN count(DISTINCT coalesce(code_file.absoluteFileName, code_file.fileName)) AS changedCodeFiles
2 changes: 1 addition & 1 deletion cypher/GitLog/Set_number_of_git_commits.cypher
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Set numberOfGitCommits property on code File nodes when git commits are present

MATCH (code_file:File&!Git)<-[:RESOLVES_TO]-(git_file:File&Git)
MATCH (git_file)<-[:CONTAINS]-(git_commit:Git:Commit)
MATCH (git_file)<-[:CONTAINS_CHANGED]-(git_commit:Git:Commit)
WITH code_file, count(DISTINCT git_commit.hash) AS numberOfGitCommits
SET code_file.numberOfGitCommits = numberOfGitCommits
RETURN count(DISTINCT coalesce(code_file.absoluteFileName, code_file.fileName)) AS changedCodeFiles
22 changes: 22 additions & 0 deletions cypher/Overview/Node_label_combination_count.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Node count for each label combination. Sums up to the total number of nodes.

MATCH (allNodes)
WITH COUNT(allNodes) AS totalNodeCount
MATCH (nodesAndTheirLabels)
WITH totalNodeCount
,labels(nodesAndTheirLabels) AS nodeLabels
,nodesAndTheirLabels
UNWIND nodeLabels AS nodeLabel
WITH totalNodeCount
,nodeLabel
,nodesAndTheirLabels
WHERE NOT nodeLabel STARTS WITH 'Mark4'
WITH totalNodeCount
,collect(nodeLabel) AS nodeLabels
,nodesAndTheirLabels
WITH totalNodeCount
,nodeLabels
,count(nodesAndTheirLabels) AS nodesWithThatLabels
,toFloat(count(nodesAndTheirLabels)) / totalNodeCount * 100.0 AS nodesWithThatLabelsPercent
RETURN nodeLabels, nodesWithThatLabels, nodesWithThatLabelsPercent
ORDER BY nodesWithThatLabels DESC
16 changes: 16 additions & 0 deletions cypher/Overview/Node_label_count.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Node count for each label separate. Doesn_t sum up to the number of total labels since one node can have multiple labels.

MATCH (allNodes)
WITH COUNT(allNodes) AS totalNodeCount
MATCH (nodesAndTheirLabels)
WITH totalNodeCount
,labels(nodesAndTheirLabels) AS nodeLabels
,nodesAndTheirLabels
UNWIND nodeLabels AS nodeLabel
WITH totalNodeCount
,nodeLabel
,count(nodesAndTheirLabels) AS nodesWithThatLabel
,toFloat(count(nodesAndTheirLabels)) / totalNodeCount * 100.0 AS nodesWithThatLabelPercent
WHERE NOT nodeLabel STARTS WITH 'Mark4'
RETURN nodeLabel, nodesWithThatLabel, nodesWithThatLabelPercent
ORDER BY nodesWithThatLabel DESC, nodeLabel ASC
29 changes: 29 additions & 0 deletions cypher/Overview/Node_labels_and_their_relationships.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// List node labels and their relationship types, their count and their density.

MATCH (nodeByLabel)
WITH labels(nodeByLabel) AS nodeLabels
,collect(nodeByLabel) AS nodesWithThatLabels
,count(nodeByLabel) AS numberOfNodesWithThatLabels
UNWIND nodesWithThatLabels AS nodeWithThatLabels
MATCH (nodeWithThatLabels)-[relation]->(target)
WITH nodeLabels AS sourceLabels
,numberOfNodesWithThatLabels AS numberOfNodesWithSameLabelsAsSource
,type(relation) AS relationType
,labels(target) AS targetLabels
,count(DISTINCT relation) AS numberOfRelationships
WITH sourceLabels
,relationType
,targetLabels
,numberOfRelationships
,numberOfNodesWithSameLabelsAsSource
,count{ MATCH (targetWithLabel) WHERE labels(targetWithLabel) = targetLabels } AS numberOfNodesWithSameLabelsAsTarget
RETURN sourceLabels
,relationType
,targetLabels
,numberOfRelationships
,numberOfNodesWithSameLabelsAsSource
,numberOfNodesWithSameLabelsAsTarget
,toFloat(numberOfRelationships)
/ ( numberOfNodesWithSameLabelsAsSource * numberOfNodesWithSameLabelsAsTarget)
* 100 AS densityInPercent
ORDER BY numberOfRelationships DESC
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Number of elements per module for Typescript

MATCH (module:TS:Module)-[:EXPORTS]->(element:TS)
OPTIONAL MATCH (module)<-[:CONTAINS]-(project:TS:Project)-[:HAS_ROOT]->(projectRoot:Directory)
WITH module.name AS moduleName
,replace(module.globalFqn,
coalesce(projectRoot.absoluteFileName + '/', ''),
'') AS modulePath
,module.globalFqn AS fullQualifiedModuleName
,count(DISTINCT element) AS numberOfModuleElements
,collect(DISTINCT element) AS moduleElements
UNWIND moduleElements AS element
WITH moduleName
,modulePath
,fullQualifiedModuleName
,numberOfModuleElements
,element
,labels(element) AS elementLabels
UNWIND elementLabels AS typeLabel
WITH moduleName
,modulePath
,fullQualifiedModuleName
,numberOfModuleElements
,element
,typeLabel
WHERE NOT typeLabel IN ['TS', 'ExternalDeclaration']
RETURN moduleName
,modulePath
,fullQualifiedModuleName
,numberOfModuleElements
,typeLabel AS languageElement
,count(element) AS numberOfElements
ORDER BY numberOfModuleElements DESC, moduleName ASC
77 changes: 77 additions & 0 deletions cypher/Overview/Overview_size_for_Typescript.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Overview size for Typescript

MATCH (n)
WITH COUNT(n) AS nodeCount
MATCH ()-[]->()
WITH nodeCount
,count(*) AS relationshipCount
MATCH (a:TS&Project)
WITH nodeCount
,relationshipCount
,count(DISTINCT a) AS projectCount
MATCH (p:TS&Module)
WITH nodeCount
,relationshipCount
,projectCount
,count(DISTINCT p.globalFqn) AS moduleCount
MATCH (function:TS&Function)
WITH nodeCount
,relationshipCount
,projectCount
,moduleCount
,count(DISTINCT function) AS functionCount
MATCH (object:TS&Object)
WITH nodeCount
,relationshipCount
,projectCount
,moduleCount
,functionCount
,count(DISTINCT object) AS objectCount
MATCH (typeAlias:TS&TypeAlias)
WITH nodeCount
,relationshipCount
,projectCount
,moduleCount
,functionCount
,objectCount
,count(DISTINCT typeAlias) AS typeAliasCount
MATCH (interface:TS&Interface)
WITH nodeCount
,relationshipCount
,projectCount
,moduleCount
,functionCount
,objectCount
,typeAliasCount
,count(DISTINCT interface) AS interfaceCount
MATCH (method:TS&Method)
WITH nodeCount
,relationshipCount
,projectCount
,moduleCount
,functionCount
,objectCount
,typeAliasCount
,interfaceCount
,count(DISTINCT method) AS methodCount
MATCH (class:TS&Class)
WITH nodeCount
,relationshipCount
,projectCount
,moduleCount
,functionCount
,objectCount
,typeAliasCount
,interfaceCount
,methodCount
,count(DISTINCT class) AS classCount
RETURN nodeCount
,relationshipCount
,projectCount
,moduleCount
,functionCount
,objectCount
,typeAliasCount
,interfaceCount
,classCount
,methodCount
15 changes: 15 additions & 0 deletions cypher/Overview/Relationship_type_count.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// Relationship count for each type separate. Sums up to the total number of relationships (100%).

MATCH ()-[allRelationships]-()
WITH COUNT(DISTINCT allRelationships) AS totalRelationshipCount
MATCH ()-[relationshipsAndTheirTypes]-()
WITH totalRelationshipCount
,type(relationshipsAndTheirTypes) AS relationshipType
,count(DISTINCT relationshipsAndTheirTypes) AS nodesWithThatRelationshipType
,toFloat(
count(DISTINCT relationshipsAndTheirTypes))
/ totalRelationshipCount * 100.0 AS nodesWithThatRelationshipTypePercent
RETURN relationshipType
,nodesWithThatRelationshipType
,nodesWithThatRelationshipTypePercent
ORDER BY nodesWithThatRelationshipType DESC, relationshipType ASC
Loading