Skip to content

Commit 39fb0a2

Browse files
authored
Merge pull request #203 from JohT/feature/useability-improvements
Improve useability
2 parents c3b0aa8 + 29addeb commit 39fb0a2

27 files changed

+385
-126
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ The [Code Structure Analysis Pipeline](./.github/workflows/java-code-analysis.ym
182182

183183
- How can i trigger a full re-scan of all artifacts?
184184
👉 Delete the file `artifactsChangeDetectionHash.txt` in the `artifacts` directory.
185+
👉 Delete the file `typescriptFileChangeDetectionHashFile.txt` in the `source` directory to additionally re-scan Typescript projects.
185186

186187
- How can i enable PDF generation for Jupyter Notebooks (depends on chromium, takes more time)?
187188
👉 Set environment variable `ENABLE_JUPYTER_NOTEBOOK_PDF_GENERATION` to anything except an empty string. Example:
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// Verify that nodes and relationships are complete and ready for projection
2+
3+
MATCH (source:TS:Module)-[dependency:DEPENDS_ON]->(target:Module)
4+
WHERE $dependencies_projection_node IN labels(source)
5+
AND $dependencies_projection_node IN labels(target)
6+
WITH (NOT $dependencies_projection_weight_property IN keys(dependency)) AS missingWeightProperty
7+
,(dependency[$dependencies_projection_weight_property]) AS weightPropertyValue
8+
,(dependency[$dependencies_projection_weight_property] < 1) AS nonPositiveWeightPropertyValue
9+
,coalesce(dependency.resolved, false) AS resolvedDependency
10+
,EXISTS { (target)<-[:RESOLVES_TO]-(resolvedTarget:ExternalModule) } AS resolvedTarget
11+
,(source.incomingDependencies IS NULL OR
12+
target.incomingDependencies IS NULL) AS missingIncomingDependencies
13+
,(source.outgoingDependencies IS NULL OR
14+
target.outgoingDependencies IS NULL) AS missingOutgoingDependencies
15+
,source
16+
,target
17+
WHERE missingWeightProperty
18+
// OR nonPositiveWeightPropertyValue // if strict positive weights are needed
19+
OR missingIncomingDependencies
20+
OR missingOutgoingDependencies
21+
RETURN missingWeightProperty
22+
,nonPositiveWeightPropertyValue
23+
,resolvedDependency
24+
,resolvedTarget
25+
,missingIncomingDependencies
26+
,missingOutgoingDependencies
27+
,count(*) AS numberOfRelationships
28+
,min(weightPropertyValue) AS minWeightPropertyValue
29+
,max(weightPropertyValue) AS maxWeightPropertyValue
30+
,collect(DISTINCT source.globalFqn + ' -> ' + target.globalFqn)[0..4] AS examples
31+
// Output source and target nodes for troubleshooting
32+
//,collect(source)[0..4]
33+
//,collect(target)[0..4]

cypher/DependsOn_Relationship_Weights/Add_fine_grained_weights_for_Typescript_internal_module_dependencies.cypher

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,16 @@
22

33
// Get the top level dependency between a Typescript module and the external modules it uses
44
MATCH (source:TS:Module)-[moduleDependency:DEPENDS_ON]->(target:Module)
5-
// Exclude all targets where an ExternalModule was found that resolves to them
6-
// because those are covered in the fine grained weights for "ExternalModule"s.
7-
WHERE NOT EXISTS { (target)<-[:RESOLVES_TO]-(resolvedTarget:ExternalModule) }
5+
// Exclude all that already have extended weight properties
6+
// for example because those were covered in the fine grained weights for "ExternalModule"s.
7+
WHERE moduleDependency.declarationCount IS NULL
8+
// Ruling out resolved targets also filters out entries that aren't covered by the fine grained weights for "ExternalModule"s.
9+
// Therefore, the exists filter is commented out for now and replaced by focussing on missing detailed weight properties to catch them all.
10+
//WHERE NOT EXISTS { (target)<-[:RESOLVES_TO]-(resolvedTarget:ExternalModule) }
811
WITH source
912
,target
1013
,moduleDependency
1114
,moduleDependency.cardinality AS targetModuleCardinality
12-
1315
// Get optional external (e.g. type) declarations that the external module (target) provides and the source module uses
1416
OPTIONAL MATCH (source)-[elementDependency:DEPENDS_ON|EXPORTS]->(elementType:TS)<-[:EXPORTS]-(target)
1517
WITH source
@@ -40,13 +42,18 @@ OPTIONAL MATCH (source)-[abstractDependency:DEPENDS_ON|EXPORTS]->(abstractType:T
4042
// - "lowCouplingElement25PercentWeight" subtracts 75% of the weights for abstract types like Interfaces and Type aliases
4143
// to compensate for their low coupling influence. Not included "high coupling" elements like Functions and Classes
4244
// remain in the weight as they were. The same applies for "lowCouplingElement10PercentWeight" but with in a stronger manner.
45+
// If there are no declarations and therefore the elementTypeCardinality is zero then the original targetModuleCardinality is used.
4346
SET moduleDependency.declarationCount = elementTypeCount
4447
,moduleDependency.abstractTypeCount = abstractTypeCount
4548
,moduleDependency.abstractTypeCardinality = abstractTypeCardinality
46-
,moduleDependency.lowCouplingElement25PercentWeight =
47-
toInteger(elementTypeCardinality - round(abstractTypeCardinality * 0.75))
48-
,moduleDependency.lowCouplingElement10PercentWeight =
49-
toInteger(elementTypeCardinality - round(abstractTypeCardinality * 0.90))
49+
,moduleDependency.lowCouplingElement25PercentWeight = toInteger(
50+
coalesce(nullif(elementTypeCardinality, 0), targetModuleCardinality) -
51+
round(abstractTypeCardinality * 0.75)
52+
)
53+
,moduleDependency.lowCouplingElement10PercentWeight = toInteger(
54+
coalesce(nullif(elementTypeCardinality, 0), targetModuleCardinality) -
55+
round(abstractTypeCardinality * 0.90)
56+
)
5057
RETURN source.globalFqn AS sourceName
5158
,target.globalFqn AS targetName
5259
,elementTypeCount
Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,28 @@
1-
// Connect git files to code files with a RESOLVES_TO relationship if their names match
2-
// Note: Even if is tempting to combine this file with the Typescript variant, they are intentionally spearated.
1+
// Connect git files to Java code files with a RESOLVES_TO relationship if their names match
2+
// Note: Its quite tricky to match Java class file paths from inside e.g. *.jar files to their source repository file path reliable.
3+
// This could be improved by utilizing package manager data (like maven). Even that turns out to be not easy,
4+
// since the folder structure can be customized. Therefore, this is only a simplified attempt and by no means complete.
5+
// Note: Even if is tempting to combine this file with the Typescript variant, they are intentionally separated.
36
// The differences are subtle but need to be thought through and tested carefully.
47
// Having separate files makes it obvious that there needs to be one for every new source code language.
58

6-
MATCH (code_file:File&!Git)
7-
WHERE NOT EXISTS { (code_file)-[:RESOLVES_TO]->(other_file:File&!Git) } // only original nodes, no duplicates
8-
WITH code_file, replace(code_file.fileName, '.class', '.java') AS codeFileName
9-
MATCH (git_file:File&Git)
9+
MATCH (code_file:!Git&File)
10+
WHERE code_file.fileName IS NOT NULL
11+
// Use only original code files, no resolved duplicates
12+
AND NOT EXISTS { (code_file)-[:RESOLVES_TO]->(other_file:File&!Git) }
13+
WITH code_file
14+
,replace(code_file.fileName, '.class', '.java') AS codeFileName
15+
MATCH (git_file:Git&File)
1016
WITH *
1117
,git_file
1218
,coalesce(git_file.fileName, git_file.relativePath) AS gitFileName
1319
WHERE gitFileName ENDS WITH codeFileName
1420
MERGE (git_file)-[:RESOLVES_TO]->(code_file)
1521
SET git_file.resolved = true
1622
RETURN count(DISTINCT codeFileName) AS numberOfCodeFiles
17-
,collect(DISTINCT codeFileName + ' <-> ' + gitFileName + '\n')[0..4] AS examples
23+
,collect(DISTINCT codeFileName + ' <-> ' + gitFileName + '\n')[0..4] AS examples
24+
// RETURN codeFileName, gitFileName
25+
// ,git_file.fileName, git_file.relativePath
26+
// ,git_repository.fileName , code_file.absoluteFileName
27+
// ,git_repository.name, code_file.fileName
28+
// LIMIT 20
Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,30 @@
11
// Connect git files to Typescript files with a RESOLVES_TO relationship if their names match
2-
// Note: Even if is tempting to combine this file with the Java variant, they are intentionally spearated.
2+
// Note: Even if is tempting to combine this file with the Java variant, they are intentionally separated.
33
// The differences are subtle but need to be thought through and tested carefully.
44
// Having separate files makes it obvious that there needs to be one for every new source code language.
55

6-
MATCH (code_file:File&!Git)
7-
WHERE NOT EXISTS { (code_file)-[:RESOLVES_TO]->(other_file:File&!Git) } // only original nodes, no duplicates
8-
WITH code_file, code_file.absoluteFileName AS codeFileName
9-
MATCH (git_file:File&Git)
6+
MATCH (code_file:!Git&File)
7+
WHERE (code_file.absoluteFileName IS NOT NULL OR code_file.fileName IS NOT NULL)
8+
// Use only original code files, no resolved duplicates
9+
AND NOT EXISTS { (code_file)-[:RESOLVES_TO]->(other_file:File&!Git) }
10+
WITH code_file
11+
,coalesce(code_file.absoluteFileName, code_file.fileName) AS codeFileName
12+
MATCH (git_file:Git&File)
13+
// Use repository if available to overcome ambiguity in multi source analysis
14+
OPTIONAL MATCH (git_repository:Git&Repository)-[:HAS_FILE]->(git_file)
1015
WITH *
16+
,git_repository
1117
,git_file
12-
,coalesce(git_file.fileName, git_file.relativePath) AS gitFileName
18+
,coalesce(coalesce(git_repository.fileName + '/', '') + git_file.fileName
19+
,coalesce(git_repository.name + '/', '') + git_file.relativePath
20+
) AS gitFileName
1321
WHERE codeFileName ENDS WITH gitFileName
1422
MERGE (git_file)-[:RESOLVES_TO]->(code_file)
1523
SET git_file.resolved = true
1624
RETURN count(DISTINCT codeFileName) AS numberOfCodeFiles
17-
,collect(DISTINCT codeFileName + ' <-> ' + gitFileName + '\n')[0..4] AS examples
25+
,collect(DISTINCT codeFileName + ' <-> ' + gitFileName + '\n')[0..4] AS examples
26+
// RETURN codeFileName, gitFileName
27+
// ,git_file.fileName, git_file.relativePath
28+
// ,git_repository.fileName , code_file.absoluteFileName
29+
// ,git_repository.name, code_file.fileName
30+
// LIMIT 20
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// Delete plain file nodes in "/.git" directory
2+
3+
MATCH (git_metadata_file:File)<-[:CONTAINS*]-(git_directory:Directory)
4+
WHERE git_directory.fileName ENDS WITH '/.git'
5+
WITH git_directory.fileName AS gitDirectory
6+
,count(DISTINCT git_metadata_file.fileName) AS numberOfFiles
7+
,collect(DISTINCT git_metadata_file.fileName)[0..4] AS fileExamples
8+
,collect(DISTINCT git_metadata_file) AS git_metadata_files
9+
UNWIND git_metadata_files AS git_metadata_file
10+
CALL { WITH git_metadata_file
11+
DETACH DELETE git_metadata_file
12+
} IN TRANSACTIONS OF 1000 ROWS
13+
RETURN DISTINCT gitDirectory, numberOfFiles, fileExamples
14+
ORDER BY numberOfFiles DESC

cypher/GitLog/Import_aggregated_git_log_csv_data.cypher

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ CALL { WITH row
99
month: toInteger(row.month),
1010
commits: toInteger(row.commits)
1111
})
12-
MERGE (git_file:Git:Log:File {fileName: row.filename})
12+
MERGE (git_file:Git:Log:File {fileName: row.filename, repositoryPath: $git_repository_absolute_directory_name})
1313
MERGE (git_author)-[:AUTHORED]->(git_change_span)
1414
MERGE (git_change_span)-[:CONTAINS_CHANGED]->(git_file)
1515
MERGE (git_repository)-[:HAS_CHANGE_SPAN]->(git_change_span)

cypher/GitLog/Import_git_log_csv_data.cypher

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ CALL { WITH row
1212
timestamp: datetime(row.timestamp),
1313
timestamp_unix: toInteger(row.timestamp_unix)
1414
})
15-
MERGE (git_file:Git:Log:File {fileName: row.filename})
15+
MERGE (git_file:Git:Log:File {fileName: row.filename, repositoryPath: $git_repository_absolute_directory_name})
1616
MERGE (git_author)-[:AUTHORED]->(git_commit)
1717
MERGE (git_commit)-[:CONTAINS_CHANGED]->(git_file)
1818
MERGE (git_repository)-[:HAS_COMMIT]->(git_commit)
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// List resolved and unresolved git files by their extension
2+
3+
MATCH (git_file:Git&File)
4+
OPTIONAL MATCH (git_file)-[:RESOLVES_TO]->(code_file:File&!Git)
5+
WITH git_file
6+
,code_file
7+
,(code_file IS NOT NULL) AS resolved
8+
,coalesce(git_file.fileName, git_file.relativePath) AS gitFileName
9+
RETURN resolved
10+
,reverse(split(split(reverse(gitFileName), '/')[0], '.')[0]) AS extension
11+
,count(DISTINCT git_file) AS gitFileCount
12+
,coalesce(labels(code_file), labels(git_file)) AS fileLabels
13+
,collect(DISTINCT gitFileName)[0..9] AS gitFileExamples
14+
ORDER BY resolved ASC, gitFileCount DESC, extension ASC
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// Verify that code to git file relationships aren't ambiguous
2+
3+
MATCH (git_file:Git&File)-[:RESOLVES_TO]->(code_file:File&!Git)
4+
WITH code_file
5+
,count(*) AS numberOfResolvedGitFiles
6+
,collect(DISTINCT git_file) AS git_files
7+
WHERE numberOfResolvedGitFiles > 1
8+
UNWIND git_files AS git_file
9+
OPTIONAL MATCH (git_file)<-[:HAS_FILE]-(git_repository:Git&Repository)
10+
RETURN numberOfResolvedGitFiles
11+
,collect(DISTINCT coalesce(code_file.absoluteFileName, code_file.fileName))[0..4] AS codeFileExamples
12+
,collect(DISTINCT coalesce(git_file.fileName, git_file.relativePath))[0..4] AS gitFileExamples
13+
,collect(DISTINCT git_repository.name)[0..4] AS gitRepositoryExamples
14+
//,collect(git_repository)[0..9]
15+
//,collect(git_file)[0..9]
16+
//,collect(code_file)[0..9]

0 commit comments

Comments
 (0)