Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,13 @@ The [Code Structure Analysis Pipeline](./.github/workflows/java-code-analysis.ym
👉 The custom Jupyter Notebook metadata property `code_graph_analysis_pipeline_data_validation` can be set to choose a query from [cypher/Validation](./cypher/Validation) that will be executed preliminary to the notebook. If the query leads to at least one result, the validation succeeds and the notebook will be run. If the query leads to no result, the notebook will be skipped.
For more details see [Data Availability Validation](./COMMANDS.md#data-availability-validation).

- How can i increase the heap memory when scanning large Typescript projects?
👉 Use the environment variable TYPESCRIPT_SCAN_HEAP_MEMORY in megabyte (default = 4096):

```shell
TYPESCRIPT_SCAN_HEAP_MEMORY=16384 ./../../scripts/analysis/analyze.sh
```

## 🕸 Web References

- [Graph Data Science 101: Understanding Graphs and Graph Data Science](https://techfirst.medium.com/graph-data-science-101-understanding-graphs-and-graph-data-science-c25055a9db01)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// List package.json dependencies by package

MATCH (package:NPM:Package)-[:DECLARES_DEPENDENCY]->(dependency:NPM:Dependency)
OPTIONAL MATCH (package)-[:CONTAINS]->(:Json:Object)-[:HAS_KEY]->(:Json:Key{name:'name'})-[:HAS_VALUE]->(packageName:Json:Scalar:Value)
RETURN replace(replace(package.fileName, '/npm-package-json/', ''), '/package.json', '')
AS packageDirectory
,packageName.value AS packageName
,dependency.name AS dependencyName
,dependency.dependency AS dependencyVersion
ORDER BY packageName, dependencyName
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// List most used combination of 2 and 3 dependencies

MATCH (package:NPM:Package)-[:DECLARES_DEPENDENCY]->(dependency:NPM:Dependency)
OPTIONAL MATCH (package)-[:CONTAINS]->(:Json:Object)-[:HAS_KEY]->(:Json:Key{name:'name'})-[:HAS_VALUE]->(packageName:Json:Scalar:Value)
WITH package.fileName AS packageFileName
,dependency.name AS dependencyName
ORDER BY packageFileName, dependencyName
WITH packageFileName
,apoc.coll.combinations(collect(dependencyName), 2, 3) AS dependencyCombinations
UNWIND dependencyCombinations AS dependencyCombination
WITH dependencyCombination
,count(*) as occurrences
,collect(packageFileName) AS packages
WHERE occurrences > 1
RETURN dependencyCombination
,occurrences
,packages[0..9] AS firstTenPackages
ORDER BY occurrences DESC
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// List most used combination of 2 and 3 dependencies including version specifier

MATCH (package:NPM:Package)-[:DECLARES_DEPENDENCY]->(dependency:NPM:Dependency)
OPTIONAL MATCH (package)-[:CONTAINS]->(:Json:Object)-[:HAS_KEY]->(:Json:Key{name:'name'})-[:HAS_VALUE]->(packageName:Json:Scalar:Value)
WITH package.fileName AS packageFileName
,dependency.name AS dependencyName
,dependency.dependency AS dependencyVersion
ORDER BY packageFileName, dependencyName
WITH packageFileName
,apoc.coll.combinations(collect(dependencyName + ' ' + dependencyVersion), 2, 3) AS dependencyCombinations
UNWIND dependencyCombinations AS dependencyCombination
WITH dependencyCombination
,count(*) as occurrences
,collect(packageFileName) AS packages
WHERE occurrences > 1
RETURN dependencyCombination
,occurrences
,packages[0..9] AS firstTenPackages
ORDER BY occurrences DESC
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// List package.json dependencies by the number they are used by all packages

MATCH (package:NPM:Package)-[:DECLARES_DEPENDENCY]->(dependency:NPM:Dependency)
OPTIONAL MATCH (package)-[:CONTAINS]->(:Json:Object)-[:HAS_KEY]->(:Json:Key{name:'name'})-[:HAS_VALUE]->(packageName:Json:Scalar:Value)
WITH replace(replace(package.fileName, '/npm-package-json/', ''), '/package.json', '')
AS packageDirectory
,packageName.value AS packageName
,dependency.name AS dependencyName
,dependency.dependency AS dependencyVersion
RETURN dependencyName
,count(*) AS usingPackageCount
,count(DISTINCT dependencyVersion) AS dependencyVersionCount
,collect(packageName)[0..9] AS packageNameExamples
,collect(dependencyVersion)[0..4] AS dependencyVersionExamples
,collect(packageDirectory)[0..4] AS packageDirectory
ORDER BY usingPackageCount DESC
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
// Wordcloud of git authors and their commit count

MATCH (author:Git:Author)-[:COMMITTED]-(commit:Git:Commit)
WHERE NOT author.name CONTAINS '[bot]'
AND size(author.name) > 1
RETURN author.name AS word, count(commit) AS frequency
2 changes: 1 addition & 1 deletion cypher/Overview/Words_for_universal_Wordcloud.cypher
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Words for universal Wordcloud

MATCH (named:!Key&!Primitive&!PrimitiveType&!Void&!JavaType&!ResolvedDuplicateType&!ExternalType)
MATCH (named:!Key&!Primitive&!PrimitiveType&!Void&!JavaType&!ResolvedDuplicateType&!ExternalType&!Git)
WHERE named.name > ''
AND named.name <> 'package-info'
AND named.name <> '<init>'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,14 @@ UNWIND sourcesAndTargets AS sourceAndTarget
// Optionally get the project (e.g. Java Artifact, Typescript Project) the source and target belong to
OPTIONAL MATCH (sourceProject:Artifact|Project)-[:CONTAINS]->(source)
OPTIONAL MATCH (targetProject:Artifact|Project)-[:CONTAINS]->(target)
// Optionally get the name of the scan that contained that project
OPTIONAL MATCH (sourceScan:TS:Scan)-[:CONTAINS_PROJECT]->(sourceProject)
OPTIONAL MATCH (targetScan:TS:Scan)-[:CONTAINS_PROJECT]->(targetProject)
// Group by project name, if the target project is the same and the distance. Return those as result.
RETURN sourceProject.name AS sourceProject
,sourceScan.name AS sourceScan
,(targetProject <> sourceProject) AS isDifferentTargetProject
,(targetScan <> sourceScan) AS isDifferentTargetScan
,distance
,distanceTotalPairCount
,distanceTotalSourceCount
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Path Finding - All pairs shortest path algorithm - Stream - Longest paths as examples

CALL gds.allShortestPaths.stream($dependencies_projection + '-cleaned')
YIELD sourceNodeId, targetNodeId, distance
// Filter out all pairs that have no connection (infinite distance)
WHERE gds.util.isFinite(distance) = true
AND sourceNodeId <> targetNodeId // Filter out cyclic dependencies
WITH toInteger(distance) AS distance
,sourceNodeId
,targetNodeId
,gds.util.asNode(sourceNodeId) AS source
,gds.util.asNode(targetNodeId) AS target
// Optionally get the project (e.g. Java Artifact, Typescript Project) the source and target belong to
OPTIONAL MATCH (sourceProject:Artifact|Project)-[:CONTAINS]->(source)
// Optionally get the name of the scan that contained that project
OPTIONAL MATCH (sourceScan:TS:Scan)-[:CONTAINS_PROJECT]->(sourceProject)
WITH *, coalesce(sourceScan, sourceProject).name AS sourceContainerName
ORDER BY distance DESC, sourceContainerName ASC
// Only output the top 10 entries
LIMIT 10
// Get the shortest path for the source and target node
MATCH path = SHORTEST 1 (source)-[:DEPENDS_ON]->+(target)
RETURN distance, sourceContainerName, sourceProject, sourceScan, path
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,14 @@ UNWIND sourcesAndTargets AS sourceAndTarget
// Optionally get the project (e.g. Java Artifact, Typescript Project) the source and target belong to
OPTIONAL MATCH (sourceProject:Artifact|Project)-[:CONTAINS]->(source)
OPTIONAL MATCH (targetProject:Artifact|Project)-[:CONTAINS]->(target)
// Optionally get the name of the scan that contained that project
OPTIONAL MATCH (sourceScan:TS:Scan)-[:CONTAINS_PROJECT]->(sourceProject)
OPTIONAL MATCH (targetScan:TS:Scan)-[:CONTAINS_PROJECT]->(targetProject)
// Group by project name, if the target project is the same and the distance. Return those as result.
RETURN sourceProject.name AS sourceProject
,sourceScan.name AS sourceScan
,(targetProject <> sourceProject) AS isDifferentTargetProject
,(targetScan <> sourceScan) AS isDifferentTargetScan
,distance
,distanceTotalPairCount
,distanceTotalSourceCount
Expand Down
25 changes: 25 additions & 0 deletions cypher/Path_Finding/Path_Finding_6_Longest_paths_examples.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Path Finding - Longest path - Stream - Max. paths as examples

CALL gds.dag.longestPath.stream($dependencies_projection + '-cleaned')
YIELD index, sourceNode, targetNode, totalCost, path
WITH index
,path
,toInteger(totalCost) AS distance
,sourceNode AS sourceNodeId
,targetNode AS targetNodeId
WHERE sourceNodeId <> targetNodeId // Filter out cyclic dependencies
WITH *
,gds.util.asNode(sourceNodeId) AS source
,gds.util.asNode(targetNodeId) AS target
// Optionally get the project (e.g. Java Artifact, Typescript Project) the source and target belong to
OPTIONAL MATCH (sourceProject:Artifact|Project)-[:CONTAINS]->(source)
OPTIONAL MATCH (targetProject:Artifact|Project)-[:CONTAINS]->(target)
// Optionally get the name of the scan that contained that project
OPTIONAL MATCH (sourceScan:TS:Scan)-[:CONTAINS_PROJECT]->(sourceProject)
OPTIONAL MATCH (targetScan:TS:Scan)-[:CONTAINS_PROJECT]->(targetProject)
WITH *, coalesce(sourceScan, sourceProject).name AS sourceContainerName
ORDER BY distance DESC, sourceContainerName ASC
// Only output the top 10 entries
LIMIT 10
// Group by project name, if the target project is the same and the distance. Return those as result.
RETURN distance, index, sourceContainerName, sourceProject, sourceScan, path
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

MATCH (typescriptScan:TS:Scan)
WITH typescriptScan
,replace(reverse(split(reverse(typescriptScan.fileName), '/')[0]), '.json', '') AS scanName
,reverse(split(reverse(split(typescriptScan.fileName, '/.reports/')[0]), '/')[0]) AS scanName
SET typescriptScan.name = scanName
RETURN count(*) AS numberOfNamesScans
RETURN count(*) AS numberOfNamesScans
// Debugging
//RETURN scanName, scanNameOld, typescriptScan.fileName
Loading