diff --git a/COMMANDS.md b/COMMANDS.md index f610c6f8a..883427f08 100644 --- a/COMMANDS.md +++ b/COMMANDS.md @@ -208,16 +208,49 @@ Be sure to replace `path/to/local/neo4j` and `password` with your settings. cat ./cypher/Get_Graph_Data_Science_Library_Version.cypher | path/to/local/neo4j/bin/cypher-shell -u neo4j -p password --format plain ``` +Query parameter can be added with the option `--param`. Here is an example: + +```shell +cat ./cypher/Get_Graph_Data_Science_Library_Version.cypher | path/to/local/neo4j/bin/cypher-shell -u neo4j -p password --format plain --param {a: 1} +``` + +For a full list of options use the help function: + +```shell +path/to/local/neo4j/bin/cypher-shell --help +``` + ### HTTP API Use [executeQuery.sh](./scripts/executeQuery.sh) to execute a Cypher query from the file given as an argument. -It uses `curl` and `jq` to access the HTTP API of Neo4j. +It uses `curl` and `jq` to access the [HTTP API of Neo4j](https://neo4j.com/docs/http-api/current/query). Here is an example: ```shell ./scripts/executeQuery.sh ./cypher/Get_Graph_Data_Science_Library_Version.cypher ``` +Query parameters can be added as arguments after the file name. Here is an example: + +```shell +./scripts/executeQuery.sh ./cypher/Get_Graph_Data_Science_Library_Version.cypher a=1 +``` + +### executeQueryFunctions + +The script [executeQueryFunctions.sh](./scripts/executeQueryFunctions.sh) contains functions to simplify the +call of [executeQuery.sh](./scripts/executeQuery.sh) for different purposes. For example, `execute_cypher_summarized` +prints out the results on the console in a summarized manner and `execute_cypher_expect_results` fails when there are no results. + +The script also provides an API abstraction that defaults to [HTTP](#http-api), but can easily be switched to [cypher-shell](#cypher-shell). + +Query parameters can be added as arguments after the file name. Here is an example: + +```shell +source "${SCRIPTS_DIR}/executeQueryFunctions.sh" +execute_cypher ./cypher/Get_Graph_Data_Science_Library_Version.cypher a=1 +``` + ## Stop Neo4j Use [stopNeo4j.sh](./scripts/stopNeo4j.sh) to stop the locally running Neo4j Graph Database. It does nothing if the database is already stopped. It runs the script with a temporary `NEO4J_HOME` environment variable to not interfere with a possibly globally installed Neo4j installation. diff --git a/cypher/Centrality/Centrality_0b_Delete_Subraph_Projection.cypher b/cypher/Centrality/Centrality_0b_Delete_Subraph_Projection.cypher deleted file mode 100644 index 5d5e32c1d..000000000 --- a/cypher/Centrality/Centrality_0b_Delete_Subraph_Projection.cypher +++ /dev/null @@ -1,5 +0,0 @@ -//Centrality 0b Delete Subgraph Projection - - CALL gds.graph.drop('package-centrality-without-empty', false) - YIELD graphName, nodeCount, relationshipCount, creationTime, modificationTime -RETURN graphName, nodeCount, relationshipCount, creationTime, modificationTime \ No newline at end of file diff --git a/cypher/Centrality/Centrality_1_Create_Projection.cypher b/cypher/Centrality/Centrality_1_Create_Projection.cypher deleted file mode 100644 index be083dc8f..000000000 --- a/cypher/Centrality/Centrality_1_Create_Projection.cypher +++ /dev/null @@ -1,8 +0,0 @@ -//Centrality 1 Create Projection - - CALL gds.graph.project('package-centrality', 'Package', 'DEPENDS_ON', { - relationshipProperties: ['weight', 'weightInterfaces', 'weight25PercentInterfaces'], - nodeProperties: ['incomingDependencies', 'outgoingDependencies'] - }) - YIELD graphName, nodeCount, relationshipCount -RETURN graphName, nodeCount, relationshipCount \ No newline at end of file diff --git a/cypher/Centrality/Centrality_1a_List_TopPercentile.cypher b/cypher/Centrality/Centrality_1a_List_TopPercentile.cypher new file mode 100644 index 000000000..9fef2deb8 --- /dev/null +++ b/cypher/Centrality/Centrality_1a_List_TopPercentile.cypher @@ -0,0 +1,22 @@ +// List the top centrality nodes with a 99.5 percentile or higher + + MATCH (member) + WHERE $dependencies_projection_node IN LABELS(member) + AND member[$dependencies_projection_write_property] IS NOT NULL + WITH count(DISTINCT member) AS memberCount + ,percentileDisc(member[$dependencies_projection_write_property], 0.995) AS centralityPercentile995 + ,collect(DISTINCT member) AS members +UNWIND members AS member + WITH memberCount + ,centralityPercentile995 + ,member + ORDER BY member[$dependencies_projection_write_property] DESCENDING + WHERE member[$dependencies_projection_write_property] >= centralityPercentile995 + WITH memberCount + ,centralityPercentile995 + ,max(member[$dependencies_projection_write_property]) AS maxCentrality + ,collect(DISTINCT member) AS topMembers + RETURN memberCount + ,maxCentrality + ,centralityPercentile995 + ,topMembers \ No newline at end of file diff --git a/cypher/Centrality/Centrality_1b_Create_Subgraph_Without_Empty_Packages.cypher b/cypher/Centrality/Centrality_1b_Create_Subgraph_Without_Empty_Packages.cypher deleted file mode 100644 index 7016f17d9..000000000 --- a/cypher/Centrality/Centrality_1b_Create_Subgraph_Without_Empty_Packages.cypher +++ /dev/null @@ -1,10 +0,0 @@ -//Centrality 1b Create subgraph without empty packages - -CALL gds.beta.graph.project.subgraph( - 'package-centrality-without-empty', - 'package-centrality', - 'n.outgoingDependencies > 0 OR n.incomingDependencies > 0', - '*' -) - YIELD graphName, fromGraphName, nodeCount, relationshipCount, nodeFilter -RETURN graphName, fromGraphName, nodeCount, relationshipCount, nodeFilter \ No newline at end of file diff --git a/cypher/Centrality/Centrality_1b_List_TopPercent.cypher b/cypher/Centrality/Centrality_1b_List_TopPercent.cypher new file mode 100644 index 000000000..50b021707 --- /dev/null +++ b/cypher/Centrality/Centrality_1b_List_TopPercent.cypher @@ -0,0 +1,12 @@ +// List the top 2% nodes with the highest centrality score. + + MATCH (member) + WHERE $dependencies_projection_node IN labels(member) + AND member[$dependencies_projection_write_property] IS NOT NULL + WITH toInteger(toFloat(count(DISTINCT member)) * 0.02) AS memberCount2Percent + ,collect(DISTINCT member) AS members +UNWIND members AS member + WITH memberCount2Percent, member + ORDER BY member[$dependencies_projection_write_property] DESCENDING + WITH memberCount2Percent, collect(DISTINCT member)[0..memberCount2Percent] AS topMembers + RETURN memberCount2Percent, topMembers \ No newline at end of file diff --git a/cypher/Centrality/Centrality_1c_Label_Delete.cypher b/cypher/Centrality/Centrality_1c_Label_Delete.cypher new file mode 100644 index 000000000..6ddd2d301 --- /dev/null +++ b/cypher/Centrality/Centrality_1c_Label_Delete.cypher @@ -0,0 +1,11 @@ +// Centrality Label Delete + + CALL db.labels() YIELD label + WHERE label = 'Top' + apoc.text.capitalize($dependencies_projection_write_property) + WITH collect(label) AS selectedLabels + MATCH (member) + WHERE $dependencies_projection_node IN LABELS(member) + AND member[$dependencies_projection_write_property] IS NOT NULL + WITH collect(member) AS members, selectedLabels + CALL apoc.create.removeLabels(members, selectedLabels) YIELD node +RETURN COUNT(node) AS nodesCount; \ No newline at end of file diff --git a/cypher/Centrality/Centrality_1d_Label_Add.cypher b/cypher/Centrality/Centrality_1d_Label_Add.cypher new file mode 100644 index 000000000..d963bf1e0 --- /dev/null +++ b/cypher/Centrality/Centrality_1d_Label_Add.cypher @@ -0,0 +1,16 @@ +// Centrality Add label to the top 2% nodes with the highest centrality score + + MATCH (member) + WHERE $dependencies_projection_node IN labels(member) + AND member[$dependencies_projection_write_property] IS NOT NULL + WITH toInteger(toFloat(count(DISTINCT member)) * 0.02) AS memberCount2Percent + ,collect(DISTINCT member) AS members +UNWIND members AS member + WITH memberCount2Percent, member + ORDER BY member[$dependencies_projection_write_property] DESCENDING + WITH memberCount2Percent + ,collect(DISTINCT member)[0..memberCount2Percent] AS topMembers + ,'Top' + apoc.text.capitalize($dependencies_projection_write_property) AS labelName +UNWIND topMembers AS topMember + CALL apoc.create.addLabels(topMember, [labelName]) YIELD node +RETURN count(node) AS nodesCount \ No newline at end of file diff --git a/cypher/Centrality/Centrality_2a_Page_Rank_Estimate_Memory.cypher b/cypher/Centrality/Centrality_2a_Page_Rank_Estimate.cypher similarity index 61% rename from cypher/Centrality/Centrality_2a_Page_Rank_Estimate_Memory.cypher rename to cypher/Centrality/Centrality_2a_Page_Rank_Estimate.cypher index 92697f425..9cfd12a46 100644 --- a/cypher/Centrality/Centrality_2a_Page_Rank_Estimate_Memory.cypher +++ b/cypher/Centrality/Centrality_2a_Page_Rank_Estimate.cypher @@ -1,11 +1,12 @@ //Centrality 2a Page Rank Estimate Memory -CALL gds.pageRank.write.estimate('package-centrality-without-empty', { - writeProperty: 'pageRank' +CALL gds.pageRank.write.estimate( + $dependencies_projection + '-without-empty', { + writeProperty: $dependencies_projection_write_property ,maxIterations: 50 ,dampingFactor: 0.85 ,tolerance: 0.00000001 - ,relationshipWeightProperty: 'weight25PercentInterfaces' + ,relationshipWeightProperty: $dependencies_projection_weight_property ,scaler: "L1Norm" }) YIELD nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView diff --git a/cypher/Centrality/Centrality_2b_Page_Rank_Statistics.cypher b/cypher/Centrality/Centrality_2b_Page_Rank_Statistics.cypher index a5b7a84a7..c41d8d581 100644 --- a/cypher/Centrality/Centrality_2b_Page_Rank_Statistics.cypher +++ b/cypher/Centrality/Centrality_2b_Page_Rank_Statistics.cypher @@ -1,10 +1,11 @@ //Centrality 2b Page Rank Statistics -CALL gds.pageRank.stats('package-centrality-without-empty', { +CALL gds.pageRank.stats( + $dependencies_projection + '-without-empty', { maxIterations: 50 ,dampingFactor: 0.85 ,tolerance: 0.00000001 - ,relationshipWeightProperty: 'weight25PercentInterfaces' + ,relationshipWeightProperty: $dependencies_projection_weight_property ,scaler: "L1Norm" }) YIELD ranIterations diff --git a/cypher/Centrality/Centrality_3c_Page_Rank_Stream.cypher b/cypher/Centrality/Centrality_3c_Page_Rank_Stream.cypher index 85184f61e..28abbd527 100644 --- a/cypher/Centrality/Centrality_3c_Page_Rank_Stream.cypher +++ b/cypher/Centrality/Centrality_3c_Page_Rank_Stream.cypher @@ -1,17 +1,17 @@ //Centrality 3c Page Rank Stream -CALL gds.pageRank.stream('package-centrality-without-empty', { +CALL gds.pageRank.stream( + $dependencies_projection + '-without-empty', { maxIterations: 50 ,dampingFactor: 0.85 ,tolerance: 0.00000001 - ,relationshipWeightProperty: 'weight25PercentInterfaces' + ,relationshipWeightProperty: $dependencies_projection_weight_property ,scaler: "L2Norm" }) YIELD nodeId, score - WITH gds.util.asNode(nodeId) AS package, score -RETURN package.fqn AS fullQualifiedPackageName - ,package.name AS packageName + WITH gds.util.asNode(nodeId) AS member, score +RETURN coalesce(member.fqn, member.fileName, member.name) AS memberName ,score - ,package.incomingDependencies AS incomingDependencies - ,package.outgoingDependencies AS outgoingDependencies - ORDER BY score DESC, packageName ASC \ No newline at end of file + ,member.incomingDependencies AS incomingDependencies + ,member.outgoingDependencies AS outgoingDependencies + ORDER BY score DESC, memberName ASC \ No newline at end of file diff --git a/cypher/Centrality/Centrality_3d_Page_Rank_Write.cypher b/cypher/Centrality/Centrality_3d_Page_Rank_Write.cypher index 8ef3fbd78..6d192bb98 100644 --- a/cypher/Centrality/Centrality_3d_Page_Rank_Write.cypher +++ b/cypher/Centrality/Centrality_3d_Page_Rank_Write.cypher @@ -1,12 +1,13 @@ //Centrality 3d Page Rank Write -CALL gds.pageRank.write('package-centrality-without-empty', { +CALL gds.pageRank.write( + $dependencies_projection + '-without-empty', { maxIterations: 50 ,dampingFactor: 0.85 ,tolerance: 0.00000001 - ,relationshipWeightProperty: 'weight25PercentInterfaces' + ,relationshipWeightProperty: $dependencies_projection_weight_property ,scaler: "L2Norm" - ,writeProperty: "pageRank25PercentInterfaces" + ,writeProperty: $dependencies_projection_write_property }) YIELD nodePropertiesWritten ,ranIterations diff --git a/cypher/Centrality/Centrality_4a_Article_Rank_Estimate_Memory.cypher b/cypher/Centrality/Centrality_4a_Article_Rank_Estimate.cypher similarity index 61% rename from cypher/Centrality/Centrality_4a_Article_Rank_Estimate_Memory.cypher rename to cypher/Centrality/Centrality_4a_Article_Rank_Estimate.cypher index f53822c95..aca0c8ad1 100644 --- a/cypher/Centrality/Centrality_4a_Article_Rank_Estimate_Memory.cypher +++ b/cypher/Centrality/Centrality_4a_Article_Rank_Estimate.cypher @@ -1,10 +1,11 @@ //Centrality 4a Article Rank Estimate Memory -CALL gds.articleRank.write.estimate('package-centrality-without-empty', { - writeProperty: 'articleRank' +CALL gds.articleRank.write.estimate( + $dependencies_projection + '-without-empty', { + writeProperty: $dependencies_projection_write_property ,maxIterations: 30 ,dampingFactor: 0.85 ,tolerance: 0.00000001 - ,relationshipWeightProperty: 'weight25PercentInterfaces' + ,relationshipWeightProperty: $dependencies_projection_weight_property ,scaler: "L1Norm" }) YIELD nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView diff --git a/cypher/Centrality/Centrality_4b_Article_Rank_Statistics.cypher b/cypher/Centrality/Centrality_4b_Article_Rank_Statistics.cypher index ba3d91621..b2626c33a 100644 --- a/cypher/Centrality/Centrality_4b_Article_Rank_Statistics.cypher +++ b/cypher/Centrality/Centrality_4b_Article_Rank_Statistics.cypher @@ -1,9 +1,10 @@ //Centrality 4b Article Rank Statistics -CALL gds.articleRank.stats('package-centrality-without-empty', { +CALL gds.articleRank.stats( + $dependencies_projection + '-without-empty', { maxIterations: 30 ,dampingFactor: 0.85 ,tolerance: 0.00000001 - ,relationshipWeightProperty: 'weight25PercentInterfaces' + ,relationshipWeightProperty: $dependencies_projection_weight_property ,scaler: "L1Norm" }) YIELD ranIterations diff --git a/cypher/Centrality/Centrality_4c_Article_Rank_Stream.cypher b/cypher/Centrality/Centrality_4c_Article_Rank_Stream.cypher index 3a2e35940..804515ef4 100644 --- a/cypher/Centrality/Centrality_4c_Article_Rank_Stream.cypher +++ b/cypher/Centrality/Centrality_4c_Article_Rank_Stream.cypher @@ -1,17 +1,17 @@ //Centrality 4c Article Rank Stream -CALL gds.articleRank.stream('package-centrality-without-empty', { +CALL gds.articleRank.stream( + $dependencies_projection + '-without-empty', { maxIterations: 30 ,dampingFactor: 0.85 ,tolerance: 0.00000001 - ,relationshipWeightProperty: 'weight25PercentInterfaces' + ,relationshipWeightProperty: $dependencies_projection_weight_property ,scaler: "L2Norm" }) YIELD nodeId, score - WITH gds.util.asNode(nodeId) AS package, score -RETURN package.fqn AS fullQualifiedPackageName - ,package.name AS packageName + WITH gds.util.asNode(nodeId) AS member, score +RETURN coalesce(member.fqn, member.fileName, member.name) AS memberName ,score - ,package.incomingDependencies AS incomingDependencies - ,package.outgoingDependencies AS outgoingDependencies - ORDER BY score DESC, packageName ASC \ No newline at end of file + ,member.incomingDependencies AS incomingDependencies + ,member.outgoingDependencies AS outgoingDependencies + ORDER BY score DESC, memberName ASC \ No newline at end of file diff --git a/cypher/Centrality/Centrality_4d_Article_Rank_Write.cypher b/cypher/Centrality/Centrality_4d_Article_Rank_Write.cypher index ad548f352..90e7d4cd8 100644 --- a/cypher/Centrality/Centrality_4d_Article_Rank_Write.cypher +++ b/cypher/Centrality/Centrality_4d_Article_Rank_Write.cypher @@ -1,12 +1,13 @@ //Centrality 4d Article Rank Write -CALL gds.articleRank.write('package-centrality-without-empty', { +CALL gds.articleRank.write( + $dependencies_projection + '-without-empty', { maxIterations: 50 ,dampingFactor: 0.85 ,tolerance: 0.00000001 - ,relationshipWeightProperty: 'weight25PercentInterfaces' + ,relationshipWeightProperty: $dependencies_projection_weight_property ,scaler: "L2Norm" - ,writeProperty: "articleRank25PercentInterfaces" + ,writeProperty: $dependencies_projection_write_property }) YIELD nodePropertiesWritten ,ranIterations diff --git a/cypher/Centrality/Centrality_5a_Betweeness_Estimate.cypher b/cypher/Centrality/Centrality_5a_Betweeness_Estimate.cypher index 0671a774e..7763f3407 100644 --- a/cypher/Centrality/Centrality_5a_Betweeness_Estimate.cypher +++ b/cypher/Centrality/Centrality_5a_Betweeness_Estimate.cypher @@ -1,7 +1,9 @@ //Centrality 5a Betweeness Estimate -CALL gds.betweenness.write.estimate('package-centrality-without-empty', { - writeProperty: 'betweenness' +CALL gds.betweenness.write.estimate( + $dependencies_projection + '-without-empty', { + relationshipWeightProperty: $dependencies_projection_weight_property + ,writeProperty: $dependencies_projection_write_property }) YIELD nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView RETURN nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView \ No newline at end of file diff --git a/cypher/Centrality/Centrality_5b_Betweeness_Statistics.cypher b/cypher/Centrality/Centrality_5b_Betweeness_Statistics.cypher index fc7f45e72..0b8b6c66a 100644 --- a/cypher/Centrality/Centrality_5b_Betweeness_Statistics.cypher +++ b/cypher/Centrality/Centrality_5b_Betweeness_Statistics.cypher @@ -1,7 +1,8 @@ //Centrality 5b Betweeness Statistics - CALL gds.betweenness.stats('package-centrality-without-empty', { - relationshipWeightProperty: 'weight25PercentInterfaces' + CALL gds.betweenness.stats( + $dependencies_projection + '-without-empty', { + relationshipWeightProperty: $dependencies_projection_weight_property }) YIELD preProcessingMillis ,computeMillis diff --git a/cypher/Centrality/Centrality_5c_Betweeness_Stream.cypher b/cypher/Centrality/Centrality_5c_Betweeness_Stream.cypher index e04abab33..09120f602 100644 --- a/cypher/Centrality/Centrality_5c_Betweeness_Stream.cypher +++ b/cypher/Centrality/Centrality_5c_Betweeness_Stream.cypher @@ -1,13 +1,13 @@ // Centrality 5c Betweeness Stream -CALL gds.betweenness.stream('package-centrality-without-empty', { - relationshipWeightProperty: 'weight25PercentInterfaces' +CALL gds.betweenness.stream( + $dependencies_projection + '-without-empty', { + relationshipWeightProperty: $dependencies_projection_weight_property }) YIELD nodeId, score - WITH gds.util.asNode(nodeId) AS package, score -RETURN package.fqn AS fullQualifiedPackageName - ,package.name AS packageName + WITH gds.util.asNode(nodeId) AS member, score +RETURN coalesce(member.fqn, member.fileName, member.name) AS memberName ,score - ,package.incomingDependencies AS incomingDependencies - ,package.outgoingDependencies AS outgoingDependencies - ORDER BY score DESC, packageName ASC \ No newline at end of file + ,member.incomingDependencies AS incomingDependencies + ,member.outgoingDependencies AS outgoingDependencies + ORDER BY score DESC, memberName ASC \ No newline at end of file diff --git a/cypher/Centrality/Centrality_5d_Betweeness_Write.cypher b/cypher/Centrality/Centrality_5d_Betweeness_Write.cypher index 69bfb1b46..34f2dae85 100644 --- a/cypher/Centrality/Centrality_5d_Betweeness_Write.cypher +++ b/cypher/Centrality/Centrality_5d_Betweeness_Write.cypher @@ -1,8 +1,9 @@ // Centrality 5d Betweeness Write -CALL gds.betweenness.write('package-centrality-without-empty', { - relationshipWeightProperty: 'weight25PercentInterfaces', - writeProperty: 'betweenness25PercentInterfaces' +CALL gds.betweenness.write( + $dependencies_projection + '-without-empty', { + relationshipWeightProperty: $dependencies_projection_weight_property + ,writeProperty: $dependencies_projection_write_property }) YIELD nodePropertiesWritten ,preProcessingMillis diff --git a/cypher/Centrality/Centrality_6a_Cost_effective_Lazy_Forward_CELF_Estimate.cypher b/cypher/Centrality/Centrality_6a_Cost_effective_Lazy_Forward_CELF_Estimate.cypher new file mode 100644 index 000000000..d68de7096 --- /dev/null +++ b/cypher/Centrality/Centrality_6a_Cost_effective_Lazy_Forward_CELF_Estimate.cypher @@ -0,0 +1,9 @@ +// Centrality 6c Cost-effective Lazy Forward (CELF) Estimate + + CALL gds.beta.influenceMaximization.celf.write.estimate( + $dependencies_projection + '-without-empty', { + seedSetSize: 5 + ,writeProperty: $dependencies_projection_write_property +}) + YIELD nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView +RETURN nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView \ No newline at end of file diff --git a/cypher/Centrality/Centrality_6b_Cost_effective_Lazy_Forward_CELF_Statistics.cypher b/cypher/Centrality/Centrality_6b_Cost_effective_Lazy_Forward_CELF_Statistics.cypher new file mode 100644 index 000000000..ac162508c --- /dev/null +++ b/cypher/Centrality/Centrality_6b_Cost_effective_Lazy_Forward_CELF_Statistics.cypher @@ -0,0 +1,12 @@ +// Centrality 6b Cost-effective Lazy Forward (CELF) Estimate + +CALL gds.beta.influenceMaximization.celf.stats( + $dependencies_projection + '-without-empty', { + seedSetSize: 5 + }) + YIELD computeMillis + ,totalSpread + ,nodeCount +RETURN computeMillis + ,totalSpread + ,nodeCount \ No newline at end of file diff --git a/cypher/Centrality/Centrality_6c_Cost_effective_Lazy_Forward_CELF_Stream.cypher b/cypher/Centrality/Centrality_6c_Cost_effective_Lazy_Forward_CELF_Stream.cypher index b30a6c839..c3380e5aa 100644 --- a/cypher/Centrality/Centrality_6c_Cost_effective_Lazy_Forward_CELF_Stream.cypher +++ b/cypher/Centrality/Centrality_6c_Cost_effective_Lazy_Forward_CELF_Stream.cypher @@ -1,11 +1,13 @@ -//Centrality 6c Cost-effective Lazy Forward (CELF) Stream +// Centrality 6c Cost-effective Lazy Forward (CELF) Stream - CALL gds.beta.influenceMaximization.celf.stream('package-centrality-without-empty', {seedSetSize: 5}) + CALL gds.beta.influenceMaximization.celf.stream( + $dependencies_projection + '-without-empty', { + seedSetSize: 5 + }) YIELD nodeId, spread - WITH gds.util.asNode(nodeId) AS package, spread -RETURN package.fqn AS fullQualifiedPackageName - ,package.name AS packageName + WITH gds.util.asNode(nodeId) AS member, spread +RETURN coalesce(member.fqn, member.fileName, member.name) AS memberName ,spread - ,package.incomingDependencies AS incomingDependencies - ,package.outgoingDependencies AS outgoingDependencies - ORDER BY spread DESC, packageName ASC \ No newline at end of file + ,member.incomingDependencies AS incomingDependencies + ,member.outgoingDependencies AS outgoingDependencies + ORDER BY spread DESC, memberName ASC \ No newline at end of file diff --git a/cypher/Centrality/Centrality_6d_Cost_effective_Lazy_Forward_CELF_Write.cypher b/cypher/Centrality/Centrality_6d_Cost_effective_Lazy_Forward_CELF_Write.cypher new file mode 100644 index 000000000..0fe0723c3 --- /dev/null +++ b/cypher/Centrality/Centrality_6d_Cost_effective_Lazy_Forward_CELF_Write.cypher @@ -0,0 +1,17 @@ +// Centrality 6d Cost-effective Lazy Forward (CELF) Write + + CALL gds.beta.influenceMaximization.celf.write( + $dependencies_projection + '-without-empty', { + seedSetSize: 5 + ,writeProperty: $dependencies_projection_write_property +}) + YIELD writeMillis + ,nodePropertiesWritten + ,computeMillis + ,totalSpread + ,nodeCount +RETURN writeMillis + ,nodePropertiesWritten + ,computeMillis + ,totalSpread + ,nodeCount \ No newline at end of file diff --git a/cypher/Centrality/Centrality_7a_Harmonic_Closeness_Stream.cypher b/cypher/Centrality/Centrality_7a_Harmonic_Closeness_Stream.cypher deleted file mode 100644 index a93ddbe92..000000000 --- a/cypher/Centrality/Centrality_7a_Harmonic_Closeness_Stream.cypher +++ /dev/null @@ -1,11 +0,0 @@ -// Centrality 7a Harmonic Closeness Stream - -CALL gds.alpha.closeness.harmonic.stream('package-centrality-without-empty', {}) - YIELD nodeId, centrality - WITH gds.util.asNode(nodeId) AS package, centrality -RETURN package.fqn AS fullQualifiedPackageName - ,package.name AS packageName - ,centrality - ,package.incomingDependencies AS incomingDependencies - ,package.outgoingDependencies AS outgoingDependencies - ORDER BY centrality DESC, packageName ASC \ No newline at end of file diff --git a/cypher/Centrality/Centrality_7b_Harmonic_Closeness_Statistics.cypher b/cypher/Centrality/Centrality_7b_Harmonic_Closeness_Statistics.cypher new file mode 100644 index 000000000..bbded7550 --- /dev/null +++ b/cypher/Centrality/Centrality_7b_Harmonic_Closeness_Statistics.cypher @@ -0,0 +1,19 @@ +// Centrality 7b Harmonic Closeness Statistics + +CALL gds.closeness.harmonic.stats( + $dependencies_projection + '-without-empty', {}) + YIELD preProcessingMillis + ,computeMillis + ,postProcessingMillis + ,centralityDistribution +RETURN preProcessingMillis + ,computeMillis + ,postProcessingMillis + ,centralityDistribution.min + ,centralityDistribution.mean + ,centralityDistribution.max + ,centralityDistribution.p50 + ,centralityDistribution.p75 + ,centralityDistribution.p90 + ,centralityDistribution.p95 + ,centralityDistribution.p99 \ No newline at end of file diff --git a/cypher/Centrality/Centrality_7c_Harmonic_Closeness_Stream.cypher b/cypher/Centrality/Centrality_7c_Harmonic_Closeness_Stream.cypher new file mode 100644 index 000000000..1801c3241 --- /dev/null +++ b/cypher/Centrality/Centrality_7c_Harmonic_Closeness_Stream.cypher @@ -0,0 +1,10 @@ +// Centrality 7a Harmonic Closeness Stream + +CALL gds.alpha.closeness.harmonic.stream($dependencies_projection + '-without-empty', {}) + YIELD nodeId, centrality + WITH gds.util.asNode(nodeId) AS member, centrality +RETURN coalesce(member.fqn, member.fileName, member.name) AS memberName + ,centrality + ,member.incomingDependencies AS incomingDependencies + ,member.outgoingDependencies AS outgoingDependencies + ORDER BY centrality DESC, memberName ASC \ No newline at end of file diff --git a/cypher/Centrality/Centrality_7b_Harmonic_Closeness_Write.cypher b/cypher/Centrality/Centrality_7d_Harmonic_Closeness_Write.cypher similarity index 71% rename from cypher/Centrality/Centrality_7b_Harmonic_Closeness_Write.cypher rename to cypher/Centrality/Centrality_7d_Harmonic_Closeness_Write.cypher index c7d817bbc..7f9f0efb0 100644 --- a/cypher/Centrality/Centrality_7b_Harmonic_Closeness_Write.cypher +++ b/cypher/Centrality/Centrality_7d_Harmonic_Closeness_Write.cypher @@ -1,7 +1,8 @@ -// Centrality 7b Harmonic Closeness Write +// Centrality 7d Harmonic Closeness Write -CALL gds.alpha.closeness.harmonic.write('package-centrality-without-empty', { - writeProperty: 'harmonicCentrality' +CALL gds.alpha.closeness.harmonic.write( + $dependencies_projection + '-without-empty', { + ,writeProperty: $dependencies_projection_write_property }) YIELD nodes ,preProcessingMillis diff --git a/cypher/Centrality/Centrality_8a_Closeness_Statistics.cypher b/cypher/Centrality/Centrality_8b_Closeness_Statistics.cypher similarity index 80% rename from cypher/Centrality/Centrality_8a_Closeness_Statistics.cypher rename to cypher/Centrality/Centrality_8b_Closeness_Statistics.cypher index 567a307ae..9e23c6d6e 100644 --- a/cypher/Centrality/Centrality_8a_Closeness_Statistics.cypher +++ b/cypher/Centrality/Centrality_8b_Closeness_Statistics.cypher @@ -1,6 +1,7 @@ -//Centrality 8a Closeness Statistics +//Centrality 8b Closeness Statistics -CALL gds.beta.closeness.stats('package-centrality-without-empty', { +CALL gds.beta.closeness.stats( + $dependencies_projection + '-without-empty', { useWassermanFaust: true }) YIELD preProcessingMillis diff --git a/cypher/Centrality/Centrality_8b_Closeness_Stream.cypher b/cypher/Centrality/Centrality_8b_Closeness_Stream.cypher deleted file mode 100644 index dd9cbbc81..000000000 --- a/cypher/Centrality/Centrality_8b_Closeness_Stream.cypher +++ /dev/null @@ -1,13 +0,0 @@ -// Centrality 8b Closeness Stream - -CALL gds.beta.closeness.stream('package-centrality-without-empty', { - useWassermanFaust: true -}) - YIELD nodeId, score - WITH gds.util.asNode(nodeId) AS package, score -RETURN package.fqn AS fullQualifiedPackageName - ,package.name AS packageName - ,score - ,package.incomingDependencies AS incomingDependencies - ,package.outgoingDependencies AS outgoingDependencies - ORDER BY score DESC, packageName ASC \ No newline at end of file diff --git a/cypher/Centrality/Centrality_8c_Closeness_Stream.cypher b/cypher/Centrality/Centrality_8c_Closeness_Stream.cypher new file mode 100644 index 000000000..ecb68123e --- /dev/null +++ b/cypher/Centrality/Centrality_8c_Closeness_Stream.cypher @@ -0,0 +1,13 @@ +// Centrality 8c Closeness Stream + +CALL gds.beta.closeness.stream( + $dependencies_projection + '-without-empty', { + useWassermanFaust: true +}) + YIELD nodeId, score + WITH gds.util.asNode(nodeId) AS member, score +RETURN coalesce(member.fqn, member.fileName, member.name) AS memberName + ,score + ,member.incomingDependencies AS incomingDependencies + ,member.outgoingDependencies AS outgoingDependencies + ORDER BY score DESC, memberName ASC \ No newline at end of file diff --git a/cypher/Centrality/Centrality_8c_Closeness_Write.cypher b/cypher/Centrality/Centrality_8d_Closeness_Write.cypher similarity index 69% rename from cypher/Centrality/Centrality_8c_Closeness_Write.cypher rename to cypher/Centrality/Centrality_8d_Closeness_Write.cypher index 3f3ba36c4..8004ed657 100644 --- a/cypher/Centrality/Centrality_8c_Closeness_Write.cypher +++ b/cypher/Centrality/Centrality_8d_Closeness_Write.cypher @@ -1,20 +1,23 @@ -// Centrality 8c Closeness Write +// Centrality 8d Closeness Write -CALL gds.beta.closeness.write('package-centrality-without-empty', { - useWassermanFaust: true, - writeProperty: 'closeness' +CALL gds.beta.closeness.write( + $dependencies_projection + '-without-empty', { + useWassermanFaust: true + ,writeProperty: $dependencies_projection_write_property }) YIELD nodePropertiesWritten ,preProcessingMillis ,computeMillis ,postProcessingMillis ,writeMillis + ,writeProperty ,centralityDistribution RETURN nodePropertiesWritten ,preProcessingMillis ,computeMillis ,postProcessingMillis ,writeMillis + ,writeProperty ,centralityDistribution.min ,centralityDistribution.mean ,centralityDistribution.max diff --git a/cypher/Centrality/Centrality_9a_Hyperlink_Induced_Topic_Search_HITS_Estimate.cypher b/cypher/Centrality/Centrality_9a_Hyperlink_Induced_Topic_Search_HITS_Estimate.cypher new file mode 100644 index 000000000..9413a2dba --- /dev/null +++ b/cypher/Centrality/Centrality_9a_Hyperlink_Induced_Topic_Search_HITS_Estimate.cypher @@ -0,0 +1,10 @@ +// Centrality 9a Hyperlink-Induced Topic Search (HITS) Memory Estimation + + CALL gds.alpha.hits.write.estimate( + $dependencies_projection + '-without-empty', { + hitsIterations: 20 + ,authProperty: $dependencies_projection_write_property + ,hubProperty: 'centralityHyperlinkInducedTopicSearchHub' +}) + YIELD nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView +RETURN nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView \ No newline at end of file diff --git a/cypher/Centrality/Centrality_9b_Hyperlink_Induced_Topic_Search_HITS_Statistics.cypher b/cypher/Centrality/Centrality_9b_Hyperlink_Induced_Topic_Search_HITS_Statistics.cypher new file mode 100644 index 000000000..c4dfcfa80 --- /dev/null +++ b/cypher/Centrality/Centrality_9b_Hyperlink_Induced_Topic_Search_HITS_Statistics.cypher @@ -0,0 +1,8 @@ +// Centrality 9b Hyperlink-Induced Topic Search (HITS) Memory Statistics + + CALL gds.alpha.hits.stats( + $dependencies_projection + '-without-empty', { + hitsIterations: 20 +}) + YIELD ranIterations, didConverge, preProcessingMillis, computeMillis +RETURN ranIterations, didConverge, preProcessingMillis, computeMillis \ No newline at end of file diff --git a/cypher/Centrality/Centrality_9c_Hyperlink_Induced_Topic_Search_HITS_Stream.cypher b/cypher/Centrality/Centrality_9c_Hyperlink_Induced_Topic_Search_HITS_Stream.cypher new file mode 100644 index 000000000..62895e558 --- /dev/null +++ b/cypher/Centrality/Centrality_9c_Hyperlink_Induced_Topic_Search_HITS_Stream.cypher @@ -0,0 +1,14 @@ +// Centrality 9c Hyperlink-Induced Topic Search (HITS) Memory Stream + + CALL gds.alpha.hits.stream( + $dependencies_projection + '-without-empty', { + hitsIterations: 20 +}) + YIELD nodeId, values + WITH gds.util.asNode(nodeId) AS member, values +RETURN coalesce(member.fqn, member.fileName, member.name) AS memberName + ,values.auth AS authority + ,values.hub AS hub + ,member.incomingDependencies AS incomingDependencies + ,member.outgoingDependencies AS outgoingDependencies + ORDER BY values.auth DESC, memberName ASC \ No newline at end of file diff --git a/cypher/Centrality/Centrality_9d_Hyperlink_Induced_Topic_Search_HITS_Write.cypher b/cypher/Centrality/Centrality_9d_Hyperlink_Induced_Topic_Search_HITS_Write.cypher new file mode 100644 index 000000000..3521821ca --- /dev/null +++ b/cypher/Centrality/Centrality_9d_Hyperlink_Induced_Topic_Search_HITS_Write.cypher @@ -0,0 +1,10 @@ +// Centrality 9d Hyperlink-Induced Topic Search (HITS) Memory Write + + CALL gds.alpha.hits.write( + $dependencies_projection + '-without-empty', { + hitsIterations: 20 + ,authProperty: $dependencies_projection_write_property + ,hubProperty: 'centralityHyperlinkInducedTopicSearchHub' +}) +YIELD nodePropertiesWritten, ranIterations, didConverge, preProcessingMillis, computeMillis, writeMillis +RETURN nodePropertiesWritten, ranIterations, didConverge, preProcessingMillis, computeMillis, writeMillis \ No newline at end of file diff --git a/cypher/Community_Detection_Louvain/Community_Detection_2_Louvain_Estimate_Memory.cypher b/cypher/Community_Detection/Community_Detection_1a_Louvain_Estimate.cypher similarity index 59% rename from cypher/Community_Detection_Louvain/Community_Detection_2_Louvain_Estimate_Memory.cypher rename to cypher/Community_Detection/Community_Detection_1a_Louvain_Estimate.cypher index 26a01c67e..af11bfe52 100644 --- a/cypher/Community_Detection_Louvain/Community_Detection_2_Louvain_Estimate_Memory.cypher +++ b/cypher/Community_Detection/Community_Detection_1a_Louvain_Estimate.cypher @@ -1,10 +1,9 @@ -//Community Detection 2 Louvain Estimate Memory +//Community Detection Louvain Estimate Memory -CALL gds.louvain.write.estimate('package-dependencies-without-empty', { - maxLevels: 10, - maxIterations: 10, - tolerance: 0.0001, - relationshipWeightProperty: 'weight25PercentInterfaces', +CALL gds.louvain.write.estimate( + $dependencies_projection + '-without-empty', { + tolerance: 0.00001, + relationshipWeightProperty: $dependencies_projection_weight_property, writeProperty: 'louvainCommunityId', includeIntermediateCommunities: true }) diff --git a/cypher/Community_Detection_Louvain/Community_Detection_3_Louvain_Statistics.cypher b/cypher/Community_Detection/Community_Detection_1b_Louvain_Statistics.cypher similarity index 62% rename from cypher/Community_Detection_Louvain/Community_Detection_3_Louvain_Statistics.cypher rename to cypher/Community_Detection/Community_Detection_1b_Louvain_Statistics.cypher index 12970b38c..ce545b1b8 100644 --- a/cypher/Community_Detection_Louvain/Community_Detection_3_Louvain_Statistics.cypher +++ b/cypher/Community_Detection/Community_Detection_1b_Louvain_Statistics.cypher @@ -1,10 +1,9 @@ -//Community Detection 3 Louvain Statistics +//Community Detection Louvain Statistics -CALL gds.louvain.stats('package-dependencies-without-empty', { - maxLevels: 10, - maxIterations: 10, - tolerance: 0.0001, - relationshipWeightProperty: 'weight25PercentInterfaces', +CALL gds.louvain.stats( + $dependencies_projection + '-without-empty', { + tolerance: 0.00001, + relationshipWeightProperty: $dependencies_projection_weight_property, includeIntermediateCommunities: true }) YIELD communityCount @@ -23,4 +22,5 @@ RETURN communityCount ,communityDistribution.p75 ,communityDistribution.p90 ,communityDistribution.p95 - ,communityDistribution.p99 \ No newline at end of file + ,communityDistribution.p99 + ,communityDistribution.p999 \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_1c_Louvain_Stream.cypher b/cypher/Community_Detection/Community_Detection_1c_Louvain_Stream.cypher new file mode 100644 index 000000000..189ac2246 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_1c_Louvain_Stream.cypher @@ -0,0 +1,26 @@ +//Community Detection Louvain Stream + +CALL gds.louvain.stream( + $dependencies_projection + '-without-empty', { + tolerance: 0.00001, + includeIntermediateCommunities: true, + relationshipWeightProperty: $dependencies_projection_weight_property +}) + YIELD nodeId, communityId, intermediateCommunityIds + WITH communityId + ,intermediateCommunityIds + ,gds.util.asNode(nodeId) AS member + WITH communityId + ,intermediateCommunityIds + ,member + ,coalesce(member.fqn, member.fileName, member.name) AS memberName + WITH communityId + ,intermediateCommunityIds + ,count(DISTINCT member) AS memberCount + ,collect(DISTINCT memberName) AS memberNames +RETURN intermediateCommunityIds[0] AS firstCommunityId + ,communityId AS finalCommunityId + ,memberCount + ,memberNames + ,intermediateCommunityIds + ORDER BY memberCount DESC, communityId ASC \ No newline at end of file diff --git a/cypher/Community_Detection_Louvain/Community_Detection_5a_Louvain_Write_louvainCommunityId.cypher b/cypher/Community_Detection/Community_Detection_1d_Louvain_Write_intermediateLouvainCommunityId.cypher similarity index 67% rename from cypher/Community_Detection_Louvain/Community_Detection_5a_Louvain_Write_louvainCommunityId.cypher rename to cypher/Community_Detection/Community_Detection_1d_Louvain_Write_intermediateLouvainCommunityId.cypher index 3b50fcea1..29124fd05 100644 --- a/cypher/Community_Detection_Louvain/Community_Detection_5a_Louvain_Write_louvainCommunityId.cypher +++ b/cypher/Community_Detection/Community_Detection_1d_Louvain_Write_intermediateLouvainCommunityId.cypher @@ -1,10 +1,11 @@ -//Community Detection 5a Louvain Write louvainCommunityId +//Community Detection Louvain Write intermediateLouvainCommunityId -CALL gds.louvain.write('package-dependencies-without-empty', { - maxIterations: 10, +CALL gds.louvain.write( + $dependencies_projection + '-without-empty', { tolerance: 0.00001, - writeProperty: 'louvainCommunityId', - relationshipWeightProperty: 'weight' + includeIntermediateCommunities: true, + relationshipWeightProperty: $dependencies_projection_weight_property, + writeProperty: 'intermediateLouvainCommunityId' }) YIELD preProcessingMillis ,computeMillis @@ -32,4 +33,5 @@ RETURN preProcessingMillis ,communityDistribution.p90 ,communityDistribution.p95 ,communityDistribution.p99 + ,communityDistribution.p999 ,modularities \ No newline at end of file diff --git a/cypher/Community_Detection_Louvain/Community_Detection_5c_Louvain_Write_louvainCommunity25PercentInterfaces.cypher b/cypher/Community_Detection/Community_Detection_1d_Louvain_Write_louvainCommunityId.cypher similarity index 69% rename from cypher/Community_Detection_Louvain/Community_Detection_5c_Louvain_Write_louvainCommunity25PercentInterfaces.cypher rename to cypher/Community_Detection/Community_Detection_1d_Louvain_Write_louvainCommunityId.cypher index a08cae1f0..5a3ca4dab 100644 --- a/cypher/Community_Detection_Louvain/Community_Detection_5c_Louvain_Write_louvainCommunity25PercentInterfaces.cypher +++ b/cypher/Community_Detection/Community_Detection_1d_Louvain_Write_louvainCommunityId.cypher @@ -1,10 +1,11 @@ -//Community Detection 5c Louvain Write louvainCommunity25PercentInterfaces +//Community Detection Louvain write node property louvainCommunityId -CALL gds.louvain.write('package-dependencies-without-empty', { - maxIterations: 10, +CALL gds.louvain.write( + $dependencies_projection + '-without-empty', { tolerance: 0.00001, - writeProperty: 'louvainCommunity25PercentInterfaces', - relationshipWeightProperty: 'weight25PercentInterfaces' + consecutiveIds: true, + relationshipWeightProperty: $dependencies_projection_weight_property, + writeProperty: 'louvainCommunityId' }) YIELD preProcessingMillis ,computeMillis @@ -32,4 +33,5 @@ RETURN preProcessingMillis ,communityDistribution.p90 ,communityDistribution.p95 ,communityDistribution.p99 + ,communityDistribution.p999 ,modularities \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_1e_Louvain_Label_Delete.cypher b/cypher/Community_Detection/Community_Detection_1e_Louvain_Label_Delete.cypher new file mode 100644 index 000000000..3cc8e0f65 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_1e_Louvain_Label_Delete.cypher @@ -0,0 +1,11 @@ +// Community Detection Louvain Label Delete + + CALL db.labels() YIELD label + WHERE label STARTS WITH $dependencies_projection_node + "Louvain" + WITH collect(label) AS selectedLabels + MATCH (member) + WHERE $dependencies_projection_node IN LABELS(member) + AND member.louvainCommunityId IS NOT NULL + WITH collect(member) AS members, selectedLabels + CALL apoc.create.removeLabels(members, selectedLabels) YIELD node +RETURN COUNT(node) AS nodesCount; \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_1f_Louvain_Label.cypher b/cypher/Community_Detection/Community_Detection_1f_Louvain_Label.cypher new file mode 100644 index 000000000..7e00d1115 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_1f_Louvain_Label.cypher @@ -0,0 +1,13 @@ +// Community Detection Add LouvainCommunity+Id label + + MATCH (member) + WHERE member.louvainCommunityId IS NOT NULL + AND $dependencies_projection_node IN LABELS(member) + WITH member.louvainCommunityId AS communityId + ,collect(member) AS members + ,count(DISTINCT member) AS memberCount + ,$dependencies_projection_node + 'LouvainCommunity' + toString(member.louvainCommunityId) AS labelName + WHERE memberCount > 1 +UNWIND members AS member + CALL apoc.create.addLabels(member, [labelName]) YIELD node +RETURN count(node) AS nodesCount \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden/Community_Detection_2_Leiden_Estimate_Memory.cypher b/cypher/Community_Detection/Community_Detection_2a_Leiden_Estimate.cypher similarity index 57% rename from cypher/Community_Detection_Leiden/Community_Detection_2_Leiden_Estimate_Memory.cypher rename to cypher/Community_Detection/Community_Detection_2a_Leiden_Estimate.cypher index 89d4da4e6..7554ce393 100644 --- a/cypher/Community_Detection_Leiden/Community_Detection_2_Leiden_Estimate_Memory.cypher +++ b/cypher/Community_Detection/Community_Detection_2a_Leiden_Estimate.cypher @@ -1,12 +1,12 @@ -//Community Detection 2 Leiden Estimate Memory +//Community Detection Leiden Estimate Memory -CALL gds.beta.leiden.write.estimate('package-dependencies-without-empty', { - maxLevels: 10, - gamma: 1.06, - theta: 0.00001, +CALL gds.beta.leiden.write.estimate( + $dependencies_projection + '-without-empty', { + gamma: toFloat($dependencies_leiden_gamma), + theta: 0.001, tolerance: 0.0000001, consecutiveIds: true, - relationshipWeightProperty: 'weight25PercentInterfaces', + relationshipWeightProperty: $dependencies_projection_weight_property, writeProperty: 'leidenCommunityId' }) YIELD nodeCount diff --git a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_3_Leiden_Statistics.cypher b/cypher/Community_Detection/Community_Detection_2b_Leiden_Statistics.cypher similarity index 59% rename from cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_3_Leiden_Statistics.cypher rename to cypher/Community_Detection/Community_Detection_2b_Leiden_Statistics.cypher index 7200b4b59..fbb1ee677 100644 --- a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_3_Leiden_Statistics.cypher +++ b/cypher/Community_Detection/Community_Detection_2b_Leiden_Statistics.cypher @@ -1,10 +1,12 @@ -//Community Detection 3 Leiden Statistics +//Community Detection Leiden Statistics -CALL gds.beta.leiden.stats('artifact-dependencies-without-empty', { - gamma: 1.11, +CALL gds.beta.leiden.stats( + $dependencies_projection + '-without-empty', { + gamma: toFloat($dependencies_leiden_gamma), theta: 0.001, + tolerance: 0.0000001, includeIntermediateCommunities: true, - relationshipWeightProperty: 'weight' + relationshipWeightProperty: $dependencies_projection_weight_property }) YIELD communityCount ,ranLevels @@ -22,4 +24,5 @@ RETURN communityCount ,communityDistribution.p75 ,communityDistribution.p90 ,communityDistribution.p95 - ,communityDistribution.p99 \ No newline at end of file + ,communityDistribution.p99 + ,communityDistribution.p999 \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_2c_Leiden_Stream.cypher b/cypher/Community_Detection/Community_Detection_2c_Leiden_Stream.cypher new file mode 100644 index 000000000..d36664edf --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_2c_Leiden_Stream.cypher @@ -0,0 +1,28 @@ +//Community Detection Leiden Stream + +CALL gds.beta.leiden.stream( + $dependencies_projection + '-without-empty', { + gamma: toFloat($dependencies_leiden_gamma), + theta: 0.001, + tolerance: 0.0000001, + includeIntermediateCommunities: true, + relationshipWeightProperty: $dependencies_projection_weight_property +}) + YIELD nodeId, communityId, intermediateCommunityIds + WITH communityId + ,intermediateCommunityIds + ,gds.util.asNode(nodeId) AS member + WITH communityId + ,intermediateCommunityIds + ,member + ,coalesce(member.fqn, member.fileName, member.name) AS memberName + WITH communityId + ,intermediateCommunityIds + ,count(DISTINCT member) AS memberCount + ,collect(DISTINCT memberName) AS memberNames +RETURN intermediateCommunityIds[0] AS firstCommunityId + ,communityId AS finalCommunityId + ,memberCount + ,memberNames + ,intermediateCommunityIds + ORDER BY memberCount DESC, communityId ASC \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_5_Leiden_Write_property_leidenCommunityId.cypher b/cypher/Community_Detection/Community_Detection_2d_Leiden_Write_Node_Property.cypher similarity index 70% rename from cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_5_Leiden_Write_property_leidenCommunityId.cypher rename to cypher/Community_Detection/Community_Detection_2d_Leiden_Write_Node_Property.cypher index 095c1b2ae..107ac9003 100644 --- a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_5_Leiden_Write_property_leidenCommunityId.cypher +++ b/cypher/Community_Detection/Community_Detection_2d_Leiden_Write_Node_Property.cypher @@ -1,10 +1,12 @@ -//Community Detection 5 Leiden Write property leidenCommunityId +//Community Detection Leiden Write property leidenCommunityId -CALL gds.beta.leiden.write('artifact-dependencies-without-empty', { - gamma: 1.11, +CALL gds.beta.leiden.write( + $dependencies_projection + '-without-empty', { + gamma: toFloat($dependencies_leiden_gamma), theta: 0.001, + tolerance: 0.0000001, consecutiveIds: true, - relationshipWeightProperty: 'weight', + relationshipWeightProperty: $dependencies_projection_weight_property, writeProperty: 'leidenCommunityId' }) YIELD preProcessingMillis @@ -33,4 +35,5 @@ RETURN preProcessingMillis ,communityDistribution.p90 ,communityDistribution.p95 ,communityDistribution.p99 + ,communityDistribution.p999 ,modularities \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_2e_Leiden_Label_Delete.cypher b/cypher/Community_Detection/Community_Detection_2e_Leiden_Label_Delete.cypher new file mode 100644 index 000000000..a10b5442a --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_2e_Leiden_Label_Delete.cypher @@ -0,0 +1,11 @@ +// Community Detection Leiden Label Delete + + CALL db.labels() YIELD label + WHERE label STARTS WITH $dependencies_projection_node + "Leiden" + WITH collect(label) AS selectedLabels + MATCH (member) + WHERE $dependencies_projection_node IN LABELS(member) + AND member.leidenCommunityId IS NOT NULL + WITH collect(member) AS members, selectedLabels + CALL apoc.create.removeLabels(members, selectedLabels) YIELD node +RETURN COUNT(node) AS nodesCount; \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_2f_Leiden_Label.cypher b/cypher/Community_Detection/Community_Detection_2f_Leiden_Label.cypher new file mode 100644 index 000000000..7403ecd31 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_2f_Leiden_Label.cypher @@ -0,0 +1,13 @@ +// Community Detection Add LeidenCommunity+Id label + + MATCH (member) + WHERE member.leidenCommunityId IS NOT NULL + AND $dependencies_projection_node IN LABELS(member) + WITH member.leidenCommunityId AS communityId + ,collect(member) AS members + ,count(DISTINCT member) AS memberCount + ,$dependencies_projection_node + 'LeidenCommunity' + toString(member.leidenCommunityId) AS labelName + WHERE memberCount > 1 +UNWIND members AS member + CALL apoc.create.addLabels(member, [labelName]) YIELD node +RETURN count(node) AS nodesCount \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_3a_WeaklyConnectedComponents_Estimate.cypher b/cypher/Community_Detection/Community_Detection_3a_WeaklyConnectedComponents_Estimate.cypher new file mode 100644 index 000000000..189081360 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_3a_WeaklyConnectedComponents_Estimate.cypher @@ -0,0 +1,26 @@ +// Community Detection Label Propagation Estimate + +CALL gds.labelPropagation.write.estimate( + $dependencies_projection + '-without-empty', { + relationshipWeightProperty: $dependencies_projection_weight_property, + consecutiveIds: true, + writeProperty: 'labelPropagationCommunityId' +}) + YIELD requiredMemory + ,nodeCount + ,relationshipCount + ,bytesMin + ,bytesMax + ,heapPercentageMin + ,heapPercentageMax + ,treeView + ,mapView +RETURN requiredMemory + ,nodeCount + ,relationshipCount + ,bytesMin + ,bytesMax + ,heapPercentageMin + ,heapPercentageMax + ,treeView + ,mapView \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_3b_WeaklyConnectedComponents_Statistics.cypher b/cypher/Community_Detection/Community_Detection_3b_WeaklyConnectedComponents_Statistics.cypher new file mode 100644 index 000000000..40e16201b --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_3b_WeaklyConnectedComponents_Statistics.cypher @@ -0,0 +1,25 @@ +// Community Detection Weakly Connected Components Statistics + +CALL gds.wcc.stats( + $dependencies_projection + '-without-empty', { + relationshipWeightProperty: $dependencies_projection_weight_property, + consecutiveIds: true +}) + YIELD componentCount + ,preProcessingMillis + ,computeMillis + ,postProcessingMillis + ,componentDistribution +RETURN componentCount + ,preProcessingMillis + ,computeMillis + ,postProcessingMillis + ,componentDistribution.min + ,componentDistribution.mean + ,componentDistribution.max + ,componentDistribution.p50 + ,componentDistribution.p75 + ,componentDistribution.p90 + ,componentDistribution.p95 + ,componentDistribution.p99 + ,componentDistribution.p999 \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_3c_WeaklyConnectedComponents_Stream.cypher b/cypher/Community_Detection/Community_Detection_3c_WeaklyConnectedComponents_Stream.cypher new file mode 100644 index 000000000..14bf0ddfb --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_3c_WeaklyConnectedComponents_Stream.cypher @@ -0,0 +1,20 @@ +// Community Detection Weakly Connected Components Stream + +CALL gds.wcc.stream( + $dependencies_projection + '-without-empty', { + relationshipWeightProperty: $dependencies_projection_weight_property, + consecutiveIds: true +}) + YIELD nodeId, componentId + WITH componentId + ,gds.util.asNode(nodeId) AS member + WITH componentId + ,member + ,coalesce(member.fqn, member.fileName, member.name) AS memberName + WITH componentId + ,count(DISTINCT member) AS memberCount + ,collect(DISTINCT memberName) AS memberNames +RETURN componentId + ,memberCount + ,memberNames + ORDER BY memberCount DESC, componentId ASC \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_3d_WeaklyConnectedComponents_Write.cypher b/cypher/Community_Detection/Community_Detection_3d_WeaklyConnectedComponents_Write.cypher new file mode 100644 index 000000000..72b3c67d2 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_3d_WeaklyConnectedComponents_Write.cypher @@ -0,0 +1,30 @@ +// Community Detection Weakly Connected Components write node property weaklyConnectedComponentId + +CALL gds.wcc.write( + $dependencies_projection + '-without-empty', { + relationshipWeightProperty: $dependencies_projection_weight_property + ,consecutiveIds: true + ,writeProperty: 'weaklyConnectedComponentId' +}) +YIELD componentCount + ,preProcessingMillis + ,computeMillis + ,writeMillis + ,postProcessingMillis + ,nodePropertiesWritten + ,componentDistribution +RETURN componentCount + ,preProcessingMillis + ,computeMillis + ,writeMillis + ,postProcessingMillis + ,nodePropertiesWritten + ,componentDistribution.min + ,componentDistribution.mean + ,componentDistribution.max + ,componentDistribution.p50 + ,componentDistribution.p75 + ,componentDistribution.p90 + ,componentDistribution.p95 + ,componentDistribution.p99 + ,componentDistribution.p999 \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_3e_WeaklyConnectedComponents_Label_Delete.cypher b/cypher/Community_Detection/Community_Detection_3e_WeaklyConnectedComponents_Label_Delete.cypher new file mode 100644 index 000000000..41db60d67 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_3e_WeaklyConnectedComponents_Label_Delete.cypher @@ -0,0 +1,11 @@ +// Community Detection Leiden Label Delete + + CALL db.labels() YIELD label + WHERE label STARTS WITH $dependencies_projection_node + "WeaklyConnectedComponent" + WITH collect(label) AS selectedLabels + MATCH (member) + WHERE $dependencies_projection_node IN LABELS(member) + AND member.weaklyConnectedComponentId IS NOT NULL + WITH collect(member) AS members, selectedLabels + CALL apoc.create.removeLabels(members, selectedLabels) YIELD node +RETURN COUNT(node) AS nodesCount; \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_3f_WeaklyConnectedComponents_Label.cypher b/cypher/Community_Detection/Community_Detection_3f_WeaklyConnectedComponents_Label.cypher new file mode 100644 index 000000000..5f41cc147 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_3f_WeaklyConnectedComponents_Label.cypher @@ -0,0 +1,13 @@ +// Community Detection Add WeaklyConnectedComponent+Id label + + MATCH (member) + WHERE member.weaklyConnectedComponentId IS NOT NULL + AND $dependencies_projection_node IN LABELS(member) + WITH member.weaklyConnectedComponentId AS communityId + ,collect(member) AS members + ,count(DISTINCT member) AS memberCount + ,$dependencies_projection_node + 'WeaklyConnectedComponent' + toString(member.weaklyConnectedComponentId) AS labelName + WHERE memberCount > 1 +UNWIND members AS member + CALL apoc.create.addLabels(member, [labelName]) YIELD node +RETURN count(node) AS nodesCount \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_4a_Label_Propagation_Estimate.cypher b/cypher/Community_Detection/Community_Detection_4a_Label_Propagation_Estimate.cypher new file mode 100644 index 000000000..cb526f14f --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_4a_Label_Propagation_Estimate.cypher @@ -0,0 +1,26 @@ +// Community Detection Label Propagation Estimate + +CALL gds.labelPropagation.write.estimate( + $dependencies_projection + '-without-empty', { + relationshipWeightProperty: $dependencies_projection_weight_property + ,consecutiveIds: true + ,writeProperty: 'labelPropagationCommunityId' +}) + YIELD requiredMemory + ,nodeCount + ,relationshipCount + ,bytesMin + ,bytesMax + ,heapPercentageMin + ,heapPercentageMax + ,treeView + ,mapView +RETURN requiredMemory + ,nodeCount + ,relationshipCount + ,bytesMin + ,bytesMax + ,heapPercentageMin + ,heapPercentageMax + ,treeView + ,mapView \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_4b_Label_Propagation_Statistics.cypher b/cypher/Community_Detection/Community_Detection_4b_Label_Propagation_Statistics.cypher new file mode 100644 index 000000000..6eacef868 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_4b_Label_Propagation_Statistics.cypher @@ -0,0 +1,29 @@ +// Community Detection Label Propagation Statistics + +CALL gds.labelPropagation.stats( + $dependencies_projection + '-without-empty', { + relationshipWeightProperty: $dependencies_projection_weight_property + ,consecutiveIds: true +}) +YIELD communityCount + ,ranIterations + ,didConverge + ,preProcessingMillis + ,computeMillis + ,postProcessingMillis + ,communityDistribution +RETURN communityCount + ,ranIterations + ,didConverge + ,preProcessingMillis + ,computeMillis + ,postProcessingMillis + ,communityDistribution.min + ,communityDistribution.mean + ,communityDistribution.max + ,communityDistribution.p50 + ,communityDistribution.p75 + ,communityDistribution.p90 + ,communityDistribution.p95 + ,communityDistribution.p99 + ,communityDistribution.p999 \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_4c_Label_Propagation_Stream.cypher b/cypher/Community_Detection/Community_Detection_4c_Label_Propagation_Stream.cypher new file mode 100644 index 000000000..71c2115fd --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_4c_Label_Propagation_Stream.cypher @@ -0,0 +1,20 @@ +// Community Detection Label Propagation Stream + +CALL gds.labelPropagation.stream( + $dependencies_projection + '-without-empty', { + relationshipWeightProperty: $dependencies_projection_weight_property + ,consecutiveIds: true +}) + YIELD nodeId, communityId + WITH communityId + ,gds.util.asNode(nodeId) AS member + WITH communityId + ,member + ,coalesce(member.fqn, member.fileName, member.name) AS memberName + WITH communityId + ,count(DISTINCT member) AS memberCount + ,collect(DISTINCT memberName) AS memberNames +RETURN communityId + ,memberCount + ,memberNames + ORDER BY memberCount DESC, communityId ASC \ No newline at end of file diff --git a/cypher/Community_Detection_Louvain/Community_Detection_5b_Louvain_Write_intermediateLouvainCommunityId.cypher b/cypher/Community_Detection/Community_Detection_4d_Label_Propagation_Write.cypher similarity index 52% rename from cypher/Community_Detection_Louvain/Community_Detection_5b_Louvain_Write_intermediateLouvainCommunityId.cypher rename to cypher/Community_Detection/Community_Detection_4d_Label_Propagation_Write.cypher index fb1ed92a2..6326cbc2c 100644 --- a/cypher/Community_Detection_Louvain/Community_Detection_5b_Louvain_Write_intermediateLouvainCommunityId.cypher +++ b/cypher/Community_Detection/Community_Detection_4d_Label_Propagation_Write.cypher @@ -1,30 +1,28 @@ -//Community Detection 5b Louvain Write intermediateLouvainCommunityId +// Community Detection Label Propagation write node property labelPropagationCommunityId -CALL gds.louvain.write('package-dependencies-without-empty', { - maxIterations: 10, - tolerance: 0.00001, - writeProperty: 'intermediateLouvainCommunityId', - relationshipWeightProperty: 'weight', - includeIntermediateCommunities: true +CALL gds.labelPropagation.write( + $dependencies_projection + '-without-empty', { + relationshipWeightProperty: $dependencies_projection_weight_property + ,consecutiveIds: true + ,writeProperty: 'labelPropagationCommunityId' }) -YIELD preProcessingMillis +YIELD ranIterations + ,didConverge + ,communityCount + ,preProcessingMillis ,computeMillis ,writeMillis ,postProcessingMillis ,nodePropertiesWritten - ,communityCount - ,ranLevels - ,modularity - ,modularities ,communityDistribution -RETURN preProcessingMillis +RETURN ranIterations + ,didConverge + ,communityCount + ,preProcessingMillis ,computeMillis ,writeMillis ,postProcessingMillis ,nodePropertiesWritten - ,communityCount - ,ranLevels - ,modularity ,communityDistribution.min ,communityDistribution.mean ,communityDistribution.max @@ -33,4 +31,4 @@ RETURN preProcessingMillis ,communityDistribution.p90 ,communityDistribution.p95 ,communityDistribution.p99 - ,modularities \ No newline at end of file + ,communityDistribution.p999 \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_4e_Label_Propagation_Label_Delete.cypher b/cypher/Community_Detection/Community_Detection_4e_Label_Propagation_Label_Delete.cypher new file mode 100644 index 000000000..3526d4070 --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_4e_Label_Propagation_Label_Delete.cypher @@ -0,0 +1,11 @@ +// Community Detection Label Propagation Label Delete + + CALL db.labels() YIELD label + WHERE label STARTS WITH $dependencies_projection_node + "LabelPropagation" + WITH collect(label) AS selectedLabels + MATCH (member) + WHERE $dependencies_projection_node IN LABELS(member) + AND member.labelPropagationCommunityId IS NOT NULL + WITH collect(member) AS members, selectedLabels + CALL apoc.create.removeLabels(members, selectedLabels) YIELD node +RETURN COUNT(node) AS nodesCount; \ No newline at end of file diff --git a/cypher/Community_Detection/Community_Detection_4f_Label_Propagation_Label.cypher b/cypher/Community_Detection/Community_Detection_4f_Label_Propagation_Label.cypher new file mode 100644 index 000000000..ecfc1dffd --- /dev/null +++ b/cypher/Community_Detection/Community_Detection_4f_Label_Propagation_Label.cypher @@ -0,0 +1,13 @@ +// Community Detection Add LabelPropagationCommunity+Id label + + MATCH (member) + WHERE member.labelPropagationCommunityId IS NOT NULL + AND $dependencies_projection_node IN LABELS(member) + WITH member.labelPropagationCommunityId AS communityId + ,collect(member) AS members + ,count(DISTINCT member) AS memberCount + ,$dependencies_projection_node + 'LabelPropagationCommunity' + toString(member.labelPropagationCommunityId) AS labelName + WHERE memberCount > 1 +UNWIND members AS member + CALL apoc.create.addLabels(member, [labelName]) YIELD node +RETURN count(node) AS nodesCount \ No newline at end of file diff --git a/cypher/Community_Detection/Compare_Community_Detection_Results.cypher b/cypher/Community_Detection/Compare_Community_Detection_Results.cypher new file mode 100644 index 000000000..c5dbcd338 --- /dev/null +++ b/cypher/Community_Detection/Compare_Community_Detection_Results.cypher @@ -0,0 +1,14 @@ +// Compare Community Detection Results + +MATCH (package:Package) + WITH package.louvainCommunityId AS louvainCommunityId + ,package.leidenCommunityId AS leidenCommunityId + ,collect(DISTINCT package.fqn) AS packages + ,count(DISTINCT package.fqn) AS packageCount + WHERE louvainCommunityId IS NOT NULL + AND leidenCommunityId IS NOT NULL +RETURN louvainCommunityId + ,leidenCommunityId + ,packageCount + ,packages +ORDER BY packageCount DESC, louvainCommunityId ASC \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden/Get_all_Packages_with_a_Community_Detection_Label.cypher b/cypher/Community_Detection/Get_all_Packages_with_a_Community_Detection_Label.cypher similarity index 100% rename from cypher/Community_Detection_Leiden/Get_all_Packages_with_a_Community_Detection_Label.cypher rename to cypher/Community_Detection/Get_all_Packages_with_a_Community_Detection_Label.cypher diff --git a/cypher/Community_Detection_Leiden/Which_package_community_spans_multiple_artifacts.cypher b/cypher/Community_Detection/Which_package_community_spans_multiple_artifacts.cypher similarity index 83% rename from cypher/Community_Detection_Leiden/Which_package_community_spans_multiple_artifacts.cypher rename to cypher/Community_Detection/Which_package_community_spans_multiple_artifacts.cypher index 560c9f8bf..78c8feac6 100644 --- a/cypher/Community_Detection_Leiden/Which_package_community_spans_multiple_artifacts.cypher +++ b/cypher/Community_Detection/Which_package_community_spans_multiple_artifacts.cypher @@ -1,7 +1,7 @@ // Which package community spans multiple artifacts? MATCH (artifact:Artifact)-[:CONTAINS]->(package:Package) - WITH package.leidenCommunityIdGamma114With25PercentInterfaces AS communityId + WITH package.leidenCommunityId AS communityId ,collect(DISTINCT replace(last(split(artifact.fileName, '/')), '.jar', '')) AS artifactNames ,size(collect(DISTINCT artifact)) AS artifactCount WHERE communityId IS NOT NULL diff --git a/cypher/Community_Detection_Leiden/Which_package_community_spans_several_artifacts_and_how_are_the_packages_distributed.cypher b/cypher/Community_Detection/Which_package_community_spans_several_artifacts_and_how_are_the_packages_distributed.cypher similarity index 73% rename from cypher/Community_Detection_Leiden/Which_package_community_spans_several_artifacts_and_how_are_the_packages_distributed.cypher rename to cypher/Community_Detection/Which_package_community_spans_several_artifacts_and_how_are_the_packages_distributed.cypher index 11b1fcb12..66bf8c8cd 100644 --- a/cypher/Community_Detection_Leiden/Which_package_community_spans_several_artifacts_and_how_are_the_packages_distributed.cypher +++ b/cypher/Community_Detection/Which_package_community_spans_several_artifacts_and_how_are_the_packages_distributed.cypher @@ -3,9 +3,9 @@ MATCH (artifact:Artifact)-[:CONTAINS]->(package:Package) MATCH (externalArtifact:Artifact)-[:CONTAINS]->(externalPackage:Package) WHERE artifact.fileName <> externalArtifact.fileName - AND package.leidenCommunityIdGamma114With25PercentInterfaces - = externalPackage.leidenCommunityIdGamma114With25PercentInterfaces - WITH package.leidenCommunityIdGamma114With25PercentInterfaces AS communityId + AND package.leidenCommunityId + = externalPackage.leidenCommunityId + WITH package.leidenCommunityId AS communityId ,replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName ,collect(DISTINCT package.name) AS packageNames ,size(collect(DISTINCT package)) AS packageCount diff --git a/cypher/Community_Detection_for_Types/Which_type_community_spans_several_artifacts_and_how_are_the_types_distributed.cypher b/cypher/Community_Detection/Which_type_community_spans_several_artifacts_and_how_are_the_types_distributed.cypher similarity index 85% rename from cypher/Community_Detection_for_Types/Which_type_community_spans_several_artifacts_and_how_are_the_types_distributed.cypher rename to cypher/Community_Detection/Which_type_community_spans_several_artifacts_and_how_are_the_types_distributed.cypher index 7e9017fe6..f03dd01e9 100644 --- a/cypher/Community_Detection_for_Types/Which_type_community_spans_several_artifacts_and_how_are_the_types_distributed.cypher +++ b/cypher/Community_Detection/Which_type_community_spans_several_artifacts_and_how_are_the_types_distributed.cypher @@ -4,9 +4,8 @@ MATCH (externalArtifact:Artifact)-[:CONTAINS]->(externalPackage:Package)-[:CONTAINS]->(externalType:Type) WHERE artifact.fileName <> externalArtifact.fileName AND package.fqn <> externalPackage.fqn - AND type.leidenTypeCommunityIdGamma7 - = externalType.leidenTypeCommunityIdGamma7 - WITH type.leidenTypeCommunityIdGamma7 AS communityId + AND type.leidenCommunityId = externalType.leidenCommunityId + WITH type.leidenCommunityId AS communityId ,size(collect(DISTINCT artifact)) AS artifactCount ,collect(DISTINCT replace(last(split(artifact.fileName, '/')), '.jar', '')) AS artifactNames ,size(collect(DISTINCT package)) AS packageCount diff --git a/cypher/Community_Detection_Leiden/Community_Detection_0_Delete_Projection.cypher b/cypher/Community_Detection_Leiden/Community_Detection_0_Delete_Projection.cypher deleted file mode 100644 index 35554ac9d..000000000 --- a/cypher/Community_Detection_Leiden/Community_Detection_0_Delete_Projection.cypher +++ /dev/null @@ -1,5 +0,0 @@ -//Community Detection 0 Delete Projection - - CALL gds.graph.drop('package-dependencies', false) - YIELD graphName, nodeCount, relationshipCount, creationTime, modificationTime -RETURN graphName, nodeCount, relationshipCount, creationTime, modificationTime \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden/Community_Detection_0b_Delete_Projection.cypher b/cypher/Community_Detection_Leiden/Community_Detection_0b_Delete_Projection.cypher deleted file mode 100644 index b0aa51f0e..000000000 --- a/cypher/Community_Detection_Leiden/Community_Detection_0b_Delete_Projection.cypher +++ /dev/null @@ -1,5 +0,0 @@ -//Community Detection 0b Delete Projection - - CALL gds.graph.drop('package-dependencies-without-empty', false) - YIELD graphName, nodeCount, relationshipCount, creationTime, modificationTime -RETURN graphName, nodeCount, relationshipCount, creationTime, modificationTime \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden/Community_Detection_1_Create_undirected_Projection.cypher b/cypher/Community_Detection_Leiden/Community_Detection_1_Create_undirected_Projection.cypher deleted file mode 100644 index 6a05690a1..000000000 --- a/cypher/Community_Detection_Leiden/Community_Detection_1_Create_undirected_Projection.cypher +++ /dev/null @@ -1,14 +0,0 @@ -//Community Detection 1 Create undirected Projection -CALL gds.graph.project('package-dependencies', 'Package', - { - DEPENDS_ON: { - orientation: 'UNDIRECTED' - } - }, - { - relationshipProperties: ['weight', 'weightInterfaces', 'weight25PercentInterfaces'], - nodeProperties: ['incomingDependencies', 'outgoingDependencies'] - } -) - YIELD graphName, nodeCount, relationshipCount -RETURN graphName, nodeCount, relationshipCount \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden/Community_Detection_1b_Create_subgraph_without_empty_packages.cypher b/cypher/Community_Detection_Leiden/Community_Detection_1b_Create_subgraph_without_empty_packages.cypher deleted file mode 100644 index 2418a62d8..000000000 --- a/cypher/Community_Detection_Leiden/Community_Detection_1b_Create_subgraph_without_empty_packages.cypher +++ /dev/null @@ -1,10 +0,0 @@ -//Community Detection 1b Create subgraph without empty packages - -CALL gds.beta.graph.project.subgraph( - 'package-dependencies-without-empty', - 'package-dependencies', - 'n.outgoingDependencies > 0 OR n.incomingDependencies > 0', - '*' -) - YIELD graphName, fromGraphName, nodeCount, relationshipCount, nodeFilter -RETURN graphName, fromGraphName, nodeCount, relationshipCount, nodeFilter \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden/Community_Detection_3_Leiden_Statistics.cypher b/cypher/Community_Detection_Leiden/Community_Detection_3_Leiden_Statistics.cypher deleted file mode 100644 index c9d37a0c7..000000000 --- a/cypher/Community_Detection_Leiden/Community_Detection_3_Leiden_Statistics.cypher +++ /dev/null @@ -1,27 +0,0 @@ -//Community Detection 3 Leiden Statistics - -CALL gds.beta.leiden.stats('package-dependencies-without-empty', { - maxLevels: 10, - gamma: 1.14, - theta: 0.001, - tolerance: 0.0001, - includeIntermediateCommunities: true, - relationshipWeightProperty: 'weight25PercentInterfaces' -}) -YIELD communityCount - ,ranLevels - ,modularity - ,modularities - ,communityDistribution -RETURN communityCount - ,ranLevels - ,modularity - ,modularities - ,communityDistribution.min - ,communityDistribution.mean - ,communityDistribution.max - ,communityDistribution.p50 - ,communityDistribution.p75 - ,communityDistribution.p90 - ,communityDistribution.p95 - ,communityDistribution.p99 \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden/Community_Detection_4_Leiden_Stream.cypher b/cypher/Community_Detection_Leiden/Community_Detection_4_Leiden_Stream.cypher deleted file mode 100644 index cae16d99a..000000000 --- a/cypher/Community_Detection_Leiden/Community_Detection_4_Leiden_Stream.cypher +++ /dev/null @@ -1,23 +0,0 @@ -//Community Detection 4 Leiden Stream - -CALL gds.beta.leiden.stream('package-dependencies-without-empty', { - maxLevels: 10, - gamma: 1.06, - theta: 0.00001, - tolerance: 0.0000001, - includeIntermediateCommunities: true, - relationshipWeightProperty: 'weight25PercentInterfaces' -}) - YIELD nodeId, communityId, intermediateCommunityIds - WITH communityId - ,intermediateCommunityIds - ,gds.util.asNode(nodeId) AS package -// MATCH (package)<-[:CONTAINS]-(artifact:Artifact) -RETURN intermediateCommunityIds[0] AS firstCommunityId - ,communityId AS finalCommunityId - ,COUNT(DISTINCT package) AS countOfMembers - ,collect(DISTINCT package.fqn) AS packages -// Remove multiple collections before CSV convertion -// ,collect(DISTINCT artifact.fileName) AS artifacts -// ,intermediateCommunityIds - ORDER BY countOfMembers DESC, communityId ASC \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden/Community_Detection_5_Leiden_Write_property_leidenCommunityIdGamma114With25PercentInterfaces.cypher b/cypher/Community_Detection_Leiden/Community_Detection_5_Leiden_Write_property_leidenCommunityIdGamma114With25PercentInterfaces.cypher deleted file mode 100644 index 5b778b017..000000000 --- a/cypher/Community_Detection_Leiden/Community_Detection_5_Leiden_Write_property_leidenCommunityIdGamma114With25PercentInterfaces.cypher +++ /dev/null @@ -1,38 +0,0 @@ -//Community Detection 5 Leiden Write property leidenCommunityIdGamma114With25PercentInterfaces - -CALL gds.beta.leiden.write('package-dependencies-without-empty', { - maxLevels: 10, - gamma: 1.14, - theta: 0.001, - tolerance: 0.0001, - consecutiveIds: true, - relationshipWeightProperty: 'weight25PercentInterfaces', - writeProperty: 'leidenCommunityIdGamma114With25PercentInterfaces' -}) -YIELD preProcessingMillis - ,computeMillis - ,writeMillis - ,postProcessingMillis - ,nodePropertiesWritten - ,communityCount - ,ranLevels - ,modularity - ,modularities - ,communityDistribution -RETURN preProcessingMillis - ,computeMillis - ,writeMillis - ,postProcessingMillis - ,nodePropertiesWritten - ,communityCount - ,ranLevels - ,modularity - ,communityDistribution.min - ,communityDistribution.mean - ,communityDistribution.max - ,communityDistribution.p50 - ,communityDistribution.p75 - ,communityDistribution.p90 - ,communityDistribution.p95 - ,communityDistribution.p99 - ,modularities \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden/Community_Detection_6_Delete_Existing_Labels.cypher b/cypher/Community_Detection_Leiden/Community_Detection_6_Delete_Existing_Labels.cypher deleted file mode 100644 index d016f1a98..000000000 --- a/cypher/Community_Detection_Leiden/Community_Detection_6_Delete_Existing_Labels.cypher +++ /dev/null @@ -1,9 +0,0 @@ -//Community Detection 6 Delete Existing Labels - - CALL db.labels() YIELD label - WHERE label STARTS WITH "Leiden" - WITH collect(label) AS labels - MATCH (package:Package) - WITH collect(package) AS packages, labels - CALL apoc.create.removeLabels(packages, labels) YIELD node -RETURN COUNT(node) AS nodesCount; \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden/Community_Detection_7_Add_LeidenCommunity_Id_label_to_packages.cypher b/cypher/Community_Detection_Leiden/Community_Detection_7_Add_LeidenCommunity_Id_label_to_packages.cypher deleted file mode 100644 index 3eab1ee03..000000000 --- a/cypher/Community_Detection_Leiden/Community_Detection_7_Add_LeidenCommunity_Id_label_to_packages.cypher +++ /dev/null @@ -1,13 +0,0 @@ -//Community Detection 7 Add LeidenCommunity+Id label to packages -//with more than one member - - MATCH (package:Package) - WITH package.leidenCommunityIdGamma114With25PercentInterfaces AS communityId - ,collect(package) AS packages - ,COUNT(DISTINCT package.fqn) AS members - ,'LeidenCommunity' + toString(package.leidenCommunityIdGamma114With25PercentInterfaces) AS labelName - WHERE members > 1 -UNWIND packages AS package -//RETURN communityId, members, packageNames - CALL apoc.create.addLabels(package, [labelName]) YIELD node -RETURN COUNT(node) as nodesCount \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden/Compare_Community_Detection_Results.cypher b/cypher/Community_Detection_Leiden/Compare_Community_Detection_Results.cypher deleted file mode 100644 index ef108aa16..000000000 --- a/cypher/Community_Detection_Leiden/Compare_Community_Detection_Results.cypher +++ /dev/null @@ -1,4 +0,0 @@ -// Compare Community Detection Results -MATCH (package:Package) -RETURN DISTINCT package.louvainCommunityId, package.leidenCommunityId, collect(package.fqn) AS packages -ORDER BY package.leidenCommunityId \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_0_Delete_Projection.cypher b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_0_Delete_Projection.cypher deleted file mode 100644 index c223aaacd..000000000 --- a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_0_Delete_Projection.cypher +++ /dev/null @@ -1,5 +0,0 @@ -//Community Detection 0 Delete Projection - - CALL gds.graph.drop('artifact-dependencies', false) - YIELD graphName, nodeCount, relationshipCount, creationTime, modificationTime -RETURN graphName, nodeCount, relationshipCount, creationTime, modificationTime \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_0b_Delete_Projection.cypher b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_0b_Delete_Projection.cypher deleted file mode 100644 index 2757e95ca..000000000 --- a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_0b_Delete_Projection.cypher +++ /dev/null @@ -1,5 +0,0 @@ -//Community Detection 0b Delete Projection - - CALL gds.graph.drop('artifact-dependencies-without-empty', false) - YIELD graphName, nodeCount, relationshipCount, creationTime, modificationTime -RETURN graphName, nodeCount, relationshipCount, creationTime, modificationTime \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_1_Create_undirected_Projection.cypher b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_1_Create_undirected_Projection.cypher deleted file mode 100644 index 5db35470c..000000000 --- a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_1_Create_undirected_Projection.cypher +++ /dev/null @@ -1,15 +0,0 @@ -//Community Detection 1 Create undirected Projection - -CALL gds.graph.project('artifact-dependencies', 'Artifact', - { - DEPENDS_ON: { - orientation: 'UNDIRECTED' - } - }, - { - relationshipProperties: ['weight'], - nodeProperties: ['incomingDependencies', 'outgoingDependencies'] - } -) - YIELD graphName, nodeCount, relationshipCount -RETURN graphName, nodeCount, relationshipCount \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_2_Leiden_Estimate_Memory.cypher b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_2_Leiden_Estimate_Memory.cypher deleted file mode 100644 index e7271a086..000000000 --- a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_2_Leiden_Estimate_Memory.cypher +++ /dev/null @@ -1,23 +0,0 @@ -//Community Detection 2 Leiden Estimate Memory - -CALL gds.beta.leiden.write.estimate('artifact-dependencies-without-empty', { - gamma: 1.11, - theta: 0.001, - consecutiveIds: true, - relationshipWeightProperty: 'weight', - writeProperty: 'leidenCommunityId' -}) -YIELD nodeCount - ,relationshipCount - ,bytesMin - ,bytesMax - ,heapPercentageMin - ,heapPercentageMax - ,treeView -RETURN nodeCount - ,relationshipCount - ,bytesMin - ,bytesMax - ,heapPercentageMin - ,heapPercentageMax - ,treeView \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_4_Leiden_Stream.cypher b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_4_Leiden_Stream.cypher deleted file mode 100644 index 09fb72892..000000000 --- a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_4_Leiden_Stream.cypher +++ /dev/null @@ -1,17 +0,0 @@ -//Community Detection 4 Leiden Stream - -CALL gds.beta.leiden.stream('artifact-dependencies-without-empty', { - gamma: 1.11, - theta: 0.001, - includeIntermediateCommunities: true, - relationshipWeightProperty: 'weight' -}) - YIELD nodeId, communityId, intermediateCommunityIds - WITH communityId - ,intermediateCommunityIds - ,gds.util.asNode(nodeId) AS artifact -RETURN intermediateCommunityIds[0] AS firstCommunityId - ,communityId AS finalCommunityId - ,COUNT(DISTINCT artifact) AS countOfMembers - ,collect(DISTINCT replace(last(split(artifact.fileName, '/')), '.jar', '')) AS artifactNames - ORDER BY countOfMembers DESC, communityId ASC \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_6_Delete_Existing_Labels.cypher b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_6_Delete_Existing_Labels.cypher deleted file mode 100644 index 2e95af7aa..000000000 --- a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_6_Delete_Existing_Labels.cypher +++ /dev/null @@ -1,9 +0,0 @@ -//Community Detection 6 Delete Existing Labels - - CALL db.labels() YIELD label - WHERE label STARTS WITH "ArtifactLeiden" - WITH collect(label) AS labels - MATCH (artifact:Artifact) - WITH collect(artifact) AS artifacts, labels - CALL apoc.create.removeLabels(artifacts, labels) YIELD node -RETURN COUNT(node) AS nodesCount; \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_7_Add_ArtifactLeidenCommunity_Id_label_to_artifacts.cypher b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_7_Add_ArtifactLeidenCommunity_Id_label_to_artifacts.cypher deleted file mode 100644 index 1d0a4419b..000000000 --- a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_7_Add_ArtifactLeidenCommunity_Id_label_to_artifacts.cypher +++ /dev/null @@ -1,12 +0,0 @@ -//Community Detection 7 Add ArtifactLeidenCommunity+Id label to artifacts -//with more than one member - - MATCH (artifact:Artifact:Archive) - WITH artifact.leidenCommunityId AS communityId - ,collect(artifact) AS artifacts - ,COUNT(DISTINCT artifact.fileName) AS members - ,'ArtifactLeidenCommunity' + toString(artifact.leidenCommunityId) AS labelName - WHERE members > 1 -UNWIND artifacts AS artifact - CALL apoc.create.addLabels(artifact, [labelName]) YIELD node -RETURN COUNT(node) as nodesCount \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_8_Check_Leiden_Community_Id.cypher b/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_8_Check_Leiden_Community_Id.cypher deleted file mode 100644 index a269dbe27..000000000 --- a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_8_Check_Leiden_Community_Id.cypher +++ /dev/null @@ -1,3 +0,0 @@ -// Community Detection 8 Check Leiden Community Id - -MATCH (a:Artifact) WHERE a.leidenCommunityId IS NOT NULL RETURN a.leidenCommunityId LIMIT 1 \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden_for_Artifacts/Get_all_Artifacts_with_a_Community_Detection_Label.cypher b/cypher/Community_Detection_Leiden_for_Artifacts/Get_all_Artifacts_with_a_Community_Detection_Label.cypher deleted file mode 100644 index a3743aa8b..000000000 --- a/cypher/Community_Detection_Leiden_for_Artifacts/Get_all_Artifacts_with_a_Community_Detection_Label.cypher +++ /dev/null @@ -1,5 +0,0 @@ -// Get all Artifacts with a Community Detection Label - -MATCH (artifact:Artifact) -WHERE any(label IN labels(artifact) WHERE label CONTAINS 'Community') -RETURN DISTINCT artifact; \ No newline at end of file diff --git a/cypher/Community_Detection_Louvain/Community_Detection_0_Delete_Projection.cypher b/cypher/Community_Detection_Louvain/Community_Detection_0_Delete_Projection.cypher deleted file mode 100644 index 35554ac9d..000000000 --- a/cypher/Community_Detection_Louvain/Community_Detection_0_Delete_Projection.cypher +++ /dev/null @@ -1,5 +0,0 @@ -//Community Detection 0 Delete Projection - - CALL gds.graph.drop('package-dependencies', false) - YIELD graphName, nodeCount, relationshipCount, creationTime, modificationTime -RETURN graphName, nodeCount, relationshipCount, creationTime, modificationTime \ No newline at end of file diff --git a/cypher/Community_Detection_Louvain/Community_Detection_0b_Delete_Projection.cypher b/cypher/Community_Detection_Louvain/Community_Detection_0b_Delete_Projection.cypher deleted file mode 100644 index b0aa51f0e..000000000 --- a/cypher/Community_Detection_Louvain/Community_Detection_0b_Delete_Projection.cypher +++ /dev/null @@ -1,5 +0,0 @@ -//Community Detection 0b Delete Projection - - CALL gds.graph.drop('package-dependencies-without-empty', false) - YIELD graphName, nodeCount, relationshipCount, creationTime, modificationTime -RETURN graphName, nodeCount, relationshipCount, creationTime, modificationTime \ No newline at end of file diff --git a/cypher/Community_Detection_Louvain/Community_Detection_1_Create_undirected_Projection.cypher b/cypher/Community_Detection_Louvain/Community_Detection_1_Create_undirected_Projection.cypher deleted file mode 100644 index 68dffb90c..000000000 --- a/cypher/Community_Detection_Louvain/Community_Detection_1_Create_undirected_Projection.cypher +++ /dev/null @@ -1,19 +0,0 @@ -//Community Detection 1 Create undirected Projection - -CALL gds.graph.project('package-dependencies', 'Package', - { - DEPENDS_ON: { - orientation: 'UNDIRECTED' - } - }, - { - relationshipProperties: [ - 'weight', - 'weight10PercentInterfaces', - 'weight25PercentInterfaces' - ], - nodeProperties: ['incomingDependencies', 'outgoingDependencies'] - } -) - YIELD graphName, nodeCount, relationshipCount -RETURN graphName, nodeCount, relationshipCount \ No newline at end of file diff --git a/cypher/Community_Detection_Louvain/Community_Detection_1b_Create_subgraph_without_empty_packages.cypher b/cypher/Community_Detection_Louvain/Community_Detection_1b_Create_subgraph_without_empty_packages.cypher deleted file mode 100644 index 2418a62d8..000000000 --- a/cypher/Community_Detection_Louvain/Community_Detection_1b_Create_subgraph_without_empty_packages.cypher +++ /dev/null @@ -1,10 +0,0 @@ -//Community Detection 1b Create subgraph without empty packages - -CALL gds.beta.graph.project.subgraph( - 'package-dependencies-without-empty', - 'package-dependencies', - 'n.outgoingDependencies > 0 OR n.incomingDependencies > 0', - '*' -) - YIELD graphName, fromGraphName, nodeCount, relationshipCount, nodeFilter -RETURN graphName, fromGraphName, nodeCount, relationshipCount, nodeFilter \ No newline at end of file diff --git a/cypher/Community_Detection_Louvain/Community_Detection_4_Louvain_Stream.cypher b/cypher/Community_Detection_Louvain/Community_Detection_4_Louvain_Stream.cypher deleted file mode 100644 index 37912d0be..000000000 --- a/cypher/Community_Detection_Louvain/Community_Detection_4_Louvain_Stream.cypher +++ /dev/null @@ -1,22 +0,0 @@ -//Community Detection 4 Louvain Stream - -CALL gds.louvain.stream('package-dependencies-without-empty', { - maxLevels: 10, - maxIterations: 10, - tolerance: 0.0001, - relationshipWeightProperty: 'weight25PercentInterfaces', - includeIntermediateCommunities: true -}) - YIELD nodeId, communityId, intermediateCommunityIds - WITH communityId - ,intermediateCommunityIds - ,gds.util.asNode(nodeId) AS package -// MATCH (package)<-[:CONTAINS]-(artifact:Artifact) -RETURN intermediateCommunityIds[0] AS firstCommunityId - ,communityId AS finalCommunityId - ,COUNT(DISTINCT package) AS countOfMembers - ,collect(DISTINCT package.fqn) AS packages -// Remove multiple collections before CSV convertion -// ,collect(DISTINCT artifact.fileName) AS artifacts -// ,intermediateCommunityIds - ORDER BY countOfMembers DESC, communityId ASC \ No newline at end of file diff --git a/cypher/Community_Detection_Louvain/Community_Detection_5d_Louvain_Write_intermediateLouvainCommunities25PercentInterfaces.cypher b/cypher/Community_Detection_Louvain/Community_Detection_5d_Louvain_Write_intermediateLouvainCommunities25PercentInterfaces.cypher deleted file mode 100644 index a9a4412c1..000000000 --- a/cypher/Community_Detection_Louvain/Community_Detection_5d_Louvain_Write_intermediateLouvainCommunities25PercentInterfaces.cypher +++ /dev/null @@ -1,36 +0,0 @@ -//Community Detection 5d Louvain Write intermediateLouvainCommunities25PercentInterfaces - -CALL gds.louvain.write('package-dependencies-without-empty', { - maxIterations: 10, - tolerance: 0.00001, - writeProperty: 'intermediateLouvainCommunities25PercentInterfaces', - relationshipWeightProperty: 'weight25PercentInterfaces', - includeIntermediateCommunities: true -}) -YIELD preProcessingMillis - ,computeMillis - ,writeMillis - ,postProcessingMillis - ,nodePropertiesWritten - ,communityCount - ,ranLevels - ,modularity - ,modularities - ,communityDistribution -RETURN preProcessingMillis - ,computeMillis - ,writeMillis - ,postProcessingMillis - ,nodePropertiesWritten - ,communityCount - ,ranLevels - ,modularity - ,communityDistribution.min - ,communityDistribution.mean - ,communityDistribution.max - ,communityDistribution.p50 - ,communityDistribution.p75 - ,communityDistribution.p90 - ,communityDistribution.p95 - ,communityDistribution.p99 - ,modularities \ No newline at end of file diff --git a/cypher/Community_Detection_Louvain/Community_Detection_6_Louvain_Delete_Existing_Labels.cypher b/cypher/Community_Detection_Louvain/Community_Detection_6_Louvain_Delete_Existing_Labels.cypher deleted file mode 100644 index ce75e1a7f..000000000 --- a/cypher/Community_Detection_Louvain/Community_Detection_6_Louvain_Delete_Existing_Labels.cypher +++ /dev/null @@ -1,9 +0,0 @@ -//Community Detection 6 Louvain Delete Existing Labels - - CALL db.labels() YIELD label - WHERE label STARTS WITH "Louvain" - WITH collect(label) AS labels - MATCH (package:Package) - WITH collect(package) AS packages, labels - CALL apoc.create.removeLabels(packages, labels) YIELD node -RETURN COUNT(node) AS nodesCount; \ No newline at end of file diff --git a/cypher/Community_Detection_Louvain/Community_Detection_7_Add_LouvainCommunity_Id_label_to_packages.cypher b/cypher/Community_Detection_Louvain/Community_Detection_7_Add_LouvainCommunity_Id_label_to_packages.cypher deleted file mode 100644 index 8d3438a6b..000000000 --- a/cypher/Community_Detection_Louvain/Community_Detection_7_Add_LouvainCommunity_Id_label_to_packages.cypher +++ /dev/null @@ -1,13 +0,0 @@ -//Community Detection 7 Add LouvainCommunity+Id label to packages -//with more than one member - - MATCH (package:Package) - WITH package.louvainCommunity25PercentInterfaces AS communityId - ,collect(package) AS packages - ,COUNT(DISTINCT package.fqn) AS members - ,'LouvainCommunity' + toString(package.louvainCommunity25PercentInterfaces) AS labelName - WHERE members > 1 -UNWIND packages AS package -//RETURN communityId, members, packageNames - CALL apoc.create.addLabels(package, [labelName]) YIELD node -RETURN COUNT(node) as nodesCount \ No newline at end of file diff --git a/cypher/Community_Detection_for_Types/Community_Detection_for_Types_0_Delete_Projection.cypher b/cypher/Community_Detection_for_Types/Community_Detection_for_Types_0_Delete_Projection.cypher deleted file mode 100644 index 7b8df3425..000000000 --- a/cypher/Community_Detection_for_Types/Community_Detection_for_Types_0_Delete_Projection.cypher +++ /dev/null @@ -1,2 +0,0 @@ -//Community Detection for Types 0 Delete Projection -CALL gds.graph.drop('type-dependencies') \ No newline at end of file diff --git a/cypher/Community_Detection_for_Types/Community_Detection_for_Types_1_Create_undirected_Projection.cypher b/cypher/Community_Detection_for_Types/Community_Detection_for_Types_1_Create_undirected_Projection.cypher deleted file mode 100644 index 542785541..000000000 --- a/cypher/Community_Detection_for_Types/Community_Detection_for_Types_1_Create_undirected_Projection.cypher +++ /dev/null @@ -1,13 +0,0 @@ -//Community Detection for Types 1 Create undirected Projection - -CALL gds.graph.project('type-dependencies', - ['Class', 'Interface', 'Enum'], - { - DEPENDS_ON: { - orientation: 'UNDIRECTED' - } - }, - { - relationshipProperties: ['weight'] - } -) \ No newline at end of file diff --git a/cypher/Community_Detection_for_Types/Community_Detection_for_Types_2_Leiden_Estimate_Memory.cypher b/cypher/Community_Detection_for_Types/Community_Detection_for_Types_2_Leiden_Estimate_Memory.cypher deleted file mode 100644 index 2710d4dcc..000000000 --- a/cypher/Community_Detection_for_Types/Community_Detection_for_Types_2_Leiden_Estimate_Memory.cypher +++ /dev/null @@ -1,8 +0,0 @@ -//Community Detection for Types 2 Leiden Estimate Memory - -CALL gds.beta.leiden.write.estimate('type-dependencies', { - maxLevels: 10, - tolerance: 0.000001, - relationshipWeightProperty: 'weight', - writeProperty: 'leidenTypeCommunityId' -}) \ No newline at end of file diff --git a/cypher/Community_Detection_for_Types/Community_Detection_for_Types_3_Leiden_Stream.cypher b/cypher/Community_Detection_for_Types/Community_Detection_for_Types_3_Leiden_Stream.cypher deleted file mode 100644 index 6367eab86..000000000 --- a/cypher/Community_Detection_for_Types/Community_Detection_for_Types_3_Leiden_Stream.cypher +++ /dev/null @@ -1,23 +0,0 @@ -//Community Detection for Types 3 Leiden Stream - -//Tweaked for Modularity > 0.3 (aiming 0.4) and distribution percentile 75 > 1 -CALL gds.beta.leiden.stream('type-dependencies', { - maxLevels: 10, - gamma: 7.0, - theta: 0.001, - tolerance: 0.0000001, - relationshipWeightProperty: 'weight', - consecutiveIds: true -}) - YIELD nodeId, communityId - WITH communityId, gds.util.asNode(nodeId) AS type - MATCH (type)<-[:CONTAINS]-(package:Package)<-[:CONTAINS]-(artifact:Artifact) -RETURN communityId - ,COUNT(DISTINCT type) AS typeCount - ,COUNT(DISTINCT package) AS packageCount - ,COUNT(DISTINCT artifact) AS artifactCount - ,collect(DISTINCT artifact.fileName) AS artifacts - ,collect(DISTINCT package.name) AS packages - ,collect(DISTINCT type.fqn) AS types - ORDER BY typeCount DESC, packageCount DESC, communityId ASC - LIMIT 25 \ No newline at end of file diff --git a/cypher/Community_Detection_for_Types/Community_Detection_for_Types_3_Leiden_Write.cypher b/cypher/Community_Detection_for_Types/Community_Detection_for_Types_3_Leiden_Write.cypher deleted file mode 100644 index 9163a7d19..000000000 --- a/cypher/Community_Detection_for_Types/Community_Detection_for_Types_3_Leiden_Write.cypher +++ /dev/null @@ -1,11 +0,0 @@ -//Community Detection for Types 3 Leiden Write - -CALL gds.beta.leiden.write('type-dependencies', { - maxLevels: 10, - gamma: 7.0, - theta: 0.001, - tolerance: 0.0000001, - relationshipWeightProperty: 'weight', - consecutiveIds: true, - writeProperty: 'leidenTypeCommunityIdGamma7' -}) \ No newline at end of file diff --git a/cypher/Community_Detection_for_Types/Community_Detection_for_Types_6_Leiden_Delete_Labels.cypher b/cypher/Community_Detection_for_Types/Community_Detection_for_Types_6_Leiden_Delete_Labels.cypher deleted file mode 100644 index bd8c8513e..000000000 --- a/cypher/Community_Detection_for_Types/Community_Detection_for_Types_6_Leiden_Delete_Labels.cypher +++ /dev/null @@ -1,10 +0,0 @@ -//Community Detection for Types 6 Leiden Delete Labels - - CALL db.labels() YIELD label - WHERE label STARTS WITH "LeidenType" - WITH collect(label) AS labels - MATCH (type:Type) - WITH collect(type) AS types, labels - CALL apoc.create.removeLabels(types, labels) - YIELD node -RETURN node, labels(node) AS labels; \ No newline at end of file diff --git a/cypher/Community_Detection_for_Types/Community_Detection_for_Types_7_Add_LeidenTypeCommunity_Id_label_to_types.cypher b/cypher/Community_Detection_for_Types/Community_Detection_for_Types_7_Add_LeidenTypeCommunity_Id_label_to_types.cypher deleted file mode 100644 index 66c2edccb..000000000 --- a/cypher/Community_Detection_for_Types/Community_Detection_for_Types_7_Add_LeidenTypeCommunity_Id_label_to_types.cypher +++ /dev/null @@ -1,16 +0,0 @@ -//Community Detection for Types 7 Add LeidenTypeCommunity+Id label to types -//with more than one member - - MATCH (type:Type) - WITH type.leidenTypeCommunityIdGamma7 AS communityId - ,collect(type) AS types - ,COUNT(DISTINCT type.fqn) AS members - ,'LeidenTypeCommunity' + toString(type.leidenTypeCommunityIdGamma7) AS labelName - WHERE members > 1 - AND communityId IS NOT NULL -UNWIND types AS type -//RETURN communityId, members, type -// LIMIT 10 - CALL apoc.create.addLabels(type, [labelName]) - YIELD node -RETURN node \ No newline at end of file diff --git a/cypher/Centrality/Centrality_0_Delete_Projection.cypher b/cypher/Dependencies_Projection/Dependencies_1_Delete_Projection.cypher similarity index 56% rename from cypher/Centrality/Centrality_0_Delete_Projection.cypher rename to cypher/Dependencies_Projection/Dependencies_1_Delete_Projection.cypher index 1b4fa7f0f..ba8137111 100644 --- a/cypher/Centrality/Centrality_0_Delete_Projection.cypher +++ b/cypher/Dependencies_Projection/Dependencies_1_Delete_Projection.cypher @@ -1,5 +1,5 @@ -//Centrality 0 Delete Projection +// Delete projection if existing. Variables: dependencies_projection - CALL gds.graph.drop('package-centrality', false) + CALL gds.graph.drop($dependencies_projection, false) YIELD graphName, nodeCount, relationshipCount, creationTime, modificationTime RETURN graphName, nodeCount, relationshipCount, creationTime, modificationTime \ No newline at end of file diff --git a/cypher/Dependencies_Projection/Dependencies_2_Delete_Subgraph.cypher b/cypher/Dependencies_Projection/Dependencies_2_Delete_Subgraph.cypher new file mode 100644 index 000000000..45b9c2720 --- /dev/null +++ b/cypher/Dependencies_Projection/Dependencies_2_Delete_Subgraph.cypher @@ -0,0 +1,5 @@ +// Delete filtered subgraph projection if exists. Variables: dependencies_projection + + CALL gds.graph.drop($dependencies_projection + '-without-empty', false) + YIELD graphName, nodeCount, relationshipCount, creationTime, modificationTime +RETURN graphName, nodeCount, relationshipCount, creationTime, modificationTime \ No newline at end of file diff --git a/cypher/Dependencies_Projection/Dependencies_3_Create_Projection.cypher b/cypher/Dependencies_Projection/Dependencies_3_Create_Projection.cypher new file mode 100644 index 000000000..ae5cf1287 --- /dev/null +++ b/cypher/Dependencies_Projection/Dependencies_3_Create_Projection.cypher @@ -0,0 +1,11 @@ +// Create directed projection. Variables: dependencies_projection, dependencies_projection_node, dependencies_projection_weight_property + + CALL gds.graph.project( + $dependencies_projection, + $dependencies_projection_node, + 'DEPENDS_ON', { + relationshipProperties: [$dependencies_projection_weight_property], + nodeProperties: ['incomingDependencies', 'outgoingDependencies'] + }) + YIELD graphName, nodeCount, relationshipCount +RETURN graphName, nodeCount, relationshipCount \ No newline at end of file diff --git a/cypher/Similarity/Similarity_1_Create_Projection.cypher b/cypher/Dependencies_Projection/Dependencies_3b_Create_Multi_Relationship_Projection.cypher similarity index 58% rename from cypher/Similarity/Similarity_1_Create_Projection.cypher rename to cypher/Dependencies_Projection/Dependencies_3b_Create_Multi_Relationship_Projection.cypher index b29815bae..8c525a792 100644 --- a/cypher/Similarity/Similarity_1_Create_Projection.cypher +++ b/cypher/Dependencies_Projection/Dependencies_3b_Create_Multi_Relationship_Projection.cypher @@ -1,11 +1,8 @@ -//Similarity 1 Create Projection +// Create multi relationship projection. Variables: dependencies_projection, dependencies_projection_node - CALL gds.graph.project('package-similarity', - { - Package: { - properties: ['incomingDependencies', 'outgoingDependencies'] - } - }, + CALL gds.graph.project( + $dependencies_projection, + $dependencies_projection_node, ['DEPENDS_ON', 'CONTAINS'], { relationshipProperties: { @@ -18,7 +15,8 @@ weight25PercentInterfaces: { defaultValue: 1.0 } - } + }, + nodeProperties: ['incomingDependencies', 'outgoingDependencies'] } ) YIELD graphName, nodeCount, relationshipCount diff --git a/cypher/Dependencies_Projection/Dependencies_4_Create_Undirected_Projection.cypher b/cypher/Dependencies_Projection/Dependencies_4_Create_Undirected_Projection.cypher new file mode 100644 index 000000000..efa87173e --- /dev/null +++ b/cypher/Dependencies_Projection/Dependencies_4_Create_Undirected_Projection.cypher @@ -0,0 +1,17 @@ +// Create undirected projection. Variables: dependencies_projection, dependencies_projection_node, dependencies_projection_weight_property + +CALL gds.graph.project( + $dependencies_projection, + $dependencies_projection_node, + { + DEPENDS_ON: { + orientation: 'UNDIRECTED' + } + }, + { + relationshipProperties: [$dependencies_projection_weight_property], + nodeProperties: ['incomingDependencies', 'outgoingDependencies'] + } +) + YIELD graphName, nodeCount, relationshipCount +RETURN graphName, nodeCount, relationshipCount \ No newline at end of file diff --git a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_1b_Create_subgraph_without_empty_artifacts.cypher b/cypher/Dependencies_Projection/Dependencies_5_Create_Subgraph.cypher similarity index 55% rename from cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_1b_Create_subgraph_without_empty_artifacts.cypher rename to cypher/Dependencies_Projection/Dependencies_5_Create_Subgraph.cypher index 47b0adcdf..044a3571c 100644 --- a/cypher/Community_Detection_Leiden_for_Artifacts/Community_Detection_1b_Create_subgraph_without_empty_artifacts.cypher +++ b/cypher/Dependencies_Projection/Dependencies_5_Create_Subgraph.cypher @@ -1,8 +1,8 @@ -//Community Detection 1b Create subgraph without empty artifacts +//Create filtered subgraph projection without zero-degree nodes. Variables: dependencies_projection, dependencies_projection_node CALL gds.beta.graph.project.subgraph( - 'artifact-dependencies-without-empty', - 'artifact-dependencies', + $dependencies_projection + '-without-empty', + $dependencies_projection, 'n.outgoingDependencies > 0 OR n.incomingDependencies > 0', '*' ) diff --git a/cypher/Dependencies_Projection/Dependencies_6_Check_Projection_Nodes.cypher b/cypher/Dependencies_Projection/Dependencies_6_Check_Projection_Nodes.cypher new file mode 100644 index 000000000..159eea751 --- /dev/null +++ b/cypher/Dependencies_Projection/Dependencies_6_Check_Projection_Nodes.cypher @@ -0,0 +1,19 @@ +// Check Projection Node Properties + +CALL gds.graph.nodeProperties.stream( + $dependencies_projection + '-without-empty' + ,['incomingDependencies', 'outgoingDependencies'] +) +YIELD nodeId, targetNodeId, propertyValue, relationshipType + WITH nodeId AS sourceNodeId + ,gds.util.asNode(nodeId) AS sourceNode + ,nodeProperty + ,propertyValue + ,nodeLabels +RETURN sourceNodeId + ,coalesce(source.fqn, source.fileName, source.name) AS sourceName + ,nodeProperty + ,propertyValue + ,nodeLabels + ORDER BY sourceName ASC + LIMIT 50 \ No newline at end of file diff --git a/cypher/Dependencies_Projection/Dependencies_7_Check_Projection_Relationships.cypher b/cypher/Dependencies_Projection/Dependencies_7_Check_Projection_Relationships.cypher new file mode 100644 index 000000000..46686aff2 --- /dev/null +++ b/cypher/Dependencies_Projection/Dependencies_7_Check_Projection_Relationships.cypher @@ -0,0 +1,22 @@ +// Check Projection Relationships + +CALL gds.graph.relationshipProperty.stream( + $dependencies_projection + '-without-empty', + ,$dependencies_projection_weight_property + ,['DEPENDS_ON'] +) +YIELD sourceNodeId, targetNodeId, propertyValue, relationshipType + WITH sourceNodeId + ,targetNodeId + ,gds.util.asNode(sourceNodeId) AS source + ,gds.util.asNode(targetNodeId) AS target + ,propertyValue AS weight + ,relationshipType +RETURN sourceNodeId + ,coalesce(source.fqn, source.fileName, source.name) AS sourceName + ,targetNodeId + ,coalesce(target.fqn, target.fileName, target.name) AS targetName + ,weight + ,relationshipType + ORDER BY weight DESC, sourceName ASC + LIMIT 50 \ No newline at end of file diff --git a/cypher/Metrics/Set_Incoming_Package_Dependencies.cypher b/cypher/Metrics/Set_Incoming_Package_Dependencies.cypher index 0e29ca977..2e4b4fa59 100644 --- a/cypher/Metrics/Set_Incoming_Package_Dependencies.cypher +++ b/cypher/Metrics/Set_Incoming_Package_Dependencies.cypher @@ -3,6 +3,8 @@ OPTIONAL MATCH (p)-[:CONTAINS]->(it:Java:Type)<-[r:DEPENDS_ON]-(et:Java:Type)<-[:CONTAINS]-(ep:Package)<-[:CONTAINS]-(ea:Artifact) OPTIONAL MATCH (it)<-[:DEPENDS_ON]-(eti:Java:Type:Interface) WHERE p <> ep + AND p.fqn <> ep.fqn + AND p.incomingDependencies IS NULL // comment out to recalculate WITH p ,COUNT(et) AS incomingDependencies ,SUM(r.weight) AS incomingDependenciesWeight @@ -10,6 +12,7 @@ OPTIONAL MATCH (it)<-[:DEPENDS_ON]-(eti:Java:Type:Interface) ,COUNT(DISTINCT eti) AS incomingDependentInterfaces // also included in usedTypes ,COUNT(DISTINCT ep) AS incomingDependentPackages ,COUNT(DISTINCT ea) AS incomingDependentArtifacts +//ORDER BY incomingDependencies DESC, packageName ASC // uncomment to get most incoming first SET p.incomingDependencies = incomingDependencies ,p.incomingDependenciesWeight = incomingDependenciesWeight ,p.incomingDependentTypes = incomingDependentTypes @@ -23,4 +26,3 @@ OPTIONAL MATCH (it)<-[:DEPENDS_ON]-(eti:Java:Type:Interface) ,incomingDependentInterfaces ,incomingDependentPackages ,incomingDependentArtifacts -ORDER BY incomingDependencies DESC, packageName ASC \ No newline at end of file diff --git a/cypher/Metrics/Set_Incoming_Type_Dependencies.cypher b/cypher/Metrics/Set_Incoming_Type_Dependencies.cypher new file mode 100644 index 000000000..5eb8ee92c --- /dev/null +++ b/cypher/Metrics/Set_Incoming_Type_Dependencies.cypher @@ -0,0 +1,29 @@ +// Set Incoming Type Dependencies + + MATCH (p:Package) +OPTIONAL MATCH (p)-[:CONTAINS]->(it:Java:Type)<-[r:DEPENDS_ON]-(et:Java:Type)<-[:CONTAINS]-(ep:Package)<-[:CONTAINS]-(ea:Artifact) +OPTIONAL MATCH (it)<-[:DEPENDS_ON]-(eti:Type:Interface) + WHERE it <> et + AND it.fqn <> et.fqn + AND it.incomingDependencies IS NULL // comment out to recalculate + WITH p.fqn AS packageName + ,it + ,it.fqn AS typeName + ,count(DISTINCT et.fqn) AS incomingDependencies + ,sum(r.weight) AS incomingDependenciesWeight + ,count(DISTINCT eti.fqn) AS incomingDependentInterfaces // also included in usedTypes + ,count(DISTINCT ep.fqn) AS incomingDependentPackages + ,count(DISTINCT ea.fileName) AS incomingDependentArtifacts +// ORDER BY incomingDependencies DESC // uncomment to get most incoming first + SET it.incomingDependencies = incomingDependencies + ,it.incomingDependenciesWeight = incomingDependenciesWeight + ,it.incomingDependentInterfaces = incomingDependentInterfaces + ,it.incomingDependentPackages = incomingDependentPackages + ,it.incomingDependentArtifacts = incomingDependentArtifacts + RETURN packageName + ,typeName + ,incomingDependencies + ,incomingDependenciesWeight + ,incomingDependentInterfaces + ,incomingDependentPackages + ,incomingDependentArtifacts \ No newline at end of file diff --git a/cypher/Metrics/Set_Outgoing_Package_Dependencies.cypher b/cypher/Metrics/Set_Outgoing_Package_Dependencies.cypher index 961385e53..725c316be 100644 --- a/cypher/Metrics/Set_Outgoing_Package_Dependencies.cypher +++ b/cypher/Metrics/Set_Outgoing_Package_Dependencies.cypher @@ -1,15 +1,19 @@ //Set Outgoing Package Dependencies + MATCH (p:Package) OPTIONAL MATCH (p)-[:CONTAINS]->(it:Java:Type)-[r:DEPENDS_ON]->(et:Java:Type)<-[:CONTAINS]-(ep:Package)<-[:CONTAINS]-(ea:Artifact) OPTIONAL MATCH (it)-[:DEPENDS_ON]->(eti:Interface) WHERE p <> ep + AND p.fqn <> ep.fqn + AND p.incomingDependencies IS NULL // comment out to recalculate WITH p ,COUNT(et) AS outgoingDependencies + ,SUM(r.weight) AS outgoingDependenciesWeight ,COUNT(DISTINCT et) AS outgoingDependentTypes ,COUNT(DISTINCT eti) AS outgoingDependentInterfaces // included in usedTypes ,COUNT(DISTINCT ep) AS outgoingDependentPackages ,COUNT(DISTINCT ea) AS outgoingDependentArtifacts - ,SUM(r.weight) AS outgoingDependenciesWeight +// ORDER BY outgoingDependencies DESC // uncomment to get most incoming first SET p.outgoingDependencies = outgoingDependencies ,p.outgoingDependenciesWeight = outgoingDependenciesWeight ,p.outgoingDependentTypes = outgoingDependentTypes @@ -18,9 +22,8 @@ OPTIONAL MATCH (it)-[:DEPENDS_ON]->(eti:Interface) ,p.outgoingDependentArtifacts = outgoingDependentArtifacts RETURN p.fqn AS packageName ,outgoingDependencies + ,outgoingDependenciesWeight ,outgoingDependentTypes ,outgoingDependentInterfaces ,outgoingDependentPackages ,outgoingDependentArtifacts - ,outgoingDependenciesWeight -ORDER BY outgoingDependencies DESC, packageName ASC \ No newline at end of file diff --git a/cypher/Metrics/Set_Outgoing_Type_Dependencies.cypher b/cypher/Metrics/Set_Outgoing_Type_Dependencies.cypher new file mode 100644 index 000000000..afbe1210a --- /dev/null +++ b/cypher/Metrics/Set_Outgoing_Type_Dependencies.cypher @@ -0,0 +1,29 @@ +//Set Outgoing Type Dependencies + + MATCH (p:Package) +OPTIONAL MATCH (p)-[:CONTAINS]->(it:Java:Type)-[r:DEPENDS_ON]->(et:Java:Type)<-[:CONTAINS]-(ep:Package)<-[:CONTAINS]-(ea:Artifact) +OPTIONAL MATCH (it)-[:DEPENDS_ON]->(eti:Type:Interface) + WHERE it <> et + AND it.fqn <> et.fqn + AND it.outgoingDependencies IS NULL // comment out to recalculate + WITH p.fqn AS packageName + ,it + ,it.fqn AS typeName + ,count(DISTINCT et.fqn) AS outgoingDependencies + ,sum(r.weight) AS outgoingDependenciesWeight + ,count(DISTINCT eti.fqn) AS outgoingDependentInterfaces // included in usedTypes + ,count(DISTINCT ep.fqn) AS outgoingDependentPackages + ,count(DISTINCT ea.fileName) AS outgoingDependentArtifacts +// ORDER BY outgoingDependencies DESC // uncomment to get most outgoing first + SET it.outgoingDependencies = outgoingDependencies + ,it.outgoingDependenciesWeight = outgoingDependenciesWeight + ,it.outgoingDependentInterfaces = outgoingDependentInterfaces + ,it.outgoingDependentPackages = outgoingDependentPackages + ,it.outgoingDependentArtifacts = outgoingDependentArtifacts + RETURN packageName + ,typeName + ,outgoingDependencies + ,outgoingDependenciesWeight + ,outgoingDependentInterfaces + ,outgoingDependentPackages + ,outgoingDependentArtifacts \ No newline at end of file diff --git a/cypher/Graph_Data_Science_Path_Finding/Path_Finding_1_Create_Projection.cypher b/cypher/Path_Finding/Path_Finding_1_Create_Projection.cypher similarity index 100% rename from cypher/Graph_Data_Science_Path_Finding/Path_Finding_1_Create_Projection.cypher rename to cypher/Path_Finding/Path_Finding_1_Create_Projection.cypher diff --git a/cypher/Graph_Data_Science_Path_Finding/Path_Finding_2_Estimate_Memory.cypher b/cypher/Path_Finding/Path_Finding_2_Estimate_Memory.cypher similarity index 100% rename from cypher/Graph_Data_Science_Path_Finding/Path_Finding_2_Estimate_Memory.cypher rename to cypher/Path_Finding/Path_Finding_2_Estimate_Memory.cypher diff --git a/cypher/Graph_Data_Science_Path_Finding/Path_Finding_3_Depth_First_Search_Path.cypher b/cypher/Path_Finding/Path_Finding_3_Depth_First_Search_Path.cypher similarity index 100% rename from cypher/Graph_Data_Science_Path_Finding/Path_Finding_3_Depth_First_Search_Path.cypher rename to cypher/Path_Finding/Path_Finding_3_Depth_First_Search_Path.cypher diff --git a/cypher/Graph_Data_Science_Path_Finding/Path_Finding_4_Breadth_First_Search_Path.cypher b/cypher/Path_Finding/Path_Finding_4_Breadth_First_Search_Path.cypher similarity index 100% rename from cypher/Graph_Data_Science_Path_Finding/Path_Finding_4_Breadth_First_Search_Path.cypher rename to cypher/Path_Finding/Path_Finding_4_Breadth_First_Search_Path.cypher diff --git a/cypher/Similarity/Similarity_0_Delete_Projection.cypher b/cypher/Similarity/Similarity_0_Delete_Projection.cypher deleted file mode 100644 index 44e429733..000000000 --- a/cypher/Similarity/Similarity_0_Delete_Projection.cypher +++ /dev/null @@ -1,5 +0,0 @@ -//Similarity 0 Delete Projection - - CALL gds.graph.drop('package-similarity', false) - YIELD graphName, nodeCount, relationshipCount, creationTime, modificationTime -RETURN graphName, nodeCount, relationshipCount, creationTime, modificationTime \ No newline at end of file diff --git a/cypher/Similarity/Similarity_0b_Delete_Subgraph_Projection.cypher b/cypher/Similarity/Similarity_0b_Delete_Subgraph_Projection.cypher deleted file mode 100644 index 89dfb2e3d..000000000 --- a/cypher/Similarity/Similarity_0b_Delete_Subgraph_Projection.cypher +++ /dev/null @@ -1,5 +0,0 @@ -//Similarity 0b Delete Subgraph Projection - - CALL gds.graph.drop('package-similarity-without-empty', false) - YIELD graphName, nodeCount, relationshipCount, creationTime, modificationTime -RETURN graphName, nodeCount, relationshipCount, creationTime, modificationTime \ No newline at end of file diff --git a/cypher/Similarity/Similarity_1a_Estimate.cypher b/cypher/Similarity/Similarity_1a_Estimate.cypher new file mode 100644 index 000000000..a552f8ab0 --- /dev/null +++ b/cypher/Similarity/Similarity_1a_Estimate.cypher @@ -0,0 +1,11 @@ +// Similarity Estimate Memory + +CALL gds.nodeSimilarity.write.estimate( + $dependencies_projection + '-without-empty', { + relationshipWeightProperty: $dependencies_projection_weight_property + ,writeRelationshipType: 'SIMILAR' + ,writeProperty: 'score' + ,topK: 3 +}) + YIELD requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView, mapView +RETURN requiredMemory, nodeCount, relationshipCount, bytesMin, bytesMax, heapPercentageMin, heapPercentageMax, treeView, mapView \ No newline at end of file diff --git a/cypher/Similarity/Similarity_1b_Create_subgraph_without_empty_packages.cypher b/cypher/Similarity/Similarity_1b_Create_subgraph_without_empty_packages.cypher deleted file mode 100644 index f73997e2a..000000000 --- a/cypher/Similarity/Similarity_1b_Create_subgraph_without_empty_packages.cypher +++ /dev/null @@ -1,10 +0,0 @@ -//Similarity 1b Create subgraph without empty packages - - CALL gds.beta.graph.project.subgraph( - 'package-similarity-without-empty', - 'package-similarity', - 'n.outgoingDependencies > 0 OR n.incomingDependencies > 0', - '*' - ) - YIELD graphName, fromGraphName, nodeCount, relationshipCount, nodeFilter -RETURN graphName, fromGraphName, nodeCount, relationshipCount, nodeFilter \ No newline at end of file diff --git a/cypher/Similarity/Similarity_1b_Statistics.cypher b/cypher/Similarity/Similarity_1b_Statistics.cypher new file mode 100644 index 000000000..a37785bd9 --- /dev/null +++ b/cypher/Similarity/Similarity_1b_Statistics.cypher @@ -0,0 +1,26 @@ +// Similarity Statistics + +CALL gds.nodeSimilarity.stats( + $dependencies_projection + '-without-empty', { + relationshipWeightProperty: $dependencies_projection_weight_property + ,topK: 3 + }) + YIELD nodesCompared + ,similarityPairs + ,preProcessingMillis + ,computeMillis + ,postProcessingMillis + ,similarityDistribution +RETURN nodesCompared + ,similarityPairs + ,preProcessingMillis + ,computeMillis + ,postProcessingMillis + ,similarityDistribution.min + ,similarityDistribution.mean + ,similarityDistribution.max + ,similarityDistribution.p50 + ,similarityDistribution.p75 + ,similarityDistribution.p90 + ,similarityDistribution.p95 + ,similarityDistribution.p99 \ No newline at end of file diff --git a/cypher/Similarity/Similarity_1c_Stream.cypher b/cypher/Similarity/Similarity_1c_Stream.cypher new file mode 100644 index 000000000..bc96a7548 --- /dev/null +++ b/cypher/Similarity/Similarity_1c_Stream.cypher @@ -0,0 +1,28 @@ +// Similarity Stream + + CALL gds.nodeSimilarity.stream( + $dependencies_projection + '-without-empty', { + relationshipWeightProperty: $dependencies_projection_weight_property + ,topK: 3 + }) +YIELD node1, node2, similarity + WITH gds.util.asNode(node1) AS node1 + ,gds.util.asNode(node2) AS node2 + ,similarity +OPTIONAL MATCH (artifact1:Artifact)-[:CONTAINS]->(node1) +OPTIONAL MATCH (artifact2:Artifact)-[:CONTAINS]->(node2) + WITH node1 + ,node2 + ,replace(last(split(artifact1.fileName, '/')), '.jar', '') AS artifactName1 + ,replace(last(split(artifact2.fileName, '/')), '.jar', '') AS artifactName2 + ,similarity +RETURN similarity + ,artifactName1 + ,coalesce(node1.fqn, node1.fileName, node1.name) AS node1Name + ,node1.incomingDependencies AS node1IncomingDependencies + ,node1.outgoingDependencies AS node1OutgoingDependencies + ,artifactName2 + ,coalesce(node2.fqn, node2.fileName, node2.name) AS node2Name + ,node2.incomingDependencies AS node2IncomingDependencies + ,node2.outgoingDependencies AS node2OutgoingDependencies +ORDER BY similarity DESCENDING, node1Name, node2Name \ No newline at end of file diff --git a/cypher/Similarity/Similarity_1d_Delete_Relationships.cypher b/cypher/Similarity/Similarity_1d_Delete_Relationships.cypher new file mode 100644 index 000000000..e5aefbf65 --- /dev/null +++ b/cypher/Similarity/Similarity_1d_Delete_Relationships.cypher @@ -0,0 +1,6 @@ +// Delete Relationship "SIMILAR" + +MATCH (source)-[similarity:SIMILAR]->(target) + WHERE $dependencies_projection_node IN labels(source) + AND $dependencies_projection_node IN labels(target) +DELETE similarity \ No newline at end of file diff --git a/cypher/Similarity/Similarity_1e_Write.cypher b/cypher/Similarity/Similarity_1e_Write.cypher new file mode 100644 index 000000000..947ff48ef --- /dev/null +++ b/cypher/Similarity/Similarity_1e_Write.cypher @@ -0,0 +1,30 @@ +// Similarity Write + +CALL gds.nodeSimilarity.write( + $dependencies_projection + '-without-empty', { + relationshipWeightProperty: $dependencies_projection_weight_property + ,writeRelationshipType: 'SIMILAR' + ,writeProperty: 'score' + ,topK: 3 +}) +YIELD nodesCompared + ,relationshipsWritten + ,preProcessingMillis + ,computeMillis + ,postProcessingMillis + ,writeMillis + ,similarityDistribution +RETURN nodesCompared + ,relationshipsWritten + ,preProcessingMillis + ,computeMillis + ,postProcessingMillis + ,writeMillis + ,similarityDistribution.min + ,similarityDistribution.mean + ,similarityDistribution.max + ,similarityDistribution.p50 + ,similarityDistribution.p75 + ,similarityDistribution.p90 + ,similarityDistribution.p95 + ,similarityDistribution.p99 \ No newline at end of file diff --git a/cypher/Similarity/Similarity_1f_Write_Node_Properties.cypher b/cypher/Similarity/Similarity_1f_Write_Node_Properties.cypher new file mode 100644 index 000000000..bf9108482 --- /dev/null +++ b/cypher/Similarity/Similarity_1f_Write_Node_Properties.cypher @@ -0,0 +1,19 @@ +// Write similar node names and their score per node + + MATCH (source)-[similar:SIMILAR]->(target) + WHERE $dependencies_projection_node IN labels(source) + AND $dependencies_projection_node IN labels(target) + WITH source + ,coalesce(source.fqn, source.fileName, source.name) AS sourceName + ,similar.score AS similarityScore + ,coalesce(target.fqn, target.fileName, target.name) AS targetName + ORDER BY sourceName ASCENDING, similarityScore DESCENDING + WITH source + ,sourceName + ,collect(DISTINCT targetName) AS similarNames + ,collect(DISTINCT similarityScore) AS similarityScores + SET source.similarNames = similarNames + ,source.similarityScores = similarityScores +//RETURN sourceName, similarNames, similarityScores +//ORDER BY sourceName ASCENDING + RETURN count(source) AS writtenNodes diff --git a/cypher/Similarity/Similarity_2a_Check_Projection_Relationship.cypher b/cypher/Similarity/Similarity_2a_Check_Projection_Relationship.cypher deleted file mode 100644 index 8187b8b4f..000000000 --- a/cypher/Similarity/Similarity_2a_Check_Projection_Relationship.cypher +++ /dev/null @@ -1,11 +0,0 @@ -// Similarity 2a Check Projection Relationship - -CALL gds.graph.relationshipProperty.stream('package-similarity', 'weight', ['DEPENDS_ON']) -YIELD - sourceNodeId, targetNodeId, propertyValue AS weight, relationshipType -RETURN - gds.util.asNode(sourceNodeId).name AS source, - gds.util.asNode(targetNodeId).name AS target, - weight, - relationshipType -ORDER BY weight DESC, source \ No newline at end of file diff --git a/cypher/Similarity/Similarity_2b_Check_Projection_Node.cypher b/cypher/Similarity/Similarity_2b_Check_Projection_Node.cypher deleted file mode 100644 index f824c113c..000000000 --- a/cypher/Similarity/Similarity_2b_Check_Projection_Node.cypher +++ /dev/null @@ -1,13 +0,0 @@ -// Similarity 2b Check Projection Node - -MATCH (p:Package) -RETURN p.name AS name - ,p.incomingDependencies - ,gds.util.nodeProperty( - 'package-similarity', id(p), 'incomingDependencies' - ) AS projectedIncomingDependencies - ,p.outgoingDependencies - ,gds.util.nodeProperty( - 'package-similarity', id(p), 'outgoingDependencies' - ) AS projectedOutgoingDependencies -ORDER BY name \ No newline at end of file diff --git a/cypher/Similarity/Similarity_2c_Check_Projection_Relationship_Topology_with_null_nodes_first.cypher b/cypher/Similarity/Similarity_2c_Check_Projection_Relationship_Topology_with_null_nodes_first.cypher deleted file mode 100644 index bc9513f50..000000000 --- a/cypher/Similarity/Similarity_2c_Check_Projection_Relationship_Topology_with_null_nodes_first.cypher +++ /dev/null @@ -1,13 +0,0 @@ -// Similarity 2c Check Projection Relationship Topology with null nodes first - -CALL gds.beta.graph.relationships.stream('package-similarity', ['DEPENDS_ON', 'CONTAINS']) -YIELD - sourceNodeId, targetNodeId, relationshipType -RETURN - sourceNodeId, - gds.util.asNode(sourceNodeId).name AS source, - targetNodeId, - gds.util.asNode(targetNodeId).name AS target, - relationshipType -ORDER BY source DESC, target DESC -LIMIT 100 \ No newline at end of file diff --git a/cypher/Similarity/Similarity_3_Estimate_Memory.cypher b/cypher/Similarity/Similarity_3_Estimate_Memory.cypher deleted file mode 100644 index f3e1b0318..000000000 --- a/cypher/Similarity/Similarity_3_Estimate_Memory.cypher +++ /dev/null @@ -1,7 +0,0 @@ -// Similarity 3 Estimate Memory - -CALL gds.nodeSimilarity.write.estimate('package-similarity', { - writeRelationshipType: 'SIMILAR', - writeProperty: 'similarityScore' -}) -YIELD nodeCount, relationshipCount, bytesMin, bytesMax, requiredMemory \ No newline at end of file diff --git a/cypher/Similarity/Similarity_4_Stream.cypher b/cypher/Similarity/Similarity_4_Stream.cypher deleted file mode 100644 index 558a6ac61..000000000 --- a/cypher/Similarity/Similarity_4_Stream.cypher +++ /dev/null @@ -1,28 +0,0 @@ -// Similarity 4 Stream - - CALL gds.nodeSimilarity.stream('package-similarity', { - relationshipWeightProperty: 'weight25PercentInterfaces' - }) -YIELD node1, node2, similarity - WITH gds.util.asNode(node1) AS package1 - ,gds.util.asNode(node2) AS package2 - ,similarity -MATCH (artifact1:Artifact)-[:CONTAINS]->(package1) -MATCH (artifact2:Artifact)-[:CONTAINS]->(package2) - WITH replace(last(split(artifact1.fileName, '/')), '.jar', '') AS artifactName1 - ,replace(last(split(artifact2.fileName, '/')), '.jar', '') AS artifactName2 - ,package1 - ,package2 - ,similarity -RETURN similarity - ,package1.fqn - ,package2.fqn - ,artifactName1 - ,package1.name - ,package1.incomingDependencies - ,package1.outgoingDependencies - ,artifactName2 - ,package2.name - ,package2.incomingDependencies - ,package2.outgoingDependencies -ORDER BY similarity DESCENDING, package1.name, package2.name \ No newline at end of file diff --git a/jupyter/ExternalDependencies.ipynb b/jupyter/ExternalDependencies.ipynb index 564feb7ab..da063a1ae 100644 --- a/jupyter/ExternalDependencies.ipynb +++ b/jupyter/ExternalDependencies.ipynb @@ -1367,11 +1367,11 @@ "plot.figure();\n", "axes = external_package_usage_aggregated.plot(\n", " kind='scatter',\n", - " title='External package usage - max internal packages', \n", + " title='External package usage - max internal packages %', \n", " x='numberOfExternalPackages',\n", " y='maxNumberOfPackagesPercentage',\n", " s='artifactPackages',\n", - " c='leidenCommunityId',\n", + " c='stdNumberOfPackagesPercentage',\n", " xlabel='external package count',\n", " ylabel='max percentage of internal packages',\n", " cmap=main_color_map,\n", @@ -1416,11 +1416,11 @@ "plot.figure();\n", "axes = external_package_usage_aggregated.plot(\n", " kind='scatter',\n", - " title='External package usage - median internal packages', \n", + " title='External package usage - median internal packages %', \n", " x='numberOfExternalPackages',\n", " y='medNumberOfPackagesPercentage',\n", " s='artifactPackages',\n", - " c='leidenCommunityId',\n", + " c='stdNumberOfPackagesPercentage',\n", " xlabel='external package count',\n", " ylabel='median percentage of internal packages',\n", " cmap=main_color_map,\n", diff --git a/scripts/executeQuery.sh b/scripts/executeQuery.sh index 47070f9c5..762d19d7c 100755 --- a/scripts/executeQuery.sh +++ b/scripts/executeQuery.sh @@ -1,17 +1,18 @@ #!/usr/bin/env bash # Utilizes Neo4j's HTTP API to execute a Cypher query from an input file and provides the results in CSV format. -# Use it when "cypher-shell" is not present or not flexible enough. +# Use it when "cypher-shell" is not present, not flexible enough or to avoid an additional dependency. # It requires "cURL" ( https://curl.se ) and "jq" ( https://stedolan.github.io/jq ) to be installed. # The environment variable NEO4J_INITIAL_PASSWORD needs to be set. # Using "cypher-shell" that comes with Neo4j server is much simpler to use: -# cat $cypher_query_file_name | $NEO4J_HOME/bin/cypher-shell -u neo4j -p password --format plain +# cat $cypher_query_file_name | $NEO4J_HOME/bin/cypher-shell -u neo4j -p password --format plain --param "number ⇒ 3" # Note: These command line arguments are supported: # -> "filename" of the cypher query file (required, unnamed first argument) # -> "--no_source_reference" to not append the cypher query file name as last CSV column +# -> any following key=value arguments are used as query parameters # Overrideable Defaults NEO4J_HTTP_PORT=${NEO4J_HTTP_PORT:-"7474"} # Neo4j HTTP API port for executing queries @@ -19,55 +20,64 @@ NEO4J_HTTP_TRANSACTION_ENDPOINT=${NEO4J_HTTP_TRANSACTION_ENDPOINT:-"db/neo4j/tx/ # Check if environment variable is set if [ -z "${NEO4J_INITIAL_PASSWORD}" ]; then - echo "Requires environment variable NEO4J_INITIAL_PASSWORD to be set first. Use 'export NEO4J_INITIAL_PASSWORD='." >&2 + echo "executeQuery requires environment variable NEO4J_INITIAL_PASSWORD to be set first. Use 'export NEO4J_INITIAL_PASSWORD='." >&2 exit 1 fi # Input Arguments: Initialize arguments and set default values for optional ones cypher_query_file_name="" no_source_reference=false +query_parameters="" # Input Arguments: Function to print usage information print_usage() { - echo "Usage: $0 [--no-source-reference-column]" >&2 + echo "executeQuery Usage: $0 [--no-source-reference-column]" >&2 echo "Options:" >&2 echo " --no-source-reference-column: Exclude the source reference column" >&2 } # Input Arguments: Parse the command-line arguments while [[ $# -gt 0 ]]; do - key="$1" + arg="$1" - case $key in + case $arg in --no-source-reference-column) no_source_reference=true shift ;; *) - if [[ -z "$cypher_query_file_name" ]]; then + if [[ -z "${cypher_query_file_name}" ]]; then # Input Arguments: Read the first unnamed input argument containing the name of the cypher file - cypher_query_file_name="$key" + cypher_query_file_name="${arg}" #echo "Cypher File: $cypher_query_file_name" # Input Arguments: Check the first input argument to be a valid file if [ ! -f "${cypher_query_file_name}" ] ; then - echo "Error: Please provide a valid filename." >&2 + echo "executeQuery Error: Invalid cypher query filename ${cypher_query_file_name}." >&2 print_usage exit 1 fi else - echo "Error: Unknown option: $key" >&2 - print_usage - exit 1 + # Convert key=value argument to JSON "key": "value" and strip all incoming quotes first + json_parameter=$(echo "${arg}" | sed "s/[\"\']//g" | awk -F'=' '{ print "\""$1"\": \""$2"\""}'| grep -iv '\"#') + if [[ -z "${query_parameters}" ]]; then + # Add first query parameter directly + query_parameters="${json_parameter}" + else + # Append next query parameter separated by a comma and a space + query_parameters="${query_parameters}, ${json_parameter}" + fi fi shift ;; esac done +#echo "executeQuery: query_parameters: ${query_parameters}" + # Read the file that contains the Cypher query original_cypher_query=$(<"${cypher_query_file_name}") -#echo "Original Query: $original_cypher_query" +#echo "executeQuery: Original Query: $original_cypher_query" # Encode the string containing the Cypher query to be used inside JSON using jq ( https://stedolan.github.io/jq ) # Be sure to put double quotes around the original Cypher query to prevent newlines from beeing removed. @@ -77,11 +87,11 @@ original_cypher_query=$(<"${cypher_query_file_name}") # . means "output the root of the JSON document" # Source: https://stackoverflow.com/questions/10053678/escaping-characters-in-bash-for-json cypher_query=$(echo -n "${original_cypher_query}" | jq -Rsa .) -#echo "Cypher Query: $cypher_query" +#echo "executeQuery: Cypher Query JSON Encoded: $cypher_query" # Put the query inside the structure that is expected by the Neo4j HTTP API -cypher_query_for_api="{\"statements\":[{\"statement\":${cypher_query},\"includeStats\": false}]}" -#echo "Cypher Query for API: ${cypher_query_for_api}" +cypher_query_for_api="{\"statements\":[{\"statement\":${cypher_query},\"parameters\":{${query_parameters}},\"includeStats\": false}]}" +#echo "executeQuery: Cypher Query for API: ${cypher_query_for_api}" # Calls the Neo4j HTTP API using cURL ( https://curl.se ) cyper_query_result=$(curl --silent -S --fail-with-body -H Accept:application/json -H Content-Type:application/json \ diff --git a/scripts/executeQueryFunctions.sh b/scripts/executeQueryFunctions.sh index d8ac4964d..bce30a935 100644 --- a/scripts/executeQueryFunctions.sh +++ b/scripts/executeQueryFunctions.sh @@ -10,36 +10,48 @@ # This way non-standard tools like readlink aren't needed. SCRIPTS_DIR=${SCRIPTS_DIR:-$( CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P )} # Repository directory containing the shell scripts +# Extact the value of one key out of a "key=value" array e.g. for query parameters. +# The first argument is the name of the target key. +# All following arguments are the "key=value" parameters. +# Example: `extractQueryParameter "b" "a=1" "b=2" "c=3"` returns `2` +extractQueryParameter() { + target_key=${1} + shift # ignore first argument containing the query file name + + for arg in "${@}"; do + key=$(echo "$arg" | cut -d'=' -f1) + value=$(echo "$arg" | cut -d'=' -f2) + if [ "${key}" = "${target_key}" ]; then + echo "${value}" + break + fi + done +} + # Function to execute a cypher query from the given file (first argument) with the default method execute_cypher() { - execute_cypher_http "${1}" || exit 1 + execute_cypher_http "${@}" || exit 1 # "${@}": Get all function arguments and forward them } # Function to execute a cypher query from the given file (first argument) with the default method and just return the number of results execute_cypher_summarized() { - execute_cypher_http_summarized "${1}" || exit 1 + execute_cypher_http_summarized "${@}" || exit 1 # "${@}": Get all function arguments and forward them } # Function to execute a cypher query from the given file (first argument) with the default method and fail if there is no result execute_cypher_expect_results() { - execute_cypher_http_expect_results "${1}" || exit 1 + execute_cypher_http_expect_results "${@}" || exit 1 # "${@}": Get all function arguments and forward them } # Function to execute a cypher query from the given file (first and only argument) using Neo4j's HTTP API execute_cypher_http() { - # Get the Cypher file name from the first argument - cypherFileName="${1}" - # (Neo4j HTTP API Script) Execute the Cyper query contained in the file and print the results as CSV - source $SCRIPTS_DIR/executeQuery.sh "${cypherFileName}" || exit 1 + source $SCRIPTS_DIR/executeQuery.sh "${@}" || exit 1 # "${@}": Get all function arguments and forward them } # Function to execute a cypher query from the given file (first and only argument) with a summarized (console) output using Neo4j's HTTP API execute_cypher_http_summarized() { - # Get the Cypher file name from the first argument - cypherFileName="${1}" - - results=$( execute_cypher_http ${cypherFileName} | wc -l ) + results=$( execute_cypher_http "${@}" | wc -l ) # "${@}": Get all function arguments and forward them results=$((results - 2)) echo "$(basename -- "${cypherFileName}") (via http) result lines: ${results}" } @@ -57,10 +69,30 @@ execute_cypher_http_expect_results() { fi } +cypher_shell_query_parameters() { + query_parameters="" + shift # ignore first argument containing the query file name + + while [[ $# -gt 0 ]]; do + arg="${1}" + # Convert key=value argument to JSON "key": "value" + json_parameter=$(echo "${arg}" | sed "s/[\"\']//g" | awk -F'=' '{print ""$1": \""$2"\""}'| grep -iv '\"#') + if [[ -z "${query_parameters}" ]]; then + # Add first query parameter directly + query_parameters="${json_parameter}" + else + # Append next query parameter separated by a comma and a space + query_parameters="${query_parameters}, ${json_parameter}" + fi + shift # iterate to next argument + done + echo "{${query_parameters}}" +} + # Function to execute a cypher query from the given file (first and only argument) using "cypher-shell" provided by Neo4j execute_cypher_shell() { # Get the Cypher file name from the first argument - cypherFileName=$1 + cypherFileName="${1}" # Check if NEO4J_BIN exists if [ ! -d "${NEO4J_BIN}" ] ; then @@ -68,8 +100,12 @@ execute_cypher_shell() { exit 1 fi + # Extract query parameters out of the key=value pair arguments that follow the first argument (query filename) + query_parameters=$(cypher_shell_query_parameters "${@}") + echo "executeQuery: query_parameters=${query_parameters}" + # (Neo4j Cyper Shell CLI) Execute the Cyper query contained in the file and print the results as CSV - cat $cypherFileName | NEO4J_HOME="${NEO4J_DIRECTORY}" ${NEO4J_BIN}/cypher-shell -u neo4j -p "${NEO4J_INITIAL_PASSWORD}" --format plain || exit 1 + cat $cypherFileName | NEO4J_HOME="${NEO4J_DIRECTORY}" ${NEO4J_BIN}/cypher-shell -u neo4j -p "${NEO4J_INITIAL_PASSWORD}" --format plain --param "${query_parameters}" || exit 1 # Display the name of the Cypher file without its path at the bottom of the CSV (or console) separated by an empty line # TODO Find a solution to move the source reference to the last column name @@ -80,7 +116,7 @@ execute_cypher_shell() { # Function to execute a cypher query from the given file (first and only argument) with a summarized (console) output using "cypher-shell" provided by Neo4j execute_cypher_shell_summarized() { # Get the Cypher file name from the first argument - cypherFileName=$1 + cypherFileName="${1}" results=$( execute_cypher_shell ${cypherFileName} | wc -l ) results=$((results - 2)) @@ -90,7 +126,7 @@ execute_cypher_shell_summarized() { # Function to execute a cypher query from the given file (first and only argument) that fails on no result using "cypher-shell" provided by Neo4j execute_cypher_shell_expect_results() { # Get the Cypher file name from the first argument - cypherFileName=$1 + cypherFileName="${1}" results=$( execute_cypher_shell ${cypherFileName} | wc -l ) results=$((results - 2)) diff --git a/scripts/prepareAnalysis.sh b/scripts/prepareAnalysis.sh index d7be07744..330a73cd6 100644 --- a/scripts/prepareAnalysis.sh +++ b/scripts/prepareAnalysis.sh @@ -55,4 +55,8 @@ execute_cypher "${EXTERNAL_DEPENDENCIES_CYPHER_DIR}/Label_external_types_and_ann # Preparation - Add Artifact node properties "incomingDependencies" and "outgoingDependencies" execute_cypher_expect_results "${ARTIFACT_DEPENDENCIES_CYPHER_DIR}/Incoming_Artifact_Dependencies.cypher" || exit 1 -execute_cypher_expect_results "${ARTIFACT_DEPENDENCIES_CYPHER_DIR}/Outgoing_Artifact_Dependencies.cypher" || exit 1 \ No newline at end of file +execute_cypher_expect_results "${ARTIFACT_DEPENDENCIES_CYPHER_DIR}/Outgoing_Artifact_Dependencies.cypher" || exit 1 + +# Preparation - Add Type node properties "incomingDependencies" and "outgoingDependencies" +execute_cypher_expect_results "${PACKAGE_METRICS_CYPHER_DIR}/Set_Incoming_Type_Dependencies.cypher" || exit 1 +execute_cypher_expect_results "${PACKAGE_METRICS_CYPHER_DIR}/Set_Outgoing_Type_Dependencies.cypher" || exit 1 \ No newline at end of file diff --git a/scripts/reports/ArtifactCommunityCsv.sh b/scripts/reports/ArtifactCommunityCsv.sh deleted file mode 100755 index d6cd1dda0..000000000 --- a/scripts/reports/ArtifactCommunityCsv.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env bash - -# Detects communities using the Graph Data Science Library of Neo4j and creates CSV reports. -# It requires an already running Neo4j graph database with already scanned analyzed artifacts. -# The reports (csv files) will be written into the sub directory reports/artifact-community-csv. -# Note that "scripts/prepareAnalysis.sh" is required to run prior to this script. - -# Requires executeQueryFunctions.sh - -# Overrideable Constants (defaults also defined in sub scripts) -REPORTS_DIRECTORY=${REPORTS_DIRECTORY:-"reports"} - -## Get this "scripts/reports" directory if not already set -# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution. -# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes. -# This way non-standard tools like readlink aren't needed. -REPORTS_SCRIPT_DIR=${REPORTS_SCRIPT_DIR:-$( CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P )} -echo "artifactCommunityCsv: REPORTS_SCRIPT_DIR=${REPORTS_SCRIPT_DIR}" - -# Get the "scripts" directory by taking the path of this script and going one directory up. -SCRIPTS_DIR=${SCRIPTS_DIR:-"${REPORTS_SCRIPT_DIR}/.."} # Repository directory containing the shell scripts -echo "artifactCommunityCsv: SCRIPTS_DIR=${SCRIPTS_DIR}" - -# Get the "cypher" directory by taking the path of this script and going two directory up and then to "cypher". -CYPHER_DIR=${CYPHER_DIR:-"${REPORTS_SCRIPT_DIR}/../../cypher"} -echo "artifactCommunityCsv: CYPHER_DIR=$CYPHER_DIR" - -# Define functions to execute a cypher query from within the given file (first and only argument) -source "${SCRIPTS_DIR}/executeQueryFunctions.sh" - -# Create report directory -REPORT_NAME="artifact-community-csv" -FULL_REPORT_DIRECTORY="${REPORTS_DIRECTORY}/${REPORT_NAME}" -mkdir -p "${FULL_REPORT_DIRECTORY}" - -# Local Constants -LEIDEN_CYPHER_DIR="$CYPHER_DIR/Community_Detection_Leiden_for_Artifacts" - -# Preparation for Community Detection - Create package dependencies projections -execute_cypher "${LEIDEN_CYPHER_DIR}/Community_Detection_0_Delete_Projection.cypher" -execute_cypher "${LEIDEN_CYPHER_DIR}/Community_Detection_0b_Delete_Projection.cypher" -execute_cypher "${LEIDEN_CYPHER_DIR}/Community_Detection_1_Create_undirected_Projection.cypher" -execute_cypher "${LEIDEN_CYPHER_DIR}/Community_Detection_1b_Create_subgraph_without_empty_artifacts.cypher" - -# Community Detection using the Leiden Algorithm - Query CSV -execute_cypher "${LEIDEN_CYPHER_DIR}/Community_Detection_2_Leiden_Estimate_Memory.cypher" -execute_cypher "${LEIDEN_CYPHER_DIR}/Community_Detection_3_Leiden_Statistics.cypher" -execute_cypher "${LEIDEN_CYPHER_DIR}/Community_Detection_4_Leiden_Stream.cypher" > "${FULL_REPORT_DIRECTORY}/Leiden_Communities.csv" - -# Community Detection using the Leiden Algorithm - Update Graph -execute_cypher "${LEIDEN_CYPHER_DIR}/Community_Detection_5_Leiden_Write_property_leidenCommunityId.cypher" -execute_cypher "${LEIDEN_CYPHER_DIR}/Community_Detection_6_Delete_Existing_Labels.cypher" -execute_cypher "${LEIDEN_CYPHER_DIR}/Community_Detection_7_Add_ArtifactLeidenCommunity_Id_label_to_artifacts.cypher" \ No newline at end of file diff --git a/scripts/reports/ArtifactDependenciesJupyter.sh b/scripts/reports/ArtifactDependenciesJupyter.sh index 29d10cf54..034d7a9f5 100755 --- a/scripts/reports/ArtifactDependenciesJupyter.sh +++ b/scripts/reports/ArtifactDependenciesJupyter.sh @@ -3,7 +3,7 @@ # Creates the "artifact-dependencies" report (ipynb, md, pdf) based on the Jupyter Notebook "ArtifactDependencies.ipynb". # It contains the hierarchical artifact dependencies graph -# Requires executeJupyterNotebook.sh, AritfactCommunityCsv.sh +# Requires executeJupyterNotebook.sh # Overrideable Constants (defaults also defined in sub scripts) REPORTS_DIRECTORY=${REPORTS_DIRECTORY:-"reports"} @@ -27,19 +27,10 @@ echo "ArtifactDependenciesJupyter: JUPYTER_NOTEBOOK_DIRECTORY=$JUPYTER_NOTEBOOK_ CYPHER_DIR=${CYPHER_DIR:-"${REPORTS_SCRIPT_DIR}/../../cypher"} echo "ArtifactDependenciesJupyter CYPHER_DIR=${CYPHER_DIR}" -# Define functions to execute cypher queries from within a given file -source "${SCRIPTS_DIR}/executeQueryFunctions.sh" - -# Local Constants -LEIDEN_CYPHER_DIR="$CYPHER_DIR/Community_Detection_Leiden_for_Artifacts" - # Create report directory REPORT_NAME="artifact-dependencies" FULL_REPORT_DIRECTORY="${REPORTS_DIRECTORY}/${REPORT_NAME}" mkdir -p "${FULL_REPORT_DIRECTORY}" -# Dependency: Assure that artifacts have a Leiden Community Id (written by "AritfactCommunityCsv.sh") -execute_cypher_expect_results "${LEIDEN_CYPHER_DIR}/Community_Detection_8_Check_Leiden_Community_Id.cypher" - # Execute and convert the Jupyter Notebook "ArtifactDependencies.ipynb" within the given reports directory (cd "${FULL_REPORT_DIRECTORY}" && exec ${SCRIPTS_DIR}/executeJupyterNotebook.sh ${JUPYTER_NOTEBOOK_DIRECTORY}/ArtifactDependencies.ipynb) || exit 1 \ No newline at end of file diff --git a/scripts/reports/CentralityCsv.sh b/scripts/reports/CentralityCsv.sh index 17aa69e41..74fdd5ce6 100755 --- a/scripts/reports/CentralityCsv.sh +++ b/scripts/reports/CentralityCsv.sh @@ -33,51 +33,299 @@ REPORT_NAME="centrality-csv" FULL_REPORT_DIRECTORY="${REPORTS_DIRECTORY}/${REPORT_NAME}" mkdir -p "${FULL_REPORT_DIRECTORY}" -# Local Constants -CENTRALITY_CYPHER_DIR="$CYPHER_DIR/Centrality" +# Centrality Preparation +# Selects the nodes and relationships for the algorithm and creates an in-memory projection. +# Nodes without incoming and outgoing dependencies will be filtered out with a subgraph. +# +# Required Parameters: +# - dependencies_projection=... +# Name prefix for the in-memory projection name for dependencies. Example: "package" +# - dependencies_projection_node=... +# Label of the nodes that will be used for the projection. Example: "Package" +# - dependencies_projection_weight_property=... +# Name of the node property that contains the dependency weight. Example: "weight" +createProjection() { + local PROJECTION_CYPHER_DIR="$CYPHER_DIR/Dependencies_Projection" -# Preparation for Centrality - Create package dependencies projection -execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_0_Delete_Projection.cypher" -execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_0b_Delete_Subraph_Projection.cypher" -execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_1_Create_Projection.cypher" -execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_1b_Create_Subgraph_Without_Empty_Packages.cypher" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_1_Delete_Projection.cypher" "${@}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_2_Delete_Subgraph.cypher" "${@}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_3_Create_Projection.cypher" "${@}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_5_Create_Subgraph.cypher" "${@}" +} -# Centrality using the Page Rank Algorithm - Query CSV -execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_2a_Page_Rank_Estimate_Memory.cypher" -execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_2b_Page_Rank_Statistics.cypher" -execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_3c_Page_Rank_Stream.cypher" > "${FULL_REPORT_DIRECTORY}/Centrality_Page_Rank.csv" +# Apply the centrality algorithm "Page Rank". +# +# Required Parameters: +# - dependencies_projection=... +# Name prefix for the in-memory projection name for dependencies. Example: "package" +# - dependencies_projection_node=... +# Label of the nodes that will be used for the projection. Example: "Package" +# - dependencies_projection_weight_property=... +# Name of the node property that contains the dependency weight. Example: "weight" +centralityWithPageRank() { + local CENTRALITY_CYPHER_DIR="$CYPHER_DIR/Centrality" -# Centrality using the Page Rank Algorithm - Update Graph -execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_3d_Page_Rank_Write.cypher" + # Name of the property that will be written to the nodes containing the centrality score. + # This is also used as a name with the first letter capitalized as a label for the top centraliy nodes. + local writePropertyName="dependencies_projection_write_property=centralityPageRank" -# Centrality using the Article Rank Algorithm - Query CSV -execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_4a_Article_Rank_Estimate_Memory.cypher" -execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_4b_Article_Rank_Statistics.cypher" -execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_4c_Article_Rank_Stream.cypher" > "${FULL_REPORT_DIRECTORY}/Centrality_Article_Rank.csv" + # Statistics + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_2a_Page_Rank_Estimate.cypher" "${@}" "${writePropertyName}" + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_2b_Page_Rank_Statistics.cypher" "${@}" + + # Stream to CSV + local nodeLabel + nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}" ) + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_3c_Page_Rank_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Centrality_Page_Rank.csv" + + # Update Graph (node properties and labels) + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_3d_Page_Rank_Write.cypher" "${@}" "${writePropertyName}" + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_1c_Label_Delete.cypher" "${@}" "${writePropertyName}" + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_1d_Label_Add.cypher" "${@}" "${writePropertyName}" +} -# Centrality using the Article Rank Algorithm - Update Graph -execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_4d_Article_Rank_Write.cypher" +# Apply the centrality algorithm "Article Rank". +# +# Required Parameters: +# - dependencies_projection=... +# Name prefix for the in-memory projection name for dependencies. Example: "package" +# - dependencies_projection_node=... +# Label of the nodes that will be used for the projection. Example: "Package" +# - dependencies_projection_weight_property=... +# Name of the node property that contains the dependency weight. Example: "weight" +centralityWithArticleRank() { + local CENTRALITY_CYPHER_DIR="$CYPHER_DIR/Centrality" + + # Name of the property that will be written to the nodes containing the centrality score. + # This is also used as a name with the first letter capitalized as a label for the top centraliy nodes. + local writePropertyName="dependencies_projection_write_property=centralityArticleRank" -# Centrality using the Betweeness Algorithm - Query CSV -execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_5a_Betweeness_Estimate.cypher" -execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_5b_Betweeness_Statistics.cypher" -execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_5c_Betweeness_Stream.cypher" > "${FULL_REPORT_DIRECTORY}/Centrality_Betweeness.csv" + # Statistics + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_4a_Article_Rank_Estimate.cypher" "${@}" "${writePropertyName}" + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_4b_Article_Rank_Statistics.cypher" "${@}" + + # Stream to CSV + local nodeLabel + nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}" ) + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_4c_Article_Rank_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Centrality_Article_Rank.csv" + + # Update Graph (node properties and labels) + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_4d_Article_Rank_Write.cypher" "${@}" "${writePropertyName}" + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_1c_Label_Delete.cypher" "${@}" "${writePropertyName}" + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_1d_Label_Add.cypher" "${@}" "${writePropertyName}" +} -# Centrality using the Betweeness Algorithm - Update Graph -execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_5d_Betweeness_Write.cypher" +# Apply the centrality algorithm "Betweenness". +# +# Required Parameters: +# - dependencies_projection=... +# Name prefix for the in-memory projection name for dependencies. Example: "package" +# - dependencies_projection_node=... +# Label of the nodes that will be used for the projection. Example: "Package" +# - dependencies_projection_weight_property=... +# Name of the node property that contains the dependency weight. Example: "weight" +centralityWithBetweenness() { + local CENTRALITY_CYPHER_DIR="$CYPHER_DIR/Centrality" + + # Name of the property that will be written to the nodes containing the centrality score. + # This is also used as a name with the first letter capitalized as a label for the top centraliy nodes. + local writePropertyName="dependencies_projection_write_property=centralityBetweenness" -# Centrality using the Cost Effective Lazy Formward (CELF) Algorithm - Query CSV -execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_6c_Cost_effective_Lazy_Forward_CELF_Stream.cypher" > "${FULL_REPORT_DIRECTORY}/Centrality_Cost_Effective_Lazy_Forward.csv" + # Statistics + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_5a_Betweeness_Estimate.cypher" "${@}" "${writePropertyName}" + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_5b_Betweeness_Statistics.cypher" "${@}" + + # Stream to CSV + local nodeLabel + nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}" ) + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_5c_Betweeness_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Centrality_Betweeness.csv" + + # Update Graph (node properties and labels) + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_5d_Betweeness_Write.cypher" "${@}" "${writePropertyName}" + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_1c_Label_Delete.cypher" "${@}" "${writePropertyName}" + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_1d_Label_Add.cypher" "${@}" "${writePropertyName}" +} -# Centrality using the Harmonic Closeness Algorithm - Query CSV -execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_7a_Harmonic_Closeness_Stream.cypher" > "${FULL_REPORT_DIRECTORY}/Centrality_Harmonic.csv" +# Apply the centrality algorithm "Cost Effective Lazy Forward (CELF)". +# +# Required Parameters: +# - dependencies_projection=... +# Name prefix for the in-memory projection name for dependencies. Example: "package" +# - dependencies_projection_node=... +# Label of the nodes that will be used for the projection. Example: "Package" +# - dependencies_projection_weight_property=... +# Name of the node property that contains the dependency weight. Example: "weight" +centralityWithCostEffectiveLazyForwardCELF() { + local CENTRALITY_CYPHER_DIR="$CYPHER_DIR/Centrality" -# Centrality using the Harmonic Closeness Algorithm - Update Graph -execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_7b_Harmonic_Closeness_Write.cypher" + # Name of the property that will be written to the nodes containing the centrality score. + # This is also used as a name with the first letter capitalized as a label for the top centraliy nodes. + local writePropertyName="dependencies_projection_write_property=centralityCostEffectiveLazyForward" -# Centrality using the Closeness Algorithm - Query CSV -execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_8a_Closeness_Statistics.cypher" -execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_8b_Closeness_Stream.cypher" > "${FULL_REPORT_DIRECTORY}/Centrality_Closeness.csv" + # Statistics + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_6a_Cost_effective_Lazy_Forward_CELF_Estimate.cypher" "${@}" "${writePropertyName}" + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_6b_Cost_effective_Lazy_Forward_CELF_Statistics.cypher" "${@}" + + # Stream to CSV + local nodeLabel + nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}" ) + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_6c_Cost_effective_Lazy_Forward_CELF_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Centrality_Cost_effective_Lazy_Forward_CELF.csv" + + # Update Graph (node properties and labels) + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_6d_Cost_effective_Lazy_Forward_CELF_Write.cypher" "${@}" "${writePropertyName}" + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_1c_Label_Delete.cypher" "${@}" "${writePropertyName}" + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_1d_Label_Add.cypher" "${@}" "${writePropertyName}" +} -# Centrality using the Closeness Algorithm - Update Graph -execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_8c_Closeness_Write.cypher" \ No newline at end of file +# Apply the centrality algorithm "Harmonic" (variant of "Closeness)"). +# +# Required Parameters: +# - dependencies_projection=... +# Name prefix for the in-memory projection name for dependencies. Example: "package" +# - dependencies_projection_node=... +# Label of the nodes that will be used for the projection. Example: "Package" +# - dependencies_projection_weight_property=... +# Name of the node property that contains the dependency weight. Example: "weight" +centralityWithHarmonic() { + local CENTRALITY_CYPHER_DIR="$CYPHER_DIR/Centrality" + + # Name of the property that will be written to the nodes containing the centrality score. + # This is also used as a name with the first letter capitalized as a label for the top centraliy nodes. + local writePropertyName="dependencies_projection_write_property=centralityHarmonic" + + # Statistics + # Note: Estimate procedure doesn't seem to exist for now (gds version 2.5.0-preview3) + # execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_7a_Harmonic_Closeness_Estimate.cypher" "${@}" "${writePropertyName}" + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_7b_Harmonic_Closeness_Statistics.cypher" "${@}" + + # Stream to CSV + local nodeLabel + nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}" ) + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_7c_Harmonic_Closeness_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Centrality_Harmonic.csv" + + # Update Graph (node properties and labels) + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_7d_Harmonic_Closeness_Write.cypher" "${@}" "${writePropertyName}" + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_1c_Label_Delete.cypher" "${@}" "${writePropertyName}" + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_1d_Label_Add.cypher" "${@}" "${writePropertyName}" +} + +# Apply the centrality algorithm "Closeness". +# +# Required Parameters: +# - dependencies_projection=... +# Name prefix for the in-memory projection name for dependencies. Example: "package" +# - dependencies_projection_node=... +# Label of the nodes that will be used for the projection. Example: "Package" +# - dependencies_projection_weight_property=... +# Name of the node property that contains the dependency weight. Example: "weight" +centralityWithCloseness() { + local CENTRALITY_CYPHER_DIR="$CYPHER_DIR/Centrality" + + # Name of the property that will be written to the nodes containing the centrality score. + # This is also used as a name with the first letter capitalized as a label for the top centraliy nodes. + local writePropertyName="dependencies_projection_write_property=centralityCloseness" + + # Statistics + # Note: Estimate procedure doesn't seem to exist for now (gds version 2.5.0-preview3) + # execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_8a_Closeness_Estimate.cypher" "${@}" "${writePropertyName}" + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_8b_Closeness_Statistics.cypher" "${@}" + + # Stream to CSV + local nodeLabel + nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}" ) + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_8c_Closeness_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Centrality_Closeness.csv" + + # Update Graph (node properties and labels) + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_8d_Closeness_Write.cypher" "${@}" "${writePropertyName}" + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_1c_Label_Delete.cypher" "${@}" "${writePropertyName}" + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_1d_Label_Add.cypher" "${@}" "${writePropertyName}" +} + +# Apply the centrality algorithm "Hyperlink-Induced Topic Search (HITS)". +# +# Required Parameters: +# - dependencies_projection=... +# Name prefix for the in-memory projection name for dependencies. Example: "package" +# - dependencies_projection_node=... +# Label of the nodes that will be used for the projection. Example: "centralityPageRank" +# - dependencies_projection_weight_property=... +# Name of the node property that contains the dependency weight. Example: "weight" +centralityWithHyperlinkInducedTopicSearchHITS() { + local CENTRALITY_CYPHER_DIR="$CYPHER_DIR/Centrality" + + # Name of the property that will be written to the nodes containing the centrality score. + # This is also used as a name with the first letter capitalized as a label for the top centraliy nodes. + local writePropertyName="dependencies_projection_write_property=centralityHyperlinkInducedTopicSearchAuthority" + + # Statistics + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_9a_Hyperlink_Induced_Topic_Search_HITS_Estimate.cypher" "${@}" "${writePropertyName}" + # Note: There is an issue in gds version 2.5.0-preview3: https://github.com/neo4j/graph-data-science/issues/285 + #execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_9b_Hyperlink_Induced_Topic_Search_HITS_Statistics.cypher" "${@}" + + # Stream to CSV + local nodeLabel + nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}" ) + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_9c_Hyperlink_Induced_Topic_Search_HITS_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Centrality_Hyperlink_Induced_Topic_Search_HITS.csv" + + # Update Graph (node properties and labels) + # Note: There is an issue in gds version 2.5.0-preview3: https://github.com/neo4j/graph-data-science/issues/285 + #execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_9d_Hyperlink_Induced_Topic_Search_HITS_Write.cypher" "${@}" "${writePropertyName}" + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_1c_Label_Delete.cypher" "${@}" "${writePropertyName}" + execute_cypher "${CENTRALITY_CYPHER_DIR}/Centrality_1d_Label_Add.cypher" "${@}" "${writePropertyName}" +} + +# --------------------------------------------------------------- + +# Artifact Query Parameters +ARTIFACT_PROJECTION="dependencies_projection=artifact-centrality" +ARTIFACT_NODE="dependencies_projection_node=Artifact" +ARTIFACT_WEIGHT="dependencies_projection_weight_property=weight" + +# Artifact Centrality +echo "centralityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing artifact dependencies..." +createProjection "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" +time centralityWithPageRank "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" +time centralityWithArticleRank "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" +time centralityWithBetweenness "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" +time centralityWithCostEffectiveLazyForwardCELF "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" +time centralityWithHarmonic "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" +time centralityWithCloseness "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" +time centralityWithHyperlinkInducedTopicSearchHITS "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" + +# --------------------------------------------------------------- + +# Package Query Parameters +PACKAGE_PROJECTION="dependencies_projection=package-centrality" +PACKAGE_NODE="dependencies_projection_node=Package" +PACKAGE_WEIGHT="dependencies_projection_weight_property=weight25PercentInterfaces" + +# Package Centrality +echo "centralityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing package dependencies..." +createProjection "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" +time centralityWithPageRank "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" +time centralityWithArticleRank "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" +time centralityWithBetweenness "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" +time centralityWithCostEffectiveLazyForwardCELF "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" +time centralityWithHarmonic "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" +time centralityWithCloseness "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" +time centralityWithHyperlinkInducedTopicSearchHITS "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" + +# --------------------------------------------------------------- + +# Type Query Parameters +TYPE_PROJECTION="dependencies_projection=type-centrality" +TYPE_NODE="dependencies_projection_node=Type" +TYPE_WEIGHT="dependencies_projection_weight_property=weight" + +# Type Centrality +echo "centralityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing package dependencies..." +createProjection "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" +time centralityWithPageRank "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" +time centralityWithArticleRank "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" +time centralityWithBetweenness "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" +time centralityWithCostEffectiveLazyForwardCELF "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" +time centralityWithHarmonic "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" +time centralityWithCloseness "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" +time centralityWithHyperlinkInducedTopicSearchHITS "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" + +echo "centralityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Successfully finished" \ No newline at end of file diff --git a/scripts/reports/CommunityCsv.sh b/scripts/reports/CommunityCsv.sh index be948bbca..2c6990c58 100755 --- a/scripts/reports/CommunityCsv.sh +++ b/scripts/reports/CommunityCsv.sh @@ -1,8 +1,9 @@ #!/usr/bin/env bash # Detects communities using the Graph Data Science Library of Neo4j and creates CSV reports. -# It requires an already running Neo4j graph database with already scanned analyzed artifacts. +# It requires an already running Neo4j graph database with already scanned and analyzed artifacts. # The reports (csv files) will be written into the sub directory reports/community-csv. + # Note that "scripts/prepareAnalysis.sh" is required to run prior to this script. # Requires executeQueryFunctions.sh @@ -23,7 +24,7 @@ echo "communityCsv: SCRIPTS_DIR=${SCRIPTS_DIR}" # Get the "cypher" directory by taking the path of this script and going two directory up and then to "cypher". CYPHER_DIR=${CYPHER_DIR:-"${REPORTS_SCRIPT_DIR}/../../cypher"} -echo "communityCsv: CYPHER_DIR=$CYPHER_DIR" +echo "communityCsv: CYPHER_DIR=${CYPHER_DIR}" # Define functions to execute a cypher query from within the given file (first and only argument) source "${SCRIPTS_DIR}/executeQueryFunctions.sh" @@ -33,35 +34,190 @@ REPORT_NAME="community-csv" FULL_REPORT_DIRECTORY="${REPORTS_DIRECTORY}/${REPORT_NAME}" mkdir -p "${FULL_REPORT_DIRECTORY}" -# Local Constants -LOUVAIN_CYPHER_DIR="$CYPHER_DIR/Community_Detection_Louvain" -LEIDEN_CYPHER_DIR="$CYPHER_DIR/Community_Detection_Leiden" - -# Preparation for Community Detection - Create package dependencies projections -execute_cypher "${LOUVAIN_CYPHER_DIR}/Community_Detection_0_Delete_Projection.cypher" -execute_cypher "${LOUVAIN_CYPHER_DIR}/Community_Detection_0b_Delete_Projection.cypher" -execute_cypher "${LOUVAIN_CYPHER_DIR}/Community_Detection_1_Create_undirected_Projection.cypher" -execute_cypher "${LOUVAIN_CYPHER_DIR}/Community_Detection_1b_Create_subgraph_without_empty_packages.cypher" - -# Community Detection using the Louvain Algorithm - Query CSV -execute_cypher "${LOUVAIN_CYPHER_DIR}/Community_Detection_2_Louvain_Estimate_Memory.cypher" -execute_cypher "${LOUVAIN_CYPHER_DIR}/Community_Detection_3_Louvain_Statistics.cypher" -execute_cypher "${LOUVAIN_CYPHER_DIR}/Community_Detection_4_Louvain_Stream.cypher" > "${FULL_REPORT_DIRECTORY}/Louvain_Communities.csv" - -# Community Detection using the Louvain Algorithm - Update Graph -execute_cypher "${LOUVAIN_CYPHER_DIR}/Community_Detection_5c_Louvain_Write_louvainCommunity25PercentInterfaces.cypher" -execute_cypher "${LOUVAIN_CYPHER_DIR}/Community_Detection_6_Louvain_Delete_Existing_Labels.cypher" -execute_cypher "${LOUVAIN_CYPHER_DIR}/Community_Detection_7_Add_LouvainCommunity_Id_label_to_packages.cypher" - -# Community Detection using the Leiden Algorithm - Query CSV -execute_cypher "${LEIDEN_CYPHER_DIR}/Community_Detection_2_Leiden_Estimate_Memory.cypher" -execute_cypher "${LEIDEN_CYPHER_DIR}/Community_Detection_3_Leiden_Statistics.cypher" -execute_cypher "${LEIDEN_CYPHER_DIR}/Community_Detection_4_Leiden_Stream.cypher" > "${FULL_REPORT_DIRECTORY}/Leiden_Communities.csv" - -# Community Detection using the Leiden Algorithm - Update Graph -execute_cypher "${LEIDEN_CYPHER_DIR}/Community_Detection_5_Leiden_Write_property_leidenCommunityIdGamma114With25PercentInterfaces.cypher" -execute_cypher "${LEIDEN_CYPHER_DIR}/Community_Detection_6_Delete_Existing_Labels.cypher" -execute_cypher "${LEIDEN_CYPHER_DIR}/Community_Detection_7_Add_LeidenCommunity_Id_label_to_packages.cypher" - -# Community Detection using the Leiden Algorithm - Query CSV after update -execute_cypher "${LEIDEN_CYPHER_DIR}/Which_package_community_spans_several_artifacts_and_how_are_the_packages_distributed.cypher" > "${FULL_REPORT_DIRECTORY}/Leiden_Communities_That_Span_Multiple_Artifacts.csv" +# Community Detection Preparation +# Selects the nodes and relationships for the algorithm and creates an in-memory projection. +# Nodes without incoming and outgoing dependencies will be filtered out with a subgraph. +# +# Required Parameters: +# - dependencies_projection=... +# Name prefix for the in-memory projection name for dependencies. Example: "package" +# - dependencies_projection_node=... +# Label of the nodes that will be used for the projection. Example: "Package" +# - dependencies_projection_weight_property=... +# Name of the node property that contains the dependency weight. Example: "weight" +createProjection() { + local PROJECTION_CYPHER_DIR="${CYPHER_DIR}/Dependencies_Projection" + + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_1_Delete_Projection.cypher" "${@}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_2_Delete_Subgraph.cypher" "${@}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_4_Create_Undirected_Projection.cypher" "${@}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_5_Create_Subgraph.cypher" "${@}" +} + +# Community Detection using the Label Propagation Algorithm +# +# Required Parameters: +# - dependencies_projection=... +# Name prefix for the in-memory projection name for dependencies. Example: "package" +# - dependencies_projection_node=... +# Label of the nodes that will be used for the projection. Example: "Package" +# - dependencies_projection_weight_property=... +# Name of the node property that contains the dependency weight. Example: "weight" +detectCommunitiesWithLabelPropagation() { + local COMMUNITY_DETECTION_CYPHER_DIR="${CYPHER_DIR}/Community_Detection" + + # Statistics + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_4a_Label_Propagation_Estimate.cypher" "${@}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_4b_Label_Propagation_Statistics.cypher" "${@}" + + # Stream to CSV + local nodeLabel + nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}") + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_4c_Label_Propagation_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Communities_Label_Propagation.csv" + + # Update Graph (node properties and labels) + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_4d_Label_Propagation_Write.cypher" "${@}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_4e_Label_Propagation_Label_Delete.cypher" "${@}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_4f_Label_Propagation_Label.cypher" "${@}" +} + +# Community Detection using the Leiden Algorithm +# +# Required Parameters: +# - dependencies_projection=... +# Name prefix for the in-memory projection name for dependencies. Example: "package" +# - dependencies_projection_node=... +# Label of the nodes that will be used for the projection. Example: "Package" +# - dependencies_projection_weight_property=... +# Name of the node property that contains the dependency weight. Example: "weight" +# - dependencies_leiden_gamma +# Leiden algorithmus parameter "gamma". Example (Default): 1.00 +detectCommunitiesWithLeiden() { + local COMMUNITY_DETECTION_CYPHER_DIR="${CYPHER_DIR}/Community_Detection" + + # Statistics + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_2a_Leiden_Estimate.cypher" "${@}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_2b_Leiden_Statistics.cypher" "${@}" + + # Stream to CSV + local nodeLabel + nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}") + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_2c_Leiden_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Communities_Leiden.csv" + + # Update Graph (node properties and labels) + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_2d_Leiden_Write_Node_Property.cypher" "${@}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_2e_Leiden_Label_Delete.cypher" "${@}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_2f_Leiden_Label.cypher" "${@}" +} + +# Community Detection using the Louvain Algorithm +# +# Required Parameters: +# - dependencies_projection=... +# Name prefix for the in-memory projection name for dependencies. Example: "package" +# - dependencies_projection_node=... +# Label of the nodes that will be used for the projection. Example: "Package" +# - dependencies_projection_weight_property=... +# Name of the node property that contains the dependency weight. Example: "weight" +detectCommunitiesWithLouvain() { + local COMMUNITY_DETECTION_CYPHER_DIR="${CYPHER_DIR}/Community_Detection" + + # Statistics + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_1a_Louvain_Estimate.cypher" "${@}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_1b_Louvain_Statistics.cypher" "${@}" + + # Stream to CSV + local nodeLabel + nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}") + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_1c_Louvain_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Communities_Louvain.csv" + + # Update Graph (node properties and labels) + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_1d_Louvain_Write_louvainCommunityId.cypher" "${@}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_1d_Louvain_Write_intermediateLouvainCommunityId.cypher" "${@}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_1e_Louvain_Label_Delete.cypher" "${@}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_1f_Louvain_Label.cypher" "${@}" +} + +# Community Detection using the Weakly Connected Components Algorithm +# +# Required Parameters: +# - dependencies_projection=... +# Name prefix for the in-memory projection name for dependencies. Example: "package" +# - dependencies_projection_node=... +# Label of the nodes that will be used for the projection. Example: "Package" +# - dependencies_projection_weight_property=... +# Name of the node property that contains the dependency weight. Example: "weight" +detectCommunitiesWithWeaklyConnectedComponents() { + local COMMUNITY_DETECTION_CYPHER_DIR="${CYPHER_DIR}/Community_Detection" + + # Statistics + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_3a_WeaklyConnectedComponents_Estimate.cypher" "${@}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_3b_WeaklyConnectedComponents_Statistics.cypher" "${@}" + + # Stream to CSV + local nodeLabel + nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}") + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_3c_WeaklyConnectedComponents_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Communities_Weakly_Connected_Components.csv" + + # Update Graph (node properties and labels) + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_3d_WeaklyConnectedComponents_Write.cypher" "${@}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_3e_WeaklyConnectedComponents_Label_Delete.cypher" "${@}" + execute_cypher "${COMMUNITY_DETECTION_CYPHER_DIR}/Community_Detection_3f_WeaklyConnectedComponents_Label.cypher" "${@}" +} + +# --------------------------------------------------------------- + +# Artifact Query Parameters +ARTIFACT_PROJECTION="dependencies_projection=artifact-community" +ARTIFACT_NODE="dependencies_projection_node=Artifact" +ARTIFACT_WEIGHT="dependencies_projection_weight_property=weight" +ARTIFACT_GAMMA="dependencies_leiden_gamma=1.11" # default = 1.00 + +# Artifact Community Detection +echo "communityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing artifact dependencies..." +createProjection "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" +time detectCommunitiesWithLeiden "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" "${ARTIFACT_GAMMA}" +time detectCommunitiesWithLouvain "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" +time detectCommunitiesWithWeaklyConnectedComponents "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" +time detectCommunitiesWithLabelPropagation "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" "${ARTIFACT_GAMMA}" + +# --------------------------------------------------------------- + +# Package Query Parameters +PACKAGE_PROJECTION="dependencies_projection=package-community" +PACKAGE_NODE="dependencies_projection_node=Package" +PACKAGE_WEIGHT="dependencies_projection_weight_property=weight25PercentInterfaces" +PACKAGE_GAMMA="dependencies_leiden_gamma=1.14" # default = 1.00 + +# Package Community Detection +echo "communityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') communityCsv: Processing package dependencies..." +createProjection "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" +time detectCommunitiesWithLeiden "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" "${PACKAGE_GAMMA}" +time detectCommunitiesWithLouvain "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" +time detectCommunitiesWithWeaklyConnectedComponents "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" +time detectCommunitiesWithLabelPropagation "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" "${PACKAGE_GAMMA}" + +# Package Community Detection - Special CSV Queries after update +execute_cypher "${CYPHER_DIR}/Community_Detection/Compare_Community_Detection_Results.cypher" > "${FULL_REPORT_DIRECTORY}/Compare_Community_Detection_Results.csv" +execute_cypher "${CYPHER_DIR}/Community_Detection/Which_package_community_spans_several_artifacts_and_how_are_the_packages_distributed.cypher" > "${FULL_REPORT_DIRECTORY}/Package_Communities_Leiden_That_Span_Multiple_Artifacts.csv" + +# --------------------------------------------------------------- + +# Type Query Parameters +TYPE_PROJECTION="dependencies_projection=type-community" +TYPE_NODE="dependencies_projection_node=Type" +TYPE_WEIGHT="dependencies_projection_weight_property=weight" +TYPE_GAMMA="dependencies_leiden_gamma=5.00" # default = 1.00 + +# Type Community Detection +echo "communityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing type dependencies..." +createProjection "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" +time detectCommunitiesWithLeiden "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" "${TYPE_GAMMA}" +time detectCommunitiesWithLouvain "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" +time detectCommunitiesWithWeaklyConnectedComponents "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" +time detectCommunitiesWithLabelPropagation "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" "${TYPE_GAMMA}" + +# Type Community Detection - Special CSV Queries after update +execute_cypher "${CYPHER_DIR}/Community_Detection/Which_type_community_spans_several_artifacts_and_how_are_the_types_distributed.cypher" > "${FULL_REPORT_DIRECTORY}/Type_Communities_Leiden_That_Span_Multiple_Artifacts.csv" + +echo "communityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Successfully finished" \ No newline at end of file diff --git a/scripts/reports/SimilarityCsv.sh b/scripts/reports/SimilarityCsv.sh index fdaa44431..2ba8c93e4 100755 --- a/scripts/reports/SimilarityCsv.sh +++ b/scripts/reports/SimilarityCsv.sh @@ -33,15 +33,85 @@ REPORT_NAME="similarity-csv" FULL_REPORT_DIRECTORY="${REPORTS_DIRECTORY}/${REPORT_NAME}" mkdir -p "${FULL_REPORT_DIRECTORY}" -# Local Constants -SIMILARITY_CYPHER_DIR="$CYPHER_DIR/Similarity" - -# Preparation Similarity - Create package dependencies projection -execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_0_Delete_Projection.cypher" -execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_0b_Delete_Subgraph_Projection.cypher" -execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1_Create_Projection.cypher" -execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1b_Create_subgraph_without_empty_packages.cypher" - -# Similarity using Node Similarity Algorithm with JACCARD metric - Query CSV -execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_3_Estimate_Memory.cypher" -execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_4_Stream.cypher" > "${FULL_REPORT_DIRECTORY}/Similarity_Jaccard.csv" +# Similarity Preparation +# Selects the nodes and relationships for the algorithm and creates an in-memory projection. +# Nodes without incoming and outgoing dependencies will be filtered out with a subgraph. +# +# Required Parameters: +# - dependencies_projection=... +# Name prefix for the in-memory projection name for dependencies. Example: "package" +# - dependencies_projection_node=... +# Label of the nodes that will be used for the projection. Example: "Package" +# - dependencies_projection_weight_property=... +# Name of the node property that contains the dependency weight. Example: "weight" +createProjection() { + PROJECTION_CYPHER_DIR="$CYPHER_DIR/Dependencies_Projection" + + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_1_Delete_Projection.cypher" "${@}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_2_Delete_Subgraph.cypher" "${@}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_3_Create_Projection.cypher" "${@}" + execute_cypher "${PROJECTION_CYPHER_DIR}/Dependencies_5_Create_Subgraph.cypher" "${@}" +} + +# Apply the similarity algorithm "Similarity". +# +# Required Parameters: +# - dependencies_projection=... +# Name prefix for the in-memory projection name for dependencies. Example: "package" +# - dependencies_projection_node=... +# Label of the nodes that will be used for the projection. Example: "Package" +# - dependencies_projection_weight_property=... +# Name of the node property that contains the dependency weight. Example: "weight" +similarity() { + local SIMILARITY_CYPHER_DIR="$CYPHER_DIR/Similarity" + + # Statistics + execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1a_Estimate.cypher" "${@}" + execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1b_Statistics.cypher" "${@}" + + # Stream to CSV + local nodeLabel + nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}" ) + execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1c_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Similarity.csv" + + # Update Graph (node properties and labels) + execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1d_Delete_Relationships.cypher" "${@}" + execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1e_Write.cypher" "${@}" + execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1f_Write_Node_Properties.cypher" "${@}" +} + +# --------------------------------------------------------------- + +# Artifact Query Parameters +ARTIFACT_PROJECTION="dependencies_projection=artifact-similarity" +ARTIFACT_NODE="dependencies_projection_node=Artifact" +ARTIFACT_WEIGHT="dependencies_projection_weight_property=weight" + +# Artifact Similarity +echo "similarityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing artifact dependencies..." +createProjection "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" +time similarity "${ARTIFACT_PROJECTION}" "${ARTIFACT_NODE}" "${ARTIFACT_WEIGHT}" + +# --------------------------------------------------------------- + +# Package Query Parameters +PACKAGE_PROJECTION="dependencies_projection=package-similarity" +PACKAGE_NODE="dependencies_projection_node=Package" +PACKAGE_WEIGHT="dependencies_projection_weight_property=weight25PercentInterfaces" + +# Package Similarity +echo "similarityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing package dependencies..." +createProjection "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" +time similarity "${PACKAGE_PROJECTION}" "${PACKAGE_NODE}" "${PACKAGE_WEIGHT}" + +# --------------------------------------------------------------- + +# Type Query Parameters +TYPE_PROJECTION="dependencies_projection=type-similarity" +TYPE_NODE="dependencies_projection_node=Type" +TYPE_WEIGHT="dependencies_projection_weight_property=weight" + +# Type Similarity +echo "similarityCsv: $(date +'%Y-%m-%dT%H:%M:%S%z') Processing type dependencies..." +createProjection "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" +time similarity "${TYPE_PROJECTION}" "${TYPE_NODE}" "${TYPE_WEIGHT}" \ No newline at end of file