@@ -242,6 +242,72 @@ detectCommunitiesWithKCoreDecomposition() {
242242 calculateCommunityMetrics " ${@ } " " ${writePropertyName} "
243243}
244244
245+ # Node Embeddings using Fast Random Projection
246+ #
247+ # Required Parameters:
248+ # - dependencies_projection=...
249+ # Name prefix for the in-memory projection name for dependencies. Example: "package"
250+ # - dependencies_projection_node=...
251+ # Label of the nodes that will be used for the projection. Example: "Package"
252+ # - dependencies_projection_weight_property=...
253+ # Name of the node property that contains the dependency weight. Example: "weight"
254+ # - dependencies_projection_node_embeddings_property=...
255+ # Name of the node property that will contain the node embeddings. Example: "embeddingsFastRandomProjectionForHDBSCAN"
256+ nodeEmbeddingsWithFastRandomProjectionForHDBSCAN () {
257+ local embeddingProperty
258+ embeddingProperty=$( extractQueryParameter " dependencies_projection_node_embeddings_property" " ${@ } " )
259+
260+ local NODE_EMBEDDINGS_CYPHER_DIR=" ${CYPHER_DIR} /Node_Embeddings"
261+ local mutatePropertyName=" dependencies_projection_write_property=${embeddingProperty} "
262+ local embeddingsDimension=" dependencies_projection_embedding_dimension=2"
263+
264+ # Run the algorithm and write the result into the in-memory projection ("mutate")
265+ execute_cypher " ${NODE_EMBEDDINGS_CYPHER_DIR} /Node_Embeddings_1c_Fast_Random_Projection_Mutate.cypher" " ${@ } " " ${mutatePropertyName} " ${embeddingsDimension}
266+ }
267+
268+ # Community Detection using Hierarchical Density-Based Spatial Clustering (HDBSCAN) Algorithm
269+ #
270+ # Required Parameters:
271+ # - dependencies_projection=...
272+ # Name prefix for the in-memory projection name for dependencies. Example: "package"
273+ # - dependencies_projection_node=...
274+ # Label of the nodes that will be used for the projection. Example: "Package"
275+ # - dependencies_projection_weight_property=...
276+ # Name of the node property that contains the dependency weight. Example: "weight"
277+ #
278+ # Special Requirements:
279+ # - This algorithm needs a node property with an array of floats to compute clusters.
280+ # One possible way is to use node embeddings for that (like FastRP).
281+ detectCommunitiesWithHDBSCAN () {
282+ local COMMUNITY_DETECTION_CYPHER_DIR=" ${CYPHER_DIR} /Community_Detection"
283+ local PROJECTION_CYPHER_DIR=" ${CYPHER_DIR} /Dependencies_Projection"
284+
285+ local writePropertyName=" dependencies_projection_write_property=communityFastRpHdbscanLabel"
286+ local writeLabelName=" dependencies_projection_write_label=HDBSCAN"
287+ local embeddingProperty=" dependencies_projection_node_embeddings_property=embeddingsFastRandomProjection2dHDBSCAN"
288+
289+ nodeEmbeddingsWithFastRandomProjectionForHDBSCAN " ${@ } " ${embeddingProperty}
290+
291+ # Statistics
292+ execute_cypher " ${COMMUNITY_DETECTION_CYPHER_DIR} /Community_Detection_11a_HDBSCAN_Estimate.cypher" " ${@ } " ${embeddingProperty} " ${writePropertyName} "
293+ execute_cypher " ${COMMUNITY_DETECTION_CYPHER_DIR} /Community_Detection_11b_HDBSCAN_Statistics.cypher" " ${@ } " ${embeddingProperty}
294+
295+ # Run the algorithm and write the result into the in-memory projection ("mutate")
296+ execute_cypher " ${COMMUNITY_DETECTION_CYPHER_DIR} /Community_Detection_11c_HDBSCAN_Mutate.cypher" " ${@ } " ${embeddingProperty} " ${writePropertyName} "
297+
298+ # Stream to CSV
299+ local nodeLabel
300+ nodeLabel=$( extractQueryParameter " dependencies_projection_node" " ${@ } " )
301+ execute_cypher " ${PROJECTION_CYPHER_DIR} /Dependencies_8_Stream_Mutated_Grouped.cypher" " ${@ } " " ${writePropertyName} " > " ${FULL_REPORT_DIRECTORY} /${nodeLabel} _Communities_HDBSCAN.csv"
302+
303+ # Update Graph (node properties and labels) using the already mutated property projection
304+ execute_cypher " ${PROJECTION_CYPHER_DIR} /Dependencies_9_Write_Mutated.cypher" " ${@ } " " ${writePropertyName} "
305+ execute_cypher " ${PROJECTION_CYPHER_DIR} /Dependencies_10_Delete_Label.cypher" " ${@ } " " ${writePropertyName} " " ${writeLabelName} "
306+ execute_cypher " ${PROJECTION_CYPHER_DIR} /Dependencies_11_Add_Label.cypher" " ${@ } " " ${writePropertyName} " " ${writeLabelName} "
307+
308+ calculateCommunityMetrics " ${@ } " " ${writePropertyName} "
309+ }
310+
245311# Community Detection using the Approximate Maximum k-cut Algorithm
246312#
247313# Required Parameters:
@@ -402,6 +468,7 @@ detectCommunities() {
402468 time detectCommunitiesWithKCoreDecomposition " ${@ } "
403469 time detectCommunitiesWithApproximateMaximumKCut " ${@ } "
404470 time calculateLocalClusteringCoefficient " ${@ } "
471+
405472 compareCommunityDetectionResults " ${@ } "
406473 listAllResults " ${@ } "
407474}
@@ -415,7 +482,7 @@ ARTIFACT_GAMMA="dependencies_leiden_gamma=1.11" # default = 1.00
415482ARTIFACT_KCUT=" dependencies_maxkcut=5" # default = 2
416483
417484if createUndirectedDependencyProjection " ${ARTIFACT_PROJECTION} " " ${ARTIFACT_NODE} " " ${ARTIFACT_WEIGHT} " ; then
418- detectCommunities " ${ARTIFACT_PROJECTION} " " ${ARTIFACT_NODE} " " ${ARTIFACT_WEIGHT} " " ${ARTIFACT_GAMMA} " " ${ARTIFACT_KCUT} "
485+ detectCommunities " ${ARTIFACT_PROJECTION} " " ${ARTIFACT_NODE} " " ${ARTIFACT_WEIGHT} " " ${ARTIFACT_GAMMA} " " ${ARTIFACT_KCUT} " # "${ARTIFACT_NODE_EMBEDDINGS}"
419486 writeLeidenModularity " ${ARTIFACT_PROJECTION} " " ${ARTIFACT_NODE} " " ${ARTIFACT_WEIGHT} "
420487fi
421488
@@ -430,7 +497,9 @@ PACKAGE_KCUT="dependencies_maxkcut=20" # default = 2
430497if createUndirectedDependencyProjection " ${PACKAGE_PROJECTION} " " ${PACKAGE_NODE} " " ${PACKAGE_WEIGHT} " ; then
431498 detectCommunities " ${PACKAGE_PROJECTION} " " ${PACKAGE_NODE} " " ${PACKAGE_WEIGHT} " " ${PACKAGE_GAMMA} " " ${PACKAGE_KCUT} "
432499 writeLeidenModularity " ${PACKAGE_PROJECTION} " " ${PACKAGE_NODE} " " ${PACKAGE_WEIGHT} "
433-
500+
501+ detectCommunitiesWithHDBSCAN " ${PACKAGE_PROJECTION} " " ${PACKAGE_NODE} " " ${PACKAGE_WEIGHT} "
502+
434503 # Package Community Detection - Special CSV Queries after update
435504 execute_cypher " ${CYPHER_DIR} /Community_Detection/Which_package_community_spans_several_artifacts_and_how_are_the_packages_distributed.cypher" > " ${FULL_REPORT_DIRECTORY} /Package_Communities_Leiden_That_Span_Multiple_Artifacts.csv"
436505fi
@@ -444,8 +513,8 @@ TYPE_GAMMA="dependencies_leiden_gamma=5.00" # default = 1.00
444513TYPE_KCUT=" dependencies_maxkcut=100" # default = 2
445514
446515if createUndirectedJavaTypeDependencyProjection " ${TYPE_PROJECTION} " ; then
447- detectCommunities " ${TYPE_PROJECTION} " " ${TYPE_NODE} " " ${TYPE_WEIGHT} " " ${TYPE_GAMMA} " " ${TYPE_KCUT} "
448-
516+ detectCommunities " ${TYPE_PROJECTION} " " ${TYPE_NODE} " " ${TYPE_WEIGHT} " " ${TYPE_GAMMA} " " ${TYPE_KCUT} " " ${TYPE_NODE_EMBEDDINGS} "
517+ detectCommunitiesWithHDBSCAN " ${TYPE_PROJECTION} " " ${TYPE_NODE} " " ${TYPE_WEIGHT} "
449518 # Type Community Detection - Special CSV Queries after update
450519 execute_cypher " ${CYPHER_DIR} /Community_Detection/Which_type_community_spans_several_artifacts_and_how_are_the_types_distributed.cypher" > " ${FULL_REPORT_DIRECTORY} /Type_Communities_Leiden_That_Span_Multiple_Artifacts.csv"
451520 execute_cypher " ${CYPHER_DIR} /Community_Detection/Type_communities_with_few_members_in_foreign_packages.cypher" > " ${FULL_REPORT_DIRECTORY} /Type_communities_with_few_members_in_foreign_packages.csv"
0 commit comments