Skip to content

Commit 17f0e5a

Browse files
committed
Optimize Similarity using in-memory mutate
1 parent 52e65c0 commit 17f0e5a

10 files changed

+89
-4
lines changed

cypher/Similarity/Similarity_1a_Estimate.cypher

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
CALL gds.nodeSimilarity.write.estimate(
44
$dependencies_projection + '-cleaned', {
55
relationshipWeightProperty: $dependencies_projection_weight_property
6+
,relationshipTypes: ['DEPENDS_ON']
67
,writeRelationshipType: 'SIMILAR'
78
,writeProperty: 'score'
89
,topK: 3

cypher/Similarity/Similarity_1b_Statistics.cypher

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
CALL gds.nodeSimilarity.stats(
44
$dependencies_projection + '-cleaned', {
55
relationshipWeightProperty: $dependencies_projection_weight_property
6+
,relationshipTypes: ['DEPENDS_ON']
67
,topK: 3
78
})
89
YIELD nodesCompared
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// Similarity Mutate
2+
3+
CALL gds.nodeSimilarity.mutate(
4+
$dependencies_projection + '-cleaned', {
5+
relationshipWeightProperty: $dependencies_projection_weight_property
6+
,relationshipTypes: ['DEPENDS_ON']
7+
,topK: 3
8+
,mutateRelationshipType: 'SIMILAR'
9+
,mutateProperty: 'score'
10+
})
11+
YIELD relationshipsWritten
12+
,nodesCompared
13+
,preProcessingMillis
14+
,computeMillis
15+
,mutateMillis
16+
,postProcessingMillis
17+
,similarityDistribution
18+
RETURN relationshipsWritten
19+
,nodesCompared
20+
,preProcessingMillis
21+
,computeMillis
22+
,mutateMillis
23+
,postProcessingMillis
24+
,similarityDistribution.min
25+
,similarityDistribution.mean
26+
,similarityDistribution.max
27+
,similarityDistribution.p50
28+
,similarityDistribution.p75
29+
,similarityDistribution.p90
30+
,similarityDistribution.p95
31+
,similarityDistribution.p99
32+
,similarityDistribution.p999
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// Read the similarity relationship from the projection. Variables: dependencies_projection
2+
3+
CALL gds.graph.relationshipProperty.stream(
4+
$dependencies_projection + '-cleaned'
5+
,'score'
6+
,['SIMILAR']
7+
)
8+
YIELD sourceNodeId
9+
,targetNodeId
10+
,relationshipType
11+
,propertyValue
12+
WITH gds.util.asNode(sourceNodeId) AS sourceNode
13+
,gds.util.asNode(targetNodeId) AS targetNode
14+
,propertyValue AS similarity
15+
OPTIONAL MATCH (sourceArtifact:Artifact)-[:CONTAINS]->(sourceNode)
16+
OPTIONAL MATCH (targetArtifact:Artifact)-[:CONTAINS]->(targetNode)
17+
WITH sourceNode
18+
,targetNode
19+
,replace(last(split(sourceArtifact.fileName, '/')), '.jar', '') AS sourceArtifactName
20+
,replace(last(split(targetArtifact.fileName, '/')), '.jar', '') AS targetArtifactName
21+
,similarity
22+
WHERE (sourceNode.incomingDependencies > 0
23+
OR sourceNode.outgoingDependencies > 0)
24+
AND (targetNode.incomingDependencies > 0
25+
OR targetNode.outgoingDependencies > 0)
26+
RETURN similarity
27+
,sourceArtifactName
28+
,coalesce(sourceNode.fqn, sourceNode.fileName, sourceNode.name) AS sourceNodeName
29+
,sourceNode.incomingDependencies AS sourceNodeIncomingDependencies
30+
,sourceNode.outgoingDependencies AS sourceNodeOutgoingDependencies
31+
,targetArtifactName
32+
,coalesce(targetNode.fqn, targetNode.fileName, targetNode.name) AS targetNodeName
33+
,targetNode.incomingDependencies AS targetNodeIncomingDependencies
34+
,targetNode.outgoingDependencies AS targetNodeOutgoingDependencies
35+
ORDER BY similarity DESCENDING, sourceNodeName, targetNodeName

cypher/Similarity/Similarity_1c_Stream.cypher renamed to cypher/Similarity/Similarity_1e_Stream.cypher

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
CALL gds.nodeSimilarity.stream(
44
$dependencies_projection + '-cleaned', {
55
relationshipWeightProperty: $dependencies_projection_weight_property
6+
,relationshipTypes: ['DEPENDS_ON']
67
,topK: 3
78
})
89
YIELD node1, node2, similarity
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// Write the Similarity relationship from the projection into the Graph. Variables: dependencies_projection, dependencies_projection_write_property
2+
3+
CALL gds.graph.relationship.write(
4+
$dependencies_projection + '-cleaned'
5+
,'SIMILAR'
6+
,'score'
7+
)
8+
YIELD relationshipsWritten, propertiesWritten, writeMillis
9+
RETURN relationshipsWritten, propertiesWritten, writeMillis

cypher/Similarity/Similarity_1e_Write.cypher renamed to cypher/Similarity/Similarity_1h_Write.cypher

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
CALL gds.nodeSimilarity.write(
44
$dependencies_projection + '-cleaned', {
55
relationshipWeightProperty: $dependencies_projection_weight_property
6+
,relationshipTypes: ['DEPENDS_ON']
67
,writeRelationshipType: 'SIMILAR'
78
,writeProperty: 'score'
89
,topK: 3

scripts/reports/SimilarityCsv.sh

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,15 +72,20 @@ similarity() {
7272
execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1a_Estimate.cypher" "${@}"
7373
execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1b_Statistics.cypher" "${@}"
7474

75+
# Run the algorithm and write the result into the in-memory projection ("mutate")
76+
execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1c_Mutate.cypher" "${@}"
77+
7578
# Stream to CSV
7679
local nodeLabel
7780
nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}" )
78-
execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1c_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Similarity.csv"
81+
execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1d_Stream_Mutated.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Similarity.csv"
82+
#execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1e_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Similarity.csv"
7983

8084
# Update Graph (node properties and labels)
81-
execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1d_Delete_Relationships.cypher" "${@}"
82-
execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1e_Write.cypher" "${@}"
83-
execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1f_Write_Node_Properties.cypher" "${@}"
85+
execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1f_Delete_Relationships.cypher" "${@}"
86+
execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1g_Write_Mutated.cypher" "${@}"
87+
#execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1h_Write.cypher" "${@}"
88+
execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1i_Write_Node_Properties.cypher" "${@}"
8489
}
8590

8691
# ---------------------------------------------------------------

0 commit comments

Comments
 (0)