Skip to content

Commit 29506f7

Browse files
committed
Optimize Similarity using in-memory mutate
1 parent 52e65c0 commit 29506f7

8 files changed

+80
-4
lines changed
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// Similarity Mutate
2+
3+
CALL gds.nodeSimilarity.mutate(
4+
$dependencies_projection + '-cleaned', {
5+
relationshipWeightProperty: $dependencies_projection_weight_property
6+
,topK: 3
7+
,writeRelationshipType: 'SIMILAR'
8+
,writeProperty: 'score'
9+
})
10+
YIELD preProcessingMillis
11+
,computeMillis
12+
,mutateMillis
13+
,postProcessingMillis
14+
,relationshipsWritten
15+
,nodesCompared
16+
,similarityDistribution
17+
RETURN preProcessingMillis
18+
,computeMillis
19+
,mutateMillis
20+
,postProcessingMillis
21+
,relationshipsWritten
22+
,nodesCompared
23+
,similarityDistribution.min
24+
,similarityDistribution.mean
25+
,similarityDistribution.max
26+
,similarityDistribution.p50
27+
,similarityDistribution.p75
28+
,similarityDistribution.p90
29+
,similarityDistribution.p95
30+
,similarityDistribution.p99
31+
,similarityDistribution.p999
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Read the similarity relationship from the projection. Variables: dependencies_projection
2+
3+
CALL gds.graph.relationships.stream(
4+
$dependencies_projection + '-cleaned'
5+
,['score']
6+
,['SIMILAR']
7+
)
8+
YIELD sourceNodeId
9+
,targetNodeId
10+
,relationshipType
11+
,relationshipProperty
12+
,propertyValue
13+
// TODO ------------------ Change plain streaming to mutated streaming
14+
YIELD node1, node2, similarity
15+
WITH gds.util.asNode(node1) AS node1
16+
,gds.util.asNode(node2) AS node2
17+
,similarity
18+
OPTIONAL MATCH (artifact1:Artifact)-[:CONTAINS]->(node1)
19+
OPTIONAL MATCH (artifact2:Artifact)-[:CONTAINS]->(node2)
20+
WITH node1
21+
,node2
22+
,replace(last(split(artifact1.fileName, '/')), '.jar', '') AS artifactName1
23+
,replace(last(split(artifact2.fileName, '/')), '.jar', '') AS artifactName2
24+
,similarity
25+
RETURN similarity
26+
,artifactName1
27+
,coalesce(node1.fqn, node1.fileName, node1.name) AS node1Name
28+
,node1.incomingDependencies AS node1IncomingDependencies
29+
,node1.outgoingDependencies AS node1OutgoingDependencies
30+
,artifactName2
31+
,coalesce(node2.fqn, node2.fileName, node2.name) AS node2Name
32+
,node2.incomingDependencies AS node2IncomingDependencies
33+
,node2.outgoingDependencies AS node2OutgoingDependencies
34+
ORDER BY similarity DESCENDING, node1Name, node2Name
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// Write a property from the projection into the Graph. Variables: dependencies_projection, dependencies_projection_write_property
2+
3+
CALL gds.graph.nodeProperties.write(
4+
$dependencies_projection + '-cleaned'
5+
,[$dependencies_projection_write_property]
6+
)
7+
YIELD propertiesWritten, nodeProperties, writeMillis
8+
RETURN propertiesWritten, nodeProperties, writeMillis

scripts/reports/SimilarityCsv.sh

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,15 +72,18 @@ similarity() {
7272
execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1a_Estimate.cypher" "${@}"
7373
execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1b_Statistics.cypher" "${@}"
7474

75+
# Run the algorithm and write the result into the in-memory projection ("mutate")
76+
execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1c_Mutate.cypher" "${@}"
77+
7578
# Stream to CSV
7679
local nodeLabel
7780
nodeLabel=$( extractQueryParameter "dependencies_projection_node" "${@}" )
78-
execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1c_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Similarity.csv"
81+
execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1e_Stream.cypher" "${@}" > "${FULL_REPORT_DIRECTORY}/${nodeLabel}_Similarity.csv"
7982

8083
# Update Graph (node properties and labels)
81-
execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1d_Delete_Relationships.cypher" "${@}"
82-
execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1e_Write.cypher" "${@}"
83-
execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1f_Write_Node_Properties.cypher" "${@}"
84+
execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1g_Delete_Relationships.cypher" "${@}"
85+
execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1h_Write.cypher" "${@}"
86+
execute_cypher "${SIMILARITY_CYPHER_DIR}/Similarity_1i_Write_Node_Properties.cypher" "${@}"
8487
}
8588

8689
# ---------------------------------------------------------------

0 commit comments

Comments
 (0)