Skip to content

Commit 468e210

Browse files
committed
Use already calculated node embeddings if available
1 parent 76b16de commit 468e210

File tree

5 files changed

+73
-15
lines changed

5 files changed

+73
-15
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// Query already calculated and written node embeddings on nodes with label in parameter $dependencies_projection_node including a communityId and centrality. Variables: dependencies_projection_node, dependencies_projection_write_property
2+
3+
MATCH (codeUnit)
4+
WHERE $dependencies_projection_node IN LABELS(codeUnit)
5+
AND codeUnit[$dependencies_projection_write_property] IS NOT NULL
6+
// AND codeUnit.notExistingToForceRecalculation IS NOT NULL // uncomment this line to force recalculation
7+
OPTIONAL MATCH (artifact:Java:Artifact)-[:CONTAINS]->(codeUnit)
8+
WITH *, replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName
9+
OPTIONAL MATCH (projectRoot:Directory)<-[:HAS_ROOT]-(proj:TS:Project)-[:CONTAINS]->(codeUnit)
10+
WITH *, last(split(projectRoot.absoluteFileName, '/')) AS projectName
11+
RETURN DISTINCT
12+
coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
13+
,coalesce(codeUnit.name, replace(last(split(codeUnit.fileName, '/')), '.jar', '')) AS shortCodeUnitName
14+
,coalesce(artifactName, projectName) AS projectName
15+
,coalesce(codeUnit.communityLeidenId, 0) AS communityId
16+
,coalesce(codeUnit.centralityPageRank, 0.01) AS centrality
17+
,codeUnit[$dependencies_projection_write_property] AS embedding
18+
ORDER BY communityId

cypher/Node_Embeddings/Node_Embeddings_1d_Fast_Random_Projection_Stream.cypher

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,14 @@ CALL gds.fastRP.stream(
99
YIELD nodeId, embedding
1010
WITH gds.util.asNode(nodeId) AS codeUnit
1111
,embedding
12-
OPTIONAL MATCH (artifact:Artifact)-[:CONTAINS]->(codeUnit)
13-
RETURN DISTINCT coalesce(codeUnit.fqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
14-
,coalesce(codeUnit.communityLeidenId, 0) AS communityId
15-
,coalesce(codeUnit.centralityPageRank, 0.01) AS centrality
16-
,replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName
17-
,embedding
12+
OPTIONAL MATCH (artifact:Java:Artifact)-[:CONTAINS]->(codeUnit)
13+
WITH *, replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName
14+
OPTIONAL MATCH (projectRoot:Directory)<-[:HAS_ROOT]-(proj:TS:Project)-[:CONTAINS]->(codeUnit)
15+
WITH *, last(split(projectRoot.absoluteFileName, '/')) AS projectName
16+
RETURN DISTINCT
17+
coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
18+
,coalesce(codeUnit.name, replace(last(split(codeUnit.fileName, '/')), '.jar', '')) AS shortCodeUnitName
19+
,coalesce(artifactName, projectName) AS projectName
20+
,coalesce(codeUnit.communityLeidenId, 0) AS communityId
21+
,coalesce(codeUnit.centralityPageRank, 0.01) AS centrality
22+
,embedding

cypher/Node_Embeddings/Node_Embeddings_2d_Hash_GNN_Stream.cypher

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,14 @@ CALL gds.beta.hashgnn.stream(
1414
YIELD nodeId, embedding
1515
WITH gds.util.asNode(nodeId) AS codeUnit
1616
,embedding
17-
OPTIONAL MATCH (artifact:Artifact)-[:CONTAINS]->(codeUnit)
18-
RETURN DISTINCT coalesce(codeUnit.fqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
17+
OPTIONAL MATCH (artifact:Java:Artifact)-[:CONTAINS]->(codeUnit)
18+
WITH *, replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName
19+
OPTIONAL MATCH (projectRoot:Directory)<-[:HAS_ROOT]-(proj:TS:Project)-[:CONTAINS]->(codeUnit)
20+
WITH *, last(split(projectRoot.absoluteFileName, '/')) AS projectName
21+
RETURN DISTINCT
22+
coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
23+
,coalesce(codeUnit.name, replace(last(split(codeUnit.fileName, '/')), '.jar', '')) AS shortCodeUnitName
24+
,coalesce(artifactName, projectName) AS projectName
1925
,coalesce(codeUnit.communityLeidenId, 0) AS communityId
2026
,coalesce(codeUnit.centralityPageRank, 0.01) AS centrality
21-
,replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName
2227
,embedding

cypher/Node_Embeddings/Node_Embeddings_3d_Node2Vec_Stream.cypher

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,14 @@ CALL gds.node2vec.stream(
1010
YIELD nodeId, embedding
1111
WITH gds.util.asNode(nodeId) AS codeUnit
1212
,embedding
13-
OPTIONAL MATCH (artifact:Artifact)-[:CONTAINS]->(codeUnit)
14-
RETURN DISTINCT coalesce(codeUnit.fqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
13+
OPTIONAL MATCH (artifact:Java:Artifact)-[:CONTAINS]->(codeUnit)
14+
WITH *, replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName
15+
OPTIONAL MATCH (projectRoot:Directory)<-[:HAS_ROOT]-(proj:TS:Project)-[:CONTAINS]->(codeUnit)
16+
WITH *, last(split(projectRoot.absoluteFileName, '/')) AS projectName
17+
RETURN DISTINCT
18+
coalesce(codeUnit.fqn, codeUnit.globalFqn, codeUnit.fileName, codeUnit.signature, codeUnit.name) AS codeUnitName
19+
,coalesce(codeUnit.name, replace(last(split(codeUnit.fileName, '/')), '.jar', '')) AS shortCodeUnitName
20+
,coalesce(artifactName, projectName) AS projectName
1521
,coalesce(codeUnit.communityLeidenId, 0) AS communityId
1622
,coalesce(codeUnit.centralityPageRank, 0.01) AS centrality
17-
,replace(last(split(artifact.fileName, '/')), '.jar', '') AS artifactName
1823
,embedding

jupyter/NodeEmbeddings.ipynb

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,28 @@
108108
" return pd.DataFrame([r.values() for r in records], columns=keys)"
109109
]
110110
},
111+
{
112+
"cell_type": "code",
113+
"execution_count": null,
114+
"id": "bd1d9775",
115+
"metadata": {},
116+
"outputs": [],
117+
"source": [
118+
"def query_first_non_empty_cypher_to_data_frame(*filenames : str, parameters: typ.Optional[typ.Dict[str, typ.Any]] = None):\n",
119+
" \"\"\"\n",
120+
" Executes the Cypher queries of the given files and returns the first result that is not empty.\n",
121+
" If all given file names result in empty results, the last (empty) result will be returned.\n",
122+
" By additionally specifying \"limit=\" the \"LIMIT\" keyword will appended to query so that only the first results get returned.\n",
123+
" \"\"\"\n",
124+
" result=pd.DataFrame()\n",
125+
" for filename in filenames:\n",
126+
" result=query_cypher_to_data_frame(filename, parameters)\n",
127+
" if not result.empty:\n",
128+
" print(\"The results have been provided by the query filename: \" + filename)\n",
129+
" return result\n",
130+
" return result"
131+
]
132+
},
111133
{
112134
"cell_type": "code",
113135
"execution_count": null,
@@ -177,7 +199,8 @@
177199
" empty_result = pd.DataFrame(columns=[\"codeUnitName\", 'projectName', 'communityId', 'centrality', 'embedding'])\n",
178200
" return empty_result\n",
179201
"\n",
180-
" embeddings = query_cypher_to_data_frame(cypher_file_name, parameters)\n",
202+
" existing_embeddings_query_filename=\"../cypher/Node_Embeddings/Node_Embeddings_0a_Query_Calculated.cypher\"\n",
203+
" embeddings = query_first_non_empty_cypher_to_data_frame(existing_embeddings_query_filename, cypher_file_name, parameters=parameters)\n",
181204
" display(embeddings.head()) # Display the first entries of the table\n",
182205
" return embeddings"
183206
]
@@ -210,7 +233,7 @@
210233
" # and the code unit and artifact name of the query above as preparation for the plot\n",
211234
" node_embeddings_for_visualization = pd.DataFrame(data = {\n",
212235
" \"codeUnit\": embeddings.codeUnitName,\n",
213-
" \"artifact\": embeddings.artifactName,\n",
236+
" \"artifact\": embeddings.projectName,\n",
214237
" \"communityId\": embeddings.communityId,\n",
215238
" \"centrality\": embeddings.centrality,\n",
216239
" \"x\": [value[0] for value in two_dimension_node_embeddings],\n",
@@ -316,7 +339,8 @@
316339
" \"dependencies_projection\": \"java-package-embeddings-notebook\",\n",
317340
" \"dependencies_projection_node\": \"Package\",\n",
318341
" \"dependencies_projection_weight_property\": \"weight25PercentInterfaces\",\n",
319-
" \"dependencies_projection_embedding_dimension\":\"64\" \n",
342+
" \"dependencies_projection_write_property\": \"embeddingsFastRandomProjection\",\n",
343+
" \"dependencies_projection_embedding_dimension\":\"64\"\n",
320344
"}\n",
321345
"embeddings = create_node_embeddings(\"../cypher/Node_Embeddings/Node_Embeddings_1d_Fast_Random_Projection_Stream.cypher\", java_package_embeddings_parameters)\n"
322346
]
@@ -396,6 +420,7 @@
396420
" \"dependencies_projection\": \"typescript-module-embeddings-notebook\",\n",
397421
" \"dependencies_projection_node\": \"Module\",\n",
398422
" \"dependencies_projection_weight_property\": \"lowCouplingElement25PercentWeight\",\n",
423+
" \"dependencies_projection_write_property\": \"embeddingsFastRandomProjection\",\n",
399424
" \"dependencies_projection_embedding_dimension\":\"64\" \n",
400425
"}\n",
401426
"embeddings = create_node_embeddings(\"../cypher/Node_Embeddings/Node_Embeddings_1d_Fast_Random_Projection_Stream.cypher\", typescript_module_embeddings_parameters)\n"

0 commit comments

Comments
 (0)