@@ -215,7 +215,7 @@ def create_source_node_graph_url_wikipedia(graph, model, wiki_query, source_type
215
215
lst_file_name .append ({'fileName' :obj_source_node .file_name ,'fileSize' :obj_source_node .file_size ,'url' :obj_source_node .url , 'language' :obj_source_node .language , 'status' :'Failed' })
216
216
return lst_file_name ,success_count ,failed_count
217
217
218
- def extract_graph_from_file_local_file (graph , model , merged_file_path , fileName , allowedNodes , allowedRelationship , uri ):
218
+ def extract_graph_from_file_local_file (uri , userName , password , database , model , merged_file_path , fileName , allowedNodes , allowedRelationship ):
219
219
220
220
logging .info (f'Process file name :{ fileName } ' )
221
221
gcs_file_cache = os .environ .get ('GCS_FILE_CACHE' )
@@ -227,9 +227,9 @@ def extract_graph_from_file_local_file(graph, model, merged_file_path, fileName,
227
227
if pages == None or len (pages )== 0 :
228
228
raise Exception (f'File content is not available for file : { file_name } ' )
229
229
230
- return processing_source (graph , model , file_name , pages , allowedNodes , allowedRelationship , True , merged_file_path , uri )
230
+ return processing_source (uri , userName , password , database , model , file_name , pages , allowedNodes , allowedRelationship , True , merged_file_path )
231
231
232
- def extract_graph_from_file_s3 (graph , model , source_url , aws_access_key_id , aws_secret_access_key ):
232
+ def extract_graph_from_file_s3 (uri , userName , password , database , model , source_url , aws_access_key_id , aws_secret_access_key , allowedNodes , allowedRelationship ):
233
233
234
234
if (aws_access_key_id == None or aws_secret_access_key == None ):
235
235
raise Exception ('Please provide AWS access and secret keys' )
@@ -240,44 +240,44 @@ def extract_graph_from_file_s3(graph, model, source_url, aws_access_key_id, aws_
240
240
if pages == None or len (pages )== 0 :
241
241
raise Exception (f'File content is not available for file : { file_name } ' )
242
242
243
- return processing_source (graph , model , file_name , pages )
243
+ return processing_source (uri , userName , password , database , model , file_name , pages , allowedNodes , allowedRelationship )
244
244
245
- def extract_graph_from_web_page (graph , model , source_url , allowedNodes , allowedRelationship ):
245
+ def extract_graph_from_web_page (uri , userName , password , database , model , source_url , allowedNodes , allowedRelationship ):
246
246
247
247
file_name , pages = get_documents_from_web_page (source_url )
248
248
249
249
if pages == None or len (pages )== 0 :
250
250
raise Exception (f'Content is not available for given URL : { file_name } ' )
251
251
252
- return processing_source (graph , model , file_name , pages , allowedNodes , allowedRelationship )
252
+ return processing_source (uri , userName , password , database , model , file_name , pages , allowedNodes , allowedRelationship )
253
253
254
- def extract_graph_from_file_youtube (graph , model , source_url , allowedNodes , allowedRelationship ):
254
+ def extract_graph_from_file_youtube (uri , userName , password , database , model , source_url , allowedNodes , allowedRelationship ):
255
255
256
256
source_type , youtube_url = check_url_source (source_url )
257
257
file_name , pages = get_documents_from_youtube (source_url )
258
258
259
259
if pages == None or len (pages )== 0 :
260
260
raise Exception ('Youtube transcript is not available for file : {file_name}' )
261
261
262
- return processing_source (graph , model , file_name , pages )
262
+ return processing_source (uri , userName , password , database , model , file_name , pages , allowedNodes , allowedRelationship )
263
263
264
- def extract_graph_from_file_Wikipedia (graph , model , wiki_query , max_sources , language , allowedNodes , allowedRelationship ):
264
+ def extract_graph_from_file_Wikipedia (uri , userName , password , database , model , wiki_query , max_sources , language , allowedNodes , allowedRelationship ):
265
265
266
266
file_name , pages = get_documents_from_Wikipedia (wiki_query , language )
267
267
if pages == None or len (pages )== 0 :
268
268
raise Exception ('Wikipedia page is not available for file : {file_name}' )
269
269
270
- return processing_source (graph , model , file_name , pages )
270
+ return processing_source (uri , userName , password , database , model , file_name , pages , allowedNodes , allowedRelationship )
271
271
272
- def extract_graph_from_file_gcs (graph , model , gcs_project_id , gcs_bucket_name , gcs_bucket_folder , gcs_blob_filename , allowedNodes , allowedRelationship ):
272
+ def extract_graph_from_file_gcs (uri , userName , password , database , model , gcs_project_id , gcs_bucket_name , gcs_bucket_folder , gcs_blob_filename , access_token , allowedNodes , allowedRelationship ):
273
273
274
274
file_name , pages = get_documents_from_gcs (gcs_project_id , gcs_bucket_name , gcs_bucket_folder , gcs_blob_filename )
275
275
if pages == None or len (pages )== 0 :
276
276
raise Exception (f'File content is not available for file : { file_name } ' )
277
277
278
- return processing_source (graph , model , file_name , pages )
278
+ return processing_source (uri , userName , password , database , model , file_name , pages , allowedNodes , allowedRelationship )
279
279
280
- def processing_source (graph , model , file_name , pages , allowedNodes , allowedRelationship , is_uploaded_from_local = None , merged_file_path = None , uri = None ):
280
+ def processing_source (uri , userName , password , database , model , file_name , pages , allowedNodes , allowedRelationship , is_uploaded_from_local = None , merged_file_path = None ):
281
281
"""
282
282
Extracts a Neo4jGraph from a PDF file based on the model.
283
283
@@ -294,6 +294,7 @@ def processing_source(graph, model, file_name, pages, allowedNodes, allowedRelat
294
294
status and model as attributes.
295
295
"""
296
296
start_time = datetime .now ()
297
+ graph = create_graph_database_connection (uri , userName , password , database )
297
298
graphDb_data_Access = graphDBdataAccess (graph )
298
299
299
300
result = graphDb_data_Access .get_current_status_document_node (file_name )
@@ -344,7 +345,7 @@ def processing_source(graph, model, file_name, pages, allowedNodes, allowedRelat
344
345
logging .info ('Exit from running loop of processing file' )
345
346
exit
346
347
else :
347
- node_count ,rel_count = processing_chunks (selected_chunks ,graph ,file_name ,model ,allowedNodes ,allowedRelationship ,node_count , rel_count )
348
+ node_count ,rel_count = processing_chunks (selected_chunks ,graph ,uri , userName , password , database , file_name ,model ,allowedNodes ,allowedRelationship ,node_count , rel_count )
348
349
end_time = datetime .now ()
349
350
processed_time = end_time - start_time
350
351
@@ -397,8 +398,14 @@ def processing_source(graph, model, file_name, pages, allowedNodes, allowedRelat
397
398
else :
398
399
logging .info ('File does not process because it\' s already in Processing status' )
399
400
400
- def processing_chunks (chunkId_chunkDoc_list ,graph ,file_name ,model ,allowedNodes ,allowedRelationship , node_count , rel_count ):
401
+ def processing_chunks (chunkId_chunkDoc_list ,graph ,uri , userName , password , database , file_name ,model ,allowedNodes ,allowedRelationship , node_count , rel_count ):
401
402
#create vector index and update chunk node with embedding
403
+ if graph is not None :
404
+ if graph ._driver ._closed :
405
+ graph = create_graph_database_connection (uri , userName , password , database )
406
+ else :
407
+ graph = create_graph_database_connection (uri , userName , password , database )
408
+
402
409
update_embedding_create_vector_index ( graph , chunkId_chunkDoc_list , file_name )
403
410
logging .info ("Get graph document list from models" )
404
411
graph_documents = generate_graphDocuments (model , graph , chunkId_chunkDoc_list , allowedNodes , allowedRelationship )
0 commit comments