@@ -59,8 +59,14 @@ def get_documents_from_gcs(gcs_project_id, gcs_bucket_name, gcs_bucket_folder, g
59
59
60
60
if access_token is None :
61
61
storage_client = storage .Client (project = gcs_project_id )
62
- loader = GCSFileLoader (project_name = gcs_project_id , bucket = gcs_bucket_name , blob = blob_name , loader_func = load_document_content )
63
- pages = loader .load ()
62
+ bucket = storage_client .bucket (gcs_bucket_name )
63
+ blob = bucket .blob (blob_name )
64
+
65
+ if blob .exists ():
66
+ loader = GCSFileLoader (project_name = gcs_project_id , bucket = gcs_bucket_name , blob = blob_name , loader_func = load_document_content )
67
+ pages = loader .load ()
68
+ else :
69
+ raise Exception ('File does not exist, Please re-upload the file and try again.' )
64
70
else :
65
71
creds = Credentials (access_token )
66
72
storage_client = storage .Client (project = gcs_project_id , credentials = creds )
@@ -77,7 +83,7 @@ def get_documents_from_gcs(gcs_project_id, gcs_bucket_name, gcs_bucket_folder, g
77
83
text += page .extract_text ()
78
84
pages = [Document (page_content = text )]
79
85
else :
80
- raise Exception ('Blob Not Found' )
86
+ raise Exception (f'File Not Found in GCS bucket - { gcs_bucket_name } ' )
81
87
return gcs_blob_filename , pages
82
88
83
89
def upload_file_to_gcs (file_chunk , chunk_number , original_file_name , bucket_name , folder_name_sha1_hashed ):
@@ -141,8 +147,9 @@ def copy_failed_file(source_bucket_name,dest_bucket_name,folder_name, file_name)
141
147
storage_client = storage .Client ()
142
148
bucket = storage_client .bucket (source_bucket_name )
143
149
folder_file_name = folder_name + '/' + file_name
144
- source_blob = bucket .blob (folder_file_name )
145
- bucket .copy_blob (source_blob ,dest_bucket_name ,file_name )
146
- logging .info (f'Failed file { file_name } copied to { dest_bucket_name } from { source_bucket_name } in GCS successfully' )
150
+ source_blob = source_bucket .blob (folder_file_name )
151
+ if source_blob .exists ():
152
+ source_bucket .copy_blob (source_blob , dest_bucket , file_name )
153
+ logging .info (f'Failed file { file_name } copied to { dest_bucket_name } from { source_bucket_name } in GCS successfully' )
147
154
except Exception as e :
148
155
raise Exception (e )
0 commit comments