5959import org .elasticsearch .common .settings .Settings ;
6060import org .elasticsearch .common .unit .ByteSizeUnit ;
6161import org .elasticsearch .common .unit .ByteSizeValue ;
62+ import org .elasticsearch .common .util .set .Sets ;
6263import org .elasticsearch .common .xcontent .LoggingDeprecationHandler ;
6364import org .elasticsearch .common .xcontent .NamedXContentRegistry ;
6465import org .elasticsearch .common .xcontent .XContentFactory ;
101102import java .util .Arrays ;
102103import java .util .Collection ;
103104import java .util .Collections ;
105+ import java .util .HashSet ;
104106import java .util .List ;
105107import java .util .Map ;
106108import java .util .Optional ;
@@ -139,7 +141,9 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
139141
140142 private static final String TESTS_FILE = "tests-" ;
141143
142- private static final String METADATA_NAME_FORMAT = "meta-%s.dat" ;
144+ private static final String METADATA_PREFIX = "meta-" ;
145+
146+ private static final String METADATA_NAME_FORMAT = METADATA_PREFIX + "%s.dat" ;
143147
144148 private static final String METADATA_CODEC = "metadata" ;
145149
@@ -393,21 +397,24 @@ public void deleteSnapshot(SnapshotId snapshotId, long repositoryStateId, Action
393397 // Delete snapshot from the index file, since it is the maintainer of truth of active snapshots
394398 final RepositoryData updatedRepositoryData ;
395399 final Map <String , BlobContainer > foundIndices ;
400+ final Set <String > rootBlobs ;
396401 try {
397402 final RepositoryData repositoryData = getRepositoryData ();
398403 updatedRepositoryData = repositoryData .removeSnapshot (snapshotId );
399404 // Cache the indices that were found before writing out the new index-N blob so that a stuck master will never
400405 // delete an index that was created by another master node after writing this index-N blob.
401406 foundIndices = blobStore ().blobContainer (basePath ().add ("indices" )).children ();
407+ rootBlobs = blobContainer ().listBlobs ().keySet ();
402408 writeIndexGen (updatedRepositoryData , repositoryStateId );
403409 } catch (Exception ex ) {
404410 listener .onFailure (new RepositoryException (metadata .name (), "failed to delete snapshot [" + snapshotId + "]" , ex ));
405411 return ;
406412 }
407413 final SnapshotInfo finalSnapshotInfo = snapshot ;
414+ final List <String > snapMetaFilesToDelete =
415+ Arrays .asList (snapshotFormat .blobName (snapshotId .getUUID ()), globalMetaDataFormat .blobName (snapshotId .getUUID ()));
408416 try {
409- blobContainer ().deleteBlobsIgnoringIfNotExists (
410- Arrays .asList (snapshotFormat .blobName (snapshotId .getUUID ()), globalMetaDataFormat .blobName (snapshotId .getUUID ())));
417+ blobContainer ().deleteBlobsIgnoringIfNotExists (snapMetaFilesToDelete );
411418 } catch (IOException e ) {
412419 logger .warn (() -> new ParameterizedMessage ("[{}] Unable to delete global metadata files" , snapshotId ), e );
413420 }
@@ -420,12 +427,56 @@ public void deleteSnapshot(SnapshotId snapshotId, long repositoryStateId, Action
420427 snapshotId ,
421428 ActionListener .map (listener , v -> {
422429 cleanupStaleIndices (foundIndices , survivingIndices );
430+ cleanupStaleRootFiles (Sets .difference (rootBlobs , new HashSet <>(snapMetaFilesToDelete )), updatedRepositoryData );
423431 return null ;
424432 })
425433 );
426434 }
427435 }
428436
437+ private void cleanupStaleRootFiles (Set <String > rootBlobNames , RepositoryData repositoryData ) {
438+ final Set <String > allSnapshotIds =
439+ repositoryData .getAllSnapshotIds ().stream ().map (SnapshotId ::getUUID ).collect (Collectors .toSet ());
440+ final List <String > blobsToDelete = rootBlobNames .stream ().filter (
441+ blob -> {
442+ if (FsBlobContainer .isTempBlobName (blob )) {
443+ return true ;
444+ }
445+ if (blob .endsWith (".dat" )) {
446+ final String foundUUID ;
447+ if (blob .startsWith (SNAPSHOT_PREFIX )) {
448+ foundUUID = blob .substring (SNAPSHOT_PREFIX .length (), blob .length () - ".dat" .length ());
449+ assert snapshotFormat .blobName (foundUUID ).equals (blob );
450+ } else if (blob .startsWith (METADATA_PREFIX )) {
451+ foundUUID = blob .substring (METADATA_PREFIX .length (), blob .length () - ".dat" .length ());
452+ assert globalMetaDataFormat .blobName (foundUUID ).equals (blob );
453+ } else {
454+ return false ;
455+ }
456+ return allSnapshotIds .contains (foundUUID ) == false ;
457+ }
458+ return false ;
459+ }
460+ ).collect (Collectors .toList ());
461+ if (blobsToDelete .isEmpty ()) {
462+ return ;
463+ }
464+ try {
465+ logger .info ("[{}] Found stale root level blobs {}. Cleaning them up" , metadata .name (), blobsToDelete );
466+ blobContainer ().deleteBlobsIgnoringIfNotExists (blobsToDelete );
467+ } catch (IOException e ) {
468+ logger .warn (() -> new ParameterizedMessage (
469+ "[{}] The following blobs are no longer part of any snapshot [{}] but failed to remove them" ,
470+ metadata .name (), blobsToDelete ), e );
471+ } catch (Exception e ) {
472+ // TODO: We shouldn't be blanket catching and suppressing all exceptions here and instead handle them safely upstream.
473+ // Currently this catch exists as a stop gap solution to tackle unexpected runtime exceptions from implementations
474+ // bubbling up and breaking the snapshot functionality.
475+ assert false : e ;
476+ logger .warn (new ParameterizedMessage ("[{}] Exception during cleanup of root level blobs" , metadata .name ()), e );
477+ }
478+ }
479+
429480 private void cleanupStaleIndices (Map <String , BlobContainer > foundIndices , Map <String , IndexId > survivingIndices ) {
430481 try {
431482 final Set <String > survivingIndexIds = survivingIndices .values ().stream ()
0 commit comments