Skip to content

Commit d980b61

Browse files
Cleanup BlobStoreRepository Abort and Failure Handling (#46208)
Aborts and failures were handled in a somewhat unfortunate way in #42791: Since the tasks for all files are generated before uploading they are all executed when a snapshot is aborted and lead to a massive number of failures added to the original aborted exception. In the case of failures the situation was not very reasonable as well. If one blob fails uploading the snapshot logic would upload all the remaining files as well and then fail (when previously it would just fail all following files). I fixed both of the above issues, by just short-circuiting all remaining tasks for a shard in case of an exception in any one upload.
1 parent dd487a0 commit d980b61

File tree

1 file changed

+12
-1
lines changed

1 file changed

+12
-1
lines changed

server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@
110110
import java.util.Optional;
111111
import java.util.Set;
112112
import java.util.concurrent.Executor;
113+
import java.util.concurrent.atomic.AtomicBoolean;
113114
import java.util.stream.Collectors;
114115

115116
import static org.elasticsearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot.FileInfo.canonicalName;
@@ -1048,17 +1049,27 @@ public void snapshotShard(Store store, MapperService mapperService, SnapshotId s
10481049
final GroupedActionListener<Void> filesListener =
10491050
new GroupedActionListener<>(allFilesUploadedListener, indexIncrementalFileCount);
10501051
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
1052+
// Flag to signal that the snapshot has been aborted/failed so we can stop any further blob uploads from starting
1053+
final AtomicBoolean alreadyFailed = new AtomicBoolean();
10511054
for (BlobStoreIndexShardSnapshot.FileInfo snapshotFileInfo : filesToSnapshot) {
10521055
executor.execute(new ActionRunnable<>(filesListener) {
10531056
@Override
10541057
protected void doRun() {
10551058
try {
1056-
snapshotFile(snapshotFileInfo, indexId, shardId, snapshotId, snapshotStatus, store);
1059+
if (alreadyFailed.get() == false) {
1060+
snapshotFile(snapshotFileInfo, indexId, shardId, snapshotId, snapshotStatus, store);
1061+
}
10571062
filesListener.onResponse(null);
10581063
} catch (IOException e) {
10591064
throw new IndexShardSnapshotFailedException(shardId, "Failed to perform snapshot (index files)", e);
10601065
}
10611066
}
1067+
1068+
@Override
1069+
public void onFailure(Exception e) {
1070+
alreadyFailed.set(true);
1071+
super.onFailure(e);
1072+
}
10621073
});
10631074
}
10641075
} catch (Exception e) {

0 commit comments

Comments
 (0)