2121
2222import org .apache .logging .log4j .LogManager ;
2323import org .apache .logging .log4j .Logger ;
24+ import org .elasticsearch .action .ActionListener ;
25+ import org .elasticsearch .action .ActionRunnable ;
26+ import org .elasticsearch .cluster .metadata .MetaData ;
2427import org .elasticsearch .cluster .metadata .RepositoryMetaData ;
2528import org .elasticsearch .cluster .service .ClusterService ;
2629import org .elasticsearch .common .Strings ;
2932import org .elasticsearch .common .settings .Setting ;
3033import org .elasticsearch .common .unit .ByteSizeUnit ;
3134import org .elasticsearch .common .unit .ByteSizeValue ;
35+ import org .elasticsearch .common .unit .TimeValue ;
3236import org .elasticsearch .common .xcontent .NamedXContentRegistry ;
3337import org .elasticsearch .monitor .jvm .JvmInfo ;
3438import org .elasticsearch .repositories .RepositoryException ;
39+ import org .elasticsearch .repositories .ShardGenerations ;
3540import org .elasticsearch .repositories .blobstore .BlobStoreRepository ;
36-
41+ import org .elasticsearch .snapshots .SnapshotId ;
42+ import org .elasticsearch .snapshots .SnapshotInfo ;
43+ import org .elasticsearch .snapshots .SnapshotShardFailure ;
44+ import org .elasticsearch .snapshots .SnapshotsService ;
45+ import org .elasticsearch .threadpool .Scheduler ;
46+ import org .elasticsearch .threadpool .ThreadPool ;
47+
48+ import java .util .List ;
49+ import java .util .Map ;
50+ import java .util .concurrent .TimeUnit ;
51+ import java .util .concurrent .atomic .AtomicReference ;
3752import java .util .function .Function ;
3853
3954/**
@@ -126,6 +141,23 @@ class S3Repository extends BlobStoreRepository {
126141
127142 static final Setting <String > CLIENT_NAME = new Setting <>("client" , "default" , Function .identity ());
128143
144+ /**
145+ * Artificial delay to introduce after a snapshot finalization or delete has finished so long as the repository is still using the
146+ * backwards compatible snapshot format from before
147+ * {@link org.elasticsearch.snapshots.SnapshotsService#SHARD_GEN_IN_REPO_DATA_VERSION} ({@link org.elasticsearch.Version#V_7_6_0}).
148+ * This delay is necessary so that the eventually consistent nature of AWS S3 does not randomly result in repository corruption when
149+ * doing repository operations in rapid succession on a repository in the old metadata format.
150+ * This setting should not be adjusted in production when working with an AWS S3 backed repository. Doing so risks the repository
151+ * becoming silently corrupted. To get rid of this waiting period, either create a new S3 repository or remove all snapshots older than
152+ * {@link org.elasticsearch.Version#V_7_6_0} from the repository which will trigger an upgrade of the repository metadata to the new
153+ * format and disable the cooldown period.
154+ */
155+ static final Setting <TimeValue > COOLDOWN_PERIOD = Setting .timeSetting (
156+ "cooldown_period" ,
157+ new TimeValue (3 , TimeUnit .MINUTES ),
158+ new TimeValue (0 , TimeUnit .MILLISECONDS ),
159+ Setting .Property .Dynamic );
160+
129161 /**
130162 * Specifies the path within bucket to repository data. Defaults to root directory.
131163 */
@@ -145,6 +177,12 @@ class S3Repository extends BlobStoreRepository {
145177
146178 private final String cannedACL ;
147179
180+ /**
181+ * Time period to delay repository operations by after finalizing or deleting a snapshot.
182+ * See {@link #COOLDOWN_PERIOD} for details.
183+ */
184+ private final TimeValue coolDown ;
185+
148186 /**
149187 * Constructs an s3 backed repository
150188 */
@@ -176,6 +214,8 @@ class S3Repository extends BlobStoreRepository {
176214 this .storageClass = STORAGE_CLASS_SETTING .get (metadata .settings ());
177215 this .cannedACL = CANNED_ACL_SETTING .get (metadata .settings ());
178216
217+ coolDown = COOLDOWN_PERIOD .get (metadata .settings ());
218+
179219 logger .debug (
180220 "using bucket [{}], chunk_size [{}], server_side_encryption [{}], buffer_size [{}], cannedACL [{}], storageClass [{}]" ,
181221 bucket ,
@@ -186,6 +226,70 @@ class S3Repository extends BlobStoreRepository {
186226 storageClass );
187227 }
188228
229+ /**
230+ * Holds a reference to delayed repository operation {@link Scheduler.Cancellable} so it can be cancelled should the repository be
231+ * closed concurrently.
232+ */
233+ private final AtomicReference <Scheduler .Cancellable > finalizationFuture = new AtomicReference <>();
234+
235+ @ Override
236+ public void finalizeSnapshot (SnapshotId snapshotId , ShardGenerations shardGenerations , long startTime , String failure , int totalShards ,
237+ List <SnapshotShardFailure > shardFailures , long repositoryStateId , boolean includeGlobalState ,
238+ MetaData clusterMetaData , Map <String , Object > userMetadata , boolean writeShardGens ,
239+ ActionListener <SnapshotInfo > listener ) {
240+ if (writeShardGens == false ) {
241+ listener = delayedListener (listener );
242+ }
243+ super .finalizeSnapshot (snapshotId , shardGenerations , startTime , failure , totalShards , shardFailures , repositoryStateId ,
244+ includeGlobalState , clusterMetaData , userMetadata , writeShardGens , listener );
245+ }
246+
247+ @ Override
248+ public void deleteSnapshot (SnapshotId snapshotId , long repositoryStateId , boolean writeShardGens , ActionListener <Void > listener ) {
249+ if (writeShardGens == false ) {
250+ listener = delayedListener (listener );
251+ }
252+ super .deleteSnapshot (snapshotId , repositoryStateId , writeShardGens , listener );
253+ }
254+
255+ /**
256+ * Wraps given listener such that it is executed with a delay of {@link #coolDown} on the snapshot thread-pool after being invoked.
257+ * See {@link #COOLDOWN_PERIOD} for details.
258+ */
259+ private <T > ActionListener <T > delayedListener (ActionListener <T > listener ) {
260+ final ActionListener <T > wrappedListener = ActionListener .runBefore (listener , () -> {
261+ final Scheduler .Cancellable cancellable = finalizationFuture .getAndSet (null );
262+ assert cancellable != null ;
263+ });
264+ return new ActionListener <>() {
265+ @ Override
266+ public void onResponse (T response ) {
267+ logCooldownInfo ();
268+ final Scheduler .Cancellable existing = finalizationFuture .getAndSet (
269+ threadPool .schedule (ActionRunnable .wrap (wrappedListener , l -> l .onResponse (response )),
270+ coolDown , ThreadPool .Names .SNAPSHOT ));
271+ assert existing == null : "Already have an ongoing finalization " + finalizationFuture ;
272+ }
273+
274+ @ Override
275+ public void onFailure (Exception e ) {
276+ logCooldownInfo ();
277+ final Scheduler .Cancellable existing = finalizationFuture .getAndSet (
278+ threadPool .schedule (ActionRunnable .wrap (wrappedListener , l -> l .onFailure (e )), coolDown , ThreadPool .Names .SNAPSHOT ));
279+ assert existing == null : "Already have an ongoing finalization " + finalizationFuture ;
280+ }
281+ };
282+ }
283+
284+ private void logCooldownInfo () {
285+ logger .info ("Sleeping for [{}] after modifying repository [{}] because it contains snapshots older than version [{}]" +
286+ " and therefore is using a backwards compatible metadata format that requires this cooldown period to avoid " +
287+ "repository corruption. To get rid of this message and move to the new repository metadata format, either remove " +
288+ "all snapshots older than version [{}] from the repository or create a new repository at an empty location." ,
289+ coolDown , metadata .name (), SnapshotsService .SHARD_GEN_IN_REPO_DATA_VERSION ,
290+ SnapshotsService .SHARD_GEN_IN_REPO_DATA_VERSION );
291+ }
292+
189293 private static BlobPath buildBasePath (RepositoryMetaData metadata ) {
190294 final String basePath = BASE_PATH_SETTING .get (metadata .settings ());
191295 if (Strings .hasLength (basePath )) {
@@ -210,4 +314,14 @@ protected BlobStore getBlobStore() {
210314 protected ByteSizeValue chunkSize () {
211315 return chunkSize ;
212316 }
317+
318+ @ Override
319+ protected void doClose () {
320+ final Scheduler .Cancellable cancellable = finalizationFuture .getAndSet (null );
321+ if (cancellable != null ) {
322+ logger .debug ("Repository [{}] closed during cool-down period" , metadata .name ());
323+ cancellable .cancel ();
324+ }
325+ super .doClose ();
326+ }
213327}
0 commit comments