Skip to content

Commit f9047d7

Browse files
test + docs
1 parent d0ffb44 commit f9047d7

File tree

2 files changed

+72
-2
lines changed

2 files changed

+72
-2
lines changed

plugins/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import org.elasticsearch.snapshots.SnapshotId;
4141
import org.elasticsearch.snapshots.SnapshotInfo;
4242
import org.elasticsearch.snapshots.SnapshotShardFailure;
43+
import org.elasticsearch.snapshots.SnapshotsService;
4344
import org.elasticsearch.threadpool.Scheduler;
4445
import org.elasticsearch.threadpool.ThreadPool;
4546

@@ -142,9 +143,13 @@ class S3Repository extends BlobStoreRepository {
142143
/**
143144
* Artificial delay to introduce after a snapshot finalization or delete has finished so long as the repository is still using the
144145
* backwards compatible snapshot format from before
145-
* {@link org.elasticsearch.snapshots.SnapshotsService#SHARD_GEN_IN_REPO_DATA_VERSION}.
146+
* {@link org.elasticsearch.snapshots.SnapshotsService#SHARD_GEN_IN_REPO_DATA_VERSION} ({@link org.elasticsearch.Version#V_7_6_0}).
146147
* This delay is necessary so that the eventually consistent nature of AWS S3 does not randomly result in repository corruption when
147-
* doing repository operations in rapid succession.
148+
* doing repository operations in rapid succession on a repository in the old metadata format.
149+
* This setting should not be adjusted in production when working with an AWS S3 backed repository. Doing so risks the repository
150+
* becoming silently corrupted. To get rid of this waiting period, either create a new S3 repository or remove all snapshots older than
151+
* {@link org.elasticsearch.Version#V_7_6_0} from the repository which will trigger an upgrade of the repository metadata to the new
152+
* format and disable the cooldown period.
148153
*/
149154
static final Setting<TimeValue> COOLDOWN_PERIOD = Setting.timeSetting(
150155
"cooldown_period",
@@ -258,20 +263,31 @@ private <T> ActionListener<T> delayedListener(ActionListener<T> listener) {
258263
return new ActionListener<>() {
259264
@Override
260265
public void onResponse(T response) {
266+
logCooldownInfo();
261267
final Scheduler.Cancellable existing = finalizationFuture.getAndSet(
262268
threadPool.schedule(() -> wrappedListener.onResponse(response), coolDown, ThreadPool.Names.SNAPSHOT));
263269
assert existing == null : "Already have an ongoing finalization " + finalizationFuture;
264270
}
265271

266272
@Override
267273
public void onFailure(Exception e) {
274+
logCooldownInfo();
268275
final Scheduler.Cancellable existing = finalizationFuture.getAndSet(
269276
threadPool.schedule(() -> wrappedListener.onFailure(e), coolDown, ThreadPool.Names.SNAPSHOT));
270277
assert existing == null : "Already have an ongoing finalization " + finalizationFuture;
271278
}
272279
};
273280
}
274281

282+
private void logCooldownInfo() {
283+
logger.info("Sleeping for [{}] after modifying repository [{}] because it contains snapshots older than version [{}]" +
284+
" and therefore is using a backwards compatible metadata format that requires this cooldown period to avoid " +
285+
"repository corruption. To get rid of this message and move to the new repository metadata format, either remove " +
286+
"all snapshots older than version [{}] from the repository or create a new repository at an empty location.",
287+
coolDown, metadata.name(), SnapshotsService.SHARD_GEN_IN_REPO_DATA_VERSION,
288+
SnapshotsService.SHARD_GEN_IN_REPO_DATA_VERSION);
289+
}
290+
275291
private static BlobPath buildBasePath(RepositoryMetaData metadata) {
276292
final String basePath = BASE_PATH_SETTING.get(metadata.settings());
277293
if (Strings.hasLength(basePath)) {

plugins/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3BlobStoreRepositoryTests.java

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,30 +22,48 @@
2222
import com.sun.net.httpserver.HttpExchange;
2323
import com.sun.net.httpserver.HttpHandler;
2424
import fixture.s3.S3HttpHandler;
25+
import org.elasticsearch.action.ActionRunnable;
26+
import org.elasticsearch.action.support.PlainActionFuture;
2527
import org.elasticsearch.cluster.metadata.RepositoryMetaData;
2628
import org.elasticsearch.cluster.service.ClusterService;
2729
import org.elasticsearch.common.SuppressForbidden;
2830
import org.elasticsearch.common.blobstore.BlobContainer;
2931
import org.elasticsearch.common.blobstore.BlobPath;
3032
import org.elasticsearch.common.blobstore.BlobStore;
33+
import org.elasticsearch.common.bytes.BytesReference;
3134
import org.elasticsearch.common.settings.MockSecureSettings;
3235
import org.elasticsearch.common.settings.Setting;
3336
import org.elasticsearch.common.settings.Settings;
3437
import org.elasticsearch.common.unit.ByteSizeUnit;
38+
import org.elasticsearch.common.unit.TimeValue;
3539
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
40+
import org.elasticsearch.common.xcontent.XContentFactory;
3641
import org.elasticsearch.plugins.Plugin;
42+
import org.elasticsearch.repositories.RepositoriesService;
43+
import org.elasticsearch.repositories.RepositoryData;
44+
import org.elasticsearch.repositories.blobstore.BlobStoreRepository;
3745
import org.elasticsearch.repositories.blobstore.ESMockAPIBasedRepositoryIntegTestCase;
46+
import org.elasticsearch.snapshots.SnapshotId;
47+
import org.elasticsearch.snapshots.SnapshotsService;
3848
import org.elasticsearch.snapshots.mockstore.BlobStoreWrapper;
49+
import org.elasticsearch.threadpool.ThreadPool;
3950

51+
import java.io.IOException;
52+
import java.io.InputStream;
4053
import java.util.ArrayList;
4154
import java.util.Collection;
4255
import java.util.Collections;
4356
import java.util.List;
4457
import java.util.Map;
4558

59+
import static org.hamcrest.Matchers.greaterThan;
60+
import static org.hamcrest.Matchers.lessThan;
61+
4662
@SuppressForbidden(reason = "this test uses a HttpServer to emulate an S3 endpoint")
4763
public class S3BlobStoreRepositoryTests extends ESMockAPIBasedRepositoryIntegTestCase {
4864

65+
private static final TimeValue TEST_COOLDOWN_PERIOD = TimeValue.timeValueSeconds(5L);
66+
4967
@Override
5068
protected String repositoryType() {
5169
return S3Repository.TYPE;
@@ -82,6 +100,7 @@ protected Settings nodeSettings(int nodeOrdinal) {
82100
secureSettings.setString(S3ClientSettings.SECRET_KEY_SETTING.getConcreteSettingForNamespace("test").getKey(), "secret");
83101

84102
return Settings.builder()
103+
.put(ThreadPool.ESTIMATED_TIME_INTERVAL_SETTING.getKey(), 0) // We have tests that verify an exact wait time
85104
.put(S3ClientSettings.ENDPOINT_SETTING.getConcreteSettingForNamespace("test").getKey(), httpServerUrl())
86105
// Disable chunked encoding as it simplifies a lot the request parsing on the httpServer side
87106
.put(S3ClientSettings.DISABLE_CHUNKED_ENCODING.getConcreteSettingForNamespace("test").getKey(), true)
@@ -92,6 +111,41 @@ protected Settings nodeSettings(int nodeOrdinal) {
92111
.build();
93112
}
94113

114+
public void testEnforcedCooldownPeriod() throws IOException {
115+
final String repoName = createRepository(randomName(), Settings.builder().put(repositorySettings())
116+
.put(S3Repository.COOLDOWN_PERIOD.getKey(), TEST_COOLDOWN_PERIOD).build());
117+
118+
final SnapshotId fakeOldSnapshot = client().admin().cluster().prepareCreateSnapshot(repoName, "snapshot-old")
119+
.setWaitForCompletion(true).setIndices().get().getSnapshotInfo().snapshotId();
120+
final RepositoriesService repositoriesService = internalCluster().getInstance(RepositoriesService.class);
121+
final BlobStoreRepository repository = (BlobStoreRepository) repositoriesService.repository(repoName);
122+
final RepositoryData repositoryData =
123+
PlainActionFuture.get(f -> repository.threadPool().generic().execute(() -> repository.getRepositoryData(f)));
124+
final RepositoryData modifiedRepositoryData = repositoryData.withVersions(Collections.singletonMap(fakeOldSnapshot,
125+
SnapshotsService.SHARD_GEN_IN_REPO_DATA_VERSION.minimumCompatibilityVersion()));
126+
final BytesReference serialized =
127+
BytesReference.bytes(modifiedRepositoryData.snapshotsToXContent(XContentFactory.jsonBuilder(), false));
128+
PlainActionFuture.get(f -> repository.threadPool().generic().execute(ActionRunnable.run(f, () -> {
129+
try (InputStream stream = serialized.streamInput()) {
130+
repository.blobStore().blobContainer(repository.basePath()).writeBlobAtomic(
131+
BlobStoreRepository.INDEX_FILE_PREFIX + modifiedRepositoryData.getGenId(), stream, serialized.length(), true);
132+
}
133+
})));
134+
135+
final String newSnapshotName = "snapshot-new";
136+
final long beforeThrottledSnapshot = repository.threadPool().relativeTimeInNanos();
137+
client().admin().cluster().prepareCreateSnapshot(repoName, newSnapshotName).setWaitForCompletion(true).setIndices().get();
138+
assertThat(repository.threadPool().relativeTimeInNanos() - beforeThrottledSnapshot, greaterThan(TEST_COOLDOWN_PERIOD.getNanos()));
139+
140+
final long beforeThrottledDelete = repository.threadPool().relativeTimeInNanos();
141+
client().admin().cluster().prepareDeleteSnapshot(repoName, newSnapshotName).get();
142+
assertThat(repository.threadPool().relativeTimeInNanos() - beforeThrottledDelete, greaterThan(TEST_COOLDOWN_PERIOD.getNanos()));
143+
144+
final long beforeFastDelete = repository.threadPool().relativeTimeInNanos();
145+
client().admin().cluster().prepareDeleteSnapshot(repoName, fakeOldSnapshot.getName()).get();
146+
assertThat(repository.threadPool().relativeTimeInNanos() - beforeFastDelete, lessThan(TEST_COOLDOWN_PERIOD.getNanos()));
147+
}
148+
95149
/**
96150
* S3RepositoryPlugin that allows to disable chunked encoding and to set a low threshold between single upload and multipart upload.
97151
*/

0 commit comments

Comments
 (0)