Skip to content

Commit fd5ef6b

Browse files
Add Repository Consistency Assertion to SnapshotResiliencyTests (#40857)
* Add Repository Consistency Assertion to SnapshotResiliencyTests * Add some quick validation on not leaving behind any dangling metadata or dangling indices to the snapshot resiliency tests * Added todo about expanding this assertion further
1 parent 777408d commit fd5ef6b

File tree

2 files changed

+77
-4
lines changed

2 files changed

+77
-4
lines changed

server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,9 @@
130130
* |- index-N - list of all snapshot ids and the indices belonging to each snapshot, N is the generation of the file
131131
* |- index.latest - contains the numeric value of the latest generation of the index file (i.e. N from above)
132132
* |- incompatible-snapshots - list of all snapshot ids that are no longer compatible with the current version of the cluster
133-
* |- snap-20131010 - JSON serialized Snapshot for snapshot "20131010"
133+
* |- snap-20131010.dat - JSON serialized Snapshot for snapshot "20131010"
134134
* |- meta-20131010.dat - JSON serialized MetaData for snapshot "20131010" (includes only global metadata)
135-
* |- snap-20131011 - JSON serialized Snapshot for snapshot "20131011"
135+
* |- snap-20131011.dat - JSON serialized Snapshot for snapshot "20131011"
136136
* |- meta-20131011.dat - JSON serialized MetaData for snapshot "20131011"
137137
* .....
138138
* |- indices/ - data for all indices

server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@
105105
import org.elasticsearch.cluster.service.ClusterApplierService;
106106
import org.elasticsearch.cluster.service.ClusterService;
107107
import org.elasticsearch.cluster.service.MasterService;
108+
import org.elasticsearch.common.Strings;
109+
import org.elasticsearch.common.bytes.BytesArray;
108110
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
109111
import org.elasticsearch.common.network.NetworkModule;
110112
import org.elasticsearch.common.settings.ClusterSettings;
@@ -115,7 +117,11 @@
115117
import org.elasticsearch.common.util.PageCacheRecycler;
116118
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
117119
import org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor;
120+
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
118121
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
122+
import org.elasticsearch.common.xcontent.XContentHelper;
123+
import org.elasticsearch.common.xcontent.XContentParser;
124+
import org.elasticsearch.common.xcontent.XContentType;
119125
import org.elasticsearch.env.Environment;
120126
import org.elasticsearch.env.NodeEnvironment;
121127
import org.elasticsearch.env.TestEnvironment;
@@ -140,8 +146,10 @@
140146
import org.elasticsearch.ingest.IngestService;
141147
import org.elasticsearch.node.ResponseCollectorService;
142148
import org.elasticsearch.plugins.PluginsService;
149+
import org.elasticsearch.repositories.IndexId;
143150
import org.elasticsearch.repositories.RepositoriesService;
144151
import org.elasticsearch.repositories.Repository;
152+
import org.elasticsearch.repositories.RepositoryData;
145153
import org.elasticsearch.repositories.fs.FsRepository;
146154
import org.elasticsearch.script.ScriptService;
147155
import org.elasticsearch.search.SearchService;
@@ -160,6 +168,8 @@
160168
import org.junit.Before;
161169

162170
import java.io.IOException;
171+
import java.nio.ByteBuffer;
172+
import java.nio.file.Files;
163173
import java.nio.file.Path;
164174
import java.util.Collection;
165175
import java.util.Collections;
@@ -206,8 +216,12 @@ public void createServices() {
206216
}
207217

208218
@After
209-
public void stopServices() {
210-
testClusterNodes.nodes.values().forEach(TestClusterNode::stop);
219+
public void verifyReposThenStopServices() throws IOException {
220+
try {
221+
assertNoStaleRepositoryData();
222+
} finally {
223+
testClusterNodes.nodes.values().forEach(TestClusterNode::stop);
224+
}
211225
}
212226

213227
public void testSuccessfulSnapshotAndRestore() {
@@ -502,6 +516,65 @@ public void run() {
502516
assertThat(snapshotIds, either(hasSize(1)).or(hasSize(0)));
503517
}
504518

519+
/**
520+
* Assert that there are no unreferenced indices or unreferenced root-level metadata blobs in any repository.
521+
* TODO: Expand the logic here to also check for unreferenced segment blobs and shard level metadata
522+
*/
523+
private void assertNoStaleRepositoryData() throws IOException {
524+
final Path repoPath = tempDir.resolve("repo").toAbsolutePath();
525+
final List<Path> repos;
526+
try (Stream<Path> reposDir = Files.list(repoPath)) {
527+
repos = reposDir.filter(s -> s.getFileName().toString().startsWith("extra") == false).collect(Collectors.toList());
528+
}
529+
for (Path repoRoot : repos) {
530+
final Path latestIndexGenBlob = repoRoot.resolve("index.latest");
531+
assertTrue("Could not find index.latest blob for repo at [" + repoRoot + ']', Files.exists(latestIndexGenBlob));
532+
final long latestGen = ByteBuffer.wrap(Files.readAllBytes(latestIndexGenBlob)).getLong(0);
533+
assertIndexGenerations(repoRoot, latestGen);
534+
final RepositoryData repositoryData;
535+
try (XContentParser parser =
536+
XContentHelper.createParser(NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE,
537+
new BytesArray(Files.readAllBytes(repoRoot.resolve("index-" + latestGen))), XContentType.JSON)) {
538+
repositoryData = RepositoryData.snapshotsFromXContent(parser, latestGen);
539+
}
540+
assertIndexUUIDs(repoRoot, repositoryData);
541+
assertSnapshotUUIDs(repoRoot, repositoryData);
542+
}
543+
}
544+
545+
private static void assertIndexGenerations(Path repoRoot, long latestGen) throws IOException {
546+
try (Stream<Path> repoRootBlobs = Files.list(repoRoot)) {
547+
final long[] indexGenerations = repoRootBlobs.filter(p -> p.getFileName().toString().startsWith("index-"))
548+
.map(p -> p.getFileName().toString().replace("index-", ""))
549+
.mapToLong(Long::parseLong).sorted().toArray();
550+
assertEquals(latestGen, indexGenerations[indexGenerations.length - 1]);
551+
assertTrue(indexGenerations.length <= 2);
552+
}
553+
}
554+
555+
private static void assertIndexUUIDs(Path repoRoot, RepositoryData repositoryData) throws IOException {
556+
final List<String> expectedIndexUUIDs =
557+
repositoryData.getIndices().values().stream().map(IndexId::getId).collect(Collectors.toList());
558+
try (Stream<Path> indexRoots = Files.list(repoRoot.resolve("indices"))) {
559+
final List<String> foundIndexUUIDs = indexRoots.filter(s -> s.getFileName().toString().startsWith("extra") == false)
560+
.map(p -> p.getFileName().toString()).collect(Collectors.toList());
561+
assertThat(foundIndexUUIDs, containsInAnyOrder(expectedIndexUUIDs.toArray(Strings.EMPTY_ARRAY)));
562+
}
563+
}
564+
565+
private static void assertSnapshotUUIDs(Path repoRoot, RepositoryData repositoryData) throws IOException {
566+
final List<String> expectedSnapshotUUIDs =
567+
repositoryData.getSnapshotIds().stream().map(SnapshotId::getUUID).collect(Collectors.toList());
568+
for (String prefix : new String[]{"snap-", "meta-"}) {
569+
try (Stream<Path> repoRootBlobs = Files.list(repoRoot)) {
570+
final Collection<String> foundSnapshotUUIDs = repoRootBlobs.filter(p -> p.getFileName().toString().startsWith(prefix))
571+
.map(p -> p.getFileName().toString().replace(prefix, "").replace(".dat", ""))
572+
.collect(Collectors.toSet());
573+
assertThat(foundSnapshotUUIDs, containsInAnyOrder(expectedSnapshotUUIDs.toArray(Strings.EMPTY_ARRAY)));
574+
}
575+
}
576+
}
577+
505578
private void clearDisruptionsAndAwaitSync() {
506579
testClusterNodes.clearNetworkDisruptions();
507580
runUntil(() -> {

0 commit comments

Comments
 (0)