From e7224efde9ff7b0fb8d82ab7f0de04255553dffb Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Wed, 17 Apr 2019 10:29:02 +0200 Subject: [PATCH 1/4] Add Repository Consistency Assertion to SnapshotResiliencyTests (#40857) * Add Repository Consistency Assertion to SnapshotResiliencyTests * Add some quick validation on not leaving behind any dangling metadata or dangling indices to the snapshot resiliency tests * Added todo about expanding this assertion further --- .../snapshots/SnapshotResiliencyTests.java | 77 ++++++++++++++++++- 1 file changed, 75 insertions(+), 2 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java index 29bf9e0493e03..b2e7562286caa 100644 --- a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java @@ -105,6 +105,8 @@ import org.elasticsearch.cluster.service.ClusterApplierService; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.cluster.service.MasterService; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.network.NetworkModule; import org.elasticsearch.common.settings.ClusterSettings; @@ -115,7 +117,11 @@ import org.elasticsearch.common.util.PageCacheRecycler; import org.elasticsearch.common.util.concurrent.AbstractRunnable; import org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor; +import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.env.Environment; import org.elasticsearch.env.NodeEnvironment; import org.elasticsearch.env.TestEnvironment; @@ -140,8 +146,10 @@ import org.elasticsearch.ingest.IngestService; import org.elasticsearch.node.ResponseCollectorService; import org.elasticsearch.plugins.PluginsService; +import org.elasticsearch.repositories.IndexId; import org.elasticsearch.repositories.RepositoriesService; import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.RepositoryData; import org.elasticsearch.repositories.fs.FsRepository; import org.elasticsearch.script.ScriptService; import org.elasticsearch.search.SearchService; @@ -160,6 +168,8 @@ import org.junit.Before; import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.file.Files; import java.nio.file.Path; import java.util.Collection; import java.util.Collections; @@ -206,8 +216,12 @@ public void createServices() { } @After - public void stopServices() { - testClusterNodes.nodes.values().forEach(TestClusterNode::stop); + public void verifyReposThenStopServices() throws IOException { + try { + assertNoStaleRepositoryData(); + } finally { + testClusterNodes.nodes.values().forEach(TestClusterNode::stop); + } } public void testSuccessfulSnapshotAndRestore() { @@ -504,6 +518,65 @@ public void run() { assertThat(snapshotIds, either(hasSize(1)).or(hasSize(0))); } + /** + * Assert that there are no unreferenced indices or unreferenced root-level metadata blobs in any repository. + * TODO: Expand the logic here to also check for unreferenced segment blobs and shard level metadata + */ + private void assertNoStaleRepositoryData() throws IOException { + final Path repoPath = tempDir.resolve("repo").toAbsolutePath(); + final List repos; + try (Stream reposDir = Files.list(repoPath)) { + repos = reposDir.filter(s -> s.getFileName().toString().startsWith("extra") == false).collect(Collectors.toList()); + } + for (Path repoRoot : repos) { + final Path latestIndexGenBlob = repoRoot.resolve("index.latest"); + assertTrue("Could not find index.latest blob for repo at [" + repoRoot + ']', Files.exists(latestIndexGenBlob)); + final long latestGen = ByteBuffer.wrap(Files.readAllBytes(latestIndexGenBlob)).getLong(0); + assertIndexGenerations(repoRoot, latestGen); + final RepositoryData repositoryData; + try (XContentParser parser = + XContentHelper.createParser(NamedXContentRegistry.EMPTY, LoggingDeprecationHandler.INSTANCE, + new BytesArray(Files.readAllBytes(repoRoot.resolve("index-" + latestGen))), XContentType.JSON)) { + repositoryData = RepositoryData.snapshotsFromXContent(parser, latestGen); + } + assertIndexUUIDs(repoRoot, repositoryData); + assertSnapshotUUIDs(repoRoot, repositoryData); + } + } + + private static void assertIndexGenerations(Path repoRoot, long latestGen) throws IOException { + try (Stream repoRootBlobs = Files.list(repoRoot)) { + final long[] indexGenerations = repoRootBlobs.filter(p -> p.getFileName().toString().startsWith("index-")) + .map(p -> p.getFileName().toString().replace("index-", "")) + .mapToLong(Long::parseLong).sorted().toArray(); + assertEquals(latestGen, indexGenerations[indexGenerations.length - 1]); + assertTrue(indexGenerations.length <= 2); + } + } + + private static void assertIndexUUIDs(Path repoRoot, RepositoryData repositoryData) throws IOException { + final List expectedIndexUUIDs = + repositoryData.getIndices().values().stream().map(IndexId::getId).collect(Collectors.toList()); + try (Stream indexRoots = Files.list(repoRoot.resolve("indices"))) { + final List foundIndexUUIDs = indexRoots.filter(s -> s.getFileName().toString().startsWith("extra") == false) + .map(p -> p.getFileName().toString()).collect(Collectors.toList()); + assertThat(foundIndexUUIDs, containsInAnyOrder(expectedIndexUUIDs.toArray(Strings.EMPTY_ARRAY))); + } + } + + private static void assertSnapshotUUIDs(Path repoRoot, RepositoryData repositoryData) throws IOException { + final List expectedSnapshotUUIDs = + repositoryData.getSnapshotIds().stream().map(SnapshotId::getUUID).collect(Collectors.toList()); + for (String prefix : new String[]{"snap-", "meta-"}) { + try (Stream repoRootBlobs = Files.list(repoRoot)) { + final Collection foundSnapshotUUIDs = repoRootBlobs.filter(p -> p.getFileName().toString().startsWith(prefix)) + .map(p -> p.getFileName().toString().replace(prefix, "").replace(".dat", "")) + .collect(Collectors.toSet()); + assertThat(foundSnapshotUUIDs, containsInAnyOrder(expectedSnapshotUUIDs.toArray(Strings.EMPTY_ARRAY))); + } + } + } + private void clearDisruptionsAndAwaitSync() { testClusterNodes.clearNetworkDisruptions(); runUntil(() -> { From 439bd718124fb30b8a06844e7691768052cb28cd Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 18 Apr 2019 16:49:07 +0200 Subject: [PATCH 2/4] Fix SnapshotResiliencyTest Repo Consistency Check (#41332) * Fix SnapshotResiliencyTest Repo Consistency Check * Due to the random creation of an empty `extra0` file by the Lucene mockFS we see broken tests because we use the existence of an index folder in assertions and the index deletion doesn't go through if there are extra files in an index folder * Fixed by removing the `extra0` file and resulting empty directory trees before asserting repo consistency * Closes #41326 --- .../snapshots/SnapshotResiliencyTests.java | 59 +++++++++++++++++-- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java index b2e7562286caa..2661f958fc2a2 100644 --- a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java @@ -167,10 +167,16 @@ import org.junit.After; import org.junit.Before; +import java.io.FileNotFoundException; import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.file.DirectoryNotEmptyException; +import java.nio.file.FileVisitResult; import java.nio.file.Files; +import java.nio.file.NoSuchFileException; import java.nio.file.Path; +import java.nio.file.SimpleFileVisitor; +import java.nio.file.attribute.BasicFileAttributes; import java.util.Collection; import java.util.Collections; import java.util.Comparator; @@ -378,7 +384,6 @@ public void testSnapshotWithNodeDisconnects() { assertThat(snapshotIds, hasSize(1)); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/41326") public void testConcurrentSnapshotCreateAndDelete() { setupTestCluster(randomFrom(1, 3, 5), randomIntBetween(2, 10)); @@ -525,10 +530,11 @@ public void run() { private void assertNoStaleRepositoryData() throws IOException { final Path repoPath = tempDir.resolve("repo").toAbsolutePath(); final List repos; - try (Stream reposDir = Files.list(repoPath)) { + try (Stream reposDir = repoFilesByPrefix(repoPath)) { repos = reposDir.filter(s -> s.getFileName().toString().startsWith("extra") == false).collect(Collectors.toList()); } for (Path repoRoot : repos) { + cleanupEmptyTrees(repoRoot); final Path latestIndexGenBlob = repoRoot.resolve("index.latest"); assertTrue("Could not find index.latest blob for repo at [" + repoRoot + ']', Files.exists(latestIndexGenBlob)); final long latestGen = ByteBuffer.wrap(Files.readAllBytes(latestIndexGenBlob)).getLong(0); @@ -544,8 +550,37 @@ private void assertNoStaleRepositoryData() throws IOException { } } + // Lucene's mock file system randomly generates empty `extra0` files that break the deletion of blob-store directories. + // We clean those up here before checking a blob-store for stale files. + private void cleanupEmptyTrees(Path repoPath) { + try { + Files.walkFileTree(repoPath, new SimpleFileVisitor<>() { + + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { + if (file.getFileName().toString().startsWith("extra")) { + Files.delete(file); + } + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { + try { + Files.delete(dir); + } catch (DirectoryNotEmptyException e) { + // We're only interested in deleting empty trees here, just ignore directories with content + } + return FileVisitResult.CONTINUE; + } + }); + } catch (IOException e) { + throw new AssertionError(e); + } + } + private static void assertIndexGenerations(Path repoRoot, long latestGen) throws IOException { - try (Stream repoRootBlobs = Files.list(repoRoot)) { + try (Stream repoRootBlobs = repoFilesByPrefix(repoRoot)) { final long[] indexGenerations = repoRootBlobs.filter(p -> p.getFileName().toString().startsWith("index-")) .map(p -> p.getFileName().toString().replace("index-", "")) .mapToLong(Long::parseLong).sorted().toArray(); @@ -557,7 +592,7 @@ private static void assertIndexGenerations(Path repoRoot, long latestGen) throws private static void assertIndexUUIDs(Path repoRoot, RepositoryData repositoryData) throws IOException { final List expectedIndexUUIDs = repositoryData.getIndices().values().stream().map(IndexId::getId).collect(Collectors.toList()); - try (Stream indexRoots = Files.list(repoRoot.resolve("indices"))) { + try (Stream indexRoots = repoFilesByPrefix(repoRoot.resolve("indices"))) { final List foundIndexUUIDs = indexRoots.filter(s -> s.getFileName().toString().startsWith("extra") == false) .map(p -> p.getFileName().toString()).collect(Collectors.toList()); assertThat(foundIndexUUIDs, containsInAnyOrder(expectedIndexUUIDs.toArray(Strings.EMPTY_ARRAY))); @@ -568,7 +603,7 @@ private static void assertSnapshotUUIDs(Path repoRoot, RepositoryData repository final List expectedSnapshotUUIDs = repositoryData.getSnapshotIds().stream().map(SnapshotId::getUUID).collect(Collectors.toList()); for (String prefix : new String[]{"snap-", "meta-"}) { - try (Stream repoRootBlobs = Files.list(repoRoot)) { + try (Stream repoRootBlobs = repoFilesByPrefix(repoRoot)) { final Collection foundSnapshotUUIDs = repoRootBlobs.filter(p -> p.getFileName().toString().startsWith(prefix)) .map(p -> p.getFileName().toString().replace(prefix, "").replace(".dat", "")) .collect(Collectors.toSet()); @@ -577,6 +612,20 @@ private static void assertSnapshotUUIDs(Path repoRoot, RepositoryData repository } } + /** + * List contents of a blob path and return an empty stream if the path doesn't exist. + * @param prefix Path to find children for + * @return stream of child paths + * @throws IOException on failure + */ + private static Stream repoFilesByPrefix(Path prefix) throws IOException { + try { + return Files.list(prefix); + } catch (FileNotFoundException | NoSuchFileException e) { + return Stream.empty(); + } + } + private void clearDisruptionsAndAwaitSync() { testClusterNodes.clearNetworkDisruptions(); runUntil(() -> { From 0f0cfaf9a1146d309c10d4994cf1c6f4289cc2bb Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Wed, 24 Apr 2019 15:44:22 +0200 Subject: [PATCH 3/4] Reenable SnapshotResiliency Test (#41437) This was fixed in https://github.com/elastic/elasticsearch/pull/41332 but I forgot to reenable the test. --- .../org/elasticsearch/snapshots/SnapshotResiliencyTests.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java index 2661f958fc2a2..285234999564e 100644 --- a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java @@ -433,7 +433,6 @@ public void testConcurrentSnapshotCreateAndDelete() { * Simulates concurrent restarts of data and master nodes as well as relocating a primary shard, while starting and subsequently * deleting a snapshot. */ - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/41326") public void testSnapshotPrimaryRelocations() { final int masterNodeCount = randomFrom(1, 3, 5); setupTestCluster(masterNodeCount, randomIntBetween(2, 10)); From abab603e976bef06943fa6338705d0131a9bf35f Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 29 Apr 2019 09:29:32 +0200 Subject: [PATCH 4/4] fix compile on java8 --- .../org/elasticsearch/snapshots/SnapshotResiliencyTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java index 285234999564e..bcecb806ece61 100644 --- a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java @@ -553,7 +553,7 @@ private void assertNoStaleRepositoryData() throws IOException { // We clean those up here before checking a blob-store for stale files. private void cleanupEmptyTrees(Path repoPath) { try { - Files.walkFileTree(repoPath, new SimpleFileVisitor<>() { + Files.walkFileTree(repoPath, new SimpleFileVisitor() { @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {