From e34407e4a90029b3e6bf73d63f455a22ebd1a56b Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 27 May 2021 10:42:25 +0200 Subject: [PATCH 1/5] Fix Bug with Concurrent Snapshot and Index Delete Fix state machine bug that fixes the incorrect assumption a finished snapshot delete could only start shard snapshots when in fact it can also move snapshots to a completed state. --- .../DedicatedClusterSnapshotRestoreIT.java | 77 +++++++++++++++++++ .../snapshots/SnapshotsService.java | 6 +- 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java index 3e84664fa2ad3..50d533df88ba7 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java @@ -11,6 +11,7 @@ import com.carrotsearch.hppc.IntHashSet; import com.carrotsearch.hppc.IntSet; import org.elasticsearch.action.ActionFuture; +import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; import org.elasticsearch.action.admin.cluster.snapshots.get.GetSnapshotsResponse; import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; @@ -20,6 +21,7 @@ import org.elasticsearch.action.admin.indices.stats.ShardStats; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.support.ActiveShardCount; +import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.client.node.NodeClient; import org.elasticsearch.cluster.SnapshotsInProgress; @@ -33,6 +35,7 @@ import org.elasticsearch.common.settings.SettingsFilter; import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.env.Environment; +import org.elasticsearch.index.IndexNotFoundException; import org.elasticsearch.index.seqno.RetentionLeaseActions; import org.elasticsearch.index.seqno.RetentionLeases; import org.elasticsearch.index.shard.ShardId; @@ -72,6 +75,7 @@ import java.util.List; import java.util.Locale; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; @@ -1007,6 +1011,79 @@ public void testPartialSnapshotsDoNotRecordDeletedShardFailures() throws Excepti assertThat(snapshotInfo.shardFailures(), empty()); } + public void testDeleteIndexDuringSnapshot() throws Exception { + final String indexName = "test-idx"; + assertAcked(prepareCreate(indexName, 1, indexSettingsNoReplicas(1))); + ensureGreen(); + indexRandomDocs(indexName, 100); + + final String repoName = "test-repo"; + createRepository(repoName, "mock", + Settings.builder().put("location", randomRepoPath()).put("random", randomAlphaOfLength(10)) + .put("wait_after_unblock", 200)); + + final String firstSnapshotName = "test-snap"; + createSnapshot(repoName, firstSnapshotName, List.of(indexName)); + final int concurrentLoops = randomIntBetween(2, 5); + final List> futures = new ArrayList<>(concurrentLoops); + for (int i = 0; i < concurrentLoops; i++) { + final PlainActionFuture future = PlainActionFuture.newFuture(); + futures.add(future); + startSnapshotDeleteLoop(repoName, indexName, "test-snap-" + i, future); + } + + Thread.sleep(200); + + logger.info("--> delete index"); + assertAcked(admin().indices().prepareDelete(indexName)); + + for (Future future : futures) { + future.get(); + } + + logger.info("--> restore snapshot 1"); + clusterAdmin().prepareRestoreSnapshot(repoName, firstSnapshotName).get(); + ensureGreen(indexName); + } + + // create and delete a snapshot of the given name and for the given single index in a loop until the index is removed from the cluster + // at which point doneListener is resolved + private void startSnapshotDeleteLoop(String repoName, String indexName, String snapshotName, ActionListener doneListener) { + clusterAdmin().prepareCreateSnapshot(repoName, snapshotName) + .setWaitForCompletion(true) + .setPartial(true) + .setIndices(indexName) + .execute(new ActionListener<>() { + @Override + public void onResponse(CreateSnapshotResponse createSnapshotResponse) { + clusterAdmin().prepareDeleteSnapshot(repoName, snapshotName) + .execute( + new ActionListener<>() { + @Override + public void onResponse(AcknowledgedResponse acknowledgedResponse) { + assertAcked(acknowledgedResponse); + startSnapshotDeleteLoop(repoName, indexName, snapshotName, doneListener); + } + + @Override + public void onFailure(Exception e) { + throw new AssertionError(e); + } + } + ); + } + + @Override + public void onFailure(Exception e) { + if (e instanceof IndexNotFoundException == false) { + throw new AssertionError(e); + } + doneListener.onResponse(null); + } + }); + } + + public void testGetReposWithWildcard() { internalCluster().startMasterOnlyNode(); List repositoryMetadata = client().admin().cluster().prepareGetRepositories("*").get().repositories(); diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index e687a0be2ec86..036bf2d2e8a92 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -2210,8 +2210,12 @@ && alreadyReassigned(value.key.getIndexName(), value.key.getId(), reassignedShar updatedAssignmentsBuilder.put(shardId, updated); } } - snapshotEntries.add(entry.withStartedShards(updatedAssignmentsBuilder.build())); + final SnapshotsInProgress.Entry updatedEntry = entry.withShardStates(updatedAssignmentsBuilder.build()); + snapshotEntries.add(updatedEntry); changed = true; + if (updatedEntry.state().completed()) { + newFinalizations.add(entry); + } } } } else { From 8a75cbea958a52a696c4c533ec02db890737be30 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 27 May 2021 10:45:36 +0200 Subject: [PATCH 2/5] fix weird assertion --- .../DedicatedClusterSnapshotRestoreIT.java | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java index 50d533df88ba7..ce9c2832336d5 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java @@ -84,17 +84,7 @@ import static org.elasticsearch.test.NodeRoles.nonMasterNode; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertFutureThrows; -import static org.hamcrest.Matchers.allOf; -import static org.hamcrest.Matchers.containsString; -import static org.hamcrest.Matchers.empty; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.greaterThan; -import static org.hamcrest.Matchers.greaterThanOrEqualTo; -import static org.hamcrest.Matchers.hasSize; -import static org.hamcrest.Matchers.is; -import static org.hamcrest.Matchers.lessThan; -import static org.hamcrest.Matchers.not; -import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.Matchers.*; @ClusterScope(scope = Scope.TEST, numDataNodes = 0) public class DedicatedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTestCase { @@ -1075,9 +1065,7 @@ public void onFailure(Exception e) { @Override public void onFailure(Exception e) { - if (e instanceof IndexNotFoundException == false) { - throw new AssertionError(e); - } + assertThat(e, instanceOf(IndexNotFoundException.class)); doneListener.onResponse(null); } }); From ac21bbea58c622645d3cc70efe88cd9d7a4b4d05 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 27 May 2021 10:45:59 +0200 Subject: [PATCH 3/5] fix weird assertion --- .../DedicatedClusterSnapshotRestoreIT.java | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java index ce9c2832336d5..698e5223899cf 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java @@ -84,7 +84,18 @@ import static org.elasticsearch.test.NodeRoles.nonMasterNode; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertFutureThrows; -import static org.hamcrest.Matchers.*; +import static org.hamcrest.Matchers.allOf; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.lessThan; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.notNullValue; @ClusterScope(scope = Scope.TEST, numDataNodes = 0) public class DedicatedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTestCase { From da3d79b7d75f67b64b3475f0b56f02ba86e1b99a Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 27 May 2021 10:48:51 +0200 Subject: [PATCH 4/5] slightly shorter --- .../DedicatedClusterSnapshotRestoreIT.java | 21 ++++++------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java index 698e5223899cf..06eb2a5af03e6 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java @@ -20,6 +20,7 @@ import org.elasticsearch.action.admin.cluster.snapshots.status.SnapshotsStatusResponse; import org.elasticsearch.action.admin.indices.stats.ShardStats; import org.elasticsearch.action.index.IndexRequestBuilder; +import org.elasticsearch.action.support.ActionTestUtils; import org.elasticsearch.action.support.ActiveShardCount; import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.action.support.master.AcknowledgedResponse; @@ -1057,21 +1058,11 @@ private void startSnapshotDeleteLoop(String repoName, String indexName, String s .execute(new ActionListener<>() { @Override public void onResponse(CreateSnapshotResponse createSnapshotResponse) { - clusterAdmin().prepareDeleteSnapshot(repoName, snapshotName) - .execute( - new ActionListener<>() { - @Override - public void onResponse(AcknowledgedResponse acknowledgedResponse) { - assertAcked(acknowledgedResponse); - startSnapshotDeleteLoop(repoName, indexName, snapshotName, doneListener); - } - - @Override - public void onFailure(Exception e) { - throw new AssertionError(e); - } - } - ); + clusterAdmin().prepareDeleteSnapshot(repoName, snapshotName).execute( + ActionTestUtils.assertNoFailureListener(acknowledgedResponse -> { + assertAcked(acknowledgedResponse); + startSnapshotDeleteLoop(repoName, indexName, snapshotName, doneListener); + })); } @Override From 4b189a14c8bd148fc46a9da9cc7b6d3883212e2a Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 27 May 2021 11:22:44 +0200 Subject: [PATCH 5/5] tidy up --- .../snapshots/DedicatedClusterSnapshotRestoreIT.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java index 06eb2a5af03e6..cc38ee58613e8 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java @@ -1020,9 +1020,7 @@ public void testDeleteIndexDuringSnapshot() throws Exception { indexRandomDocs(indexName, 100); final String repoName = "test-repo"; - createRepository(repoName, "mock", - Settings.builder().put("location", randomRepoPath()).put("random", randomAlphaOfLength(10)) - .put("wait_after_unblock", 200)); + createRepository(repoName, "fs"); final String firstSnapshotName = "test-snap"; createSnapshot(repoName, firstSnapshotName, List.of(indexName));