Skip to content

Commit 7102dd8

Browse files
Fix Race in testGetSnapshotsRequest (#61694)
The fact that the data node is already blocked on writing data files did not guarantee that the cluster state that made the data node start snapshotting is already applied on master. This could lead to races where the get snapshots action still runs based on a state without the snapshot in it, tripping the assertion. Much safer to handle this by waiting on the non-blocking snapshot create to return, which guarantees that the CS has been applied on master. Closes #61541
1 parent 5ec4821 commit 7102dd8

File tree

1 file changed

+2
-4
lines changed

1 file changed

+2
-4
lines changed

server/src/internalClusterTest/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2803,11 +2803,10 @@ public void testGetSnapshotsRequest() throws Exception {
28032803
// take initial snapshot with a block, making sure we only get 1 in-progress snapshot returned
28042804
// block a node so the create snapshot operation can remain in progress
28052805
final String initialBlockedNode = blockNodeWithIndex(repositoryName, indexName);
2806-
ActionFuture<CreateSnapshotResponse> responseListener =
2807-
client.admin().cluster().prepareCreateSnapshot(repositoryName, "snap-on-empty-repo")
2806+
client.admin().cluster().prepareCreateSnapshot(repositoryName, "snap-on-empty-repo")
28082807
.setWaitForCompletion(false)
28092808
.setIndices(indexName)
2810-
.execute();
2809+
.get();
28112810
waitForBlock(initialBlockedNode, repositoryName, TimeValue.timeValueSeconds(60)); // wait for block to kick in
28122811
getSnapshotsResponse = client.admin().cluster()
28132812
.prepareGetSnapshots("test-repo")
@@ -2816,7 +2815,6 @@ public void testGetSnapshotsRequest() throws Exception {
28162815
assertEquals(1, getSnapshotsResponse.getSnapshots("test-repo").size());
28172816
assertEquals("snap-on-empty-repo", getSnapshotsResponse.getSnapshots("test-repo").get(0).snapshotId().getName());
28182817
unblockNode(repositoryName, initialBlockedNode); // unblock node
2819-
responseListener.actionGet(TimeValue.timeValueMillis(10000L)); // timeout after 10 seconds
28202818
client.admin().cluster().prepareDeleteSnapshot(repositoryName, "snap-on-empty-repo").get();
28212819

28222820
final int numSnapshots = randomIntBetween(1, 3) + 1;

0 commit comments

Comments
 (0)