Skip to content

Commit 18fba4b

Browse files
Add IT for Snapshot Issue in 47552 (#47627) (#47635)
* Add IT for Snapshot Issue in 47552 (#47627) Adding a specific integration test that reproduces the problem fixed in #47552. The issue fixed only reproduces in the snapshot resiliency otherwise which are not available in 6.8 where the fix is being backported to as well.
1 parent b2506a8 commit 18fba4b

File tree

1 file changed

+49
-0
lines changed

1 file changed

+49
-0
lines changed

server/src/test/java/org/elasticsearch/snapshots/DedicatedClusterSnapshotRestoreIT.java

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1237,6 +1237,55 @@ public void testDataNodeRestartWithBusyMasterDuringSnapshot() throws Exception {
12371237
}, 60L, TimeUnit.SECONDS);
12381238
}
12391239

1240+
public void testDataNodeRestartAfterShardSnapshotFailure() throws Exception {
1241+
logger.info("--> starting a master node and two data nodes");
1242+
internalCluster().startMasterOnlyNode();
1243+
final List<String> dataNodes = internalCluster().startDataOnlyNodes(2);
1244+
logger.info("--> creating repository");
1245+
assertAcked(client().admin().cluster().preparePutRepository("test-repo")
1246+
.setType("mock").setSettings(Settings.builder()
1247+
.put("location", randomRepoPath())
1248+
.put("compress", randomBoolean())
1249+
.put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES)));
1250+
assertAcked(prepareCreate("test-idx", 0, Settings.builder()
1251+
.put("number_of_shards", 2).put("number_of_replicas", 0)));
1252+
ensureGreen();
1253+
logger.info("--> indexing some data");
1254+
final int numdocs = randomIntBetween(50, 100);
1255+
IndexRequestBuilder[] builders = new IndexRequestBuilder[numdocs];
1256+
for (int i = 0; i < builders.length; i++) {
1257+
builders[i] = client().prepareIndex("test-idx", "type1",
1258+
Integer.toString(i)).setSource("field1", "bar " + i);
1259+
}
1260+
indexRandom(true, builders);
1261+
flushAndRefresh();
1262+
blockAllDataNodes("test-repo");
1263+
logger.info("--> snapshot");
1264+
client(internalCluster().getMasterName()).admin().cluster()
1265+
.prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(false).setIndices("test-idx").get();
1266+
logger.info("--> restarting first data node, which should cause the primary shard on it to be failed");
1267+
internalCluster().restartNode(dataNodes.get(0), InternalTestCluster.EMPTY_CALLBACK);
1268+
1269+
logger.info("--> wait for shard snapshot of first primary to show as failed");
1270+
assertBusy(() -> assertThat(
1271+
client().admin().cluster().prepareSnapshotStatus("test-repo").setSnapshots("test-snap").get().getSnapshots()
1272+
.get(0).getShardsStats().getFailedShards(), is(1)), 60L, TimeUnit.SECONDS);
1273+
1274+
logger.info("--> restarting second data node, which should cause the primary shard on it to be failed");
1275+
internalCluster().restartNode(dataNodes.get(1), InternalTestCluster.EMPTY_CALLBACK);
1276+
1277+
// check that snapshot completes with both failed shards being accounted for in the snapshot result
1278+
assertBusy(() -> {
1279+
GetSnapshotsResponse snapshotsStatusResponse = client().admin().cluster()
1280+
.prepareGetSnapshots("test-repo").setSnapshots("test-snap").setIgnoreUnavailable(true).get();
1281+
assertEquals(1, snapshotsStatusResponse.getSnapshots().size());
1282+
SnapshotInfo snapshotInfo = snapshotsStatusResponse.getSnapshots().get(0);
1283+
assertTrue(snapshotInfo.state().toString(), snapshotInfo.state().completed());
1284+
assertThat(snapshotInfo.totalShards(), is(2));
1285+
assertThat(snapshotInfo.shardFailures(), hasSize(2));
1286+
}, 60L, TimeUnit.SECONDS);
1287+
}
1288+
12401289
public void testRetentionLeasesClearedOnRestore() throws Exception {
12411290
final String repoName = "test-repo-retention-leases";
12421291
assertAcked(client().admin().cluster().preparePutRepository(repoName)

0 commit comments

Comments
 (0)