Skip to content

Commit 44fdf20

Browse files
committed
Always flush in FullClusterRestartIT#testRecovery (#47465)
The pattern in the latest failure is similar to the source fixed in #46956 but relates to synced-flush. If peer recovery happens after indexing, and indexing flushes some shard at the end, then a synced flush in the test will not roll or commit translog. Closes #46712
1 parent 0beb526 commit 44fdf20

File tree

1 file changed

+14
-21
lines changed

1 file changed

+14
-21
lines changed

qa/full-cluster-restart/src/test/java/org/elasticsearch/upgrades/FullClusterRestartIT.java

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -733,28 +733,21 @@ public void testRecovery() throws Exception {
733733

734734
// make sure all recoveries are done
735735
ensureGreen(index);
736-
// Recovering a synced-flush index from 5.x to 6.x might be subtle as a 5.x index commit does not have all 6.x commit tags.
736+
737+
// Force flush so we're sure that all translog are committed
738+
Request flushRequest = new Request("POST", "/" + index + "/_flush");
739+
flushRequest.addParameter("force", "true");
740+
flushRequest.addParameter("wait_if_ongoing", "true");
741+
assertOK(client().performRequest(flushRequest));
742+
737743
if (randomBoolean()) {
738-
// needs to call a replication action to sync the global checkpoint from primaries to replication.
739-
assertOK(client().performRequest(new Request("POST", "/" + index + "/_refresh")));
740-
// We have to spin synced-flush requests here because we fire the global checkpoint sync for the last write operation.
741-
// A synced-flush request considers the global checkpoint sync as an going operation because it acquires a shard permit.
742-
assertBusy(() -> {
743-
try {
744-
Response resp = client().performRequest(new Request("POST", index + "/_flush/synced"));
745-
Map<String, Object> result = ObjectPath.createFromResponse(resp).evaluate("_shards");
746-
assertThat(result.get("successful"), equalTo(result.get("total")));
747-
assertThat(result.get("failed"), equalTo(0));
748-
} catch (ResponseException ex) {
749-
throw new AssertionError(ex); // cause assert busy to retry
750-
}
751-
});
752-
} else {
753-
// Explicitly flush so we're sure to have a bunch of documents in the Lucene index
754-
Request flushRequest = new Request("POST", "/" + index + "/_flush");
755-
flushRequest.addParameter("force", "true");
756-
flushRequest.addParameter("wait_if_ongoing", "true");
757-
assertOK(client().performRequest(flushRequest));
744+
// We had a bug before where we failed to perform peer recovery with sync_id from 5.x to 6.x.
745+
// We added this synced flush so we can exercise different paths of recovery code.
746+
try {
747+
client().performRequest(new Request("POST", index + "/_flush/synced"));
748+
} catch (ResponseException ignored) {
749+
// synced flush is optional here
750+
}
758751
}
759752
if (shouldHaveTranslog) {
760753
// Update a few documents so we are sure to have a translog

0 commit comments

Comments
 (0)