Skip to content

Commit e44e84a

Browse files
committed
Suppress lease background sync failures if stopping (#40902)
If the transport service is stopped, likely because we are shutting down, and a retention lease background sync fires the logs will display a warn message and stacktrace. Yet, this situaton is harmless and can happen as a normal course of business when shutting down. This commit suppresses the log messages in this case.
1 parent a69ff82 commit e44e84a

File tree

5 files changed

+34
-10
lines changed

5 files changed

+34
-10
lines changed

server/src/main/java/org/elasticsearch/ExceptionsHelper.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
3030
import org.elasticsearch.index.Index;
3131
import org.elasticsearch.rest.RestStatus;
32+
import org.elasticsearch.transport.TransportException;
3233

3334
import java.io.IOException;
3435
import java.io.PrintWriter;
@@ -193,6 +194,14 @@ public static Throwable unwrap(Throwable t, Class<?>... clazzes) {
193194
return null;
194195
}
195196

197+
public static boolean isTransportStoppedForAction(final Throwable t, final String action) {
198+
final TransportException maybeTransport =
199+
(TransportException) ExceptionsHelper.unwrap(t, TransportException.class);
200+
return maybeTransport != null
201+
&& (maybeTransport.getMessage().equals("TransportService is closed stopped can't send request")
202+
|| maybeTransport.getMessage().equals("transport stopped, action: " + action));
203+
}
204+
196205
/**
197206
* Throws the specified exception. If null if specified then <code>true</code> is returned.
198207
*/

server/src/main/java/org/elasticsearch/action/support/replication/ReplicationOperation.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
import org.elasticsearch.index.shard.ShardId;
3939
import org.elasticsearch.node.NodeClosedException;
4040
import org.elasticsearch.rest.RestStatus;
41-
import org.elasticsearch.transport.TransportException;
4241

4342
import java.io.IOException;
4443
import java.util.ArrayList;
@@ -205,10 +204,9 @@ public String toString() {
205204

206205
private void onNoLongerPrimary(Exception failure) {
207206
final Throwable cause = ExceptionsHelper.unwrapCause(failure);
208-
final boolean nodeIsClosing = cause instanceof NodeClosedException
209-
|| (cause instanceof TransportException &&
210-
("TransportService is closed stopped can't send request".equals(cause.getMessage())
211-
|| "transport stopped, action: internal:cluster/shard/failure".equals(cause.getMessage())));
207+
final boolean nodeIsClosing =
208+
cause instanceof NodeClosedException
209+
|| ExceptionsHelper.isTransportStoppedForAction(cause, "internal:cluster/shard/failure");
212210
final String message;
213211
if (nodeIsClosing) {
214212
message = String.format(Locale.ROOT,

server/src/main/java/org/elasticsearch/index/seqno/RetentionLeaseBackgroundSyncAction.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,9 +113,15 @@ public void backgroundSync(
113113
ActionListener.wrap(
114114
r -> {},
115115
e -> {
116-
if (ExceptionsHelper.unwrap(e, AlreadyClosedException.class, IndexShardClosedException.class) == null) {
117-
getLogger().warn(new ParameterizedMessage("{} retention lease background sync failed", shardId), e);
116+
if (ExceptionsHelper.isTransportStoppedForAction(e, ACTION_NAME + "[p]")) {
117+
// we are likely shutting down
118+
return;
118119
}
120+
if (ExceptionsHelper.unwrap(e, AlreadyClosedException.class, IndexShardClosedException.class) != null) {
121+
// the shard is closed
122+
return;
123+
}
124+
getLogger().warn(new ParameterizedMessage("{} retention lease background sync failed", shardId), e);
119125
}));
120126
}
121127
}

server/src/main/java/org/elasticsearch/transport/TransportService.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@ public void onFailure(Exception e) {
274274
}
275275
@Override
276276
public void doRun() {
277+
// cf. ExceptionsHelper#isTransportStoppedForAction
277278
TransportException ex = new TransportException("transport stopped, action: " + holderToNotify.action());
278279
holderToNotify.handler().handleException(ex);
279280
}
@@ -626,8 +627,13 @@ private <T extends TransportResponse> void sendRequestInternal(final Transport.C
626627
}
627628
try {
628629
if (lifecycle.stoppedOrClosed()) {
629-
// if we are not started the exception handling will remove the RequestHolder again and calls the handler to notify
630-
// the caller. It will only notify if the toStop code hasn't done the work yet.
630+
/*
631+
* If we are not started the exception handling will remove the request holder again and calls the handler to notify the
632+
* caller. It will only notify if toStop hasn't done the work yet.
633+
*
634+
* Do not edit this exception message, it is currently relied upon in production code!
635+
*/
636+
// TODO: make a dedicated exception for a stopped transport service? cf. ExceptionsHelper#isTransportStoppedForAction
631637
throw new TransportException("TransportService is closed stopped can't send request");
632638
}
633639
if (timeoutHandler != null) {

server/src/test/java/org/elasticsearch/index/seqno/RetentionLeaseBackgroundSyncActionTests.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import org.elasticsearch.test.transport.CapturingTransport;
4343
import org.elasticsearch.threadpool.TestThreadPool;
4444
import org.elasticsearch.threadpool.ThreadPool;
45+
import org.elasticsearch.transport.TransportException;
4546
import org.elasticsearch.transport.TransportService;
4647
import org.mockito.ArgumentCaptor;
4748

@@ -204,9 +205,13 @@ protected void doExecute(Task task, Request request, ActionListener<ReplicationR
204205
final Exception e = randomFrom(
205206
new AlreadyClosedException("closed"),
206207
new IndexShardClosedException(indexShard.shardId()),
208+
new TransportException(randomFrom(
209+
"failed",
210+
"TransportService is closed stopped can't send request",
211+
"transport stopped, action: indices:admin/seq_no/retention_lease_background_sync[p]")),
207212
new RuntimeException("failed"));
208213
listener.onFailure(e);
209-
if (e instanceof AlreadyClosedException == false && e instanceof IndexShardClosedException == false) {
214+
if (e.getMessage().equals("failed")) {
210215
final ArgumentCaptor<ParameterizedMessage> captor = ArgumentCaptor.forClass(ParameterizedMessage.class);
211216
verify(retentionLeaseSyncActionLogger).warn(captor.capture(), same(e));
212217
final ParameterizedMessage message = captor.getValue();

0 commit comments

Comments
 (0)