Skip to content

Commit 2a7b7cc

Browse files
authored
Use cancel instead of timeout for aborting publications (#37670)
When publications were cancelled because a node turned to follower or candidate, it would still show as time out, which can be confusing in the logs. This change adapts the improper call of onTimeout by generalizing it to a cancel method.
1 parent ef2f5e4 commit 2a7b7cc

File tree

3 files changed

+23
-23
lines changed

3 files changed

+23
-23
lines changed

server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -446,7 +446,7 @@ void becomeCandidate(String method) {
446446

447447
if (mode != Mode.CANDIDATE) {
448448
mode = Mode.CANDIDATE;
449-
cancelActivePublication();
449+
cancelActivePublication("become candidate: " + method);
450450
joinAccumulator.close(mode);
451451
joinAccumulator = joinHelper.new CandidateJoinAccumulator();
452452

@@ -518,7 +518,7 @@ void becomeFollower(String method, DiscoveryNode leaderNode) {
518518
discoveryUpgradeService.deactivate();
519519
clusterFormationFailureHelper.stop();
520520
closePrevotingAndElectionScheduler();
521-
cancelActivePublication();
521+
cancelActivePublication("become follower: " + method);
522522
preVoteCollector.update(getPreVoteResponse(), leaderNode);
523523

524524
if (restartLeaderChecker) {
@@ -902,7 +902,7 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId())
902902
@Override
903903
public void run() {
904904
synchronized (mutex) {
905-
publication.onTimeout();
905+
publication.cancel("timed out after " + publishTimeout);
906906
}
907907
}
908908

@@ -958,10 +958,10 @@ public void onFailure(Exception e) {
958958
};
959959
}
960960

961-
private void cancelActivePublication() {
961+
private void cancelActivePublication(String reason) {
962962
assert Thread.holdsLock(mutex) : "Coordinator mutex not held";
963963
if (currentPublication.isPresent()) {
964-
currentPublication.get().onTimeout();
964+
currentPublication.get().cancel(reason);
965965
}
966966
}
967967

server/src/main/java/org/elasticsearch/cluster/coordination/Publication.java

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ public abstract class Publication {
4949

5050
private Optional<ApplyCommitRequest> applyCommitRequest; // set when state is committed
5151
private boolean isCompleted; // set when publication is completed
52-
private boolean timedOut; // set when publication timed out
52+
private boolean cancelled; // set when publication is cancelled
5353

5454
public Publication(PublishRequest publishRequest, AckListener ackListener, LongSupplier currentTimeSupplier) {
5555
this.publishRequest = publishRequest;
@@ -71,17 +71,17 @@ public void start(Set<DiscoveryNode> faultyNodes) {
7171
publicationTargets.forEach(PublicationTarget::sendPublishRequest);
7272
}
7373

74-
public void onTimeout() {
74+
public void cancel(String reason) {
7575
if (isCompleted) {
7676
return;
7777
}
7878

79-
assert timedOut == false;
80-
timedOut = true;
79+
assert cancelled == false;
80+
cancelled = true;
8181
if (applyCommitRequest.isPresent() == false) {
82-
logger.debug("onTimeout: [{}] timed out before committing", this);
82+
logger.debug("cancel: [{}] cancelled before committing (reason: {})", this, reason);
8383
// fail all current publications
84-
final Exception e = new ElasticsearchException("publication timed out before committing");
84+
final Exception e = new ElasticsearchException("publication cancelled before committing: " + reason);
8585
publicationTargets.stream().filter(PublicationTarget::isActive).forEach(pt -> pt.setFailed(e));
8686
}
8787
onPossibleCompletion();
@@ -101,7 +101,7 @@ private void onPossibleCompletion() {
101101
return;
102102
}
103103

104-
if (timedOut == false) {
104+
if (cancelled == false) {
105105
for (final PublicationTarget target : publicationTargets) {
106106
if (target.isActive()) {
107107
return;
@@ -125,8 +125,8 @@ private void onPossibleCompletion() {
125125
}
126126

127127
// For assertions only: verify that this invariant holds
128-
private boolean publicationCompletedIffAllTargetsInactiveOrTimedOut() {
129-
if (timedOut == false) {
128+
private boolean publicationCompletedIffAllTargetsInactiveOrCancelled() {
129+
if (cancelled == false) {
130130
for (final PublicationTarget target : publicationTargets) {
131131
if (target.isActive()) {
132132
return isCompleted == false;
@@ -222,7 +222,7 @@ void sendPublishRequest() {
222222
state = PublicationTargetState.SENT_PUBLISH_REQUEST;
223223
Publication.this.sendPublishRequest(discoveryNode, publishRequest, new PublishResponseHandler());
224224
// TODO Can this ^ fail with an exception? Target should be failed if so.
225-
assert publicationCompletedIffAllTargetsInactiveOrTimedOut();
225+
assert publicationCompletedIffAllTargetsInactiveOrCancelled();
226226
}
227227

228228
void handlePublishResponse(PublishResponse publishResponse) {
@@ -245,7 +245,7 @@ void sendApplyCommit() {
245245
state = PublicationTargetState.SENT_APPLY_COMMIT;
246246
assert applyCommitRequest.isPresent();
247247
Publication.this.sendApplyCommit(discoveryNode, applyCommitRequest.get(), new ApplyCommitResponseHandler());
248-
assert publicationCompletedIffAllTargetsInactiveOrTimedOut();
248+
assert publicationCompletedIffAllTargetsInactiveOrCancelled();
249249
}
250250

251251
void setAppliedCommit() {
@@ -300,7 +300,7 @@ private class PublishResponseHandler implements ActionListener<PublishWithJoinRe
300300
public void onResponse(PublishWithJoinResponse response) {
301301
if (isFailed()) {
302302
logger.debug("PublishResponseHandler.handleResponse: already failed, ignoring response from [{}]", discoveryNode);
303-
assert publicationCompletedIffAllTargetsInactiveOrTimedOut();
303+
assert publicationCompletedIffAllTargetsInactiveOrCancelled();
304304
return;
305305
}
306306

@@ -319,7 +319,7 @@ public void onResponse(PublishWithJoinResponse response) {
319319
state = PublicationTargetState.WAITING_FOR_QUORUM;
320320
handlePublishResponse(response.getPublishResponse());
321321

322-
assert publicationCompletedIffAllTargetsInactiveOrTimedOut();
322+
assert publicationCompletedIffAllTargetsInactiveOrCancelled();
323323
}
324324

325325
@Override
@@ -330,7 +330,7 @@ public void onFailure(Exception e) {
330330
assert ((TransportException) e).getRootCause() instanceof Exception;
331331
setFailed((Exception) exp.getRootCause());
332332
onPossibleCommitFailure();
333-
assert publicationCompletedIffAllTargetsInactiveOrTimedOut();
333+
assert publicationCompletedIffAllTargetsInactiveOrCancelled();
334334
}
335335

336336
}
@@ -346,7 +346,7 @@ public void onResponse(TransportResponse.Empty ignored) {
346346
}
347347
setAppliedCommit();
348348
onPossibleCompletion();
349-
assert publicationCompletedIffAllTargetsInactiveOrTimedOut();
349+
assert publicationCompletedIffAllTargetsInactiveOrCancelled();
350350
}
351351

352352
@Override
@@ -357,7 +357,7 @@ public void onFailure(Exception e) {
357357
assert ((TransportException) e).getRootCause() instanceof Exception;
358358
setFailed((Exception) exp.getRootCause());
359359
onPossibleCompletion();
360-
assert publicationCompletedIffAllTargetsInactiveOrTimedOut();
360+
assert publicationCompletedIffAllTargetsInactiveOrCancelled();
361361
}
362362
}
363363
}

server/src/test/java/org/elasticsearch/cluster/coordination/PublicationTests.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ public void testClusterStatePublishingFailsOrTimesOutBeforeCommit() throws Inter
346346
publication.pendingPublications.entrySet().stream().collect(shuffle()).forEach(e -> {
347347
if (e.getKey().equals(n2)) {
348348
if (timeOut) {
349-
publication.onTimeout();
349+
publication.cancel("timed out");
350350
} else {
351351
e.getValue().onFailure(new TransportException(new Exception("dummy failure")));
352352
}
@@ -407,7 +407,7 @@ public void testClusterStatePublishingTimesOutAfterCommit() throws InterruptedEx
407407
}
408408
});
409409

410-
publication.onTimeout();
410+
publication.cancel("timed out");
411411
assertTrue(publication.completed);
412412
assertTrue(publication.committed);
413413
assertEquals(committingNodes, ackListener.await(0L, TimeUnit.SECONDS));

0 commit comments

Comments
 (0)