From 8568eb2fbb2b276e7f7f3afca699932d4981e9c1 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 23 May 2019 10:46:25 +0200 Subject: [PATCH] Fix RareClusterStateIT * It looks like we might be cancelling a previous publication instead of the one triggered by the given request with a very low likelihood. * Fixed by adding a wait for no in-progress publications * Also added debug logging that would've identified this problem * Closes #36813 --- .../cluster/coordination/Coordinator.java | 10 +++++++--- .../cluster/coordination/RareClusterStateIT.java | 4 ++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java index 6304588e3121a..c43ac64b43d81 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java @@ -1165,9 +1165,13 @@ public Iterable getFoundPeers() { */ boolean cancelCommittedPublication() { synchronized (mutex) { - if (currentPublication.isPresent() && currentPublication.get().isCommitted()) { - currentPublication.get().cancel("cancelCommittedPublication"); - return true; + if (currentPublication.isPresent()) { + final CoordinatorPublication publication = currentPublication.get(); + if (publication.isCommitted()) { + publication.cancel("cancelCommittedPublication"); + logger.debug("Cancelled publication of [{}].", publication); + return true; + } } return false; } diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/RareClusterStateIT.java b/server/src/test/java/org/elasticsearch/cluster/coordination/RareClusterStateIT.java index c31df3ade71cc..402383b7aac1b 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/RareClusterStateIT.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/RareClusterStateIT.java @@ -141,6 +141,10 @@ public void onFailure(String source, Exception e) { private ActionFuture executeAndCancelCommittedPublication( ActionRequestBuilder req) throws Exception { + // Wait for no publication in progress to not accidentally cancel a publication different from the one triggered by the given + // request. + assertBusy( + () -> assertFalse(((Coordinator) internalCluster().getCurrentMasterNodeInstance(Discovery.class)).publicationInProgress())); ActionFuture future = req.execute(); assertBusy( () -> assertTrue(((Coordinator)internalCluster().getCurrentMasterNodeInstance(Discovery.class)).cancelCommittedPublication()));