From a2a063246a1c7dc622c34b984a0180cb87877661 Mon Sep 17 00:00:00 2001
From: Andrey Ershov <andrey.ershov@elastic.co>
Date: Fri, 7 Dec 2018 15:17:31 +0100
Subject: [PATCH 1/7] Switch more tests to zen2

---
 .../gateway/GatewayIndexStateIT.java          | 30 ++++++++++++-------
 .../RemoveCorruptedShardDataCommandIT.java    | 25 ++++++----------
 .../elasticsearch/xpack/CcrIntegTestCase.java |  1 -
 3 files changed, 28 insertions(+), 28 deletions(-)
diff --git a/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java b/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java
index b17ba487693d9..4d21c2756a5d2 100644
--- a/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java
+++ b/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java
@@ -276,11 +276,18 @@ public void testTwoNodesSingleDoc() throws Exception {
     }
 
     public void testDanglingIndices() throws Exception {
+        /*TODO This test test does not work with Zen2, because once master node looses its cluster state during restart
+        it will start with term = 1, which is the same as the term data node has. Data node won't accept cluster state from master
+        after the restart, because the term is the same, but version of the cluster state is greater on the data node.
+        Consider adding term to JoinRequest, so that master node can bump its term if its current term is less than JoinRequest#term.
+        */
         logger.info("--> starting two nodes");
-
-        final String node_1 = internalCluster().startNodes(2,
-                //TODO fails wih Zen2
-                Settings.builder().put(TestZenDiscovery.USE_ZEN2.getKey(), false).build()).get(0);
+        String masterNode = internalCluster().startNode(Settings.builder()
+                .put(Settings.builder().put(TestZenDiscovery.USE_ZEN2.getKey(), false).build())
+                .build());
+        internalCluster().startDataOnlyNode(Settings.builder()
+                .put(Settings.builder().put(TestZenDiscovery.USE_ZEN2.getKey(), false).build())
+                .build());
 
         logger.info("--> indexing a simple document");
         client().prepareIndex("test", "type1", "1").setSource("field1", "value1").setRefreshPolicy(IMMEDIATE).get();
@@ -294,11 +301,11 @@ public void testDanglingIndices() throws Exception {
         }
         assertThat(client().prepareGet("test", "type1", "1").execute().actionGet().isExists(), equalTo(true));
 
-        logger.info("--> restarting the nodes");
-        internalCluster().fullRestart(new RestartCallback() {
+        logger.info("--> restarting master node, while clearing its data");
+        internalCluster().restartNode(masterNode, new RestartCallback() {
             @Override
             public boolean clearData(String nodeName) {
-                return node_1.equals(nodeName);
+                return true;
             }
         });
 
@@ -328,14 +335,14 @@ public boolean clearData(String nodeName) {
      */
     public void testIndexDeletionWhenNodeRejoins() throws Exception {
         final String indexName = "test-index-del-on-node-rejoin-idx";
-        final int numNodes = 2;
+        // We need at least 3 nodes to make sure, that once one node is stopped, remaining nodes can elect a new master
+        final int numNodes = 3;
 
         final List<String> nodes;
         logger.info("--> starting a cluster with " + numNodes + " nodes");
+
         nodes = internalCluster().startNodes(numNodes,
-            Settings.builder().put(IndexGraveyard.SETTING_MAX_TOMBSTONES.getKey(), randomIntBetween(10, 100))
-                    //TODO fails with Zen2
-                    .put(TestZenDiscovery.USE_ZEN2.getKey(), false).build());
+            Settings.builder().put(IndexGraveyard.SETTING_MAX_TOMBSTONES.getKey(), randomIntBetween(10, 100)).build());
         logger.info("--> create an index");
         createIndex(indexName);
 
@@ -355,6 +362,7 @@ public Settings onNodeStopped(final String nodeName) throws Exception {
                 final Client client = client(otherNode);
                 client.admin().indices().prepareDelete(indexName).execute().actionGet();
                 assertFalse(client.admin().indices().prepareExists(indexName).execute().actionGet().isExists());
+                logger.info("--> deleted");
                 return super.onNodeStopped(nodeName);
             }
         });
diff --git a/server/src/test/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommandIT.java b/server/src/test/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommandIT.java
index b1a6747273066..bcb96cec71c2d 100644
--- a/server/src/test/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommandIT.java
+++ b/server/src/test/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommandIT.java
@@ -70,7 +70,6 @@
 import org.elasticsearch.test.ESIntegTestCase;
 import org.elasticsearch.test.InternalSettingsPlugin;
 import org.elasticsearch.test.InternalTestCluster;
-import org.elasticsearch.test.discovery.TestZenDiscovery;
 import org.elasticsearch.test.engine.MockEngineSupport;
 import org.elasticsearch.test.transport.MockTransportService;
 
@@ -102,13 +101,6 @@
 @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.SUITE, numDataNodes = 0)
 public class RemoveCorruptedShardDataCommandIT extends ESIntegTestCase {
 
-    @Override
-    protected Settings nodeSettings(int nodeOrdinal) {
-        return Settings.builder().put(super.nodeSettings(nodeOrdinal))
-            .put(TestZenDiscovery.USE_ZEN2.getKey(), false) // no state persistence yet
-            .build();
-    }
-
     @Override
     protected Collection<Class<? extends Plugin>> nodePlugins() {
         return Arrays.asList(MockTransportService.TestPlugin.class, MockEngineFactoryPlugin.class, InternalSettingsPlugin.class);
@@ -260,7 +252,7 @@ public Settings onNodeStopped(String nodeName) throws Exception {
     }
 
     public void testCorruptTranslogTruncation() throws Exception {
-        internalCluster().startNodes(2, Settings.EMPTY);
+        internalCluster().startNodes(2);
 
         final String node1 = internalCluster().getNodeNames()[0];
         final String node2 = internalCluster().getNodeNames()[1];
@@ -436,10 +428,10 @@ public Settings onNodeStopped(String nodeName) throws Exception {
     }
 
     public void testCorruptTranslogTruncationOfReplica() throws Exception {
-        internalCluster().startNodes(2, Settings.EMPTY);
+        internalCluster().startMasterOnlyNode();
 
-        final String node1 = internalCluster().getNodeNames()[0];
-        final String node2 = internalCluster().getNodeNames()[1];
+        final String node1 = internalCluster().startDataOnlyNode();
+        final String node2 = internalCluster().startDataOnlyNode();
         logger.info("--> nodes name: {}, {}", node1, node2);
 
         final String indexName = "test";
@@ -481,12 +473,11 @@ public void testCorruptTranslogTruncationOfReplica() throws Exception {
         final ShardId shardId = new ShardId(resolveIndex(indexName), 0);
         final Set<Path> translogDirs = getDirs(node2, shardId, ShardPath.TRANSLOG_FOLDER_NAME);
 
-        // stop the cluster nodes. we don't use full restart so the node start up order will be the same
-        // and shard roles will be maintained
+        // stop data nodes. After the restart the 1st node will be primary and the 2nd node will be replica
         internalCluster().stopRandomDataNode();
         internalCluster().stopRandomDataNode();
 
-        // Corrupt the translog file(s)
+        // Corrupt the translog file(s) on the replica
         logger.info("--> corrupting translog");
         TestTranslog.corruptRandomTranslogFile(logger, random(), translogDirs);
 
@@ -555,8 +546,10 @@ public Settings onNodeStopped(String nodeName) throws Exception {
     }
 
     public void testResolvePath() throws Exception {
+        internalCluster().startMasterOnlyNode();
+
         final int numOfNodes = randomIntBetween(1, 5);
-        final List<String> nodeNames = internalCluster().startNodes(numOfNodes, Settings.EMPTY);
+        final List<String> nodeNames = internalCluster().startNodes(numOfNodes);
 
         final String indexName = "test" + randomInt(100);
         assertAcked(prepareCreate(indexName).setSettings(Settings.builder()
diff --git a/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/CcrIntegTestCase.java b/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/CcrIntegTestCase.java
index 3185fc4627342..5abe852ca5ff0 100644
--- a/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/CcrIntegTestCase.java
+++ b/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/CcrIntegTestCase.java
@@ -188,7 +188,6 @@ private NodeConfigurationSource createNodeConfigurationSource() {
         builder.put(IndicesStore.INDICES_STORE_DELETE_SHARD_TIMEOUT.getKey(), new TimeValue(1, TimeUnit.SECONDS));
         builder.putList(DISCOVERY_ZEN_PING_UNICAST_HOSTS_SETTING.getKey()); // empty list disables a port scan for other nodes
         builder.putList(DISCOVERY_HOSTS_PROVIDER_SETTING.getKey(), "file");
-        builder.put(TestZenDiscovery.USE_ZEN2.getKey(), false); // some tests do full cluster restarts
         builder.put(NetworkModule.TRANSPORT_TYPE_KEY, getTestTransportType());
         builder.put(XPackSettings.SECURITY_ENABLED.getKey(), false);
         builder.put(XPackSettings.MONITORING_ENABLED.getKey(), false);

From 3395e2e641b06954a9e81feda9793767586ec097 Mon Sep 17 00:00:00 2001
From: Andrey Ershov <andrey.ershov@elastic.co>
Date: Fri, 7 Dec 2018 15:24:32 +0100
Subject: [PATCH 2/7] Remove extra logging

---
 .../test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java | 1 -
 1 file changed, 1 deletion(-)

diff --git a/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java b/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java
index 4d21c2756a5d2..df23256eb2ebe 100644
--- a/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java
+++ b/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java
@@ -362,7 +362,6 @@ public Settings onNodeStopped(final String nodeName) throws Exception {
                 final Client client = client(otherNode);
                 client.admin().indices().prepareDelete(indexName).execute().actionGet();
                 assertFalse(client.admin().indices().prepareExists(indexName).execute().actionGet().isExists());
-                logger.info("--> deleted");
                 return super.onNodeStopped(nodeName);
             }
         });

From edfa48ad9ce79b7e3b3bf0d10789d88bc359e227 Mon Sep 17 00:00:00 2001
From: Andrey Ershov <andrey.ershov@elastic.co>
Date: Fri, 7 Dec 2018 18:39:01 +0100
Subject: [PATCH 3/7] SnapshotDisruptionIT

Without changes test fails, because Zen2 retries snapshot creation
as soon as network partition heals. This results into race between
creating snapshot and test cleanup logic (deleting index).
Zen1 on the other hand, also schedules retry, but it takes some time
after network partition heals, so cleanup logic executes latter and test
passes.
The check that snapshot is eventually created is added to the end of the
test
---
 .../discovery/SnapshotDisruptionIT.java       | 28 ++++++++++++-------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/server/src/test/java/org/elasticsearch/discovery/SnapshotDisruptionIT.java b/server/src/test/java/org/elasticsearch/discovery/SnapshotDisruptionIT.java
index bbf0ca99b101e..30cfacff96128 100644
--- a/server/src/test/java/org/elasticsearch/discovery/SnapshotDisruptionIT.java
+++ b/server/src/test/java/org/elasticsearch/discovery/SnapshotDisruptionIT.java
@@ -68,7 +68,6 @@ protected Settings nodeSettings(int nodeOrdinal) {
         return Settings.builder().put(super.nodeSettings(nodeOrdinal))
             .put(AbstractDisruptionTestCase.DEFAULT_SETTINGS)
             .put(TestZenDiscovery.USE_MOCK_PINGS.getKey(), false)
-            .put(TestZenDiscovery.USE_ZEN2.getKey(), false) // requires more work
             .put(DiscoverySettings.COMMIT_TIMEOUT_SETTING.getKey(), "30s")
             .build();
     }
@@ -133,7 +132,7 @@ public void clusterChanged(ClusterChangedEvent event) {
 
         logger.info("--> wait until the snapshot is done");
         assertBusy(() -> {
-            SnapshotsInProgress snapshots = dataNodeClient().admin().cluster().prepareState().setLocal(true).get().getState()
+            SnapshotsInProgress snapshots = dataNodeClient().admin().cluster().prepareState().setLocal(false).get().getState()
                 .custom(SnapshotsInProgress.TYPE);
             if (snapshots != null && snapshots.entries().size() > 0) {
                 logger.info("Current snapshot state [{}]", snapshots.entries().get(0).state());
@@ -146,15 +145,9 @@ public void clusterChanged(ClusterChangedEvent event) {
         logger.info("--> verify that snapshot was successful or no longer exist");
         assertBusy(() -> {
             try {
-                GetSnapshotsResponse snapshotsStatusResponse = dataNodeClient().admin().cluster().prepareGetSnapshots("test-repo")
-                    .setSnapshots("test-snap-2").get();
-                SnapshotInfo snapshotInfo = snapshotsStatusResponse.getSnapshots().get(0);
-                assertEquals(SnapshotState.SUCCESS, snapshotInfo.state());
-                assertEquals(snapshotInfo.totalShards(), snapshotInfo.successfulShards());
-                assertEquals(0, snapshotInfo.failedShards());
-                logger.info("--> done verifying");
+                assertSnapshotExists("test-repo", "test-snap-2");
             } catch (SnapshotMissingException exception) {
-                logger.info("--> snapshot doesn't exist");
+                logger.info("--> done verifying, snapshot doesn't exist");
             }
         }, 1, TimeUnit.MINUTES);
 
@@ -172,6 +165,21 @@ public void clusterChanged(ClusterChangedEvent event) {
             cause = cause.getCause();
             assertThat(cause, instanceOf(FailedToCommitClusterStateException.class));
         }
+
+        logger.info("--> verify that snapshot eventually will be created due to retries");
+        assertBusy(() -> {
+            assertSnapshotExists("test-repo", "test-snap-2");
+        }, 1, TimeUnit.MINUTES);
+    }
+
+    private void assertSnapshotExists(String repository, String snapshot) {
+        GetSnapshotsResponse snapshotsStatusResponse = dataNodeClient().admin().cluster().prepareGetSnapshots(repository)
+                .setSnapshots(snapshot).get();
+        SnapshotInfo snapshotInfo = snapshotsStatusResponse.getSnapshots().get(0);
+        assertEquals(SnapshotState.SUCCESS, snapshotInfo.state());
+        assertEquals(snapshotInfo.totalShards(), snapshotInfo.successfulShards());
+        assertEquals(0, snapshotInfo.failedShards());
+        logger.info("--> done verifying, snapshot exists");
     }
 
     private void createRandomIndex(String idxName) throws InterruptedException {

From 830def10fdc55b5e40b4b8ebe315c150491786de Mon Sep 17 00:00:00 2001
From: Andrey Ershov <andrey.ershov@elastic.co>
Date: Fri, 7 Dec 2018 21:39:54 +0100
Subject: [PATCH 4/7] Revert testDanglingIndices, except TODO

---
 .../gateway/GatewayIndexStateIT.java             | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java b/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java
index df23256eb2ebe..42976573167d1 100644
--- a/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java
+++ b/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java
@@ -282,12 +282,9 @@ public void testDanglingIndices() throws Exception {
         Consider adding term to JoinRequest, so that master node can bump its term if its current term is less than JoinRequest#term.
         */
         logger.info("--> starting two nodes");
-        String masterNode = internalCluster().startNode(Settings.builder()
-                .put(Settings.builder().put(TestZenDiscovery.USE_ZEN2.getKey(), false).build())
-                .build());
-        internalCluster().startDataOnlyNode(Settings.builder()
-                .put(Settings.builder().put(TestZenDiscovery.USE_ZEN2.getKey(), false).build())
-                .build());
+
+        final String node_1 = internalCluster().startNodes(2,
+                Settings.builder().put(TestZenDiscovery.USE_ZEN2.getKey(), false).build()).get(0);
 
         logger.info("--> indexing a simple document");
         client().prepareIndex("test", "type1", "1").setSource("field1", "value1").setRefreshPolicy(IMMEDIATE).get();
@@ -301,11 +298,11 @@ public void testDanglingIndices() throws Exception {
         }
         assertThat(client().prepareGet("test", "type1", "1").execute().actionGet().isExists(), equalTo(true));
 
-        logger.info("--> restarting master node, while clearing its data");
-        internalCluster().restartNode(masterNode, new RestartCallback() {
+        logger.info("--> restarting the nodes");
+        internalCluster().fullRestart(new RestartCallback() {
             @Override
             public boolean clearData(String nodeName) {
-                return true;
+                return node_1.equals(nodeName);
             }
         });
 
@@ -362,6 +359,7 @@ public Settings onNodeStopped(final String nodeName) throws Exception {
                 final Client client = client(otherNode);
                 client.admin().indices().prepareDelete(indexName).execute().actionGet();
                 assertFalse(client.admin().indices().prepareExists(indexName).execute().actionGet().isExists());
+                logger.info("--> index deleted");
                 return super.onNodeStopped(nodeName);
             }
         });

From 6e5bbd4cc235aeadbf7a72d2f4642635c98690ec Mon Sep 17 00:00:00 2001
From: Andrey Ershov <andrey.ershov@elastic.co>
Date: Fri, 7 Dec 2018 21:44:46 +0100
Subject: [PATCH 5/7] Exclude node from voting configuration when restarting it

---
 .../gateway/GatewayIndexStateIT.java          |   4 +-
 .../test/InternalTestCluster.java             | 107 ++++++++++--------
 2 files changed, 60 insertions(+), 51 deletions(-)

diff --git a/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java b/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java
index 42976573167d1..0cddb929472b7 100644
--- a/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java
+++ b/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java
@@ -332,12 +332,10 @@ public boolean clearData(String nodeName) {
      */
     public void testIndexDeletionWhenNodeRejoins() throws Exception {
         final String indexName = "test-index-del-on-node-rejoin-idx";
-        // We need at least 3 nodes to make sure, that once one node is stopped, remaining nodes can elect a new master
-        final int numNodes = 3;
+        final int numNodes = 2;
 
         final List<String> nodes;
         logger.info("--> starting a cluster with " + numNodes + " nodes");
-
         nodes = internalCluster().startNodes(numNodes,
             Settings.builder().put(IndexGraveyard.SETTING_MAX_TOMBSTONES.getKey(), randomIntBetween(10, 100)).build());
         logger.info("--> create an index");
diff --git a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java
index cea9c6d8a9b76..e720a8b19eb24 100644
--- a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java
+++ b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java
@@ -1636,35 +1636,7 @@ private synchronized void stopNodesAndClient(NodeAndClient nodeAndClient) throws
     }
 
     private synchronized void stopNodesAndClients(Collection<NodeAndClient> nodeAndClients) throws IOException {
-        final Set<String> excludedNodeIds = new HashSet<>();
-
-        if (autoManageMinMasterNodes && nodeAndClients.size() > 0) {
-
-            final long currentMasters = nodes.values().stream().filter(NodeAndClient::isMasterEligible).count();
-            final long stoppingMasters = nodeAndClients.stream().filter(NodeAndClient::isMasterEligible).count();
-
-            assert stoppingMasters <= currentMasters : currentMasters + " < " + stoppingMasters;
-            if (stoppingMasters != currentMasters && stoppingMasters > 0) {
-                // If stopping few enough master-nodes that there's still a majority left, there is no need to withdraw their votes first.
-                // However, we do not yet have a way to be sure there's a majority left, because the voting configuration may not yet have
-                // been updated when the previous nodes shut down, so we must always explicitly withdraw votes.
-                // TODO add cluster health API to check that voting configuration is optimal so this isn't always needed
-                nodeAndClients.stream().filter(NodeAndClient::isMasterEligible).map(NodeAndClient::getName).forEach(excludedNodeIds::add);
-                assert excludedNodeIds.size() == stoppingMasters;
-
-                logger.info("adding voting config exclusions {} prior to shutdown", excludedNodeIds);
-                try {
-                    client().execute(AddVotingConfigExclusionsAction.INSTANCE,
-                        new AddVotingConfigExclusionsRequest(excludedNodeIds.toArray(new String[0]))).get();
-                } catch (InterruptedException | ExecutionException e) {
-                    throw new AssertionError("unexpected", e);
-                }
-            }
-
-            if (stoppingMasters > 0) {
-                updateMinMasterNodes(getMasterNodesCount() - Math.toIntExact(stoppingMasters));
-            }
-        }
+        final Set<String> excludedNodeIds = excludeMasters(nodeAndClients);
 
         for (NodeAndClient nodeAndClient: nodeAndClients) {
             removeDisruptionSchemeFromNode(nodeAndClient);
@@ -1673,14 +1645,7 @@ private synchronized void stopNodesAndClients(Collection<NodeAndClient> nodeAndC
             nodeAndClient.close();
         }
 
-        if (excludedNodeIds.isEmpty() == false) {
-            logger.info("removing voting config exclusions for {} after shutdown", excludedNodeIds);
-            try {
-                client().execute(ClearVotingConfigExclusionsAction.INSTANCE, new ClearVotingConfigExclusionsRequest()).get();
-            } catch (InterruptedException | ExecutionException e) {
-                throw new AssertionError("unexpected", e);
-            }
-        }
+        removeExclusions(excludedNodeIds);
     }
 
     /**
@@ -1746,31 +1711,78 @@ public synchronized void rollingRestart(RestartCallback callback) throws Excepti
 
     private void restartNode(NodeAndClient nodeAndClient, RestartCallback callback) throws Exception {
         logger.info("Restarting node [{}] ", nodeAndClient.name);
+
         if (activeDisruptionScheme != null) {
             activeDisruptionScheme.removeFromNode(nodeAndClient.name, this);
         }
-        final int masterNodesCount = getMasterNodesCount();
-        // special case to allow stopping one node in a two node cluster and keep it functional
-        final boolean updateMinMaster = nodeAndClient.isMasterEligible() && masterNodesCount == 2 && autoManageMinMasterNodes;
-        if (updateMinMaster) {
-            updateMinMasterNodes(masterNodesCount - 1);
-        }
+
+        Set<String> excludedNodeIds = excludeMasters(Collections.singleton(nodeAndClient));
+
         final Settings newSettings = nodeAndClient.closeForRestart(callback,
-            autoManageMinMasterNodes ? getMinMasterNodes(masterNodesCount) : -1);
+                autoManageMinMasterNodes ? getMinMasterNodes(getMasterNodesCount()) : -1);
+
+        removeExclusions(excludedNodeIds);
+
         nodeAndClient.recreateNode(newSettings, () -> rebuildUnicastHostFiles(emptyList()));
         nodeAndClient.startNode();
         if (activeDisruptionScheme != null) {
             activeDisruptionScheme.applyToNode(nodeAndClient.name, this);
         }
-        if (callback.validateClusterForming() || updateMinMaster) {
+
+        if (callback.validateClusterForming() && excludedNodeIds.isEmpty() == false) {
             // we have to validate cluster size if updateMinMaster == true, because we need the
             // second node to join in order to increment min_master_nodes back to 2.
             // we also have to do via the node that was just restarted as it may be that the master didn't yet process
             // the fact it left
             validateClusterFormed(nodeAndClient.name);
         }
-        if (updateMinMaster) {
-            updateMinMasterNodes(masterNodesCount);
+
+        if (excludedNodeIds.isEmpty() == false) {
+            updateMinMasterNodes(getMasterNodesCount());
+        }
+    }
+
+    private Set<String> excludeMasters(Collection<NodeAndClient> nodeAndClients) {
+        final Set<String> excludedNodeIds = new HashSet<>();
+        if (autoManageMinMasterNodes && nodeAndClients.size() > 0) {
+
+            final long currentMasters = nodes.values().stream().filter(NodeAndClient::isMasterEligible).count();
+            final long stoppingMasters = nodeAndClients.stream().filter(NodeAndClient::isMasterEligible).count();
+
+            assert stoppingMasters <= currentMasters : currentMasters + " < " + stoppingMasters;
+            if (stoppingMasters != currentMasters && stoppingMasters > 0) {
+                // If stopping few enough master-nodes that there's still a majority left, there is no need to withdraw their votes first.
+                // However, we do not yet have a way to be sure there's a majority left, because the voting configuration may not yet have
+                // been updated when the previous nodes shut down, so we must always explicitly withdraw votes.
+                // TODO add cluster health API to check that voting configuration is optimal so this isn't always needed
+                nodeAndClients.stream().filter(NodeAndClient::isMasterEligible).map(NodeAndClient::getName).forEach(excludedNodeIds::add);
+                assert excludedNodeIds.size() == stoppingMasters;
+
+                logger.info("adding voting config exclusions {} prior to restart/shutdown", excludedNodeIds);
+                try {
+                    client().execute(AddVotingConfigExclusionsAction.INSTANCE,
+                            new AddVotingConfigExclusionsRequest(excludedNodeIds.toArray(new String[0]))).get();
+                } catch (InterruptedException | ExecutionException e) {
+                    throw new AssertionError("unexpected", e);
+                }
+            }
+
+            if (stoppingMasters > 0) {
+                updateMinMasterNodes(getMasterNodesCount() - Math.toIntExact(stoppingMasters));
+            }
+        }
+        return excludedNodeIds;
+    }
+
+    private void removeExclusions(Set<String> excludedNodeIds) {
+        if (excludedNodeIds.isEmpty() == false) {
+            logger.info("removing voting config exclusions for {} after restart/shutdown", excludedNodeIds);
+            try {
+                Client client = getRandomNodeAndClient(node -> excludedNodeIds.contains(node.name) == false).client(random);
+                client.execute(ClearVotingConfigExclusionsAction.INSTANCE, new ClearVotingConfigExclusionsRequest()).get();
+            } catch (InterruptedException | ExecutionException e) {
+                throw new AssertionError("unexpected", e);
+            }
         }
     }
 
@@ -1828,7 +1840,6 @@ public synchronized void fullRestart(RestartCallback callback) throws Exception
         }
     }
 
-
     /**
      * Returns the name of the current master node in the cluster.
      */

From 4d9d141b90c390068f1374a471309e7e8d7cf006 Mon Sep 17 00:00:00 2001
From: Andrey Ershov <andrey.ershov@elastic.co>
Date: Fri, 7 Dec 2018 21:45:46 +0100
Subject: [PATCH 6/7] RemoveCorruptedShardDataCommandIT cluster -> suite
 internal cluster

---
 .../index/shard/RemoveCorruptedShardDataCommandIT.java      | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/server/src/test/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommandIT.java b/server/src/test/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommandIT.java
index bcb96cec71c2d..7ca2e0a64070b 100644
--- a/server/src/test/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommandIT.java
+++ b/server/src/test/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommandIT.java
@@ -98,7 +98,7 @@
 import static org.hamcrest.Matchers.notNullValue;
 import static org.hamcrest.Matchers.startsWith;
 
-@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.SUITE, numDataNodes = 0)
+@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0)
 public class RemoveCorruptedShardDataCommandIT extends ESIntegTestCase {
 
     @Override
@@ -546,10 +546,8 @@ public Settings onNodeStopped(String nodeName) throws Exception {
     }
 
     public void testResolvePath() throws Exception {
-        internalCluster().startMasterOnlyNode();
-
         final int numOfNodes = randomIntBetween(1, 5);
-        final List<String> nodeNames = internalCluster().startNodes(numOfNodes);
+        final List<String> nodeNames = internalCluster().startNodes(numOfNodes, Settings.EMPTY);
 
         final String indexName = "test" + randomInt(100);
         assertAcked(prepareCreate(indexName).setSettings(Settings.builder()

From a500d20642cd5f84310d4126bbb095dfbf7c35dd Mon Sep 17 00:00:00 2001
From: Andrey Ershov <andrey.ershov@elastic.co>
Date: Mon, 10 Dec 2018 17:38:54 +0100
Subject: [PATCH 7/7] && -> ||

---
 .../main/java/org/elasticsearch/test/InternalTestCluster.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java
index e720a8b19eb24..d408922bb7a5a 100644
--- a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java
+++ b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java
@@ -1729,7 +1729,7 @@ private void restartNode(NodeAndClient nodeAndClient, RestartCallback callback)
             activeDisruptionScheme.applyToNode(nodeAndClient.name, this);
         }
 
-        if (callback.validateClusterForming() && excludedNodeIds.isEmpty() == false) {
+        if (callback.validateClusterForming() || excludedNodeIds.isEmpty() == false) {
             // we have to validate cluster size if updateMinMaster == true, because we need the
             // second node to join in order to increment min_master_nodes back to 2.
             // we also have to do via the node that was just restarted as it may be that the master didn't yet process