From aaa8753989ebe172f6ccc862d24618870684d8cb Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 20 Jul 2018 08:09:29 +0100 Subject: [PATCH 01/76] Introduce gossip-like discovery of master nodes This commit introduces the `PeerFinder` which can be used to collect the identities of the master-eligible nodes in a masterless cluster, based on the `UnicastHostsProvider`, the nodes in the `ClusterState`, and nodes that other nodes have discovered. --- .../cluster/coordination/PeerFinder.java | 453 ++++++++++++++++ .../cluster/coordination/PeersRequest.java | 82 +++ .../cluster/coordination/PeersResponse.java | 94 ++++ .../cluster/coordination/RunnableUtils.java | 45 ++ .../coordination/DeterministicTaskQueue.java | 34 +- .../DeterministicTaskQueueTests.java | 18 +- .../cluster/coordination/MessagesTests.java | 75 +++ .../cluster/coordination/PeerFinderTests.java | 495 ++++++++++++++++++ .../coordination/RunnableUtilsTests.java | 46 ++ 9 files changed, 1316 insertions(+), 26 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java create mode 100644 server/src/main/java/org/elasticsearch/cluster/coordination/PeersRequest.java create mode 100644 server/src/main/java/org/elasticsearch/cluster/coordination/PeersResponse.java create mode 100644 server/src/main/java/org/elasticsearch/cluster/coordination/RunnableUtils.java create mode 100644 server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java create mode 100644 server/src/test/java/org/elasticsearch/cluster/coordination/RunnableUtilsTests.java diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java new file mode 100644 index 0000000000000..21dba055cd38c --- /dev/null +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -0,0 +1,453 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.cluster.coordination; + +import org.apache.logging.log4j.message.ParameterizedMessage; +import org.apache.lucene.util.SetOnce; +import org.elasticsearch.cluster.ClusterState.VotingConfiguration; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.common.collect.ImmutableOpenMap; +import org.elasticsearch.common.component.AbstractLifecycleComponent; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.settings.Setting; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.transport.TransportAddress; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.common.util.concurrent.AbstractRunnable; +import org.elasticsearch.discovery.zen.UnicastHostsProvider; +import org.elasticsearch.discovery.zen.UnicastZenPing; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.threadpool.ThreadPool.Names; +import org.elasticsearch.transport.TransportException; +import org.elasticsearch.transport.TransportResponseHandler; +import org.elasticsearch.transport.TransportService; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +import static java.util.Collections.unmodifiableList; +import static org.elasticsearch.cluster.coordination.RunnableUtils.labelRunnable; +import static org.elasticsearch.common.util.concurrent.ConcurrentCollections.newConcurrentMap; + +public abstract class PeerFinder extends AbstractLifecycleComponent { + + public static final String REQUEST_PEERS_ACTION_NAME = "internal:discovery/zen2/requestpeers"; + + // the time between attempts to find all peers + public static final Setting CONSENSUS_FIND_PEERS_DELAY_SETTING = + Setting.timeSetting("discovery.zen2.find_peers_delay", + TimeValue.timeValueMillis(1000), TimeValue.timeValueMillis(1), Setting.Property.NodeScope); + + private final TimeValue findPeersDelay; + private final TimeValue resolveTimeout; + + private final Object mutex = new Object(); + private final TransportService transportService; + private final UnicastHostsProvider hostsProvider; + private final FutureExecutor futureExecutor; + private final TransportAddressConnector transportAddressConnector; + private final Supplier executorServiceFactory; + + private final SetOnce executorService = new SetOnce<>(); + private Optional peerFinder = Optional.empty(); + + PeerFinder(Settings settings, TransportService transportService, UnicastHostsProvider hostsProvider, + FutureExecutor futureExecutor, TransportAddressConnector transportAddressConnector, + Supplier executorServiceFactory) { + super(settings); + findPeersDelay = CONSENSUS_FIND_PEERS_DELAY_SETTING.get(settings); + resolveTimeout = UnicastZenPing.DISCOVERY_ZEN_PING_UNICAST_HOSTS_RESOLVE_TIMEOUT.get(settings); + this.transportService = transportService; + this.hostsProvider = hostsProvider; + this.futureExecutor = futureExecutor; + this.transportAddressConnector = transportAddressConnector; + this.executorServiceFactory = executorServiceFactory; + } + + public Iterable getFoundPeers() { + // getFoundPeersSet() is synchronised so we don't have to + return getFoundPeersSet(); + } + + public boolean foundQuorumFrom(VotingConfiguration votingConfiguration) { + // getFoundPeersSet() is synchronised so we don't have to + return votingConfiguration.hasQuorum(getFoundPeersSet().stream().map(DiscoveryNode::getId).collect(Collectors.toSet())); + } + + Set getFoundPeersSet() { + synchronized (mutex) { + return getActivePeerFinder().foundPeers.keySet(); + } + } + + public void activate() { + if (lifecycle.started() == false) { + logger.debug("ignoring activation, not started"); + return; + } + + // Must not hold lock when calling out to Legislator to avoid a lock cycle + assert holdsLock() == false : "Peerfinder mutex is held in error"; + final DiscoveryNodes lastAcceptedNodes = getLastAcceptedNodes(); + + synchronized (mutex) { + assert peerFinder.isPresent() == false; + peerFinder = Optional.of(new ActivePeerFinder()); + peerFinder.get().start(lastAcceptedNodes); + } + } + + // exposed to subclasses for testing + protected final boolean holdsLock() { + return Thread.holdsLock(mutex); + } + + public void deactivate() { + synchronized (mutex) { + if (peerFinder.isPresent()) { + assert peerFinder.get().running; + peerFinder.get().stop(); + peerFinder = Optional.empty(); + } + } + } + + private ActivePeerFinder getActivePeerFinder() { + assert holdsLock() : "Peerfinder mutex not held"; + final ActivePeerFinder activePeerFinder = this.peerFinder.get(); + assert activePeerFinder.running; + return activePeerFinder; + } + + public List handlePeersRequest(PeersRequest peersRequest) { + synchronized (mutex) { + assert peersRequest.getSourceNode().equals(getLocalNode()) == false; + final ActivePeerFinder activePeerFinder = getActivePeerFinder(); + activePeerFinder.startProbe(peersRequest.getSourceNode().getAddress()); + peersRequest.getDiscoveryNodes().stream().map(DiscoveryNode::getAddress).forEach(activePeerFinder::startProbe); + return new ArrayList<>(activePeerFinder.foundPeers.keySet()); + } + } + + public boolean isActive() { + synchronized (mutex) { + if (peerFinder.isPresent()) { + assert peerFinder.get().running; + return true; + } else { + return false; + } + } + } + + private DiscoveryNode getLocalNode() { + // No synchronisation required + final DiscoveryNode localNode = transportService.getLocalNode(); + assert localNode != null; + return localNode; + } + + @Override + protected void doStart() { + // No synchronisation required + executorService.set(executorServiceFactory.get()); + } + + @Override + protected void doStop() { + // No synchronisation required + ThreadPool.terminate(executorService.get(), 10, TimeUnit.SECONDS); + } + + @Override + protected void doClose() { + } + + abstract DiscoveryNodes getLastAcceptedNodes(); + + abstract void onMasterFoundByProbe(DiscoveryNode masterNode, long term); + + public interface TransportAddressConnector { + /** + * Identify the node at the given address and establish a full connection to it. + * + * @return The node to which we connected. + */ + DiscoveryNode connectTo(TransportAddress transportAddress) throws IOException; + } + + private class ActivePeerFinder { + private boolean running; + private final Map foundPeers; + private final Set probedAddresses = new HashSet<>(); + private final AtomicBoolean resolveInProgress = new AtomicBoolean(); + private final AtomicReference> lastConnectedAddresses = new AtomicReference<>(Collections.emptyList()); + private final Map connectedNodes = newConcurrentMap(); + + ActivePeerFinder() { + foundPeers = newConcurrentMap(); + foundPeers.put(getLocalNode(), new FoundPeer(getLocalNode())); + } + + void start(DiscoveryNodes lastAcceptedNodes) { + assert holdsLock() : "PeerFinder mutex not held"; + assert running == false; + running = true; + handleWakeUpUnderLock(lastAcceptedNodes); + } + + void stop() { + assert holdsLock() : "PeerFinder mutex not held"; + assert running; + running = false; + } + + private void handleWakeUp() { + // Must not hold lock when calling out to Legislator to avoid a lock cycle + assert holdsLock() == false : "Peerfinder mutex is held in error"; + final DiscoveryNodes lastAcceptedNodes = getLastAcceptedNodes(); + synchronized (mutex) { + handleWakeUpUnderLock(lastAcceptedNodes); + } + } + + private void handleWakeUpUnderLock(DiscoveryNodes lastAcceptedNodes) { + assert holdsLock() : "PeerFinder mutex not held"; + + if (running == false) { + return; + } + + for (final FoundPeer foundPeer : foundPeers.values()) { + foundPeer.peersRequestedThisRound = false; + } + probedAddresses.clear(); + + logger.trace("ActivePeerFinder#handleWakeUp(): probing found peers {}", foundPeers.keySet()); + foundPeers.keySet().forEach(this::startProbe); + + final ImmutableOpenMap masterNodes = lastAcceptedNodes.getMasterNodes(); + logger.trace("ActivePeerFinder#handleWakeUp(): probing nodes in cluster state {}", masterNodes); + masterNodes.forEach(e -> startProbe(e.value)); + + if (resolveInProgress.compareAndSet(false, true)) { + executorService.get().execute(labelRunnable(() -> { + // No synchronisation required for most of this + List providedAddresses + = new ArrayList<>(hostsProvider.buildDynamicHosts((hosts, limitPortCounts) + -> UnicastZenPing.resolveHostsLists(executorService.get(), logger, hosts, limitPortCounts, + transportService, resolveTimeout))); + + // localNode is excluded from buildDynamicNodes + providedAddresses.add(transportService.getLocalNode().getAddress()); + + lastConnectedAddresses.set(unmodifiableList(providedAddresses)); + + logger.trace("ActivePeerFinder#handleNextWakeUp(): probing resolved transport addresses {}", providedAddresses); + synchronized (mutex) { + providedAddresses.forEach(ActivePeerFinder.this::startProbe); + } + + resolveInProgress.set(false); + }, "PeerFinder resolving unicast hosts list")); + } + + final List transportAddresses = lastConnectedAddresses.get(); + logger.trace("ActivePeerFinder#handleNextWakeUp(): probing supplied transport addresses {}", transportAddresses); + transportAddresses.forEach(this::startProbe); + + futureExecutor.schedule(labelRunnable(this::handleWakeUp, "ActivePeerFinder::handleWakeUp"), findPeersDelay); + } + + void startProbe(DiscoveryNode discoveryNode) { + startProbe(discoveryNode.getAddress()); + } + + void startProbe(TransportAddress transportAddress) { + assert holdsLock() : "PeerFinder mutex not held"; + if (running == false) { + logger.trace("startProbe({}) not running", transportAddress); + return; + } + + if (transportAddress.equals(getLocalNode().getAddress())) { + logger.trace("startProbe({}) not probing local node", transportAddress); + return; + } + + if (probedAddresses.add(transportAddress) == false) { + logger.trace("startProbe({}) already probed this round", transportAddress); + return; + } + + final DiscoveryNode cachedNode = connectedNodes.get(transportAddress); + if (cachedNode != null) { + assert cachedNode.getAddress().equals(transportAddress); + + if (transportService.nodeConnected(cachedNode)) { + logger.trace("startProbe({}) found connected {}", transportAddress, cachedNode); + onProbeSuccess(cachedNode); + return; + } + + logger.trace("startProbe({}) found disconnected {}, probing again", transportAddress, cachedNode); + connectedNodes.remove(transportAddress, cachedNode); + } else { + logger.trace("startProbe({}) no cached node found, probing", transportAddress); + } + + executorService.get().execute(new AbstractRunnable() { + + @Override + protected void doRun() throws Exception { + // No synchronisation required - transportAddressConnector, connectedNodes, onProbeSuccess all threadsafe + final DiscoveryNode remoteNode = transportAddressConnector.connectTo(transportAddress); + connectedNodes.put(remoteNode.getAddress(), remoteNode); + onProbeSuccess(remoteNode); + } + + @Override + public void onFailure(Exception e) { + logger.debug(() -> new ParameterizedMessage("Probing {} failed", transportAddress), e); + } + + @Override + public String toString() { + return "probe " + transportAddress; + } + }); + } + + private void onProbeSuccess(final DiscoveryNode discoveryNode) { + // Called when we are fully connected to the given DiscoveryNode. Request its peers once per round, and probe them too. + synchronized (mutex) { + if (running == false) { + return; + } + + if (discoveryNode.isMasterNode() == false) { + logger.trace("Ignoring non-master node {}", discoveryNode); + return; + } + + final FoundPeer foundPeer = getFoundPeer(discoveryNode); + + if (foundPeer.peersRequestedThisRound) { + return; + } + foundPeer.peersRequestedThisRound = true; + + if (discoveryNode.equals(getLocalNode())) { + return; + } + + List knownNodes = new ArrayList<>(foundPeers.keySet()); + + transportService.sendRequest(discoveryNode, REQUEST_PEERS_ACTION_NAME, + new PeersRequest(getLocalNode(), new ArrayList<>(knownNodes)), + new TransportResponseHandler() { + + @Override + public PeersResponse read(StreamInput in) throws IOException { + return new PeersResponse(in); + } + + @Override + public void handleResponse(PeersResponse response) { + logger.trace("Received {} from {}", response, discoveryNode); + final boolean foundMasterNode; + synchronized (mutex) { + if (running == false) { + return; + } + + if (response.getMasterNode().isPresent()) { + final DiscoveryNode masterNode = response.getMasterNode().get(); + if (masterNode.equals(discoveryNode)) { + foundMasterNode = true; + } else { + foundMasterNode = false; + startProbe(masterNode); + } + } else { + foundMasterNode = false; + response.getDiscoveryNodes().stream().map(DiscoveryNode::getAddress) + .forEach(ActivePeerFinder.this::startProbe); + } + } + + if (foundMasterNode) { + // Must not hold lock when calling out to Legislator to avoid a lock cycle + assert holdsLock() == false : "Peerfinder mutex is held in error"; + onMasterFoundByProbe(discoveryNode, response.getTerm()); + } + } + + @Override + public void handleException(TransportException exp) { + logger.debug("PeersRequest failed", exp); + } + + @Override + public String executor() { + return Names.GENERIC; + } + }); + } + } + + private FoundPeer getFoundPeer(DiscoveryNode discoveryNode) { + // no synchronisation required: computeIfAbsent is atomic as foundPeers is a newConcurrentMap() + FoundPeer foundPeer = foundPeers.computeIfAbsent(discoveryNode, FoundPeer::new); + assert foundPeer.discoveryNode.equals(discoveryNode); + return foundPeer; + } + + class FoundPeer { + final DiscoveryNode discoveryNode; + boolean peersRequestedThisRound; + + FoundPeer(DiscoveryNode discoveryNode) { + this.discoveryNode = discoveryNode; + } + + @Override + public String toString() { + return "FoundPeer{" + + "discoveryNode=" + discoveryNode + + ", peersRequestedThisRound=" + peersRequestedThisRound + + '}'; + } + } + } +} diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeersRequest.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeersRequest.java new file mode 100644 index 0000000000000..2946c704beac5 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeersRequest.java @@ -0,0 +1,82 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.cluster.coordination; + +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.transport.TransportRequest; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +public class PeersRequest extends TransportRequest { + private final DiscoveryNode sourceNode; + private final List discoveryNodes; + + public PeersRequest(DiscoveryNode sourceNode, List discoveryNodes) { + this.sourceNode = sourceNode; + this.discoveryNodes = discoveryNodes; + } + + public PeersRequest(StreamInput in) throws IOException { + super(in); + sourceNode = new DiscoveryNode(in); + discoveryNodes = in.readList(DiscoveryNode::new); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + sourceNode.writeTo(out); + out.writeList(discoveryNodes); + } + + public List getDiscoveryNodes() { + return discoveryNodes; + } + + public DiscoveryNode getSourceNode() { + return sourceNode; + } + + @Override + public String toString() { + return "PeersRequest{" + + "sourceNode=" + sourceNode + + ", discoveryNodes=" + discoveryNodes + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + PeersRequest that = (PeersRequest) o; + return Objects.equals(sourceNode, that.sourceNode) && + Objects.equals(discoveryNodes, that.discoveryNodes); + } + + @Override + public int hashCode() { + return Objects.hash(sourceNode, discoveryNodes); + } +} diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeersResponse.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeersResponse.java new file mode 100644 index 0000000000000..d9cbaa348115c --- /dev/null +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeersResponse.java @@ -0,0 +1,94 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.cluster.coordination; + +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.transport.TransportResponse; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; +import java.util.Optional; + +public class PeersResponse extends TransportResponse { + private final Optional masterNode; + private final List discoveryNodes; + private final long term; + + public PeersResponse(Optional masterNode, List discoveryNodes, long term) { + this.masterNode = masterNode; + this.discoveryNodes = discoveryNodes; + this.term = term; + assert masterNode.isPresent() == discoveryNodes.isEmpty(); + } + + public PeersResponse(StreamInput in) throws IOException { + masterNode = Optional.ofNullable(in.readOptionalWriteable(DiscoveryNode::new)); + discoveryNodes = in.readList(DiscoveryNode::new); + term = in.readLong(); + assert masterNode.isPresent() == discoveryNodes.isEmpty(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeOptionalWriteable(masterNode.orElse(null)); + out.writeList(discoveryNodes); + out.writeLong(term); + } + + public Optional getMasterNode() { + return masterNode; + } + + public List getDiscoveryNodes() { + return discoveryNodes; + } + + public long getTerm() { + return term; + } + + @Override + public String toString() { + return "PeersResponse{" + + "masterNode=" + masterNode + + ", discoveryNodes=" + discoveryNodes + + ", term=" + term + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + PeersResponse that = (PeersResponse) o; + return term == that.term && + Objects.equals(masterNode, that.masterNode) && + Objects.equals(discoveryNodes, that.discoveryNodes); + } + + @Override + public int hashCode() { + return Objects.hash(masterNode, discoveryNodes, term); + } +} diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/RunnableUtils.java b/server/src/main/java/org/elasticsearch/cluster/coordination/RunnableUtils.java new file mode 100644 index 0000000000000..908e0b4e74dfb --- /dev/null +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/RunnableUtils.java @@ -0,0 +1,45 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.cluster.coordination; + +public class RunnableUtils { + + private RunnableUtils() { + // utility class, it's a mistake to instantiate. + } + + /** + * Label a Runnable, overriding its toString() method. + */ + public static Runnable labelRunnable(final Runnable runnable, final String label) { + return new Runnable() { + @Override + public void run() { + runnable.run(); + } + + @Override + public String toString() { + return label; + } + }; + } +} + diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/DeterministicTaskQueue.java b/server/src/test/java/org/elasticsearch/cluster/coordination/DeterministicTaskQueue.java index 6afdf81a8b45f..904dc0f96873a 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/DeterministicTaskQueue.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/DeterministicTaskQueue.java @@ -24,7 +24,6 @@ import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; -import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.threadpool.ThreadPoolInfo; import org.elasticsearch.threadpool.ThreadPoolStats; @@ -51,6 +50,29 @@ public DeterministicTaskQueue(Settings settings) { super(settings); } + public void runAllTasks() { + while (true) { + runAllRunnableTasks(); + if (hasDeferredTasks()) { + advanceTime(); + } else { + break; + } + } + } + + public void runAllRunnableTasks() { + while (hasRunnableTasks()) { + runNextTask(); + } + } + + public void runAllRunnableTasks(Random random) { + while (hasRunnableTasks()) { + runRandomTask(random); + } + } + /** * @return whether there are any runnable tasks. */ @@ -318,16 +340,6 @@ public boolean awaitTermination(long timeout, TimeUnit unit) { public ScheduledExecutorService scheduler() { throw new UnsupportedOperationException(); } - - @Override - public void close() { - throw new UnsupportedOperationException(); - } - - @Override - public ThreadContext getThreadContext() { - throw new UnsupportedOperationException(); - } }; } diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/DeterministicTaskQueueTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/DeterministicTaskQueueTests.java index adba2bcc8a0e8..2097075b32def 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/DeterministicTaskQueueTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/DeterministicTaskQueueTests.java @@ -281,7 +281,7 @@ public void testFutureExecutorSchedulesTasks() { futureExecutor.schedule(() -> strings.add("also runnable"), TimeValue.MINUS_ONE); - runAllTasks(taskQueue); + taskQueue.runAllTasks(); assertThat(taskQueue.getCurrentTimeMillis(), is(startTime + delayMillis)); assertThat(strings, contains("runnable", "also runnable", "deferred")); @@ -295,22 +295,10 @@ public void testFutureExecutorSchedulesTasks() { assertFalse(taskQueue.hasRunnableTasks()); assertTrue(taskQueue.hasDeferredTasks()); - runAllTasks(taskQueue); + taskQueue.runAllTasks(); assertThat(taskQueue.getCurrentTimeMillis(), is(startTime + delayMillis + delayMillis1)); - assertThat(strings, contains("runnable", "also runnable", "deferred", "not quite so deferred", "further deferred")); - } - private static void runAllTasks(DeterministicTaskQueue taskQueue) { - while (true) { - while (taskQueue.hasRunnableTasks()) { - taskQueue.runNextTask(); - } - if (taskQueue.hasDeferredTasks()) { - taskQueue.advanceTime(); - } else { - break; - } - } + assertThat(strings, contains("runnable", "also runnable", "deferred", "not quite so deferred", "further deferred")); } private static DeterministicTaskQueue newTaskQueue() { diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/MessagesTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/MessagesTests.java index 7fa4a3217348f..979e84f1832e4 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/MessagesTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/MessagesTests.java @@ -25,6 +25,16 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.EqualsHashCodeTestUtils; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +import static java.util.Collections.emptyList; +import static java.util.Collections.singletonList; + public class MessagesTests extends ESTestCase { private DiscoveryNode createNode(String id) { @@ -145,4 +155,69 @@ public ClusterState randomClusterState() { new ClusterState.VotingConfiguration(Sets.newHashSet(generateRandomStringArray(10, 10, false))), randomLong()); } + + private List modifyDiscoveryNodesList(Collection originalNodes, boolean allowEmpty) { + final List discoveryNodes = new ArrayList<>(originalNodes); + if (discoveryNodes.isEmpty() == false && randomBoolean() && (allowEmpty || discoveryNodes.size() > 1)) { + discoveryNodes.remove(randomIntBetween(0, discoveryNodes.size() - 1)); + } else if (discoveryNodes.isEmpty() == false && randomBoolean()) { + discoveryNodes.set(randomIntBetween(0, discoveryNodes.size() - 1), createNode(randomAlphaOfLength(10))); + } else { + discoveryNodes.add(createNode(randomAlphaOfLength(10))); + } + return discoveryNodes; + } + + public void testPeersRequestEqualsHashCodeSerialization() { + final PeersRequest initialPeersRequest = new PeersRequest(createNode(randomAlphaOfLength(10)), + Arrays.stream(generateRandomStringArray(10, 10, false)).map(this::createNode).collect(Collectors.toList())); + + EqualsHashCodeTestUtils.checkEqualsAndHashCode(initialPeersRequest, + publishRequest -> copyWriteable(publishRequest, writableRegistry(), PeersRequest::new), + in -> { + final List discoveryNodes = new ArrayList<>(in.getDiscoveryNodes()); + if (randomBoolean()) { + return new PeersRequest(createNode(randomAlphaOfLength(10)), discoveryNodes); + } else { + return new PeersRequest(in.getSourceNode(), modifyDiscoveryNodesList(in.getDiscoveryNodes(), true)); + } + }); + } + + public void testPeersResponseEqualsHashCodeSerialization() { + final long initialTerm = randomNonNegativeLong(); + final PeersResponse initialPeersResponse; + + if (randomBoolean()) { + initialPeersResponse = new PeersResponse(Optional.of(createNode(randomAlphaOfLength(10))), emptyList(), initialTerm); + } else { + initialPeersResponse = new PeersResponse(Optional.empty(), + Arrays.stream(generateRandomStringArray(10, 10, false, false)).map(this::createNode).collect(Collectors.toList()), + initialTerm); + } + + EqualsHashCodeTestUtils.checkEqualsAndHashCode(initialPeersResponse, + publishResponse -> copyWriteable(publishResponse, writableRegistry(), PeersResponse::new), + in -> { + final long term = in.getTerm(); + if (randomBoolean()) { + return new PeersResponse(in.getMasterNode(), in.getDiscoveryNodes(), + randomValueOtherThan(term, ESTestCase::randomNonNegativeLong)); + } else { + if (in.getMasterNode().isPresent()) { + if (randomBoolean()) { + return new PeersResponse(Optional.of(createNode(randomAlphaOfLength(10))), in.getDiscoveryNodes(), term); + } else { + return new PeersResponse(Optional.empty(), singletonList(createNode(randomAlphaOfLength(10))), term); + } + } else { + if (randomBoolean()) { + return new PeersResponse(Optional.of(createNode(randomAlphaOfLength(10))), emptyList(), term); + } else { + return new PeersResponse(in.getMasterNode(), modifyDiscoveryNodesList(in.getDiscoveryNodes(), false), term); + } + } + } + }); + } } diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java new file mode 100644 index 0000000000000..1f17908cf65e7 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java @@ -0,0 +1,495 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.cluster.coordination; + +import org.elasticsearch.Version; +import org.elasticsearch.cluster.coordination.PeerFinder.TransportAddressConnector; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.cluster.node.DiscoveryNodes.Builder; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.transport.TransportAddress; +import org.elasticsearch.discovery.zen.UnicastHostsProvider; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.transport.CapturingTransport; +import org.elasticsearch.test.transport.CapturingTransport.CapturedRequest; +import org.elasticsearch.transport.TransportService; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ExecutorService; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static java.util.Collections.emptyList; +import static java.util.Collections.emptyMap; +import static java.util.Collections.emptySet; +import static java.util.Collections.singletonList; +import static org.elasticsearch.cluster.coordination.PeerFinder.REQUEST_PEERS_ACTION_NAME; +import static org.elasticsearch.node.Node.NODE_NAME_SETTING; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.emptyArray; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.nullValue; + +public class PeerFinderTests extends ESTestCase { + + private CapturingTransport capturingTransport; + private DeterministicTaskQueue deterministicTaskQueue; + private DiscoveryNode localNode; + private MockUnicastHostsProvider unicastHostsProvider; + private MockTransportAddressConnector transportAddressConnector; + private TestPeerFinder peerFinder; + + private Set disconnectedNodes = new HashSet<>(); + private Set connectedNodes = new HashSet<>(); + + class MockTransportAddressConnector implements TransportAddressConnector { + final Set reachableNodes = new HashSet<>(); + final Set unreachableAddresses = new HashSet<>(); + + @Override + public DiscoveryNode connectTo(TransportAddress transportAddress) throws IOException { + assert localNode.getAddress().equals(transportAddress) == false : "should not probe local node"; + + if (unreachableAddresses.contains(transportAddress)) { + throw new IOException("cannot connect to " + transportAddress); + } + + for (final DiscoveryNode discoveryNode : reachableNodes) { + if (discoveryNode.getAddress().equals(transportAddress)) { + disconnectedNodes.remove(discoveryNode); + connectedNodes.add(discoveryNode); + return discoveryNode; + } + } + + throw new AssertionError(transportAddress + " unknown"); + } + } + + static class MockUnicastHostsProvider implements UnicastHostsProvider { + final List providedAddresses = new ArrayList<>(); + + @Override + public List buildDynamicHosts(HostsResolver hostsResolver) { + return providedAddresses; + } + } + + static class TestPeerFinder extends PeerFinder { + DiscoveryNodes lastAcceptedNodes; + DiscoveryNode discoveredMasterNode; + + TestPeerFinder(Settings settings, TransportService transportService, UnicastHostsProvider hostsProvider, + FutureExecutor futureExecutor, TransportAddressConnector transportAddressConnector, + Supplier executorServiceFactory) { + super(settings, transportService, hostsProvider, futureExecutor, transportAddressConnector, executorServiceFactory); + } + + @Override + DiscoveryNodes getLastAcceptedNodes() { + assert holdsLock() == false : "PeerFinder lock held in error"; + return lastAcceptedNodes; + } + + @Override + void onMasterFoundByProbe(DiscoveryNode masterNode, long term) { + assert holdsLock() == false : "PeerFinder lock held in error"; + assertThat(discoveredMasterNode, nullValue()); + discoveredMasterNode = masterNode; + } + + void updateLastAcceptedNodes(Consumer onBuilder) { + final Builder builder = DiscoveryNodes.builder(lastAcceptedNodes); + onBuilder.accept(builder); + lastAcceptedNodes = builder.build(); + } + } + + @Before + public void setup() { + capturingTransport = new CapturingTransport() { + @Override + public boolean nodeConnected(DiscoveryNode node) { + final boolean isConnected = connectedNodes.contains(node); + final boolean isDisconnected = disconnectedNodes.contains(node); + assert isConnected != isDisconnected : node + ": isConnected=" + isConnected + ", isDisconnected=" + isDisconnected; + return isConnected; + } + }; + unicastHostsProvider = new MockUnicastHostsProvider(); + transportAddressConnector = new MockTransportAddressConnector(); + + final Settings settings = Settings.builder().put(NODE_NAME_SETTING.getKey(), "node").build(); + deterministicTaskQueue = new DeterministicTaskQueue(settings); + + localNode = newDiscoveryNode("local-node"); + final TransportService transportService = new TransportService(settings, capturingTransport, + deterministicTaskQueue.getThreadPool(), TransportService.NOOP_TRANSPORT_INTERCEPTOR, + boundTransportAddress -> localNode, null, emptySet()); + transportService.start(); + transportService.acceptIncomingRequests(); + + peerFinder = new TestPeerFinder(settings, transportService, unicastHostsProvider, + deterministicTaskQueue.getFutureExecutor(), transportAddressConnector, deterministicTaskQueue::getExecutorService); + + peerFinder.lastAcceptedNodes = DiscoveryNodes.builder().localNodeId(localNode.getId()).add(localNode).build(); + peerFinder.start(); + } + + @After + public void deactivateAndRunRemainingTasks() { + peerFinder.deactivate(); + deterministicTaskQueue.runAllTasks(); // termination ensures that everything is properly cleaned up + } + + public void testActivationAndDeactivation() { + peerFinder.activate(); + assertFoundPeers(); + assertTrue(peerFinder.isActive()); + + peerFinder.deactivate(); + assertFalse(peerFinder.isActive()); + } + + public void testAddsReachableNodesFromUnicastHostsList() { + final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); + unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); + transportAddressConnector.reachableNodes.add(otherNode); + + peerFinder.activate(); + runAllRunnableTasks(); + + assertFoundPeers(otherNode); + } + + public void testDoesNotAddUnreachableNodesFromUnicastHostsList() { + final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); + unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); + transportAddressConnector.unreachableAddresses.add(otherNode.getAddress()); + + peerFinder.activate(); + runAllRunnableTasks(); + + assertFoundPeers(); + } + + public void testDoesNotAddNonMasterEligibleNodesFromUnicastHostsList() { + final DiscoveryNode nonMasterNode = new DiscoveryNode("node-from-hosts-list", buildNewFakeTransportAddress(), + emptyMap(), emptySet(), Version.CURRENT); + + unicastHostsProvider.providedAddresses.add(nonMasterNode.getAddress()); + transportAddressConnector.reachableNodes.add(nonMasterNode); + + peerFinder.activate(); + runAllRunnableTasks(); + + assertFoundPeers(); + + assertThat(capturingTransport.capturedRequests(), emptyArray()); + } + + public void testChecksUnicastHostsForChanges() { + peerFinder.activate(); + runAllRunnableTasks(); + assertFoundPeers(); + + final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); + unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); + transportAddressConnector.reachableNodes.add(otherNode); + + deterministicTaskQueue.advanceTime(); + runAllRunnableTasks(); + + assertFoundPeers(otherNode); + } + + public void testDeactivationClearsPastKnowledge() { + final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); + unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); + transportAddressConnector.reachableNodes.add(otherNode); + + peerFinder.activate(); + runAllRunnableTasks(); + + assertFoundPeers(otherNode); + + peerFinder.deactivate(); + + unicastHostsProvider.providedAddresses.clear(); + peerFinder.activate(); + runAllRunnableTasks(); + assertFoundPeers(); + } + + public void testAddsReachableNodesFromClusterState() { + final DiscoveryNode otherNode = newDiscoveryNode("node-in-cluster-state"); + peerFinder.updateLastAcceptedNodes(b -> b.add(otherNode)); + transportAddressConnector.reachableNodes.add(otherNode); + + peerFinder.activate(); + runAllRunnableTasks(); + + assertFoundPeers(otherNode); + } + + public void testDoesNotAddUnreachableNodesFromClusterState() { + final DiscoveryNode otherNode = newDiscoveryNode("node-in-cluster-state"); + peerFinder.updateLastAcceptedNodes(b -> b.add(otherNode)); + transportAddressConnector.unreachableAddresses.add(otherNode.getAddress()); + + peerFinder.activate(); + runAllRunnableTasks(); + assertFoundPeers(); + } + + public void testAddsReachableNodesFromIncomingRequests() { + final DiscoveryNode sourceNode = newDiscoveryNode("request-source"); + final DiscoveryNode otherKnownNode = newDiscoveryNode("other-known-node"); + + transportAddressConnector.reachableNodes.add(sourceNode); + transportAddressConnector.reachableNodes.add(otherKnownNode); + + peerFinder.activate(); + peerFinder.handlePeersRequest(new PeersRequest(sourceNode, Collections.singletonList(otherKnownNode))); + runAllRunnableTasks(); + + assertFoundPeers(sourceNode, otherKnownNode); + } + + public void testDoesNotAddUnreachableNodesFromIncomingRequests() { + final DiscoveryNode sourceNode = newDiscoveryNode("request-source"); + final DiscoveryNode otherKnownNode = newDiscoveryNode("other-known-node"); + + transportAddressConnector.reachableNodes.add(sourceNode); + transportAddressConnector.unreachableAddresses.add(otherKnownNode.getAddress()); + + peerFinder.activate(); + peerFinder.handlePeersRequest(new PeersRequest(sourceNode, Collections.singletonList(otherKnownNode))); + runAllRunnableTasks(); + + assertFoundPeers(sourceNode); + } + + public void testDoesNotAddUnreachableSourceNodeFromIncomingRequests() { + final DiscoveryNode sourceNode = newDiscoveryNode("request-source"); + final DiscoveryNode otherKnownNode = newDiscoveryNode("other-known-node"); + + transportAddressConnector.unreachableAddresses.add(sourceNode.getAddress()); + transportAddressConnector.reachableNodes.add(otherKnownNode); + + peerFinder.activate(); + peerFinder.handlePeersRequest(new PeersRequest(sourceNode, Collections.singletonList(otherKnownNode))); + runAllRunnableTasks(); + + assertFoundPeers(otherKnownNode); + } + + public void testRequestsPeersIncludingKnownPeersInRequest() { + final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); + unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); + transportAddressConnector.reachableNodes.add(otherNode); + + peerFinder.activate(); + runAllRunnableTasks(); + + assertFoundPeers(otherNode); + + final CapturedRequest[] capturedRequests = capturingTransport.getCapturedRequestsAndClear(); + assertThat(capturedRequests.length, is(1)); + final PeersRequest peersRequest = (PeersRequest) capturedRequests[0].request; + assertThat(peersRequest.getDiscoveryNodes(), containsInAnyOrder(localNode, otherNode)); + } + + public void testAddsReachablePeersFromResponse() { + final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); + unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); + transportAddressConnector.reachableNodes.add(otherNode); + + peerFinder.activate(); + runAllRunnableTasks(); + + assertFoundPeers(otherNode); + + final DiscoveryNode discoveredNode = newDiscoveryNode("discovered-node"); + transportAddressConnector.reachableNodes.add(discoveredNode); + respondToRequests(node -> { + assertThat(node, is(otherNode)); + return new PeersResponse(Optional.empty(), singletonList(discoveredNode), randomNonNegativeLong()); + }); + + runAllRunnableTasks(); + assertFoundPeers(otherNode, discoveredNode); + } + + public void testAddsReachableMasterFromResponse() { + final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); + unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); + transportAddressConnector.reachableNodes.add(otherNode); + + peerFinder.activate(); + runAllRunnableTasks(); + + assertFoundPeers(otherNode); + final DiscoveryNode discoveredMaster = newDiscoveryNode("discovered-master"); + + respondToRequests(node -> { + assertThat(node, is(otherNode)); + return new PeersResponse(Optional.of(discoveredMaster), emptyList(), randomNonNegativeLong()); + }); + + transportAddressConnector.reachableNodes.add(discoveredMaster); + runAllRunnableTasks(); + assertFoundPeers(otherNode, discoveredMaster); + assertThat(peerFinder.discoveredMasterNode, nullValue()); + } + + public void testHandlesDiscoveryOfMasterFromResponseFromMaster() { + final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); + unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); + transportAddressConnector.reachableNodes.add(otherNode); + + peerFinder.activate(); + runAllRunnableTasks(); + + assertFoundPeers(otherNode); + + respondToRequests(node -> { + assertThat(node, is(otherNode)); + return new PeersResponse(Optional.of(otherNode), emptyList(), randomNonNegativeLong()); + }); + + runAllRunnableTasks(); + assertFoundPeers(otherNode); + assertThat(peerFinder.discoveredMasterNode, is(otherNode)); + } + + public void testOnlyRequestsPeersOncePerRoundButDoesRetryNextRound() { + final DiscoveryNode sourceNode = newDiscoveryNode("request-source"); + transportAddressConnector.reachableNodes.add(sourceNode); + + peerFinder.activate(); + peerFinder.handlePeersRequest(new PeersRequest(sourceNode, emptyList())); + runAllRunnableTasks(); + assertFoundPeers(sourceNode); + + respondToRequests(node -> { + assertThat(node, is(sourceNode)); + return new PeersResponse(Optional.empty(), singletonList(sourceNode), randomNonNegativeLong()); + }); + + peerFinder.handlePeersRequest(new PeersRequest(sourceNode, emptyList())); + runAllRunnableTasks(); + respondToRequests(node -> { + throw new AssertionError("there should have been no further requests"); + }); + + final DiscoveryNode otherNode = newDiscoveryNode("otherNode"); + transportAddressConnector.reachableNodes.add(otherNode); + + deterministicTaskQueue.advanceTime(); + runAllRunnableTasks(); + respondToRequests(node -> { + assertThat(node, is(sourceNode)); + return new PeersResponse(Optional.empty(), singletonList(otherNode), randomNonNegativeLong()); + }); + runAllRunnableTasks(); + assertFoundPeers(sourceNode, otherNode); + } + + public void testDoesNotReconnectToNodesOnceConnected() { + final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); + unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); + transportAddressConnector.reachableNodes.add(otherNode); + + peerFinder.activate(); + runAllRunnableTasks(); + + assertFoundPeers(otherNode); + + transportAddressConnector.reachableNodes.clear(); + deterministicTaskQueue.advanceTime(); + runAllRunnableTasks(); + + assertFoundPeers(otherNode); + } + + public void testReconnectsToDisconnectedNodes() { + final DiscoveryNode otherNode = newDiscoveryNode("original-node"); + unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); + transportAddressConnector.reachableNodes.add(otherNode); + + peerFinder.activate(); + runAllRunnableTasks(); + + assertFoundPeers(otherNode); + + transportAddressConnector.reachableNodes.clear(); + final DiscoveryNode rebootedOtherNode = new DiscoveryNode("rebooted-node", otherNode.getAddress(), Version.CURRENT); + transportAddressConnector.reachableNodes.add(rebootedOtherNode); + + connectedNodes.remove(otherNode); + disconnectedNodes.add(otherNode); + + deterministicTaskQueue.advanceTime(); + runAllRunnableTasks(); + + assertFoundPeers(otherNode, rebootedOtherNode); + } + + private void respondToRequests(Function responseFactory) { + final CapturedRequest[] capturedRequests = capturingTransport.getCapturedRequestsAndClear(); + for (final CapturedRequest capturedRequest : capturedRequests) { + assertThat(capturedRequest.action, is(REQUEST_PEERS_ACTION_NAME)); + assertThat(capturedRequest.request, instanceOf(PeersRequest.class)); + final PeersRequest peersRequest = (PeersRequest) capturedRequest.request; + assertThat(peersRequest.getSourceNode(), is(localNode)); + capturingTransport.handleResponse(capturedRequests[0].requestId, responseFactory.apply(capturedRequest.node)); + } + } + + private void assertFoundPeers(DiscoveryNode... expectedNodes) { + assertThat(peerFinder.getFoundPeersSet(), + equalTo(Stream.concat(Arrays.stream(expectedNodes), Stream.of(localNode)).collect(Collectors.toSet()))); + } + + private DiscoveryNode newDiscoveryNode(String nodeId) { + return new DiscoveryNode(nodeId, buildNewFakeTransportAddress(), Version.CURRENT); + } + + private void runAllRunnableTasks() { + deterministicTaskQueue.runAllRunnableTasks(random()); + } +} + diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/RunnableUtilsTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/RunnableUtilsTests.java new file mode 100644 index 0000000000000..9f933be7d98be --- /dev/null +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/RunnableUtilsTests.java @@ -0,0 +1,46 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.cluster.coordination; + +import org.elasticsearch.test.ESTestCase; + +import java.util.concurrent.atomic.AtomicInteger; + +import static org.elasticsearch.cluster.coordination.RunnableUtils.labelRunnable; +import static org.hamcrest.core.Is.is; + +public class RunnableUtilsTests extends ESTestCase { + public void testLabelRunnable() { + final AtomicInteger counter = new AtomicInteger(); + final String expectedMessage = randomAlphaOfLength(10); + + final Runnable labelledRunnable = labelRunnable(counter::incrementAndGet, expectedMessage); + + assertThat(labelledRunnable.toString(), is(expectedMessage)); + + assertThat(counter.get(), is(0)); + labelledRunnable.run(); + assertThat(counter.get(), is(1)); + labelledRunnable.run(); + assertThat(counter.get(), is(2)); + + assertThat(labelledRunnable.toString(), is(expectedMessage)); + } +} From 321def8c19542eab4996b7905af55db75fecbdd2 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 31 Jul 2018 12:56:37 +0100 Subject: [PATCH 02/76] No zen2 in request peers action --- .../java/org/elasticsearch/cluster/coordination/PeerFinder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 21dba055cd38c..24bc32e58678f 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -61,7 +61,7 @@ public abstract class PeerFinder extends AbstractLifecycleComponent { - public static final String REQUEST_PEERS_ACTION_NAME = "internal:discovery/zen2/requestpeers"; + public static final String REQUEST_PEERS_ACTION_NAME = "internal:discovery/requestpeers"; // the time between attempts to find all peers public static final Setting CONSENSUS_FIND_PEERS_DELAY_SETTING = From 8e9c881023e667b348b332d33fd3bc6c210d2d78 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 31 Jul 2018 12:58:40 +0100 Subject: [PATCH 03/76] Fix up discovery.find_peers_interval setting --- .../org/elasticsearch/cluster/coordination/PeerFinder.java | 6 +++--- .../org/elasticsearch/common/settings/ClusterSettings.java | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 24bc32e58678f..d336a58060d71 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -64,8 +64,8 @@ public abstract class PeerFinder extends AbstractLifecycleComponent { public static final String REQUEST_PEERS_ACTION_NAME = "internal:discovery/requestpeers"; // the time between attempts to find all peers - public static final Setting CONSENSUS_FIND_PEERS_DELAY_SETTING = - Setting.timeSetting("discovery.zen2.find_peers_delay", + public static final Setting DISCOVERY_FIND_PEERS_INTERVAL_SETTING = + Setting.timeSetting("discovery.find_peers_interval", TimeValue.timeValueMillis(1000), TimeValue.timeValueMillis(1), Setting.Property.NodeScope); private final TimeValue findPeersDelay; @@ -85,7 +85,7 @@ public abstract class PeerFinder extends AbstractLifecycleComponent { FutureExecutor futureExecutor, TransportAddressConnector transportAddressConnector, Supplier executorServiceFactory) { super(settings); - findPeersDelay = CONSENSUS_FIND_PEERS_DELAY_SETTING.get(settings); + findPeersDelay = DISCOVERY_FIND_PEERS_INTERVAL_SETTING.get(settings); resolveTimeout = UnicastZenPing.DISCOVERY_ZEN_PING_UNICAST_HOSTS_RESOLVE_TIMEOUT.get(settings); this.transportService = transportService; this.hostsProvider = hostsProvider; diff --git a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java index 03a0f3b42a10a..24a5750d14aa9 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java @@ -30,6 +30,7 @@ import org.elasticsearch.cluster.InternalClusterInfoService; import org.elasticsearch.cluster.NodeConnectionsService; import org.elasticsearch.cluster.action.index.MappingUpdatedAction; +import org.elasticsearch.cluster.coordination.PeerFinder; import org.elasticsearch.cluster.metadata.IndexGraveyard; import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.routing.OperationRouting; @@ -423,6 +424,7 @@ public void apply(Settings value, Settings current, Settings previous) { Node.BREAKER_TYPE_KEY, OperationRouting.USE_ADAPTIVE_REPLICA_SELECTION_SETTING, IndexGraveyard.SETTING_MAX_TOMBSTONES, - EnableAssignmentDecider.CLUSTER_TASKS_ALLOCATION_ENABLE_SETTING + EnableAssignmentDecider.CLUSTER_TASKS_ALLOCATION_ENABLE_SETTING, + PeerFinder.DISCOVERY_FIND_PEERS_INTERVAL_SETTING ))); } From c870c94278b5d3ab0bf156314ff605054cba4edd Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 31 Jul 2018 13:02:42 +0100 Subject: [PATCH 04/76] Remove unused foundQuorumFrom --- .../cluster/coordination/PeerFinder.java | 12 ------------ .../cluster/coordination/PeerFinderTests.java | 2 +- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index d336a58060d71..48ca216bec0bc 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -21,7 +21,6 @@ import org.apache.logging.log4j.message.ParameterizedMessage; import org.apache.lucene.util.SetOnce; -import org.elasticsearch.cluster.ClusterState.VotingConfiguration; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.common.collect.ImmutableOpenMap; @@ -53,7 +52,6 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Supplier; -import java.util.stream.Collectors; import static java.util.Collections.unmodifiableList; import static org.elasticsearch.cluster.coordination.RunnableUtils.labelRunnable; @@ -95,16 +93,6 @@ public abstract class PeerFinder extends AbstractLifecycleComponent { } public Iterable getFoundPeers() { - // getFoundPeersSet() is synchronised so we don't have to - return getFoundPeersSet(); - } - - public boolean foundQuorumFrom(VotingConfiguration votingConfiguration) { - // getFoundPeersSet() is synchronised so we don't have to - return votingConfiguration.hasQuorum(getFoundPeersSet().stream().map(DiscoveryNode::getId).collect(Collectors.toSet())); - } - - Set getFoundPeersSet() { synchronized (mutex) { return getActivePeerFinder().foundPeers.keySet(); } diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java index 1f17908cf65e7..cc551f1785c3b 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java @@ -480,7 +480,7 @@ private void respondToRequests(Function responseFa } private void assertFoundPeers(DiscoveryNode... expectedNodes) { - assertThat(peerFinder.getFoundPeersSet(), + assertThat(peerFinder.getFoundPeers(), equalTo(Stream.concat(Arrays.stream(expectedNodes), Stream.of(localNode)).collect(Collectors.toSet()))); } From 686a434ec20f8a75349af03f1f1acf04740faca9 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 31 Jul 2018 13:08:35 +0100 Subject: [PATCH 05/76] protected --- .../org/elasticsearch/cluster/coordination/PeerFinder.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 48ca216bec0bc..4a810088e5888 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -181,9 +181,9 @@ protected void doStop() { protected void doClose() { } - abstract DiscoveryNodes getLastAcceptedNodes(); + protected abstract DiscoveryNodes getLastAcceptedNodes(); - abstract void onMasterFoundByProbe(DiscoveryNode masterNode, long term); + protected abstract void onMasterFoundByProbe(DiscoveryNode masterNode, long term); public interface TransportAddressConnector { /** From 83f7189c3e0178b5fec14c25bd2fd16494bb90dd Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 31 Jul 2018 13:08:42 +0100 Subject: [PATCH 06/76] Inline one-use method --- .../elasticsearch/cluster/coordination/PeerFinder.java | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 4a810088e5888..ab7f2af3e30d0 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -348,7 +348,8 @@ private void onProbeSuccess(final DiscoveryNode discoveryNode) { return; } - final FoundPeer foundPeer = getFoundPeer(discoveryNode); + final FoundPeer foundPeer = foundPeers.computeIfAbsent(discoveryNode, FoundPeer::new); + assert foundPeer.discoveryNode.equals(discoveryNode); if (foundPeer.peersRequestedThisRound) { return; @@ -414,13 +415,6 @@ public String executor() { } } - private FoundPeer getFoundPeer(DiscoveryNode discoveryNode) { - // no synchronisation required: computeIfAbsent is atomic as foundPeers is a newConcurrentMap() - FoundPeer foundPeer = foundPeers.computeIfAbsent(discoveryNode, FoundPeer::new); - assert foundPeer.discoveryNode.equals(discoveryNode); - return foundPeer; - } - class FoundPeer { final DiscoveryNode discoveryNode; boolean peersRequestedThisRound; From 1525607029ea66b98d18f8ddb46634cbb3ab04c8 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 31 Jul 2018 13:12:14 +0100 Subject: [PATCH 07/76] We do not probe the local address so it does not need to be provided --- .../org/elasticsearch/cluster/coordination/PeerFinder.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index ab7f2af3e30d0..3bd9a6d8da976 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -256,9 +256,6 @@ private void handleWakeUpUnderLock(DiscoveryNodes lastAcceptedNodes) { -> UnicastZenPing.resolveHostsLists(executorService.get(), logger, hosts, limitPortCounts, transportService, resolveTimeout))); - // localNode is excluded from buildDynamicNodes - providedAddresses.add(transportService.getLocalNode().getAddress()); - lastConnectedAddresses.set(unmodifiableList(providedAddresses)); logger.trace("ActivePeerFinder#handleNextWakeUp(): probing resolved transport addresses {}", providedAddresses); From 0b4e9d19f7b63ec2d3a243216bcbc0867ca11c7f Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 31 Jul 2018 17:46:56 +0100 Subject: [PATCH 08/76] Fixup protected --- .../cluster/coordination/RunnableUtils.java | 45 ------------------ .../cluster/coordination/PeerFinderTests.java | 4 +- .../coordination/RunnableUtilsTests.java | 46 ------------------- 3 files changed, 2 insertions(+), 93 deletions(-) delete mode 100644 server/src/main/java/org/elasticsearch/cluster/coordination/RunnableUtils.java delete mode 100644 server/src/test/java/org/elasticsearch/cluster/coordination/RunnableUtilsTests.java diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/RunnableUtils.java b/server/src/main/java/org/elasticsearch/cluster/coordination/RunnableUtils.java deleted file mode 100644 index 908e0b4e74dfb..0000000000000 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/RunnableUtils.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.cluster.coordination; - -public class RunnableUtils { - - private RunnableUtils() { - // utility class, it's a mistake to instantiate. - } - - /** - * Label a Runnable, overriding its toString() method. - */ - public static Runnable labelRunnable(final Runnable runnable, final String label) { - return new Runnable() { - @Override - public void run() { - runnable.run(); - } - - @Override - public String toString() { - return label; - } - }; - } -} - diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java index cc551f1785c3b..a38388da49635 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java @@ -118,13 +118,13 @@ static class TestPeerFinder extends PeerFinder { } @Override - DiscoveryNodes getLastAcceptedNodes() { + protected DiscoveryNodes getLastAcceptedNodes() { assert holdsLock() == false : "PeerFinder lock held in error"; return lastAcceptedNodes; } @Override - void onMasterFoundByProbe(DiscoveryNode masterNode, long term) { + protected void onMasterFoundByProbe(DiscoveryNode masterNode, long term) { assert holdsLock() == false : "PeerFinder lock held in error"; assertThat(discoveredMasterNode, nullValue()); discoveredMasterNode = masterNode; diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/RunnableUtilsTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/RunnableUtilsTests.java deleted file mode 100644 index 9f933be7d98be..0000000000000 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/RunnableUtilsTests.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.cluster.coordination; - -import org.elasticsearch.test.ESTestCase; - -import java.util.concurrent.atomic.AtomicInteger; - -import static org.elasticsearch.cluster.coordination.RunnableUtils.labelRunnable; -import static org.hamcrest.core.Is.is; - -public class RunnableUtilsTests extends ESTestCase { - public void testLabelRunnable() { - final AtomicInteger counter = new AtomicInteger(); - final String expectedMessage = randomAlphaOfLength(10); - - final Runnable labelledRunnable = labelRunnable(counter::incrementAndGet, expectedMessage); - - assertThat(labelledRunnable.toString(), is(expectedMessage)); - - assertThat(counter.get(), is(0)); - labelledRunnable.run(); - assertThat(counter.get(), is(1)); - labelledRunnable.run(); - assertThat(counter.get(), is(2)); - - assertThat(labelledRunnable.toString(), is(expectedMessage)); - } -} From d524a623f064e4f7186c09211ae02261f36e0077 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 31 Jul 2018 17:47:27 +0100 Subject: [PATCH 09/76] Remove labelRunnable() and use an AbstractRunnable --- .../cluster/coordination/PeerFinder.java | 57 +++++++++++++------ 1 file changed, 41 insertions(+), 16 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 3bd9a6d8da976..7e7101344e7d7 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -54,7 +54,6 @@ import java.util.function.Supplier; import static java.util.Collections.unmodifiableList; -import static org.elasticsearch.cluster.coordination.RunnableUtils.labelRunnable; import static org.elasticsearch.common.util.concurrent.ConcurrentCollections.newConcurrentMap; public abstract class PeerFinder extends AbstractLifecycleComponent { @@ -249,29 +248,55 @@ private void handleWakeUpUnderLock(DiscoveryNodes lastAcceptedNodes) { masterNodes.forEach(e -> startProbe(e.value)); if (resolveInProgress.compareAndSet(false, true)) { - executorService.get().execute(labelRunnable(() -> { - // No synchronisation required for most of this - List providedAddresses - = new ArrayList<>(hostsProvider.buildDynamicHosts((hosts, limitPortCounts) - -> UnicastZenPing.resolveHostsLists(executorService.get(), logger, hosts, limitPortCounts, - transportService, resolveTimeout))); - - lastConnectedAddresses.set(unmodifiableList(providedAddresses)); - - logger.trace("ActivePeerFinder#handleNextWakeUp(): probing resolved transport addresses {}", providedAddresses); - synchronized (mutex) { - providedAddresses.forEach(ActivePeerFinder.this::startProbe); + transportService.getThreadPool().generic().execute(new AbstractRunnable() { + @Override + public void onFailure(Exception e) { } - resolveInProgress.set(false); - }, "PeerFinder resolving unicast hosts list")); + @Override + protected void doRun() { + // No synchronisation required for most of this + List providedAddresses + = new ArrayList<>(hostsProvider.buildDynamicHosts((hosts, limitPortCounts) + -> UnicastZenPing.resolveHostsLists(executorService.get(), logger, hosts, limitPortCounts, + transportService, resolveTimeout))); + + lastConnectedAddresses.set(unmodifiableList(providedAddresses)); + + logger.trace("ActivePeerFinder#handleNextWakeUp(): probing resolved transport addresses {}", providedAddresses); + synchronized (mutex) { + providedAddresses.forEach(ActivePeerFinder.this::startProbe); + } + } + + @Override + public void onAfter() { + super.onAfter(); + resolveInProgress.set(false); + } + + @Override + public String toString() { + return "PeerFinder resolving unicast hosts list"; + } + }); } final List transportAddresses = lastConnectedAddresses.get(); logger.trace("ActivePeerFinder#handleNextWakeUp(): probing supplied transport addresses {}", transportAddresses); transportAddresses.forEach(this::startProbe); - futureExecutor.schedule(labelRunnable(this::handleWakeUp, "ActivePeerFinder::handleWakeUp"), findPeersDelay); + futureExecutor.schedule(new Runnable() { + @Override + public void run() { + handleWakeUp(); + } + + @Override + public String toString() { + return "ActivePeerFinder::handleWakeUp"; + } + }, findPeersDelay); } void startProbe(DiscoveryNode discoveryNode) { From 93ce7937943623ed39e2606ed78dc59630c9227e Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 31 Jul 2018 17:51:49 +0100 Subject: [PATCH 10/76] Just created this, don't need another copy --- .../java/org/elasticsearch/cluster/coordination/PeerFinder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 7e7101344e7d7..e5165bebe3a7a 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -385,7 +385,7 @@ private void onProbeSuccess(final DiscoveryNode discoveryNode) { List knownNodes = new ArrayList<>(foundPeers.keySet()); transportService.sendRequest(discoveryNode, REQUEST_PEERS_ACTION_NAME, - new PeersRequest(getLocalNode(), new ArrayList<>(knownNodes)), + new PeersRequest(getLocalNode(), knownNodes), new TransportResponseHandler() { @Override From 38ef5428dedbd9393fe728be5fd9677c9d935401 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 31 Jul 2018 17:52:26 +0100 Subject: [PATCH 11/76] Rename --- .../org/elasticsearch/cluster/coordination/PeerFinder.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index e5165bebe3a7a..e5e917f00f91b 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -198,7 +198,7 @@ private class ActivePeerFinder { private final Map foundPeers; private final Set probedAddresses = new HashSet<>(); private final AtomicBoolean resolveInProgress = new AtomicBoolean(); - private final AtomicReference> lastConnectedAddresses = new AtomicReference<>(Collections.emptyList()); + private final AtomicReference> lastResolvedAddresses = new AtomicReference<>(Collections.emptyList()); private final Map connectedNodes = newConcurrentMap(); ActivePeerFinder() { @@ -261,7 +261,7 @@ protected void doRun() { -> UnicastZenPing.resolveHostsLists(executorService.get(), logger, hosts, limitPortCounts, transportService, resolveTimeout))); - lastConnectedAddresses.set(unmodifiableList(providedAddresses)); + lastResolvedAddresses.set(unmodifiableList(providedAddresses)); logger.trace("ActivePeerFinder#handleNextWakeUp(): probing resolved transport addresses {}", providedAddresses); synchronized (mutex) { @@ -282,7 +282,7 @@ public String toString() { }); } - final List transportAddresses = lastConnectedAddresses.get(); + final List transportAddresses = lastResolvedAddresses.get(); logger.trace("ActivePeerFinder#handleNextWakeUp(): probing supplied transport addresses {}", transportAddresses); transportAddresses.forEach(this::startProbe); From 578a57fa9366e34417c9a86fc1adf19b548eeee2 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 31 Jul 2018 17:53:12 +0100 Subject: [PATCH 12/76] Rename --- .../org/elasticsearch/cluster/coordination/PeerFinder.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index e5e917f00f91b..34473dc22e616 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -196,7 +196,7 @@ public interface TransportAddressConnector { private class ActivePeerFinder { private boolean running; private final Map foundPeers; - private final Set probedAddresses = new HashSet<>(); + private final Set probedAddressesSinceLastWakeUp = new HashSet<>(); private final AtomicBoolean resolveInProgress = new AtomicBoolean(); private final AtomicReference> lastResolvedAddresses = new AtomicReference<>(Collections.emptyList()); private final Map connectedNodes = newConcurrentMap(); @@ -238,7 +238,7 @@ private void handleWakeUpUnderLock(DiscoveryNodes lastAcceptedNodes) { for (final FoundPeer foundPeer : foundPeers.values()) { foundPeer.peersRequestedThisRound = false; } - probedAddresses.clear(); + probedAddressesSinceLastWakeUp.clear(); logger.trace("ActivePeerFinder#handleWakeUp(): probing found peers {}", foundPeers.keySet()); foundPeers.keySet().forEach(this::startProbe); @@ -315,7 +315,7 @@ void startProbe(TransportAddress transportAddress) { return; } - if (probedAddresses.add(transportAddress) == false) { + if (probedAddressesSinceLastWakeUp.add(transportAddress) == false) { logger.trace("startProbe({}) already probed this round", transportAddress); return; } From 6ec315c6bc87f439073e907938f7a19bcf08ab52 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 31 Jul 2018 17:54:33 +0100 Subject: [PATCH 13/76] Not so private --- .../org/elasticsearch/cluster/coordination/PeerFinder.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 34473dc22e616..bc873cbb6e65c 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -194,8 +194,8 @@ public interface TransportAddressConnector { } private class ActivePeerFinder { - private boolean running; - private final Map foundPeers; + boolean running; + final Map foundPeers; private final Set probedAddressesSinceLastWakeUp = new HashSet<>(); private final AtomicBoolean resolveInProgress = new AtomicBoolean(); private final AtomicReference> lastResolvedAddresses = new AtomicReference<>(Collections.emptyList()); From a933a7fbecd9a617b3c36d44d8aaff9aedeee156 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 31 Jul 2018 18:01:06 +0100 Subject: [PATCH 14/76] TODOs --- .../org/elasticsearch/cluster/coordination/PeerFinder.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index bc873cbb6e65c..fda1cf9cb7f7b 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -58,6 +58,7 @@ public abstract class PeerFinder extends AbstractLifecycleComponent { + // TODO can this class handle these requests? public static final String REQUEST_PEERS_ACTION_NAME = "internal:discovery/requestpeers"; // the time between attempts to find all peers @@ -195,7 +196,7 @@ public interface TransportAddressConnector { private class ActivePeerFinder { boolean running; - final Map foundPeers; + final Map foundPeers; // TODO contemplate whether this needs periodic cleaning private final Set probedAddressesSinceLastWakeUp = new HashSet<>(); private final AtomicBoolean resolveInProgress = new AtomicBoolean(); private final AtomicReference> lastResolvedAddresses = new AtomicReference<>(Collections.emptyList()); @@ -221,6 +222,8 @@ void stop() { private void handleWakeUp() { // Must not hold lock when calling out to Legislator to avoid a lock cycle + // TODO can we avoid calling out at all and just pass lastAcceptedNodes in? Need to be able to get a new value + // somehow on a wakeup. assert holdsLock() == false : "Peerfinder mutex is held in error"; final DiscoveryNodes lastAcceptedNodes = getLastAcceptedNodes(); synchronized (mutex) { @@ -332,6 +335,7 @@ void startProbe(TransportAddress transportAddress) { logger.trace("startProbe({}) found disconnected {}, probing again", transportAddress, cachedNode); connectedNodes.remove(transportAddress, cachedNode); + // TODO Should we also clean up foundPeers? } else { logger.trace("startProbe({}) no cached node found, probing", transportAddress); } @@ -380,6 +384,7 @@ private void onProbeSuccess(final DiscoveryNode discoveryNode) { if (discoveryNode.equals(getLocalNode())) { return; + // TODO at the moment this puts the local node into foundPeers. Can we do without? } List knownNodes = new ArrayList<>(foundPeers.keySet()); From dddeecd0699bcac2d169053a16c384d409af11b1 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 31 Jul 2018 18:04:11 +0100 Subject: [PATCH 15/76] Renaming --- .../cluster/coordination/PeerFinder.java | 2 +- .../cluster/coordination/PeersResponse.java | 24 +++++++++---------- .../cluster/coordination/MessagesTests.java | 6 ++--- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index fda1cf9cb7f7b..d0a12d52b2c95 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -417,7 +417,7 @@ public void handleResponse(PeersResponse response) { } } else { foundMasterNode = false; - response.getDiscoveryNodes().stream().map(DiscoveryNode::getAddress) + response.getCandidateNodes().stream().map(DiscoveryNode::getAddress) .forEach(ActivePeerFinder.this::startProbe); } } diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeersResponse.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeersResponse.java index d9cbaa348115c..22bda7b0013f6 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeersResponse.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeersResponse.java @@ -31,28 +31,28 @@ public class PeersResponse extends TransportResponse { private final Optional masterNode; - private final List discoveryNodes; + private final List candidateNodes; private final long term; - public PeersResponse(Optional masterNode, List discoveryNodes, long term) { + public PeersResponse(Optional masterNode, List candidateNodes, long term) { + assert masterNode.isPresent() == candidateNodes.isEmpty(); this.masterNode = masterNode; - this.discoveryNodes = discoveryNodes; + this.candidateNodes = candidateNodes; this.term = term; - assert masterNode.isPresent() == discoveryNodes.isEmpty(); } public PeersResponse(StreamInput in) throws IOException { masterNode = Optional.ofNullable(in.readOptionalWriteable(DiscoveryNode::new)); - discoveryNodes = in.readList(DiscoveryNode::new); + candidateNodes = in.readList(DiscoveryNode::new); term = in.readLong(); - assert masterNode.isPresent() == discoveryNodes.isEmpty(); + assert masterNode.isPresent() == candidateNodes.isEmpty(); } @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); out.writeOptionalWriteable(masterNode.orElse(null)); - out.writeList(discoveryNodes); + out.writeList(candidateNodes); out.writeLong(term); } @@ -60,8 +60,8 @@ public Optional getMasterNode() { return masterNode; } - public List getDiscoveryNodes() { - return discoveryNodes; + public List getCandidateNodes() { + return candidateNodes; } public long getTerm() { @@ -72,7 +72,7 @@ public long getTerm() { public String toString() { return "PeersResponse{" + "masterNode=" + masterNode + - ", discoveryNodes=" + discoveryNodes + + ", candidateNodes=" + candidateNodes + ", term=" + term + '}'; } @@ -84,11 +84,11 @@ public boolean equals(Object o) { PeersResponse that = (PeersResponse) o; return term == that.term && Objects.equals(masterNode, that.masterNode) && - Objects.equals(discoveryNodes, that.discoveryNodes); + Objects.equals(candidateNodes, that.candidateNodes); } @Override public int hashCode() { - return Objects.hash(masterNode, discoveryNodes, term); + return Objects.hash(masterNode, candidateNodes, term); } } diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/MessagesTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/MessagesTests.java index 979e84f1832e4..e98304a5f29bd 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/MessagesTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/MessagesTests.java @@ -201,12 +201,12 @@ public void testPeersResponseEqualsHashCodeSerialization() { in -> { final long term = in.getTerm(); if (randomBoolean()) { - return new PeersResponse(in.getMasterNode(), in.getDiscoveryNodes(), + return new PeersResponse(in.getMasterNode(), in.getCandidateNodes(), randomValueOtherThan(term, ESTestCase::randomNonNegativeLong)); } else { if (in.getMasterNode().isPresent()) { if (randomBoolean()) { - return new PeersResponse(Optional.of(createNode(randomAlphaOfLength(10))), in.getDiscoveryNodes(), term); + return new PeersResponse(Optional.of(createNode(randomAlphaOfLength(10))), in.getCandidateNodes(), term); } else { return new PeersResponse(Optional.empty(), singletonList(createNode(randomAlphaOfLength(10))), term); } @@ -214,7 +214,7 @@ public void testPeersResponseEqualsHashCodeSerialization() { if (randomBoolean()) { return new PeersResponse(Optional.of(createNode(randomAlphaOfLength(10))), emptyList(), term); } else { - return new PeersResponse(in.getMasterNode(), modifyDiscoveryNodesList(in.getDiscoveryNodes(), false), term); + return new PeersResponse(in.getMasterNode(), modifyDiscoveryNodesList(in.getCandidateNodes(), false), term); } } } From 2177d0a1a2a53afb21da34792802d361939c8d41 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 1 Aug 2018 08:13:11 +0100 Subject: [PATCH 16/76] lastAcceptedNodes cannot change while the peerfinder is active, so just pass it in at activation --- .../cluster/coordination/PeerFinder.java | 29 +++------ .../cluster/coordination/PeerFinderTests.java | 64 +++++++++---------- 2 files changed, 40 insertions(+), 53 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index d0a12d52b2c95..65c6805ffc05f 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -98,20 +98,16 @@ public Iterable getFoundPeers() { } } - public void activate() { + public void activate(final DiscoveryNodes lastAcceptedNodes) { if (lifecycle.started() == false) { logger.debug("ignoring activation, not started"); return; } - // Must not hold lock when calling out to Legislator to avoid a lock cycle - assert holdsLock() == false : "Peerfinder mutex is held in error"; - final DiscoveryNodes lastAcceptedNodes = getLastAcceptedNodes(); - synchronized (mutex) { assert peerFinder.isPresent() == false; - peerFinder = Optional.of(new ActivePeerFinder()); - peerFinder.get().start(lastAcceptedNodes); + peerFinder = Optional.of(new ActivePeerFinder(lastAcceptedNodes)); + peerFinder.get().start(); } } @@ -181,8 +177,6 @@ protected void doStop() { protected void doClose() { } - protected abstract DiscoveryNodes getLastAcceptedNodes(); - protected abstract void onMasterFoundByProbe(DiscoveryNode masterNode, long term); public interface TransportAddressConnector { @@ -195,6 +189,7 @@ public interface TransportAddressConnector { } private class ActivePeerFinder { + private final DiscoveryNodes lastAcceptedNodes; boolean running; final Map foundPeers; // TODO contemplate whether this needs periodic cleaning private final Set probedAddressesSinceLastWakeUp = new HashSet<>(); @@ -202,16 +197,17 @@ private class ActivePeerFinder { private final AtomicReference> lastResolvedAddresses = new AtomicReference<>(Collections.emptyList()); private final Map connectedNodes = newConcurrentMap(); - ActivePeerFinder() { + ActivePeerFinder(DiscoveryNodes lastAcceptedNodes) { + this.lastAcceptedNodes = lastAcceptedNodes; foundPeers = newConcurrentMap(); foundPeers.put(getLocalNode(), new FoundPeer(getLocalNode())); } - void start(DiscoveryNodes lastAcceptedNodes) { + void start() { assert holdsLock() : "PeerFinder mutex not held"; assert running == false; running = true; - handleWakeUpUnderLock(lastAcceptedNodes); + handleWakeUpUnderLock(); } void stop() { @@ -221,17 +217,12 @@ void stop() { } private void handleWakeUp() { - // Must not hold lock when calling out to Legislator to avoid a lock cycle - // TODO can we avoid calling out at all and just pass lastAcceptedNodes in? Need to be able to get a new value - // somehow on a wakeup. - assert holdsLock() == false : "Peerfinder mutex is held in error"; - final DiscoveryNodes lastAcceptedNodes = getLastAcceptedNodes(); synchronized (mutex) { - handleWakeUpUnderLock(lastAcceptedNodes); + handleWakeUpUnderLock(); } } - private void handleWakeUpUnderLock(DiscoveryNodes lastAcceptedNodes) { + private void handleWakeUpUnderLock() { assert holdsLock() : "PeerFinder mutex not held"; if (running == false) { diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java index a38388da49635..6ec6f3c19045e 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java @@ -73,6 +73,8 @@ public class PeerFinderTests extends ESTestCase { private Set disconnectedNodes = new HashSet<>(); private Set connectedNodes = new HashSet<>(); + private DiscoveryNodes lastAcceptedNodes; + class MockTransportAddressConnector implements TransportAddressConnector { final Set reachableNodes = new HashSet<>(); @@ -108,7 +110,6 @@ public List buildDynamicHosts(HostsResolver hostsResolver) { } static class TestPeerFinder extends PeerFinder { - DiscoveryNodes lastAcceptedNodes; DiscoveryNode discoveredMasterNode; TestPeerFinder(Settings settings, TransportService transportService, UnicastHostsProvider hostsProvider, @@ -117,24 +118,18 @@ static class TestPeerFinder extends PeerFinder { super(settings, transportService, hostsProvider, futureExecutor, transportAddressConnector, executorServiceFactory); } - @Override - protected DiscoveryNodes getLastAcceptedNodes() { - assert holdsLock() == false : "PeerFinder lock held in error"; - return lastAcceptedNodes; - } - @Override protected void onMasterFoundByProbe(DiscoveryNode masterNode, long term) { assert holdsLock() == false : "PeerFinder lock held in error"; assertThat(discoveredMasterNode, nullValue()); discoveredMasterNode = masterNode; } + } - void updateLastAcceptedNodes(Consumer onBuilder) { - final Builder builder = DiscoveryNodes.builder(lastAcceptedNodes); - onBuilder.accept(builder); - lastAcceptedNodes = builder.build(); - } + void updateLastAcceptedNodes(Consumer onBuilder) { + final Builder builder = DiscoveryNodes.builder(lastAcceptedNodes); + onBuilder.accept(builder); + lastAcceptedNodes = builder.build(); } @Before @@ -161,10 +156,11 @@ public boolean nodeConnected(DiscoveryNode node) { transportService.start(); transportService.acceptIncomingRequests(); + lastAcceptedNodes = DiscoveryNodes.builder().localNodeId(localNode.getId()).add(localNode).build(); + peerFinder = new TestPeerFinder(settings, transportService, unicastHostsProvider, deterministicTaskQueue.getFutureExecutor(), transportAddressConnector, deterministicTaskQueue::getExecutorService); - peerFinder.lastAcceptedNodes = DiscoveryNodes.builder().localNodeId(localNode.getId()).add(localNode).build(); peerFinder.start(); } @@ -175,7 +171,7 @@ public void deactivateAndRunRemainingTasks() { } public void testActivationAndDeactivation() { - peerFinder.activate(); + peerFinder.activate(lastAcceptedNodes); assertFoundPeers(); assertTrue(peerFinder.isActive()); @@ -188,7 +184,7 @@ public void testAddsReachableNodesFromUnicastHostsList() { unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); transportAddressConnector.reachableNodes.add(otherNode); - peerFinder.activate(); + peerFinder.activate(lastAcceptedNodes); runAllRunnableTasks(); assertFoundPeers(otherNode); @@ -199,7 +195,7 @@ public void testDoesNotAddUnreachableNodesFromUnicastHostsList() { unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); transportAddressConnector.unreachableAddresses.add(otherNode.getAddress()); - peerFinder.activate(); + peerFinder.activate(lastAcceptedNodes); runAllRunnableTasks(); assertFoundPeers(); @@ -212,7 +208,7 @@ public void testDoesNotAddNonMasterEligibleNodesFromUnicastHostsList() { unicastHostsProvider.providedAddresses.add(nonMasterNode.getAddress()); transportAddressConnector.reachableNodes.add(nonMasterNode); - peerFinder.activate(); + peerFinder.activate(lastAcceptedNodes); runAllRunnableTasks(); assertFoundPeers(); @@ -221,7 +217,7 @@ public void testDoesNotAddNonMasterEligibleNodesFromUnicastHostsList() { } public void testChecksUnicastHostsForChanges() { - peerFinder.activate(); + peerFinder.activate(lastAcceptedNodes); runAllRunnableTasks(); assertFoundPeers(); @@ -240,7 +236,7 @@ public void testDeactivationClearsPastKnowledge() { unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); transportAddressConnector.reachableNodes.add(otherNode); - peerFinder.activate(); + peerFinder.activate(lastAcceptedNodes); runAllRunnableTasks(); assertFoundPeers(otherNode); @@ -248,17 +244,17 @@ public void testDeactivationClearsPastKnowledge() { peerFinder.deactivate(); unicastHostsProvider.providedAddresses.clear(); - peerFinder.activate(); + peerFinder.activate(lastAcceptedNodes); runAllRunnableTasks(); assertFoundPeers(); } public void testAddsReachableNodesFromClusterState() { final DiscoveryNode otherNode = newDiscoveryNode("node-in-cluster-state"); - peerFinder.updateLastAcceptedNodes(b -> b.add(otherNode)); + updateLastAcceptedNodes(b -> b.add(otherNode)); transportAddressConnector.reachableNodes.add(otherNode); - peerFinder.activate(); + peerFinder.activate(lastAcceptedNodes); runAllRunnableTasks(); assertFoundPeers(otherNode); @@ -266,10 +262,10 @@ public void testAddsReachableNodesFromClusterState() { public void testDoesNotAddUnreachableNodesFromClusterState() { final DiscoveryNode otherNode = newDiscoveryNode("node-in-cluster-state"); - peerFinder.updateLastAcceptedNodes(b -> b.add(otherNode)); + updateLastAcceptedNodes(b -> b.add(otherNode)); transportAddressConnector.unreachableAddresses.add(otherNode.getAddress()); - peerFinder.activate(); + peerFinder.activate(lastAcceptedNodes); runAllRunnableTasks(); assertFoundPeers(); } @@ -281,7 +277,7 @@ public void testAddsReachableNodesFromIncomingRequests() { transportAddressConnector.reachableNodes.add(sourceNode); transportAddressConnector.reachableNodes.add(otherKnownNode); - peerFinder.activate(); + peerFinder.activate(lastAcceptedNodes); peerFinder.handlePeersRequest(new PeersRequest(sourceNode, Collections.singletonList(otherKnownNode))); runAllRunnableTasks(); @@ -295,7 +291,7 @@ public void testDoesNotAddUnreachableNodesFromIncomingRequests() { transportAddressConnector.reachableNodes.add(sourceNode); transportAddressConnector.unreachableAddresses.add(otherKnownNode.getAddress()); - peerFinder.activate(); + peerFinder.activate(lastAcceptedNodes); peerFinder.handlePeersRequest(new PeersRequest(sourceNode, Collections.singletonList(otherKnownNode))); runAllRunnableTasks(); @@ -309,7 +305,7 @@ public void testDoesNotAddUnreachableSourceNodeFromIncomingRequests() { transportAddressConnector.unreachableAddresses.add(sourceNode.getAddress()); transportAddressConnector.reachableNodes.add(otherKnownNode); - peerFinder.activate(); + peerFinder.activate(lastAcceptedNodes); peerFinder.handlePeersRequest(new PeersRequest(sourceNode, Collections.singletonList(otherKnownNode))); runAllRunnableTasks(); @@ -321,7 +317,7 @@ public void testRequestsPeersIncludingKnownPeersInRequest() { unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); transportAddressConnector.reachableNodes.add(otherNode); - peerFinder.activate(); + peerFinder.activate(lastAcceptedNodes); runAllRunnableTasks(); assertFoundPeers(otherNode); @@ -337,7 +333,7 @@ public void testAddsReachablePeersFromResponse() { unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); transportAddressConnector.reachableNodes.add(otherNode); - peerFinder.activate(); + peerFinder.activate(lastAcceptedNodes); runAllRunnableTasks(); assertFoundPeers(otherNode); @@ -358,7 +354,7 @@ public void testAddsReachableMasterFromResponse() { unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); transportAddressConnector.reachableNodes.add(otherNode); - peerFinder.activate(); + peerFinder.activate(lastAcceptedNodes); runAllRunnableTasks(); assertFoundPeers(otherNode); @@ -380,7 +376,7 @@ public void testHandlesDiscoveryOfMasterFromResponseFromMaster() { unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); transportAddressConnector.reachableNodes.add(otherNode); - peerFinder.activate(); + peerFinder.activate(lastAcceptedNodes); runAllRunnableTasks(); assertFoundPeers(otherNode); @@ -399,7 +395,7 @@ public void testOnlyRequestsPeersOncePerRoundButDoesRetryNextRound() { final DiscoveryNode sourceNode = newDiscoveryNode("request-source"); transportAddressConnector.reachableNodes.add(sourceNode); - peerFinder.activate(); + peerFinder.activate(lastAcceptedNodes); peerFinder.handlePeersRequest(new PeersRequest(sourceNode, emptyList())); runAllRunnableTasks(); assertFoundPeers(sourceNode); @@ -433,7 +429,7 @@ public void testDoesNotReconnectToNodesOnceConnected() { unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); transportAddressConnector.reachableNodes.add(otherNode); - peerFinder.activate(); + peerFinder.activate(lastAcceptedNodes); runAllRunnableTasks(); assertFoundPeers(otherNode); @@ -450,7 +446,7 @@ public void testReconnectsToDisconnectedNodes() { unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); transportAddressConnector.reachableNodes.add(otherNode); - peerFinder.activate(); + peerFinder.activate(lastAcceptedNodes); runAllRunnableTasks(); assertFoundPeers(otherNode); From 320ebdd4056bf6eec514088c980769d5d1439500 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 1 Aug 2018 08:21:38 +0100 Subject: [PATCH 17/76] Do not need the local node to be in foundPeers, so remove it --- .../elasticsearch/cluster/coordination/PeerFinder.java | 10 ++++------ .../cluster/coordination/PeerFinderTests.java | 5 +++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 65c6805ffc05f..6bbc67c84c86c 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -200,7 +200,6 @@ private class ActivePeerFinder { ActivePeerFinder(DiscoveryNodes lastAcceptedNodes) { this.lastAcceptedNodes = lastAcceptedNodes; foundPeers = newConcurrentMap(); - foundPeers.put(getLocalNode(), new FoundPeer(getLocalNode())); } void start() { @@ -365,6 +364,10 @@ private void onProbeSuccess(final DiscoveryNode discoveryNode) { return; } + if (discoveryNode.equals(getLocalNode())) { + return; + } + final FoundPeer foundPeer = foundPeers.computeIfAbsent(discoveryNode, FoundPeer::new); assert foundPeer.discoveryNode.equals(discoveryNode); @@ -373,11 +376,6 @@ private void onProbeSuccess(final DiscoveryNode discoveryNode) { } foundPeer.peersRequestedThisRound = true; - if (discoveryNode.equals(getLocalNode())) { - return; - // TODO at the moment this puts the local node into foundPeers. Can we do without? - } - List knownNodes = new ArrayList<>(foundPeers.keySet()); transportService.sendRequest(discoveryNode, REQUEST_PEERS_ACTION_NAME, diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java index 6ec6f3c19045e..a06fa268ab27e 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java @@ -56,6 +56,7 @@ import static org.elasticsearch.cluster.coordination.PeerFinder.REQUEST_PEERS_ACTION_NAME; import static org.elasticsearch.node.Node.NODE_NAME_SETTING; import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.emptyArray; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; @@ -325,7 +326,7 @@ public void testRequestsPeersIncludingKnownPeersInRequest() { final CapturedRequest[] capturedRequests = capturingTransport.getCapturedRequestsAndClear(); assertThat(capturedRequests.length, is(1)); final PeersRequest peersRequest = (PeersRequest) capturedRequests[0].request; - assertThat(peersRequest.getDiscoveryNodes(), containsInAnyOrder(localNode, otherNode)); + assertThat(peersRequest.getDiscoveryNodes(), contains(otherNode)); } public void testAddsReachablePeersFromResponse() { @@ -477,7 +478,7 @@ private void respondToRequests(Function responseFa private void assertFoundPeers(DiscoveryNode... expectedNodes) { assertThat(peerFinder.getFoundPeers(), - equalTo(Stream.concat(Arrays.stream(expectedNodes), Stream.of(localNode)).collect(Collectors.toSet()))); + equalTo(Arrays.stream(expectedNodes).collect(Collectors.toSet()))); } private DiscoveryNode newDiscoveryNode(String nodeId) { From 37122085c432e960a09a1e8173762412688a1d1d Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 1 Aug 2018 08:24:21 +0100 Subject: [PATCH 18/76] Rename discoveryNodes -> knownPeers in PeersRequest --- .../cluster/coordination/PeerFinder.java | 2 +- .../cluster/coordination/PeersRequest.java | 21 ++++++++++--------- .../cluster/coordination/PeerFinderTests.java | 2 +- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 6bbc67c84c86c..3792fc725e222 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -138,7 +138,7 @@ public List handlePeersRequest(PeersRequest peersRequest) { assert peersRequest.getSourceNode().equals(getLocalNode()) == false; final ActivePeerFinder activePeerFinder = getActivePeerFinder(); activePeerFinder.startProbe(peersRequest.getSourceNode().getAddress()); - peersRequest.getDiscoveryNodes().stream().map(DiscoveryNode::getAddress).forEach(activePeerFinder::startProbe); + peersRequest.getKnownPeers().stream().map(DiscoveryNode::getAddress).forEach(activePeerFinder::startProbe); return new ArrayList<>(activePeerFinder.foundPeers.keySet()); } } diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeersRequest.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeersRequest.java index 2946c704beac5..3ff5f77bc80ac 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeersRequest.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeersRequest.java @@ -30,28 +30,29 @@ public class PeersRequest extends TransportRequest { private final DiscoveryNode sourceNode; - private final List discoveryNodes; + private final List knownPeers; - public PeersRequest(DiscoveryNode sourceNode, List discoveryNodes) { + public PeersRequest(DiscoveryNode sourceNode, List knownPeers) { + assert knownPeers.contains(sourceNode) == false : "local node is not a peer"; this.sourceNode = sourceNode; - this.discoveryNodes = discoveryNodes; + this.knownPeers = knownPeers; } public PeersRequest(StreamInput in) throws IOException { super(in); sourceNode = new DiscoveryNode(in); - discoveryNodes = in.readList(DiscoveryNode::new); + knownPeers = in.readList(DiscoveryNode::new); } @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); sourceNode.writeTo(out); - out.writeList(discoveryNodes); + out.writeList(knownPeers); } - public List getDiscoveryNodes() { - return discoveryNodes; + public List getKnownPeers() { + return knownPeers; } public DiscoveryNode getSourceNode() { @@ -62,7 +63,7 @@ public DiscoveryNode getSourceNode() { public String toString() { return "PeersRequest{" + "sourceNode=" + sourceNode + - ", discoveryNodes=" + discoveryNodes + + ", knownPeers=" + knownPeers + '}'; } @@ -72,11 +73,11 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) return false; PeersRequest that = (PeersRequest) o; return Objects.equals(sourceNode, that.sourceNode) && - Objects.equals(discoveryNodes, that.discoveryNodes); + Objects.equals(knownPeers, that.knownPeers); } @Override public int hashCode() { - return Objects.hash(sourceNode, discoveryNodes); + return Objects.hash(sourceNode, knownPeers); } } diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java index a06fa268ab27e..3bf442491eaf0 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java @@ -326,7 +326,7 @@ public void testRequestsPeersIncludingKnownPeersInRequest() { final CapturedRequest[] capturedRequests = capturingTransport.getCapturedRequestsAndClear(); assertThat(capturedRequests.length, is(1)); final PeersRequest peersRequest = (PeersRequest) capturedRequests[0].request; - assertThat(peersRequest.getDiscoveryNodes(), contains(otherNode)); + assertThat(peersRequest.getKnownPeers(), contains(otherNode)); } public void testAddsReachablePeersFromResponse() { From a566ffce2ef85149c0155378e08c16cd3c8ba72b Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 1 Aug 2018 08:26:32 +0100 Subject: [PATCH 19/76] Rename candidateNodes to knownPeers in PeersResponse too --- .../cluster/coordination/PeerFinder.java | 2 +- .../cluster/coordination/PeersResponse.java | 24 +++++++++---------- .../cluster/coordination/MessagesTests.java | 10 ++++---- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 3792fc725e222..f5bba9e8afe52 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -406,7 +406,7 @@ public void handleResponse(PeersResponse response) { } } else { foundMasterNode = false; - response.getCandidateNodes().stream().map(DiscoveryNode::getAddress) + response.getKnownPeers().stream().map(DiscoveryNode::getAddress) .forEach(ActivePeerFinder.this::startProbe); } } diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeersResponse.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeersResponse.java index 22bda7b0013f6..58f18a77605e7 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeersResponse.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeersResponse.java @@ -31,28 +31,28 @@ public class PeersResponse extends TransportResponse { private final Optional masterNode; - private final List candidateNodes; + private final List knownPeers; private final long term; - public PeersResponse(Optional masterNode, List candidateNodes, long term) { - assert masterNode.isPresent() == candidateNodes.isEmpty(); + public PeersResponse(Optional masterNode, List knownPeers, long term) { + assert masterNode.isPresent() == knownPeers.isEmpty(); this.masterNode = masterNode; - this.candidateNodes = candidateNodes; + this.knownPeers = knownPeers; this.term = term; } public PeersResponse(StreamInput in) throws IOException { masterNode = Optional.ofNullable(in.readOptionalWriteable(DiscoveryNode::new)); - candidateNodes = in.readList(DiscoveryNode::new); + knownPeers = in.readList(DiscoveryNode::new); term = in.readLong(); - assert masterNode.isPresent() == candidateNodes.isEmpty(); + assert masterNode.isPresent() == knownPeers.isEmpty(); } @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); out.writeOptionalWriteable(masterNode.orElse(null)); - out.writeList(candidateNodes); + out.writeList(knownPeers); out.writeLong(term); } @@ -60,8 +60,8 @@ public Optional getMasterNode() { return masterNode; } - public List getCandidateNodes() { - return candidateNodes; + public List getKnownPeers() { + return knownPeers; } public long getTerm() { @@ -72,7 +72,7 @@ public long getTerm() { public String toString() { return "PeersResponse{" + "masterNode=" + masterNode + - ", candidateNodes=" + candidateNodes + + ", knownPeers=" + knownPeers + ", term=" + term + '}'; } @@ -84,11 +84,11 @@ public boolean equals(Object o) { PeersResponse that = (PeersResponse) o; return term == that.term && Objects.equals(masterNode, that.masterNode) && - Objects.equals(candidateNodes, that.candidateNodes); + Objects.equals(knownPeers, that.knownPeers); } @Override public int hashCode() { - return Objects.hash(masterNode, candidateNodes, term); + return Objects.hash(masterNode, knownPeers, term); } } diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/MessagesTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/MessagesTests.java index e98304a5f29bd..c0e9494bed611 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/MessagesTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/MessagesTests.java @@ -175,11 +175,11 @@ public void testPeersRequestEqualsHashCodeSerialization() { EqualsHashCodeTestUtils.checkEqualsAndHashCode(initialPeersRequest, publishRequest -> copyWriteable(publishRequest, writableRegistry(), PeersRequest::new), in -> { - final List discoveryNodes = new ArrayList<>(in.getDiscoveryNodes()); + final List discoveryNodes = new ArrayList<>(in.getKnownPeers()); if (randomBoolean()) { return new PeersRequest(createNode(randomAlphaOfLength(10)), discoveryNodes); } else { - return new PeersRequest(in.getSourceNode(), modifyDiscoveryNodesList(in.getDiscoveryNodes(), true)); + return new PeersRequest(in.getSourceNode(), modifyDiscoveryNodesList(in.getKnownPeers(), true)); } }); } @@ -201,12 +201,12 @@ public void testPeersResponseEqualsHashCodeSerialization() { in -> { final long term = in.getTerm(); if (randomBoolean()) { - return new PeersResponse(in.getMasterNode(), in.getCandidateNodes(), + return new PeersResponse(in.getMasterNode(), in.getKnownPeers(), randomValueOtherThan(term, ESTestCase::randomNonNegativeLong)); } else { if (in.getMasterNode().isPresent()) { if (randomBoolean()) { - return new PeersResponse(Optional.of(createNode(randomAlphaOfLength(10))), in.getCandidateNodes(), term); + return new PeersResponse(Optional.of(createNode(randomAlphaOfLength(10))), in.getKnownPeers(), term); } else { return new PeersResponse(Optional.empty(), singletonList(createNode(randomAlphaOfLength(10))), term); } @@ -214,7 +214,7 @@ public void testPeersResponseEqualsHashCodeSerialization() { if (randomBoolean()) { return new PeersResponse(Optional.of(createNode(randomAlphaOfLength(10))), emptyList(), term); } else { - return new PeersResponse(in.getMasterNode(), modifyDiscoveryNodesList(in.getCandidateNodes(), false), term); + return new PeersResponse(in.getMasterNode(), modifyDiscoveryNodesList(in.getKnownPeers(), false), term); } } } From 5f12c03e25f0c496052ac5de53d2607ea683eadc Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 1 Aug 2018 08:26:47 +0100 Subject: [PATCH 20/76] Imports --- .../org/elasticsearch/cluster/coordination/PeerFinderTests.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java index 3bf442491eaf0..4c392e1b98432 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java @@ -47,7 +47,6 @@ import java.util.function.Function; import java.util.function.Supplier; import java.util.stream.Collectors; -import java.util.stream.Stream; import static java.util.Collections.emptyList; import static java.util.Collections.emptyMap; @@ -55,7 +54,6 @@ import static java.util.Collections.singletonList; import static org.elasticsearch.cluster.coordination.PeerFinder.REQUEST_PEERS_ACTION_NAME; import static org.elasticsearch.node.Node.NODE_NAME_SETTING; -import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.emptyArray; import static org.hamcrest.Matchers.equalTo; From dcfa1cac53a82fadaaff45b93778b0344fc23bc5 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 1 Aug 2018 08:28:55 +0100 Subject: [PATCH 21/76] Private class --- .../java/org/elasticsearch/cluster/coordination/PeerFinder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index f5bba9e8afe52..4e6b29877bc0a 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -431,7 +431,7 @@ public String executor() { } } - class FoundPeer { + private class FoundPeer { final DiscoveryNode discoveryNode; boolean peersRequestedThisRound; From 53c1dfff19112476ece67068d7dfa5d22b388f6e Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 1 Aug 2018 08:42:47 +0100 Subject: [PATCH 22/76] Private --- .../org/elasticsearch/cluster/coordination/PeerFinderTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java index 4c392e1b98432..8d9405ef8ad63 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java @@ -125,7 +125,7 @@ protected void onMasterFoundByProbe(DiscoveryNode masterNode, long term) { } } - void updateLastAcceptedNodes(Consumer onBuilder) { + private void updateLastAcceptedNodes(Consumer onBuilder) { final Builder builder = DiscoveryNodes.builder(lastAcceptedNodes); onBuilder.accept(builder); lastAcceptedNodes = builder.build(); From 012002917ddc1cee70eb10e68da23f7f0937250c Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 1 Aug 2018 08:46:54 +0100 Subject: [PATCH 23/76] Add assertion of received term --- .../cluster/coordination/PeerFinderTests.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java index 8d9405ef8ad63..265513e2e590c 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java @@ -41,6 +41,7 @@ import java.util.HashSet; import java.util.List; import java.util.Optional; +import java.util.OptionalLong; import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.function.Consumer; @@ -110,6 +111,7 @@ public List buildDynamicHosts(HostsResolver hostsResolver) { static class TestPeerFinder extends PeerFinder { DiscoveryNode discoveredMasterNode; + OptionalLong discoveredMasterTerm = OptionalLong.empty(); TestPeerFinder(Settings settings, TransportService transportService, UnicastHostsProvider hostsProvider, FutureExecutor futureExecutor, TransportAddressConnector transportAddressConnector, @@ -121,7 +123,9 @@ static class TestPeerFinder extends PeerFinder { protected void onMasterFoundByProbe(DiscoveryNode masterNode, long term) { assert holdsLock() == false : "PeerFinder lock held in error"; assertThat(discoveredMasterNode, nullValue()); + assertFalse(discoveredMasterTerm.isPresent()); discoveredMasterNode = masterNode; + discoveredMasterTerm = OptionalLong.of(term); } } @@ -368,6 +372,7 @@ public void testAddsReachableMasterFromResponse() { runAllRunnableTasks(); assertFoundPeers(otherNode, discoveredMaster); assertThat(peerFinder.discoveredMasterNode, nullValue()); + assertFalse(peerFinder.discoveredMasterTerm.isPresent()); } public void testHandlesDiscoveryOfMasterFromResponseFromMaster() { @@ -380,14 +385,16 @@ public void testHandlesDiscoveryOfMasterFromResponseFromMaster() { assertFoundPeers(otherNode); + final long term = randomNonNegativeLong(); respondToRequests(node -> { assertThat(node, is(otherNode)); - return new PeersResponse(Optional.of(otherNode), emptyList(), randomNonNegativeLong()); + return new PeersResponse(Optional.of(otherNode), emptyList(), term); }); runAllRunnableTasks(); assertFoundPeers(otherNode); assertThat(peerFinder.discoveredMasterNode, is(otherNode)); + assertThat(peerFinder.discoveredMasterTerm, is(OptionalLong.of(term))); } public void testOnlyRequestsPeersOncePerRoundButDoesRetryNextRound() { From 3fab139f742063feef61abf8c478080886dade7d Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 1 Aug 2018 09:13:11 +0100 Subject: [PATCH 24/76] Start work on having the PeerFinder respond to PeersRequests directly --- .../cluster/coordination/PeerFinder.java | 38 ++++++++++++++++--- .../cluster/coordination/PeersResponse.java | 13 ++++++- .../cluster/coordination/PeerFinderTests.java | 5 +++ 3 files changed, 50 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 4e6b29877bc0a..7bacdd5b365e9 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -50,6 +50,7 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Supplier; @@ -78,6 +79,7 @@ public abstract class PeerFinder extends AbstractLifecycleComponent { private final SetOnce executorService = new SetOnce<>(); private Optional peerFinder = Optional.empty(); + private volatile long currentTerm; PeerFinder(Settings settings, TransportService transportService, UnicastHostsProvider hostsProvider, FutureExecutor futureExecutor, TransportAddressConnector transportAddressConnector, @@ -90,6 +92,10 @@ public abstract class PeerFinder extends AbstractLifecycleComponent { this.futureExecutor = futureExecutor; this.transportAddressConnector = transportAddressConnector; this.executorServiceFactory = executorServiceFactory; + + transportService.registerRequestHandler(REQUEST_PEERS_ACTION_NAME, Names.GENERIC, false, false, + PeersRequest::new, + (request, channel, task) -> channel.sendResponse(handlePeersRequest(request))); } public Iterable getFoundPeers() { @@ -133,14 +139,18 @@ private ActivePeerFinder getActivePeerFinder() { return activePeerFinder; } - public List handlePeersRequest(PeersRequest peersRequest) { + PeersResponse handlePeersRequest(PeersRequest peersRequest) { synchronized (mutex) { assert peersRequest.getSourceNode().equals(getLocalNode()) == false; - final ActivePeerFinder activePeerFinder = getActivePeerFinder(); - activePeerFinder.startProbe(peersRequest.getSourceNode().getAddress()); - peersRequest.getKnownPeers().stream().map(DiscoveryNode::getAddress).forEach(activePeerFinder::startProbe); - return new ArrayList<>(activePeerFinder.foundPeers.keySet()); + if (isActive()) { + final ActivePeerFinder activePeerFinder = getActivePeerFinder(); + activePeerFinder.startProbe(peersRequest.getSourceNode().getAddress()); + peersRequest.getKnownPeers().stream().map(DiscoveryNode::getAddress).forEach(activePeerFinder::startProbe); + return new PeersResponse(Optional.empty(), new ArrayList<>(activePeerFinder.foundPeers.keySet()), currentTerm); + } } + + return onPeersRequestWhenInactive(peersRequest.getSourceNode()); } public boolean isActive() { @@ -154,6 +164,11 @@ public boolean isActive() { } } + public void setCurrentTerm(long currentTerm) { + // Volatile write, no synchronisation required + this.currentTerm = currentTerm; + } + private DiscoveryNode getLocalNode() { // No synchronisation required final DiscoveryNode localNode = transportService.getLocalNode(); @@ -177,8 +192,21 @@ protected void doStop() { protected void doClose() { } + /** + * Called on receipt of a PeersResponse from a node that believes it's an active leader, which this node should therefore try and join. + */ protected abstract void onMasterFoundByProbe(DiscoveryNode masterNode, long term); + /** + * Called on receipt of a PeersRequest when there is no ActivePeerFinder, which mostly means that this node thinks there's an + * active leader. However, there's no synchronisation around this call so it's possible that it's called if there's no active + * leader, but we have not yet been activated. If so, this should respond indicating that there's no active leader, and no + * known peers. + * + * @param sourceNode The sender of the PeersRequest + */ + protected abstract PeersResponse onPeersRequestWhenInactive(DiscoveryNode sourceNode); + public interface TransportAddressConnector { /** * Identify the node at the given address and establish a full connection to it. diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeersResponse.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeersResponse.java index 58f18a77605e7..abd1dce23ab19 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeersResponse.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeersResponse.java @@ -35,7 +35,7 @@ public class PeersResponse extends TransportResponse { private final long term; public PeersResponse(Optional masterNode, List knownPeers, long term) { - assert masterNode.isPresent() == knownPeers.isEmpty(); + assert masterNode.isPresent() == knownPeers.isEmpty(); // TODO this should be failing now that the local node is not a known peer?? this.masterNode = masterNode; this.knownPeers = knownPeers; this.term = term; @@ -56,14 +56,25 @@ public void writeTo(StreamOutput out) throws IOException { out.writeLong(term); } + /** + * @return the node that is currently leading, according to the responding node. + */ public Optional getMasterNode() { return masterNode; } + /** + * @return the collection of known peers of the responding node, or an empty collection if the responding node believes there + * is currently a leader. + */ public List getKnownPeers() { return knownPeers; } + /** + * @return the current term of the responding node. If the responding node is the leader then this is the term in which it is + * currently leading. + */ public long getTerm() { return term; } diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java index 265513e2e590c..c415036cf821b 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java @@ -127,6 +127,11 @@ protected void onMasterFoundByProbe(DiscoveryNode masterNode, long term) { discoveredMasterNode = masterNode; discoveredMasterTerm = OptionalLong.of(term); } + + @Override + protected PeersResponse onPeersRequestWhenInactive(DiscoveryNode sourceNode) { + throw new AssertionError("TODO"); + } } private void updateLastAcceptedNodes(Consumer onBuilder) { From 4701d351a38ec5f1d1c24b30cb1fd8956cd21f2d Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 2 Aug 2018 14:56:54 +0100 Subject: [PATCH 25/76] Fix assertion --- .../org/elasticsearch/cluster/coordination/PeersResponse.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeersResponse.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeersResponse.java index abd1dce23ab19..6e2e36697c6cc 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeersResponse.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeersResponse.java @@ -35,7 +35,7 @@ public class PeersResponse extends TransportResponse { private final long term; public PeersResponse(Optional masterNode, List knownPeers, long term) { - assert masterNode.isPresent() == knownPeers.isEmpty(); // TODO this should be failing now that the local node is not a known peer?? + assert masterNode.isPresent() == false || knownPeers.isEmpty(); this.masterNode = masterNode; this.knownPeers = knownPeers; this.term = term; @@ -45,7 +45,7 @@ public PeersResponse(StreamInput in) throws IOException { masterNode = Optional.ofNullable(in.readOptionalWriteable(DiscoveryNode::new)); knownPeers = in.readList(DiscoveryNode::new); term = in.readLong(); - assert masterNode.isPresent() == knownPeers.isEmpty(); + assert masterNode.isPresent() == false || knownPeers.isEmpty(); } @Override From 4b70b2e47b35e51e5b637ab920b73d85e9404198 Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 2 Aug 2018 15:04:20 +0100 Subject: [PATCH 26/76] Add test case for values in PeersResponse --- .../cluster/coordination/PeerFinderTests.java | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java index c415036cf821b..5faade1125b99 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java @@ -56,6 +56,7 @@ import static org.elasticsearch.cluster.coordination.PeerFinder.REQUEST_PEERS_ACTION_NAME; import static org.elasticsearch.node.Node.NODE_NAME_SETTING; import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.emptyArray; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; @@ -320,6 +321,29 @@ public void testDoesNotAddUnreachableSourceNodeFromIncomingRequests() { assertFoundPeers(otherKnownNode); } + public void testRespondsToRequestWhenActive() { + final DiscoveryNode sourceNode = newDiscoveryNode("request-source"); + + transportAddressConnector.reachableNodes.add(sourceNode); + + peerFinder.activate(lastAcceptedNodes); + final PeersResponse peersResponse1 = peerFinder.handlePeersRequest(new PeersRequest(sourceNode, Collections.emptyList())); + assertFalse(peersResponse1.getMasterNode().isPresent()); + assertThat(peersResponse1.getKnownPeers(), empty()); // sourceNode is not yet known + assertThat(peersResponse1.getTerm(), is(0L)); + + runAllRunnableTasks(); + + assertFoundPeers(sourceNode); + + final long updatedTerm = randomNonNegativeLong(); + peerFinder.setCurrentTerm(updatedTerm); + final PeersResponse peersResponse2 = peerFinder.handlePeersRequest(new PeersRequest(sourceNode, Collections.emptyList())); + assertFalse(peersResponse2.getMasterNode().isPresent()); + assertThat(peersResponse2.getKnownPeers(), contains(sourceNode)); + assertThat(peersResponse2.getTerm(), is(updatedTerm)); + } + public void testRequestsPeersIncludingKnownPeersInRequest() { final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); From ffe663797e281fa87f9849dbf32e17852480d569 Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 2 Aug 2018 15:10:53 +0100 Subject: [PATCH 27/76] Test that it does delegate to onPeersRequestWhenInactive if inactive --- .../cluster/coordination/PeerFinderTests.java | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java index 5faade1125b99..f0c16eb9191d5 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java @@ -61,6 +61,7 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.nullValue; public class PeerFinderTests extends ESTestCase { @@ -113,6 +114,7 @@ public List buildDynamicHosts(HostsResolver hostsResolver) { static class TestPeerFinder extends PeerFinder { DiscoveryNode discoveredMasterNode; OptionalLong discoveredMasterTerm = OptionalLong.empty(); + PeersResponse nextResponseWhenInactive; TestPeerFinder(Settings settings, TransportService transportService, UnicastHostsProvider hostsProvider, FutureExecutor futureExecutor, TransportAddressConnector transportAddressConnector, @@ -131,7 +133,10 @@ protected void onMasterFoundByProbe(DiscoveryNode masterNode, long term) { @Override protected PeersResponse onPeersRequestWhenInactive(DiscoveryNode sourceNode) { - throw new AssertionError("TODO"); + assertThat(nextResponseWhenInactive, not(nullValue())); + final PeersResponse savedResponse = this.nextResponseWhenInactive; + nextResponseWhenInactive = null; // only send this response once. + return savedResponse; } } @@ -344,6 +349,18 @@ public void testRespondsToRequestWhenActive() { assertThat(peersResponse2.getTerm(), is(updatedTerm)); } + public void testDelegatesRequestHandlingWhenInactive() { + final DiscoveryNode masterNode = newDiscoveryNode("master-node"); + final DiscoveryNode sourceNode = newDiscoveryNode("request-source"); + transportAddressConnector.reachableNodes.add(sourceNode); + + final PeersResponse expectedResponse = new PeersResponse(Optional.of(masterNode), Collections.emptyList(), randomNonNegativeLong()); + peerFinder.nextResponseWhenInactive = expectedResponse; + final PeersResponse peersResponse = peerFinder.handlePeersRequest(new PeersRequest(sourceNode, Collections.emptyList())); + assertThat("should have consumed the mock value", peerFinder.nextResponseWhenInactive, nullValue()); + assertThat(peersResponse, equalTo(expectedResponse)); + } + public void testRequestsPeersIncludingKnownPeersInRequest() { final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); From b2199980e370dd36fe575c4b65cea9d83e7fa75f Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 2 Aug 2018 15:20:31 +0100 Subject: [PATCH 28/76] Also verify that it receives messages from the transport service --- .../cluster/coordination/PeerFinderTests.java | 44 ++++++++++++++++++- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java index f0c16eb9191d5..05d8f0e00a5eb 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java @@ -30,6 +30,10 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.transport.CapturingTransport; import org.elasticsearch.test.transport.CapturingTransport.CapturedRequest; +import org.elasticsearch.threadpool.ThreadPool.Names; +import org.elasticsearch.transport.TransportException; +import org.elasticsearch.transport.TransportResponse; +import org.elasticsearch.transport.TransportResponseHandler; import org.elasticsearch.transport.TransportService; import org.junit.After; import org.junit.Before; @@ -44,6 +48,7 @@ import java.util.OptionalLong; import java.util.Set; import java.util.concurrent.ExecutorService; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Consumer; import java.util.function.Function; import java.util.function.Supplier; @@ -76,7 +81,7 @@ public class PeerFinderTests extends ESTestCase { private Set disconnectedNodes = new HashSet<>(); private Set connectedNodes = new HashSet<>(); private DiscoveryNodes lastAcceptedNodes; - + private TransportService transportService; class MockTransportAddressConnector implements TransportAddressConnector { final Set reachableNodes = new HashSet<>(); @@ -164,7 +169,7 @@ public boolean nodeConnected(DiscoveryNode node) { deterministicTaskQueue = new DeterministicTaskQueue(settings); localNode = newDiscoveryNode("local-node"); - final TransportService transportService = new TransportService(settings, capturingTransport, + transportService = new TransportService(settings, capturingTransport, deterministicTaskQueue.getThreadPool(), TransportService.NOOP_TRANSPORT_INTERCEPTOR, boundTransportAddress -> localNode, null, emptySet()); transportService.start(); @@ -361,6 +366,41 @@ public void testDelegatesRequestHandlingWhenInactive() { assertThat(peersResponse, equalTo(expectedResponse)); } + public void testReceivesRequestsFromTransportService() { + final DiscoveryNode sourceNode = newDiscoveryNode("request-source"); + + transportAddressConnector.reachableNodes.add(sourceNode); + + peerFinder.activate(lastAcceptedNodes); + + final AtomicBoolean responseReceived = new AtomicBoolean(); + + transportService.sendRequest(localNode, PeerFinder.REQUEST_PEERS_ACTION_NAME, new PeersRequest(sourceNode, Collections.emptyList()), + new TransportResponseHandler() { + @Override + public void handleResponse(PeersResponse response) { + assertTrue(responseReceived.compareAndSet(false, true)); + assertFalse(response.getMasterNode().isPresent()); + assertThat(response.getKnownPeers(), empty()); // sourceNode is not yet known + assertThat(response.getTerm(), is(0L)); + } + + @Override + public void handleException(TransportException exp) { + throw new AssertionError("unexpected", exp); + } + + @Override + public String executor() { + return Names.SAME; + } + }); + + runAllRunnableTasks(); + assertTrue(responseReceived.get()); + assertFoundPeers(sourceNode); + } + public void testRequestsPeersIncludingKnownPeersInRequest() { final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); From 52834a1be21aa00e1d62c04f0530d60bba4cbf85 Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 2 Aug 2018 15:21:59 +0100 Subject: [PATCH 29/76] TODO resolved --- .../java/org/elasticsearch/cluster/coordination/PeerFinder.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 7bacdd5b365e9..119d77c9d4969 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -59,7 +59,6 @@ public abstract class PeerFinder extends AbstractLifecycleComponent { - // TODO can this class handle these requests? public static final String REQUEST_PEERS_ACTION_NAME = "internal:discovery/requestpeers"; // the time between attempts to find all peers From 354c94e0ece7af7e03545a92fb55feefbd229da4 Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 2 Aug 2018 15:22:26 +0100 Subject: [PATCH 30/76] Imports --- .../java/org/elasticsearch/cluster/coordination/PeerFinder.java | 1 - .../org/elasticsearch/cluster/coordination/PeerFinderTests.java | 1 - 2 files changed, 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 119d77c9d4969..6a2126cfe8fb1 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -50,7 +50,6 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Supplier; diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java index 05d8f0e00a5eb..7776e276c22cf 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java @@ -32,7 +32,6 @@ import org.elasticsearch.test.transport.CapturingTransport.CapturedRequest; import org.elasticsearch.threadpool.ThreadPool.Names; import org.elasticsearch.transport.TransportException; -import org.elasticsearch.transport.TransportResponse; import org.elasticsearch.transport.TransportResponseHandler; import org.elasticsearch.transport.TransportService; import org.junit.After; From 94cf9a3027525369766359f3a2cedc187c66009a Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 2 Aug 2018 16:21:23 +0100 Subject: [PATCH 31/76] Remove foundPeers and just track the nodes we've sent requests to --- .../cluster/coordination/PeerFinder.java | 33 ++++++++----------- .../cluster/coordination/PeerFinderTests.java | 32 +++++++++++++++--- 2 files changed, 42 insertions(+), 23 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 6a2126cfe8fb1..093c45e605a74 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -98,7 +98,7 @@ public abstract class PeerFinder extends AbstractLifecycleComponent { public Iterable getFoundPeers() { synchronized (mutex) { - return getActivePeerFinder().foundPeers.keySet(); + return getActivePeerFinder().connectedNodes.values(); } } @@ -144,7 +144,7 @@ PeersResponse handlePeersRequest(PeersRequest peersRequest) { final ActivePeerFinder activePeerFinder = getActivePeerFinder(); activePeerFinder.startProbe(peersRequest.getSourceNode().getAddress()); peersRequest.getKnownPeers().stream().map(DiscoveryNode::getAddress).forEach(activePeerFinder::startProbe); - return new PeersResponse(Optional.empty(), new ArrayList<>(activePeerFinder.foundPeers.keySet()), currentTerm); + return new PeersResponse(Optional.empty(), new ArrayList<>(activePeerFinder.connectedNodes.values()), currentTerm); } } @@ -217,15 +217,14 @@ public interface TransportAddressConnector { private class ActivePeerFinder { private final DiscoveryNodes lastAcceptedNodes; boolean running; - final Map foundPeers; // TODO contemplate whether this needs periodic cleaning - private final Set probedAddressesSinceLastWakeUp = new HashSet<>(); private final AtomicBoolean resolveInProgress = new AtomicBoolean(); private final AtomicReference> lastResolvedAddresses = new AtomicReference<>(Collections.emptyList()); + private final Set probedAddressesSinceLastWakeUp = new HashSet<>(); private final Map connectedNodes = newConcurrentMap(); + final Set peersRequestRecipientsSinceLastWakeUp = new HashSet<>(); ActivePeerFinder(DiscoveryNodes lastAcceptedNodes) { this.lastAcceptedNodes = lastAcceptedNodes; - foundPeers = newConcurrentMap(); } void start() { @@ -254,13 +253,11 @@ private void handleWakeUpUnderLock() { return; } - for (final FoundPeer foundPeer : foundPeers.values()) { - foundPeer.peersRequestedThisRound = false; - } probedAddressesSinceLastWakeUp.clear(); + peersRequestRecipientsSinceLastWakeUp.clear(); - logger.trace("ActivePeerFinder#handleWakeUp(): probing found peers {}", foundPeers.keySet()); - foundPeers.keySet().forEach(this::startProbe); + logger.trace("ActivePeerFinder#handleWakeUp(): probing connected peers {}", connectedNodes.keySet()); + connectedNodes.keySet().forEach(this::startProbe); final ImmutableOpenMap masterNodes = lastAcceptedNodes.getMasterNodes(); logger.trace("ActivePeerFinder#handleWakeUp(): probing nodes in cluster state {}", masterNodes); @@ -351,7 +348,6 @@ void startProbe(TransportAddress transportAddress) { logger.trace("startProbe({}) found disconnected {}, probing again", transportAddress, cachedNode); connectedNodes.remove(transportAddress, cachedNode); - // TODO Should we also clean up foundPeers? } else { logger.trace("startProbe({}) no cached node found, probing", transportAddress); } @@ -362,8 +358,11 @@ void startProbe(TransportAddress transportAddress) { protected void doRun() throws Exception { // No synchronisation required - transportAddressConnector, connectedNodes, onProbeSuccess all threadsafe final DiscoveryNode remoteNode = transportAddressConnector.connectTo(transportAddress); - connectedNodes.put(remoteNode.getAddress(), remoteNode); - onProbeSuccess(remoteNode); + if (remoteNode.isMasterNode()) { + connectedNodes.put(remoteNode.getAddress(), remoteNode); + onProbeSuccess(remoteNode); + } + // TODO prevent concurrent connection attempts to the same address } @Override @@ -394,15 +393,11 @@ private void onProbeSuccess(final DiscoveryNode discoveryNode) { return; } - final FoundPeer foundPeer = foundPeers.computeIfAbsent(discoveryNode, FoundPeer::new); - assert foundPeer.discoveryNode.equals(discoveryNode); - - if (foundPeer.peersRequestedThisRound) { + if (peersRequestRecipientsSinceLastWakeUp.add(discoveryNode) == false) { return; } - foundPeer.peersRequestedThisRound = true; - List knownNodes = new ArrayList<>(foundPeers.keySet()); + List knownNodes = new ArrayList<>(connectedNodes.values()); transportService.sendRequest(discoveryNode, REQUEST_PEERS_ACTION_NAME, new PeersRequest(getLocalNode(), knownNodes), diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java index 7776e276c22cf..f0b03f8b010a4 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java @@ -28,6 +28,7 @@ import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.discovery.zen.UnicastHostsProvider; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.junit.annotations.TestLogging; import org.elasticsearch.test.transport.CapturingTransport; import org.elasticsearch.test.transport.CapturingTransport.CapturedRequest; import org.elasticsearch.threadpool.ThreadPool.Names; @@ -52,6 +53,7 @@ import java.util.function.Function; import java.util.function.Supplier; import java.util.stream.Collectors; +import java.util.stream.StreamSupport; import static java.util.Collections.emptyList; import static java.util.Collections.emptyMap; @@ -532,6 +534,27 @@ public void testDoesNotReconnectToNodesOnceConnected() { assertFoundPeers(otherNode); } + public void testDiscardsDisconnectedNodes() { + final DiscoveryNode otherNode = newDiscoveryNode("original-node"); + unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); + transportAddressConnector.reachableNodes.add(otherNode); + + peerFinder.activate(lastAcceptedNodes); + runAllRunnableTasks(); + + assertFoundPeers(otherNode); + + transportAddressConnector.reachableNodes.clear(); + transportAddressConnector.unreachableAddresses.add(otherNode.getAddress()); + connectedNodes.remove(otherNode); + disconnectedNodes.add(otherNode); + + deterministicTaskQueue.advanceTime(); + runAllRunnableTasks(); + + assertFoundPeers(); + } + public void testReconnectsToDisconnectedNodes() { final DiscoveryNode otherNode = newDiscoveryNode("original-node"); unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); @@ -552,7 +575,7 @@ public void testReconnectsToDisconnectedNodes() { deterministicTaskQueue.advanceTime(); runAllRunnableTasks(); - assertFoundPeers(otherNode, rebootedOtherNode); + assertFoundPeers(rebootedOtherNode); } private void respondToRequests(Function responseFactory) { @@ -566,9 +589,10 @@ private void respondToRequests(Function responseFa } } - private void assertFoundPeers(DiscoveryNode... expectedNodes) { - assertThat(peerFinder.getFoundPeers(), - equalTo(Arrays.stream(expectedNodes).collect(Collectors.toSet()))); + private void assertFoundPeers(DiscoveryNode... expectedNodesArray) { + final Set expectedNodes = Arrays.stream(expectedNodesArray).collect(Collectors.toSet()); + final Set actualNodes = StreamSupport.stream(peerFinder.getFoundPeers().spliterator(), false).collect(Collectors.toSet()); + assertThat(actualNodes, equalTo(expectedNodes)); } private DiscoveryNode newDiscoveryNode(String nodeId) { From c7bcae9762ca88944136d20868a2c37279dbb58b Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 2 Aug 2018 17:15:42 +0100 Subject: [PATCH 32/76] Prevent multiple concurrent attempts to connect to the same node --- .../cluster/coordination/PeerFinder.java | 68 ++++++++++----- .../cluster/coordination/PeerFinderTests.java | 83 ++++++++++++++++--- 2 files changed, 117 insertions(+), 34 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 093c45e605a74..731c6c51e8052 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -21,6 +21,7 @@ import org.apache.logging.log4j.message.ParameterizedMessage; import org.apache.lucene.util.SetOnce; +import org.elasticsearch.action.ActionListener; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.common.collect.ImmutableOpenMap; @@ -55,6 +56,7 @@ import static java.util.Collections.unmodifiableList; import static org.elasticsearch.common.util.concurrent.ConcurrentCollections.newConcurrentMap; +import static org.elasticsearch.common.util.concurrent.ConcurrentCollections.newConcurrentSet; public abstract class PeerFinder extends AbstractLifecycleComponent { @@ -208,10 +210,8 @@ protected void doClose() { public interface TransportAddressConnector { /** * Identify the node at the given address and establish a full connection to it. - * - * @return The node to which we connected. */ - DiscoveryNode connectTo(TransportAddress transportAddress) throws IOException; + void connectTo(TransportAddress transportAddress, ActionListener listener); } private class ActivePeerFinder { @@ -220,6 +220,7 @@ private class ActivePeerFinder { private final AtomicBoolean resolveInProgress = new AtomicBoolean(); private final AtomicReference> lastResolvedAddresses = new AtomicReference<>(Collections.emptyList()); private final Set probedAddressesSinceLastWakeUp = new HashSet<>(); + private final Set inFlightProbes = newConcurrentSet(); private final Map connectedNodes = newConcurrentMap(); final Set peersRequestRecipientsSinceLastWakeUp = new HashSet<>(); @@ -250,6 +251,7 @@ private void handleWakeUpUnderLock() { assert holdsLock() : "PeerFinder mutex not held"; if (running == false) { + logger.trace("ActivePeerFinder#handleWakeUp(): not running"); return; } @@ -352,29 +354,51 @@ void startProbe(TransportAddress transportAddress) { logger.trace("startProbe({}) no cached node found, probing", transportAddress); } - executorService.get().execute(new AbstractRunnable() { + if (inFlightProbes.add(transportAddress)) { + logger.trace("startProbe({}) acquired probeLock", transportAddress); + executorService.get().execute(new AbstractRunnable() { + @Override + protected void doRun() { + // No synchronisation required - transportAddressConnector is threadsafe + transportAddressConnector.connectTo(transportAddress, new ActionListener() { + @Override + public void onResponse(DiscoveryNode remoteNode) { + // No synchronisation required - connectedNodes, inFlightProbes, onProbeSuccess all threadsafe + logger.trace("startProbe({}) found {}", transportAddress, remoteNode); + final boolean removed = inFlightProbes.remove(transportAddress); + assert removed; + if (remoteNode.isMasterNode()) { + connectedNodes.put(remoteNode.getAddress(), remoteNode); + onProbeSuccess(remoteNode); + } + } - @Override - protected void doRun() throws Exception { - // No synchronisation required - transportAddressConnector, connectedNodes, onProbeSuccess all threadsafe - final DiscoveryNode remoteNode = transportAddressConnector.connectTo(transportAddress); - if (remoteNode.isMasterNode()) { - connectedNodes.put(remoteNode.getAddress(), remoteNode); - onProbeSuccess(remoteNode); + @Override + public void onFailure(Exception e) { + // No synchronisation required - inFlightProbes is threadsafe + logger.debug(() -> new ParameterizedMessage("startProbe({}) failed", transportAddress), e); + final boolean removed = inFlightProbes.remove(transportAddress); + assert removed; + } + }); } - // TODO prevent concurrent connection attempts to the same address - } - @Override - public void onFailure(Exception e) { - logger.debug(() -> new ParameterizedMessage("Probing {} failed", transportAddress), e); - } + @Override + public void onFailure(Exception e) { + // No synchronisation required - inFlightProbes is threadsafe + logger.debug(() -> new ParameterizedMessage("startProbe({}) failed", transportAddress), e); + final boolean removed = inFlightProbes.remove(transportAddress); + assert removed; + } - @Override - public String toString() { - return "probe " + transportAddress; - } - }); + @Override + public String toString() { + return "probe " + transportAddress; + } + }); + } else { + logger.trace("startProbe({}) probeLock unavailable", transportAddress); + } } private void onProbeSuccess(final DiscoveryNode discoveryNode) { diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java index f0b03f8b010a4..7054200642538 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java @@ -20,6 +20,7 @@ package org.elasticsearch.cluster.coordination; import org.elasticsearch.Version; +import org.elasticsearch.action.ActionListener; import org.elasticsearch.cluster.coordination.PeerFinder.TransportAddressConnector; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; @@ -28,7 +29,6 @@ import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.discovery.zen.UnicastHostsProvider; import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.test.junit.annotations.TestLogging; import org.elasticsearch.test.transport.CapturingTransport; import org.elasticsearch.test.transport.CapturingTransport.CapturedRequest; import org.elasticsearch.threadpool.ThreadPool.Names; @@ -84,27 +84,51 @@ public class PeerFinderTests extends ESTestCase { private DiscoveryNodes lastAcceptedNodes; private TransportService transportService; + private static long CONNECTION_TIMEOUT_MILLIS = 30000; + class MockTransportAddressConnector implements TransportAddressConnector { final Set reachableNodes = new HashSet<>(); final Set unreachableAddresses = new HashSet<>(); + final Set slowAddresses = new HashSet<>(); + final Set inFlightConnectionAttempts = new HashSet<>(); @Override - public DiscoveryNode connectTo(TransportAddress transportAddress) throws IOException { + public void connectTo(TransportAddress transportAddress, ActionListener listener) { assert localNode.getAddress().equals(transportAddress) == false : "should not probe local node"; - if (unreachableAddresses.contains(transportAddress)) { - throw new IOException("cannot connect to " + transportAddress); - } + final boolean isNotInFlight = inFlightConnectionAttempts.add(transportAddress); + assertTrue(isNotInFlight); - for (final DiscoveryNode discoveryNode : reachableNodes) { - if (discoveryNode.getAddress().equals(transportAddress)) { - disconnectedNodes.remove(discoveryNode); - connectedNodes.add(discoveryNode); - return discoveryNode; + final long connectResultTime = deterministicTaskQueue.getCurrentTimeMillis() + + (slowAddresses.contains(transportAddress) ? CONNECTION_TIMEOUT_MILLIS : 0); + + deterministicTaskQueue.scheduleAt(connectResultTime, new Runnable() { + @Override + public void run() { + if (unreachableAddresses.contains(transportAddress)) { + assertTrue(inFlightConnectionAttempts.remove(transportAddress)); + listener.onFailure(new IOException("cannot connect to " + transportAddress)); + return; + } + + for (final DiscoveryNode discoveryNode : reachableNodes) { + if (discoveryNode.getAddress().equals(transportAddress)) { + disconnectedNodes.remove(discoveryNode); + connectedNodes.add(discoveryNode); + assertTrue(inFlightConnectionAttempts.remove(transportAddress)); + listener.onResponse(discoveryNode); + return; + } + } + + throw new AssertionError(transportAddress + " unknown"); } - } - throw new AssertionError(transportAddress + " unknown"); + @Override + public String toString() { + return "connection attempt to " + transportAddress; + } + }); } } @@ -555,6 +579,41 @@ public void testDiscardsDisconnectedNodes() { assertFoundPeers(); } + public void testDoesNotMakeMultipleConcurrentConnectionAttemptsToOneAddress() { + final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); + unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); + transportAddressConnector.unreachableAddresses.add(otherNode.getAddress()); + transportAddressConnector.slowAddresses.add(otherNode.getAddress()); + + peerFinder.activate(lastAcceptedNodes); + runAllRunnableTasks(); + assertFoundPeers(); + + deterministicTaskQueue.advanceTime(); + runAllRunnableTasks(); // MockTransportAddressConnector verifies no multiple connection attempts + assertFoundPeers(); + + transportAddressConnector.slowAddresses.clear(); + transportAddressConnector.unreachableAddresses.clear(); + transportAddressConnector.reachableNodes.add(otherNode); + + while (deterministicTaskQueue.getCurrentTimeMillis() < CONNECTION_TIMEOUT_MILLIS) { + assertFoundPeers(); + deterministicTaskQueue.advanceTime(); + runAllRunnableTasks(); + } + + // need to wait for the connection to timeout, then for another wakeup, before discovering the peer + final long expectedTime = CONNECTION_TIMEOUT_MILLIS + PeerFinder.DISCOVERY_FIND_PEERS_INTERVAL_SETTING.get(Settings.EMPTY).millis(); + + while (deterministicTaskQueue.getCurrentTimeMillis() < expectedTime) { + deterministicTaskQueue.advanceTime(); + runAllRunnableTasks(); + } + + assertFoundPeers(otherNode); + } + public void testReconnectsToDisconnectedNodes() { final DiscoveryNode otherNode = newDiscoveryNode("original-node"); unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); From 9e156df300fc4cf81bf2a4e8f7ade1712b985b9f Mon Sep 17 00:00:00 2001 From: David Turner Date: Thu, 2 Aug 2018 17:21:33 +0100 Subject: [PATCH 33/76] Line length --- .../cluster/coordination/PeerFinderTests.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java index 7054200642538..bff4fbf059874 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java @@ -53,6 +53,7 @@ import java.util.function.Function; import java.util.function.Supplier; import java.util.stream.Collectors; +import java.util.stream.Stream; import java.util.stream.StreamSupport; import static java.util.Collections.emptyList; @@ -649,9 +650,9 @@ private void respondToRequests(Function responseFa } private void assertFoundPeers(DiscoveryNode... expectedNodesArray) { - final Set expectedNodes = Arrays.stream(expectedNodesArray).collect(Collectors.toSet()); - final Set actualNodes = StreamSupport.stream(peerFinder.getFoundPeers().spliterator(), false).collect(Collectors.toSet()); - assertThat(actualNodes, equalTo(expectedNodes)); + final Stream expectedNodes = Arrays.stream(expectedNodesArray); + final Stream actualNodes = StreamSupport.stream(peerFinder.getFoundPeers().spliterator(), false); + assertThat(actualNodes.collect(Collectors.toSet()), equalTo(expectedNodes.collect(Collectors.toSet()))); } private DiscoveryNode newDiscoveryNode(String nodeId) { From c805addebf10852441679f0565a54bd6aac9bb8e Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 3 Aug 2018 07:54:33 +0100 Subject: [PATCH 34/76] Remove unused class --- .../cluster/coordination/PeerFinder.java | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 731c6c51e8052..072d8e2ff09ff 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -475,22 +475,5 @@ public String executor() { }); } } - - private class FoundPeer { - final DiscoveryNode discoveryNode; - boolean peersRequestedThisRound; - - FoundPeer(DiscoveryNode discoveryNode) { - this.discoveryNode = discoveryNode; - } - - @Override - public String toString() { - return "FoundPeer{" + - "discoveryNode=" + discoveryNode + - ", peersRequestedThisRound=" + peersRequestedThisRound + - '}'; - } - } } } From 7aec4646d4026e26292fa8ff54ea7b99ad9a8259 Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 3 Aug 2018 07:55:39 +0100 Subject: [PATCH 35/76] Remove refs to probeLock --- .../org/elasticsearch/cluster/coordination/PeerFinder.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 072d8e2ff09ff..6774b1da63de3 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -355,7 +355,7 @@ void startProbe(TransportAddress transportAddress) { } if (inFlightProbes.add(transportAddress)) { - logger.trace("startProbe({}) acquired probeLock", transportAddress); + logger.trace("startProbe({}) starting new probe", transportAddress); executorService.get().execute(new AbstractRunnable() { @Override protected void doRun() { @@ -397,7 +397,7 @@ public String toString() { } }); } else { - logger.trace("startProbe({}) probeLock unavailable", transportAddress); + logger.trace("startProbe({}) already probing", transportAddress); } } From 33b17ce49696dd569c8e5240b15d5f17a313f712 Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 3 Aug 2018 07:57:21 +0100 Subject: [PATCH 36/76] connectTo is already async, no need to schedule it with the executorService --- .../cluster/coordination/PeerFinder.java | 39 +++++-------------- 1 file changed, 10 insertions(+), 29 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 6774b1da63de3..774871632ead5 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -356,31 +356,17 @@ void startProbe(TransportAddress transportAddress) { if (inFlightProbes.add(transportAddress)) { logger.trace("startProbe({}) starting new probe", transportAddress); - executorService.get().execute(new AbstractRunnable() { + transportAddressConnector.connectTo(transportAddress, new ActionListener() { @Override - protected void doRun() { - // No synchronisation required - transportAddressConnector is threadsafe - transportAddressConnector.connectTo(transportAddress, new ActionListener() { - @Override - public void onResponse(DiscoveryNode remoteNode) { - // No synchronisation required - connectedNodes, inFlightProbes, onProbeSuccess all threadsafe - logger.trace("startProbe({}) found {}", transportAddress, remoteNode); - final boolean removed = inFlightProbes.remove(transportAddress); - assert removed; - if (remoteNode.isMasterNode()) { - connectedNodes.put(remoteNode.getAddress(), remoteNode); - onProbeSuccess(remoteNode); - } - } - - @Override - public void onFailure(Exception e) { - // No synchronisation required - inFlightProbes is threadsafe - logger.debug(() -> new ParameterizedMessage("startProbe({}) failed", transportAddress), e); - final boolean removed = inFlightProbes.remove(transportAddress); - assert removed; - } - }); + public void onResponse(DiscoveryNode remoteNode) { + // No synchronisation required - connectedNodes, inFlightProbes, onProbeSuccess all threadsafe + logger.trace("startProbe({}) found {}", transportAddress, remoteNode); + final boolean removed = inFlightProbes.remove(transportAddress); + assert removed; + if (remoteNode.isMasterNode()) { + connectedNodes.put(remoteNode.getAddress(), remoteNode); + onProbeSuccess(remoteNode); + } } @Override @@ -390,11 +376,6 @@ public void onFailure(Exception e) { final boolean removed = inFlightProbes.remove(transportAddress); assert removed; } - - @Override - public String toString() { - return "probe " + transportAddress; - } }); } else { logger.trace("startProbe({}) already probing", transportAddress); From b00795c65d4d8238082bd1d76c667d6c80a6b0e0 Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 3 Aug 2018 09:21:44 +0100 Subject: [PATCH 37/76] Consolidate per-address logic into Peer class --- .../cluster/coordination/PeerFinder.java | 187 ++++++++++-------- .../cluster/coordination/PeerFinderTests.java | 19 +- 2 files changed, 120 insertions(+), 86 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 774871632ead5..06a8f686d99c7 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -19,12 +19,12 @@ package org.elasticsearch.cluster.coordination; +import com.carrotsearch.hppc.cursors.ObjectCursor; import org.apache.logging.log4j.message.ParameterizedMessage; import org.apache.lucene.util.SetOnce; import org.elasticsearch.action.ActionListener; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; -import org.elasticsearch.common.collect.ImmutableOpenMap; import org.elasticsearch.common.component.AbstractLifecycleComponent; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.settings.Setting; @@ -42,21 +42,15 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicReference; import java.util.function.Supplier; -import static java.util.Collections.unmodifiableList; import static org.elasticsearch.common.util.concurrent.ConcurrentCollections.newConcurrentMap; -import static org.elasticsearch.common.util.concurrent.ConcurrentCollections.newConcurrentSet; public abstract class PeerFinder extends AbstractLifecycleComponent { @@ -100,7 +94,7 @@ public abstract class PeerFinder extends AbstractLifecycleComponent { public Iterable getFoundPeers() { synchronized (mutex) { - return getActivePeerFinder().connectedNodes.values(); + return getActivePeerFinder().getKnownPeers(); } } @@ -110,6 +104,8 @@ public void activate(final DiscoveryNodes lastAcceptedNodes) { return; } + logger.trace("activating PeerFinder {}", lastAcceptedNodes); + synchronized (mutex) { assert peerFinder.isPresent() == false; peerFinder = Optional.of(new ActivePeerFinder(lastAcceptedNodes)); @@ -125,6 +121,7 @@ protected final boolean holdsLock() { public void deactivate() { synchronized (mutex) { if (peerFinder.isPresent()) { + logger.trace("deactivating PeerFinder"); assert peerFinder.get().running; peerFinder.get().stop(); peerFinder = Optional.empty(); @@ -146,7 +143,7 @@ PeersResponse handlePeersRequest(PeersRequest peersRequest) { final ActivePeerFinder activePeerFinder = getActivePeerFinder(); activePeerFinder.startProbe(peersRequest.getSourceNode().getAddress()); peersRequest.getKnownPeers().stream().map(DiscoveryNode::getAddress).forEach(activePeerFinder::startProbe); - return new PeersResponse(Optional.empty(), new ArrayList<>(activePeerFinder.connectedNodes.values()), currentTerm); + return new PeersResponse(Optional.empty(), activePeerFinder.getKnownPeers(), currentTerm); } } @@ -209,20 +206,16 @@ protected void doClose() { public interface TransportAddressConnector { /** - * Identify the node at the given address and establish a full connection to it. + * Identify the node at the given address and, if it is a master node and not the local node then establish a full connection to it. */ - void connectTo(TransportAddress transportAddress, ActionListener listener); + void connectToRemoteMasterNode(TransportAddress transportAddress, ActionListener listener); } private class ActivePeerFinder { private final DiscoveryNodes lastAcceptedNodes; boolean running; private final AtomicBoolean resolveInProgress = new AtomicBoolean(); - private final AtomicReference> lastResolvedAddresses = new AtomicReference<>(Collections.emptyList()); - private final Set probedAddressesSinceLastWakeUp = new HashSet<>(); - private final Set inFlightProbes = newConcurrentSet(); - private final Map connectedNodes = newConcurrentMap(); - final Set peersRequestRecipientsSinceLastWakeUp = new HashSet<>(); + private final Map peersByAddress = newConcurrentMap(); ActivePeerFinder(DiscoveryNodes lastAcceptedNodes) { this.lastAcceptedNodes = lastAcceptedNodes; @@ -247,6 +240,24 @@ private void handleWakeUp() { } } + List getKnownPeers() { + assert holdsLock() : "PeerFinder mutex not held"; + List knownPeers = new ArrayList<>(peersByAddress.size()); + for(final Peer peer : peersByAddress.values()) { + DiscoveryNode peerNode = peer.getDiscoveryNode(); + if (peerNode != null) { + knownPeers.add(peerNode); + } + } + return knownPeers; + } + + private Peer createConnectingPeer(TransportAddress transportAddress) { + Peer peer = new Peer(transportAddress); + peer.establishConnection(); + return peer; + } + private void handleWakeUpUnderLock() { assert holdsLock() : "PeerFinder mutex not held"; @@ -255,15 +266,13 @@ private void handleWakeUpUnderLock() { return; } - probedAddressesSinceLastWakeUp.clear(); - peersRequestRecipientsSinceLastWakeUp.clear(); - - logger.trace("ActivePeerFinder#handleWakeUp(): probing connected peers {}", connectedNodes.keySet()); - connectedNodes.keySet().forEach(this::startProbe); + for (final Peer peer : peersByAddress.values()) { + peer.handleWakeUp(); + } - final ImmutableOpenMap masterNodes = lastAcceptedNodes.getMasterNodes(); - logger.trace("ActivePeerFinder#handleWakeUp(): probing nodes in cluster state {}", masterNodes); - masterNodes.forEach(e -> startProbe(e.value)); + for (ObjectCursor discoveryNodeObjectCursor : lastAcceptedNodes.getMasterNodes().values()) { + startProbe(discoveryNodeObjectCursor.value.getAddress()); + } if (resolveInProgress.compareAndSet(false, true)) { transportService.getThreadPool().generic().execute(new AbstractRunnable() { @@ -279,8 +288,6 @@ protected void doRun() { -> UnicastZenPing.resolveHostsLists(executorService.get(), logger, hosts, limitPortCounts, transportService, resolveTimeout))); - lastResolvedAddresses.set(unmodifiableList(providedAddresses)); - logger.trace("ActivePeerFinder#handleNextWakeUp(): probing resolved transport addresses {}", providedAddresses); synchronized (mutex) { providedAddresses.forEach(ActivePeerFinder.this::startProbe); @@ -300,10 +307,6 @@ public String toString() { }); } - final List transportAddresses = lastResolvedAddresses.get(); - logger.trace("ActivePeerFinder#handleNextWakeUp(): probing supplied transport addresses {}", transportAddresses); - transportAddresses.forEach(this::startProbe); - futureExecutor.schedule(new Runnable() { @Override public void run() { @@ -333,76 +336,88 @@ void startProbe(TransportAddress transportAddress) { return; } - if (probedAddressesSinceLastWakeUp.add(transportAddress) == false) { - logger.trace("startProbe({}) already probed this round", transportAddress); - return; + peersByAddress.computeIfAbsent(transportAddress, this::createConnectingPeer); + } + + private class Peer { + private final TransportAddress transportAddress; + private volatile PeerState state = PeerState.CONNECTING; // volatile just for toString() + private SetOnce discoveryNode = new SetOnce<>(); + + Peer(TransportAddress transportAddress) { + this.transportAddress = transportAddress; + } + + DiscoveryNode getDiscoveryNode() { + return discoveryNode.get(); } - final DiscoveryNode cachedNode = connectedNodes.get(transportAddress); - if (cachedNode != null) { - assert cachedNode.getAddress().equals(transportAddress); + void handleWakeUp() { + assert holdsLock() : "PeerFinder mutex not held"; - if (transportService.nodeConnected(cachedNode)) { - logger.trace("startProbe({}) found connected {}", transportAddress, cachedNode); - onProbeSuccess(cachedNode); + if (running == false) { return; } - logger.trace("startProbe({}) found disconnected {}, probing again", transportAddress, cachedNode); - connectedNodes.remove(transportAddress, cachedNode); - } else { - logger.trace("startProbe({}) no cached node found, probing", transportAddress); + final DiscoveryNode discoveryNode = this.discoveryNode.get(); + if (discoveryNode != null && transportService.nodeConnected(discoveryNode) == false) { + logger.trace("{} no longer connected to {}", this, discoveryNode); + removePeer(); + return; + } + + if (state == PeerState.PEERS_KNOWN) { + requestPeers(); + } } - if (inFlightProbes.add(transportAddress)) { - logger.trace("startProbe({}) starting new probe", transportAddress); - transportAddressConnector.connectTo(transportAddress, new ActionListener() { + void establishConnection() { + assert holdsLock() : "PeerFinder mutex not held"; + assert state == PeerState.CONNECTING : state; + assert running; + + logger.trace("{} attempting connection", this); + transportAddressConnector.connectToRemoteMasterNode(transportAddress, new ActionListener() { @Override public void onResponse(DiscoveryNode remoteNode) { - // No synchronisation required - connectedNodes, inFlightProbes, onProbeSuccess all threadsafe - logger.trace("startProbe({}) found {}", transportAddress, remoteNode); - final boolean removed = inFlightProbes.remove(transportAddress); - assert removed; - if (remoteNode.isMasterNode()) { - connectedNodes.put(remoteNode.getAddress(), remoteNode); - onProbeSuccess(remoteNode); + // No synchronisation required for discoveryNode.set() + assert remoteNode.isMasterNode() : remoteNode; + discoveryNode.set(remoteNode); + synchronized (mutex) { + if (running) { + requestPeers(); + } } } @Override public void onFailure(Exception e) { - // No synchronisation required - inFlightProbes is threadsafe - logger.debug(() -> new ParameterizedMessage("startProbe({}) failed", transportAddress), e); - final boolean removed = inFlightProbes.remove(transportAddress); - assert removed; + // No synchronisation required - peersByAddress is threadsafe + logger.debug(() -> new ParameterizedMessage("{} connection failed", Peer.this), e); + removePeer(); } }); - } else { - logger.trace("startProbe({}) already probing", transportAddress); } - } - private void onProbeSuccess(final DiscoveryNode discoveryNode) { - // Called when we are fully connected to the given DiscoveryNode. Request its peers once per round, and probe them too. - synchronized (mutex) { - if (running == false) { - return; - } + private void removePeer() { + // No synchronisation required - peersByAddress is threadsafe + final Peer removed = peersByAddress.remove(transportAddress); + assert removed == Peer.this; + } - if (discoveryNode.isMasterNode() == false) { - logger.trace("Ignoring non-master node {}", discoveryNode); - return; - } + private void requestPeers() { + assert holdsLock() : "PeerFinder mutex not held"; + assert state != PeerState.PEERS_REQUESTED : "PeersRequest already in flight"; + assert running; - if (discoveryNode.equals(getLocalNode())) { - return; - } + state = PeerState.PEERS_REQUESTED; - if (peersRequestRecipientsSinceLastWakeUp.add(discoveryNode) == false) { - return; - } + final DiscoveryNode discoveryNode = this.discoveryNode.get(); + assert discoveryNode != null : "discoveryNode should be set"; + + logger.trace("{} requesting peers from {}", this, discoveryNode); - List knownNodes = new ArrayList<>(connectedNodes.values()); + List knownNodes = getKnownPeers(); transportService.sendRequest(discoveryNode, REQUEST_PEERS_ACTION_NAME, new PeersRequest(getLocalNode(), knownNodes), @@ -415,13 +430,15 @@ public PeersResponse read(StreamInput in) throws IOException { @Override public void handleResponse(PeersResponse response) { - logger.trace("Received {} from {}", response, discoveryNode); + logger.trace("{} received {} from {}", this, response, discoveryNode); final boolean foundMasterNode; synchronized (mutex) { if (running == false) { return; } + state = PeerState.PEERS_KNOWN; + if (response.getMasterNode().isPresent()) { final DiscoveryNode masterNode = response.getMasterNode().get(); if (masterNode.equals(discoveryNode)) { @@ -438,7 +455,7 @@ public void handleResponse(PeersResponse response) { } if (foundMasterNode) { - // Must not hold lock when calling out to Legislator to avoid a lock cycle + // Must not hold lock here to avoid deadlock assert holdsLock() == false : "Peerfinder mutex is held in error"; onMasterFoundByProbe(discoveryNode, response.getTerm()); } @@ -446,6 +463,7 @@ public void handleResponse(PeersResponse response) { @Override public void handleException(TransportException exp) { + state = PeerState.PEERS_KNOWN; logger.debug("PeersRequest failed", exp); } @@ -455,6 +473,17 @@ public String executor() { } }); } + + @Override + public String toString() { + return "Peer{" + transportAddress + " [" + state + "]}"; + } } } + + private enum PeerState { + CONNECTING, // Address is known but no connection established yet + PEERS_REQUESTED, // Connection established, PeersRequest sent + PEERS_KNOWN // Connection established and PeersResponse received + } } diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java index bff4fbf059874..30351601bed5c 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java @@ -19,6 +19,7 @@ package org.elasticsearch.cluster.coordination; +import org.elasticsearch.ElasticsearchException; import org.elasticsearch.Version; import org.elasticsearch.action.ActionListener; import org.elasticsearch.cluster.coordination.PeerFinder.TransportAddressConnector; @@ -94,7 +95,7 @@ class MockTransportAddressConnector implements TransportAddressConnector { final Set inFlightConnectionAttempts = new HashSet<>(); @Override - public void connectTo(TransportAddress transportAddress, ActionListener listener) { + public void connectToRemoteMasterNode(TransportAddress transportAddress, ActionListener listener) { assert localNode.getAddress().equals(transportAddress) == false : "should not probe local node"; final boolean isNotInFlight = inFlightConnectionAttempts.add(transportAddress); @@ -114,11 +115,16 @@ public void run() { for (final DiscoveryNode discoveryNode : reachableNodes) { if (discoveryNode.getAddress().equals(transportAddress)) { - disconnectedNodes.remove(discoveryNode); - connectedNodes.add(discoveryNode); - assertTrue(inFlightConnectionAttempts.remove(transportAddress)); - listener.onResponse(discoveryNode); - return; + if (discoveryNode.isMasterNode()) { + disconnectedNodes.remove(discoveryNode); + connectedNodes.add(discoveryNode); + assertTrue(inFlightConnectionAttempts.remove(transportAddress)); + listener.onResponse(discoveryNode); + return; + } else { + listener.onFailure(new ElasticsearchException("non-master node " + discoveryNode)); // TODO better exception + return; + } } } @@ -576,7 +582,6 @@ public void testDiscardsDisconnectedNodes() { deterministicTaskQueue.advanceTime(); runAllRunnableTasks(); - assertFoundPeers(); } From 0e9850efe5e081679d80be8d7510e28245b66f3b Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 3 Aug 2018 09:31:32 +0100 Subject: [PATCH 38/76] No need to track separate state, just need a boolean --- .../cluster/coordination/PeerFinder.java | 53 ++++++++----------- 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 06a8f686d99c7..71e3a73540f3d 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -341,8 +341,8 @@ void startProbe(TransportAddress transportAddress) { private class Peer { private final TransportAddress transportAddress; - private volatile PeerState state = PeerState.CONNECTING; // volatile just for toString() private SetOnce discoveryNode = new SetOnce<>(); + private volatile boolean peersRequestInFlight; Peer(TransportAddress transportAddress) { this.transportAddress = transportAddress; @@ -360,39 +360,41 @@ void handleWakeUp() { } final DiscoveryNode discoveryNode = this.discoveryNode.get(); - if (discoveryNode != null && transportService.nodeConnected(discoveryNode) == false) { - logger.trace("{} no longer connected to {}", this, discoveryNode); - removePeer(); - return; - } + // may be null if connection not yet established - if (state == PeerState.PEERS_KNOWN) { - requestPeers(); + if (discoveryNode != null) { + if (transportService.nodeConnected(discoveryNode)) { + if (peersRequestInFlight == false) { + requestPeers(discoveryNode); + } + } else { + logger.trace("{} no longer connected to {}", this, discoveryNode); + removePeer(); + } } } void establishConnection() { assert holdsLock() : "PeerFinder mutex not held"; - assert state == PeerState.CONNECTING : state; + assert getDiscoveryNode() == null : "unexpectedly connected to " + getDiscoveryNode(); assert running; logger.trace("{} attempting connection", this); transportAddressConnector.connectToRemoteMasterNode(transportAddress, new ActionListener() { @Override public void onResponse(DiscoveryNode remoteNode) { - // No synchronisation required for discoveryNode.set() assert remoteNode.isMasterNode() : remoteNode; - discoveryNode.set(remoteNode); synchronized (mutex) { if (running) { - requestPeers(); + discoveryNode.set(remoteNode); + requestPeers(remoteNode); } } } @Override public void onFailure(Exception e) { - // No synchronisation required - peersByAddress is threadsafe + // No synchronisation required - removePeer is threadsafe logger.debug(() -> new ParameterizedMessage("{} connection failed", Peer.this), e); removePeer(); } @@ -405,17 +407,14 @@ private void removePeer() { assert removed == Peer.this; } - private void requestPeers() { + private void requestPeers(final DiscoveryNode discoveryNode) { assert holdsLock() : "PeerFinder mutex not held"; - assert state != PeerState.PEERS_REQUESTED : "PeersRequest already in flight"; + assert peersRequestInFlight == false : "PeersRequest already in flight"; assert running; - - state = PeerState.PEERS_REQUESTED; - - final DiscoveryNode discoveryNode = this.discoveryNode.get(); - assert discoveryNode != null : "discoveryNode should be set"; + assert discoveryNode.equals(this.discoveryNode.get()); logger.trace("{} requesting peers from {}", this, discoveryNode); + peersRequestInFlight = true; List knownNodes = getKnownPeers(); @@ -437,7 +436,7 @@ public void handleResponse(PeersResponse response) { return; } - state = PeerState.PEERS_KNOWN; + peersRequestInFlight = false; if (response.getMasterNode().isPresent()) { final DiscoveryNode masterNode = response.getMasterNode().get(); @@ -456,14 +455,14 @@ public void handleResponse(PeersResponse response) { if (foundMasterNode) { // Must not hold lock here to avoid deadlock - assert holdsLock() == false : "Peerfinder mutex is held in error"; + assert holdsLock() == false : "PeerFinder mutex is held in error"; onMasterFoundByProbe(discoveryNode, response.getTerm()); } } @Override public void handleException(TransportException exp) { - state = PeerState.PEERS_KNOWN; + peersRequestInFlight = false; // volatile write needs no further synchronisation logger.debug("PeersRequest failed", exp); } @@ -476,14 +475,8 @@ public String executor() { @Override public String toString() { - return "Peer{" + transportAddress + " [" + state + "]}"; + return "Peer{" + transportAddress + " peersRequestInFlight=" + peersRequestInFlight + "}"; } } } - - private enum PeerState { - CONNECTING, // Address is known but no connection established yet - PEERS_REQUESTED, // Connection established, PeersRequest sent - PEERS_KNOWN // Connection established and PeersResponse received - } } From f466b44f565a4d9933f18540a1c7eae50a436953 Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 3 Aug 2018 09:36:19 +0100 Subject: [PATCH 39/76] Tweaks --- .../org/elasticsearch/cluster/coordination/PeerFinder.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 71e3a73540f3d..2617360a12cf6 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -349,6 +349,7 @@ private class Peer { } DiscoveryNode getDiscoveryNode() { + // No synchronisation required return discoveryNode.get(); } @@ -383,7 +384,8 @@ void establishConnection() { transportAddressConnector.connectToRemoteMasterNode(transportAddress, new ActionListener() { @Override public void onResponse(DiscoveryNode remoteNode) { - assert remoteNode.isMasterNode() : remoteNode; + assert remoteNode.isMasterNode() : remoteNode + " is not master-eligible"; + assert remoteNode.equals(getLocalNode()) : remoteNode + " is the local node"; synchronized (mutex) { if (running) { discoveryNode.set(remoteNode); From 3c219dec59c458fba9fb7f82fce1321d129d1dca Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 3 Aug 2018 10:10:42 +0100 Subject: [PATCH 40/76] Need moar coffee --- .../java/org/elasticsearch/cluster/coordination/PeerFinder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 2617360a12cf6..78776da2b0d26 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -385,7 +385,7 @@ void establishConnection() { @Override public void onResponse(DiscoveryNode remoteNode) { assert remoteNode.isMasterNode() : remoteNode + " is not master-eligible"; - assert remoteNode.equals(getLocalNode()) : remoteNode + " is the local node"; + assert remoteNode.equals(getLocalNode()) == false : remoteNode + " is the local node"; synchronized (mutex) { if (running) { discoveryNode.set(remoteNode); From 9356a26980a300c5fee4ebce4b6609b7bd2b2f57 Mon Sep 17 00:00:00 2001 From: David Turner Date: Sat, 4 Aug 2018 11:09:00 +0100 Subject: [PATCH 41/76] Pass in leader when deactivating --- .../cluster/coordination/PeerFinder.java | 22 ++++++---------- .../cluster/coordination/PeerFinderTests.java | 26 +++++++------------ 2 files changed, 18 insertions(+), 30 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 78776da2b0d26..3ef4acc0ce0a0 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -42,6 +42,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Optional; @@ -74,6 +75,7 @@ public abstract class PeerFinder extends AbstractLifecycleComponent { private final SetOnce executorService = new SetOnce<>(); private Optional peerFinder = Optional.empty(); private volatile long currentTerm; + private Optional leader = Optional.empty(); PeerFinder(Settings settings, TransportService transportService, UnicastHostsProvider hostsProvider, FutureExecutor futureExecutor, TransportAddressConnector transportAddressConnector, @@ -110,6 +112,7 @@ public void activate(final DiscoveryNodes lastAcceptedNodes) { assert peerFinder.isPresent() == false; peerFinder = Optional.of(new ActivePeerFinder(lastAcceptedNodes)); peerFinder.get().start(); + leader = Optional.empty(); } } @@ -118,7 +121,7 @@ protected final boolean holdsLock() { return Thread.holdsLock(mutex); } - public void deactivate() { + public void deactivate(DiscoveryNode leader) { synchronized (mutex) { if (peerFinder.isPresent()) { logger.trace("deactivating PeerFinder"); @@ -126,6 +129,7 @@ public void deactivate() { peerFinder.get().stop(); peerFinder = Optional.empty(); } + this.leader = Optional.of(leader); } } @@ -144,10 +148,10 @@ PeersResponse handlePeersRequest(PeersRequest peersRequest) { activePeerFinder.startProbe(peersRequest.getSourceNode().getAddress()); peersRequest.getKnownPeers().stream().map(DiscoveryNode::getAddress).forEach(activePeerFinder::startProbe); return new PeersResponse(Optional.empty(), activePeerFinder.getKnownPeers(), currentTerm); + } else { + return new PeersResponse(leader, Collections.emptyList(), currentTerm); } } - - return onPeersRequestWhenInactive(peersRequest.getSourceNode()); } public boolean isActive() { @@ -194,16 +198,6 @@ protected void doClose() { */ protected abstract void onMasterFoundByProbe(DiscoveryNode masterNode, long term); - /** - * Called on receipt of a PeersRequest when there is no ActivePeerFinder, which mostly means that this node thinks there's an - * active leader. However, there's no synchronisation around this call so it's possible that it's called if there's no active - * leader, but we have not yet been activated. If so, this should respond indicating that there's no active leader, and no - * known peers. - * - * @param sourceNode The sender of the PeersRequest - */ - protected abstract PeersResponse onPeersRequestWhenInactive(DiscoveryNode sourceNode); - public interface TransportAddressConnector { /** * Identify the node at the given address and, if it is a master node and not the local node then establish a full connection to it. @@ -243,7 +237,7 @@ private void handleWakeUp() { List getKnownPeers() { assert holdsLock() : "PeerFinder mutex not held"; List knownPeers = new ArrayList<>(peersByAddress.size()); - for(final Peer peer : peersByAddress.values()) { + for (final Peer peer : peersByAddress.values()) { DiscoveryNode peerNode = peer.getDiscoveryNode(); if (peerNode != null) { knownPeers.add(peerNode); diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java index 30351601bed5c..e05e6ae95b14f 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java @@ -69,7 +69,6 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; -import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.nullValue; public class PeerFinderTests extends ESTestCase { @@ -151,7 +150,6 @@ public List buildDynamicHosts(HostsResolver hostsResolver) { static class TestPeerFinder extends PeerFinder { DiscoveryNode discoveredMasterNode; OptionalLong discoveredMasterTerm = OptionalLong.empty(); - PeersResponse nextResponseWhenInactive; TestPeerFinder(Settings settings, TransportService transportService, UnicastHostsProvider hostsProvider, FutureExecutor futureExecutor, TransportAddressConnector transportAddressConnector, @@ -167,14 +165,6 @@ protected void onMasterFoundByProbe(DiscoveryNode masterNode, long term) { discoveredMasterNode = masterNode; discoveredMasterTerm = OptionalLong.of(term); } - - @Override - protected PeersResponse onPeersRequestWhenInactive(DiscoveryNode sourceNode) { - assertThat(nextResponseWhenInactive, not(nullValue())); - final PeersResponse savedResponse = this.nextResponseWhenInactive; - nextResponseWhenInactive = null; // only send this response once. - return savedResponse; - } } private void updateLastAcceptedNodes(Consumer onBuilder) { @@ -217,7 +207,7 @@ public boolean nodeConnected(DiscoveryNode node) { @After public void deactivateAndRunRemainingTasks() { - peerFinder.deactivate(); + peerFinder.deactivate(localNode); deterministicTaskQueue.runAllTasks(); // termination ensures that everything is properly cleaned up } @@ -226,7 +216,7 @@ public void testActivationAndDeactivation() { assertFoundPeers(); assertTrue(peerFinder.isActive()); - peerFinder.deactivate(); + peerFinder.deactivate(localNode); assertFalse(peerFinder.isActive()); } @@ -292,7 +282,7 @@ public void testDeactivationClearsPastKnowledge() { assertFoundPeers(otherNode); - peerFinder.deactivate(); + peerFinder.deactivate(localNode); unicastHostsProvider.providedAddresses.clear(); peerFinder.activate(lastAcceptedNodes); @@ -391,10 +381,14 @@ public void testDelegatesRequestHandlingWhenInactive() { final DiscoveryNode sourceNode = newDiscoveryNode("request-source"); transportAddressConnector.reachableNodes.add(sourceNode); - final PeersResponse expectedResponse = new PeersResponse(Optional.of(masterNode), Collections.emptyList(), randomNonNegativeLong()); - peerFinder.nextResponseWhenInactive = expectedResponse; + peerFinder.activate(DiscoveryNodes.EMPTY_NODES); + + final long term = randomNonNegativeLong(); + peerFinder.setCurrentTerm(term); + peerFinder.deactivate(masterNode); + + final PeersResponse expectedResponse = new PeersResponse(Optional.of(masterNode), Collections.emptyList(), term); final PeersResponse peersResponse = peerFinder.handlePeersRequest(new PeersRequest(sourceNode, Collections.emptyList())); - assertThat("should have consumed the mock value", peerFinder.nextResponseWhenInactive, nullValue()); assertThat(peersResponse, equalTo(expectedResponse)); } From 7afe75c3804041eebf9158686a6afd68a97a306f Mon Sep 17 00:00:00 2001 From: David Turner Date: Sat, 4 Aug 2018 11:10:42 +0100 Subject: [PATCH 42/76] Remove comments re. synchronisation --- .../elasticsearch/cluster/coordination/PeerFinder.java | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 3ef4acc0ce0a0..12fa472009daf 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -166,12 +166,10 @@ public boolean isActive() { } public void setCurrentTerm(long currentTerm) { - // Volatile write, no synchronisation required this.currentTerm = currentTerm; } private DiscoveryNode getLocalNode() { - // No synchronisation required final DiscoveryNode localNode = transportService.getLocalNode(); assert localNode != null; return localNode; @@ -179,13 +177,11 @@ private DiscoveryNode getLocalNode() { @Override protected void doStart() { - // No synchronisation required executorService.set(executorServiceFactory.get()); } @Override protected void doStop() { - // No synchronisation required ThreadPool.terminate(executorService.get(), 10, TimeUnit.SECONDS); } @@ -276,7 +272,6 @@ public void onFailure(Exception e) { @Override protected void doRun() { - // No synchronisation required for most of this List providedAddresses = new ArrayList<>(hostsProvider.buildDynamicHosts((hosts, limitPortCounts) -> UnicastZenPing.resolveHostsLists(executorService.get(), logger, hosts, limitPortCounts, @@ -343,7 +338,6 @@ private class Peer { } DiscoveryNode getDiscoveryNode() { - // No synchronisation required return discoveryNode.get(); } @@ -390,7 +384,6 @@ public void onResponse(DiscoveryNode remoteNode) { @Override public void onFailure(Exception e) { - // No synchronisation required - removePeer is threadsafe logger.debug(() -> new ParameterizedMessage("{} connection failed", Peer.this), e); removePeer(); } @@ -398,7 +391,6 @@ public void onFailure(Exception e) { } private void removePeer() { - // No synchronisation required - peersByAddress is threadsafe final Peer removed = peersByAddress.remove(transportAddress); assert removed == Peer.this; } @@ -458,7 +450,7 @@ public void handleResponse(PeersResponse response) { @Override public void handleException(TransportException exp) { - peersRequestInFlight = false; // volatile write needs no further synchronisation + peersRequestInFlight = false; logger.debug("PeersRequest failed", exp); } From 27e95cf1675d9b7dfe9516bc0ec480ea02ab83fe Mon Sep 17 00:00:00 2001 From: David Turner Date: Sat, 4 Aug 2018 11:11:46 +0100 Subject: [PATCH 43/76] Rename callback --- .../org/elasticsearch/cluster/coordination/PeerFinder.java | 4 ++-- .../elasticsearch/cluster/coordination/PeerFinderTests.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 12fa472009daf..1d4efe3fa2783 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -192,7 +192,7 @@ protected void doClose() { /** * Called on receipt of a PeersResponse from a node that believes it's an active leader, which this node should therefore try and join. */ - protected abstract void onMasterFoundByProbe(DiscoveryNode masterNode, long term); + protected abstract void onActiveMasterFound(DiscoveryNode masterNode, long term); public interface TransportAddressConnector { /** @@ -444,7 +444,7 @@ public void handleResponse(PeersResponse response) { if (foundMasterNode) { // Must not hold lock here to avoid deadlock assert holdsLock() == false : "PeerFinder mutex is held in error"; - onMasterFoundByProbe(discoveryNode, response.getTerm()); + onActiveMasterFound(discoveryNode, response.getTerm()); } } diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java index e05e6ae95b14f..f446bff6e08a5 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java @@ -158,7 +158,7 @@ static class TestPeerFinder extends PeerFinder { } @Override - protected void onMasterFoundByProbe(DiscoveryNode masterNode, long term) { + protected void onActiveMasterFound(DiscoveryNode masterNode, long term) { assert holdsLock() == false : "PeerFinder lock held in error"; assertThat(discoveredMasterNode, nullValue()); assertFalse(discoveredMasterTerm.isPresent()); From 5374f2b542a6db800daf09990ae8af1879469f82 Mon Sep 17 00:00:00 2001 From: David Turner Date: Sat, 4 Aug 2018 11:15:28 +0100 Subject: [PATCH 44/76] Remove foundMasterNode boolean --- .../elasticsearch/cluster/coordination/PeerFinder.java | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 1d4efe3fa2783..2adf47df7862e 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -418,7 +418,6 @@ public PeersResponse read(StreamInput in) throws IOException { @Override public void handleResponse(PeersResponse response) { logger.trace("{} received {} from {}", this, response, discoveryNode); - final boolean foundMasterNode; synchronized (mutex) { if (running == false) { return; @@ -428,20 +427,16 @@ public void handleResponse(PeersResponse response) { if (response.getMasterNode().isPresent()) { final DiscoveryNode masterNode = response.getMasterNode().get(); - if (masterNode.equals(discoveryNode)) { - foundMasterNode = true; - } else { - foundMasterNode = false; + if (masterNode.equals(discoveryNode) == false) { startProbe(masterNode); } } else { - foundMasterNode = false; response.getKnownPeers().stream().map(DiscoveryNode::getAddress) .forEach(ActivePeerFinder.this::startProbe); } } - if (foundMasterNode) { + if (response.getMasterNode().equals(Optional.of(discoveryNode))) { // Must not hold lock here to avoid deadlock assert holdsLock() == false : "PeerFinder mutex is held in error"; onActiveMasterFound(discoveryNode, response.getTerm()); From 1308444fa15e6b5a0f3663fc507f853571ca0e99 Mon Sep 17 00:00:00 2001 From: David Turner Date: Sat, 4 Aug 2018 11:16:51 +0100 Subject: [PATCH 45/76] Assert discoveryNode not set --- .../org/elasticsearch/cluster/coordination/PeerFinder.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 2adf47df7862e..404ab1d5aa4de 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -348,7 +348,7 @@ void handleWakeUp() { return; } - final DiscoveryNode discoveryNode = this.discoveryNode.get(); + final DiscoveryNode discoveryNode = getDiscoveryNode(); // may be null if connection not yet established if (discoveryNode != null) { @@ -376,6 +376,7 @@ public void onResponse(DiscoveryNode remoteNode) { assert remoteNode.equals(getLocalNode()) == false : remoteNode + " is the local node"; synchronized (mutex) { if (running) { + assert discoveryNode.get() == null : "discoveryNode unexpectedly already set to " + discoveryNode.get(); discoveryNode.set(remoteNode); requestPeers(remoteNode); } From b7c598b441c24632f6d4fea0832b12fded351165 Mon Sep 17 00:00:00 2001 From: David Turner Date: Sat, 4 Aug 2018 11:18:19 +0100 Subject: [PATCH 46/76] Get the remote node again rather than passing it in --- .../elasticsearch/cluster/coordination/PeerFinder.java | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 404ab1d5aa4de..29df0868c6d65 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -354,7 +354,7 @@ void handleWakeUp() { if (discoveryNode != null) { if (transportService.nodeConnected(discoveryNode)) { if (peersRequestInFlight == false) { - requestPeers(discoveryNode); + requestPeers(); } } else { logger.trace("{} no longer connected to {}", this, discoveryNode); @@ -378,7 +378,7 @@ public void onResponse(DiscoveryNode remoteNode) { if (running) { assert discoveryNode.get() == null : "discoveryNode unexpectedly already set to " + discoveryNode.get(); discoveryNode.set(remoteNode); - requestPeers(remoteNode); + requestPeers(); } } } @@ -396,11 +396,13 @@ private void removePeer() { assert removed == Peer.this; } - private void requestPeers(final DiscoveryNode discoveryNode) { + private void requestPeers() { assert holdsLock() : "PeerFinder mutex not held"; assert peersRequestInFlight == false : "PeersRequest already in flight"; assert running; - assert discoveryNode.equals(this.discoveryNode.get()); + + final DiscoveryNode discoveryNode = getDiscoveryNode(); + assert discoveryNode != null : "cannot request peers without first connecting"; logger.trace("{} requesting peers from {}", this, discoveryNode); peersRequestInFlight = true; From 417e6e26abee53f1dce82e1d263c0aa095ef02a2 Mon Sep 17 00:00:00 2001 From: David Turner Date: Sat, 4 Aug 2018 11:20:19 +0100 Subject: [PATCH 47/76] Unwrap provided addresses --- .../org/elasticsearch/cluster/coordination/PeerFinder.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 29df0868c6d65..e91dcb465723c 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -273,9 +273,9 @@ public void onFailure(Exception e) { @Override protected void doRun() { List providedAddresses - = new ArrayList<>(hostsProvider.buildDynamicHosts((hosts, limitPortCounts) + = hostsProvider.buildDynamicHosts((hosts, limitPortCounts) -> UnicastZenPing.resolveHostsLists(executorService.get(), logger, hosts, limitPortCounts, - transportService, resolveTimeout))); + transportService, resolveTimeout)); logger.trace("ActivePeerFinder#handleNextWakeUp(): probing resolved transport addresses {}", providedAddresses); synchronized (mutex) { From 4ca2e62f922bde15e43a6fc004346ee10c363196 Mon Sep 17 00:00:00 2001 From: David Turner Date: Sat, 4 Aug 2018 11:39:17 +0100 Subject: [PATCH 48/76] Extract ConfiguredHostsResolver class --- .../cluster/coordination/PeerFinder.java | 58 ++-------- .../discovery/ConfiguredHostsResolver.java | 107 ++++++++++++++++++ 2 files changed, 119 insertions(+), 46 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/discovery/ConfiguredHostsResolver.java diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index e91dcb465723c..8def3af273054 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -31,10 +31,8 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.unit.TimeValue; -import org.elasticsearch.common.util.concurrent.AbstractRunnable; +import org.elasticsearch.discovery.ConfiguredHostsResolver; import org.elasticsearch.discovery.zen.UnicastHostsProvider; -import org.elasticsearch.discovery.zen.UnicastZenPing; -import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.threadpool.ThreadPool.Names; import org.elasticsearch.transport.TransportException; import org.elasticsearch.transport.TransportResponseHandler; @@ -47,8 +45,6 @@ import java.util.Map; import java.util.Optional; import java.util.concurrent.ExecutorService; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Supplier; import static org.elasticsearch.common.util.concurrent.ConcurrentCollections.newConcurrentMap; @@ -63,16 +59,13 @@ public abstract class PeerFinder extends AbstractLifecycleComponent { TimeValue.timeValueMillis(1000), TimeValue.timeValueMillis(1), Setting.Property.NodeScope); private final TimeValue findPeersDelay; - private final TimeValue resolveTimeout; private final Object mutex = new Object(); private final TransportService transportService; - private final UnicastHostsProvider hostsProvider; private final FutureExecutor futureExecutor; private final TransportAddressConnector transportAddressConnector; - private final Supplier executorServiceFactory; + private final ConfiguredHostsResolver configuredHostsResolver; - private final SetOnce executorService = new SetOnce<>(); private Optional peerFinder = Optional.empty(); private volatile long currentTerm; private Optional leader = Optional.empty(); @@ -82,12 +75,10 @@ public abstract class PeerFinder extends AbstractLifecycleComponent { Supplier executorServiceFactory) { super(settings); findPeersDelay = DISCOVERY_FIND_PEERS_INTERVAL_SETTING.get(settings); - resolveTimeout = UnicastZenPing.DISCOVERY_ZEN_PING_UNICAST_HOSTS_RESOLVE_TIMEOUT.get(settings); this.transportService = transportService; - this.hostsProvider = hostsProvider; this.futureExecutor = futureExecutor; this.transportAddressConnector = transportAddressConnector; - this.executorServiceFactory = executorServiceFactory; + configuredHostsResolver = new ConfiguredHostsResolver(settings, transportService, hostsProvider, executorServiceFactory); transportService.registerRequestHandler(REQUEST_PEERS_ACTION_NAME, Names.GENERIC, false, false, PeersRequest::new, @@ -177,16 +168,17 @@ private DiscoveryNode getLocalNode() { @Override protected void doStart() { - executorService.set(executorServiceFactory.get()); + configuredHostsResolver.start(); } @Override protected void doStop() { - ThreadPool.terminate(executorService.get(), 10, TimeUnit.SECONDS); + configuredHostsResolver.stop(); } @Override protected void doClose() { + configuredHostsResolver.close(); } /** @@ -204,7 +196,6 @@ public interface TransportAddressConnector { private class ActivePeerFinder { private final DiscoveryNodes lastAcceptedNodes; boolean running; - private final AtomicBoolean resolveInProgress = new AtomicBoolean(); private final Map peersByAddress = newConcurrentMap(); ActivePeerFinder(DiscoveryNodes lastAcceptedNodes) { @@ -264,37 +255,12 @@ private void handleWakeUpUnderLock() { startProbe(discoveryNodeObjectCursor.value.getAddress()); } - if (resolveInProgress.compareAndSet(false, true)) { - transportService.getThreadPool().generic().execute(new AbstractRunnable() { - @Override - public void onFailure(Exception e) { - } - - @Override - protected void doRun() { - List providedAddresses - = hostsProvider.buildDynamicHosts((hosts, limitPortCounts) - -> UnicastZenPing.resolveHostsLists(executorService.get(), logger, hosts, limitPortCounts, - transportService, resolveTimeout)); - - logger.trace("ActivePeerFinder#handleNextWakeUp(): probing resolved transport addresses {}", providedAddresses); - synchronized (mutex) { - providedAddresses.forEach(ActivePeerFinder.this::startProbe); - } - } - - @Override - public void onAfter() { - super.onAfter(); - resolveInProgress.set(false); - } - - @Override - public String toString() { - return "PeerFinder resolving unicast hosts list"; - } - }); - } + configuredHostsResolver.resolveConfiguredHosts(providedAddresses -> { + synchronized (mutex) { + logger.trace("ActivePeerFinder#handleNextWakeUp(): probing resolved transport addresses {}", providedAddresses); + providedAddresses.forEach(ActivePeerFinder.this::startProbe); + } + }); futureExecutor.schedule(new Runnable() { @Override diff --git a/server/src/main/java/org/elasticsearch/discovery/ConfiguredHostsResolver.java b/server/src/main/java/org/elasticsearch/discovery/ConfiguredHostsResolver.java new file mode 100644 index 0000000000000..4317bdae10161 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/discovery/ConfiguredHostsResolver.java @@ -0,0 +1,107 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.discovery; + +import org.apache.lucene.util.SetOnce; +import org.elasticsearch.common.component.AbstractLifecycleComponent; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.transport.TransportAddress; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.common.util.concurrent.AbstractRunnable; +import org.elasticsearch.discovery.zen.UnicastHostsProvider; +import org.elasticsearch.discovery.zen.UnicastZenPing; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportService; + +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Consumer; +import java.util.function.Supplier; + +public class ConfiguredHostsResolver extends AbstractLifecycleComponent { + + private final AtomicBoolean resolveInProgress = new AtomicBoolean(); + private final TransportService transportService; + private final UnicastHostsProvider hostsProvider; + private final SetOnce executorService = new SetOnce<>(); + private final TimeValue resolveTimeout; + private final Supplier executorServiceFactory; + + public ConfiguredHostsResolver(Settings settings, TransportService transportService, UnicastHostsProvider hostsProvider, + Supplier executorServiceFactory) { + super(settings); + this.transportService = transportService; + this.hostsProvider = hostsProvider; + resolveTimeout = UnicastZenPing.DISCOVERY_ZEN_PING_UNICAST_HOSTS_RESOLVE_TIMEOUT.get(settings); + this.executorServiceFactory = executorServiceFactory; + } + + @Override + protected void doStart() { + executorService.set(executorServiceFactory.get()); + } + + @Override + protected void doStop() { + ThreadPool.terminate(executorService.get(), 10, TimeUnit.SECONDS); + } + + @Override + protected void doClose() { + } + + /** + * Attempt to resolve the configured unicast hosts list to a list of transport addresses. + * @param consumer Consumer for the resolved list. May not be called if an error occurs or if another resolution attempt is in + * progress. + */ + public void resolveConfiguredHosts(Consumer> consumer) { + if (resolveInProgress.compareAndSet(false, true)) { + transportService.getThreadPool().generic().execute(new AbstractRunnable() { + @Override + public void onFailure(Exception e) { + } + + @Override + protected void doRun() { + List providedAddresses + = hostsProvider.buildDynamicHosts((hosts, limitPortCounts) + -> UnicastZenPing.resolveHostsLists(executorService.get(), logger, hosts, limitPortCounts, + transportService, resolveTimeout)); + + consumer.accept(providedAddresses); + } + + @Override + public void onAfter() { + super.onAfter(); + resolveInProgress.set(false); + } + + @Override + public String toString() { + return "ConfiguredHostsResolver resolving unicast hosts list"; + } + }); + } + } +} From f034f18dd0493554a4901da8ba9fde600c4a2fe1 Mon Sep 17 00:00:00 2001 From: David Turner Date: Sat, 4 Aug 2018 11:56:35 +0100 Subject: [PATCH 49/76] Combine ActivePeerFinder and PeerFinder --- .../cluster/coordination/PeerFinder.java | 390 ++++++++---------- 1 file changed, 179 insertions(+), 211 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 8def3af273054..ab7159c7dd0cb 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -66,8 +66,10 @@ public abstract class PeerFinder extends AbstractLifecycleComponent { private final TransportAddressConnector transportAddressConnector; private final ConfiguredHostsResolver configuredHostsResolver; - private Optional peerFinder = Optional.empty(); private volatile long currentTerm; + private boolean active; + private DiscoveryNodes lastAcceptedNodes; + private final Map peersByAddress = newConcurrentMap(); private Optional leader = Optional.empty(); PeerFinder(Settings settings, TransportService transportService, UnicastHostsProvider hostsProvider, @@ -87,7 +89,7 @@ public abstract class PeerFinder extends AbstractLifecycleComponent { public Iterable getFoundPeers() { synchronized (mutex) { - return getActivePeerFinder().getKnownPeers(); + return getKnownPeers(); } } @@ -100,45 +102,37 @@ public void activate(final DiscoveryNodes lastAcceptedNodes) { logger.trace("activating PeerFinder {}", lastAcceptedNodes); synchronized (mutex) { - assert peerFinder.isPresent() == false; - peerFinder = Optional.of(new ActivePeerFinder(lastAcceptedNodes)); - peerFinder.get().start(); + assert active == false; + active = true; + this.lastAcceptedNodes = lastAcceptedNodes; leader = Optional.empty(); + handleWakeUpUnderLock(); } } - // exposed to subclasses for testing - protected final boolean holdsLock() { - return Thread.holdsLock(mutex); - } - public void deactivate(DiscoveryNode leader) { synchronized (mutex) { - if (peerFinder.isPresent()) { - logger.trace("deactivating PeerFinder"); - assert peerFinder.get().running; - peerFinder.get().stop(); - peerFinder = Optional.empty(); + logger.trace("deactivating PeerFinder and setting leader to {}", leader); + if (active) { + active = false; + handleWakeUpUnderLock(); } this.leader = Optional.of(leader); } } - private ActivePeerFinder getActivePeerFinder() { - assert holdsLock() : "Peerfinder mutex not held"; - final ActivePeerFinder activePeerFinder = this.peerFinder.get(); - assert activePeerFinder.running; - return activePeerFinder; + // exposed to subclasses for testing + protected final boolean holdsLock() { + return Thread.holdsLock(mutex); } PeersResponse handlePeersRequest(PeersRequest peersRequest) { synchronized (mutex) { assert peersRequest.getSourceNode().equals(getLocalNode()) == false; - if (isActive()) { - final ActivePeerFinder activePeerFinder = getActivePeerFinder(); - activePeerFinder.startProbe(peersRequest.getSourceNode().getAddress()); - peersRequest.getKnownPeers().stream().map(DiscoveryNode::getAddress).forEach(activePeerFinder::startProbe); - return new PeersResponse(Optional.empty(), activePeerFinder.getKnownPeers(), currentTerm); + if (active) { + startProbe(peersRequest.getSourceNode().getAddress()); + peersRequest.getKnownPeers().stream().map(DiscoveryNode::getAddress).forEach(this::startProbe); + return new PeersResponse(Optional.empty(), getKnownPeers(), currentTerm); } else { return new PeersResponse(leader, Collections.emptyList(), currentTerm); } @@ -147,12 +141,7 @@ PeersResponse handlePeersRequest(PeersRequest peersRequest) { public boolean isActive() { synchronized (mutex) { - if (peerFinder.isPresent()) { - assert peerFinder.get().running; - return true; - } else { - return false; - } + return active; } } @@ -193,242 +182,221 @@ public interface TransportAddressConnector { void connectToRemoteMasterNode(TransportAddress transportAddress, ActionListener listener); } - private class ActivePeerFinder { - private final DiscoveryNodes lastAcceptedNodes; - boolean running; - private final Map peersByAddress = newConcurrentMap(); - - ActivePeerFinder(DiscoveryNodes lastAcceptedNodes) { - this.lastAcceptedNodes = lastAcceptedNodes; - } - - void start() { - assert holdsLock() : "PeerFinder mutex not held"; - assert running == false; - running = true; + private void handleWakeUp() { + synchronized (mutex) { handleWakeUpUnderLock(); } + } - void stop() { - assert holdsLock() : "PeerFinder mutex not held"; - assert running; - running = false; - } - - private void handleWakeUp() { - synchronized (mutex) { - handleWakeUpUnderLock(); + private List getKnownPeers() { + assert active; + assert holdsLock() : "PeerFinder mutex not held"; + List knownPeers = new ArrayList<>(peersByAddress.size()); + for (final Peer peer : peersByAddress.values()) { + DiscoveryNode peerNode = peer.getDiscoveryNode(); + if (peerNode != null) { + knownPeers.add(peerNode); } } + return knownPeers; + } - List getKnownPeers() { - assert holdsLock() : "PeerFinder mutex not held"; - List knownPeers = new ArrayList<>(peersByAddress.size()); - for (final Peer peer : peersByAddress.values()) { - DiscoveryNode peerNode = peer.getDiscoveryNode(); - if (peerNode != null) { - knownPeers.add(peerNode); - } - } - return knownPeers; + private Peer createConnectingPeer(TransportAddress transportAddress) { + Peer peer = new Peer(transportAddress); + peer.establishConnection(); + return peer; + } + + private void handleWakeUpUnderLock() { + assert holdsLock() : "PeerFinder mutex not held"; + + for (final Peer peer : peersByAddress.values()) { + peer.handleWakeUp(); } - private Peer createConnectingPeer(TransportAddress transportAddress) { - Peer peer = new Peer(transportAddress); - peer.establishConnection(); - return peer; + if (active == false) { + logger.trace("ActivePeerFinder#handleWakeUp(): not running"); + return; } - private void handleWakeUpUnderLock() { - assert holdsLock() : "PeerFinder mutex not held"; + for (ObjectCursor discoveryNodeObjectCursor : lastAcceptedNodes.getMasterNodes().values()) { + startProbe(discoveryNodeObjectCursor.value.getAddress()); + } - if (running == false) { - logger.trace("ActivePeerFinder#handleWakeUp(): not running"); - return; + configuredHostsResolver.resolveConfiguredHosts(providedAddresses -> { + synchronized (mutex) { + logger.trace("ActivePeerFinder#handleNextWakeUp(): probing resolved transport addresses {}", providedAddresses); + providedAddresses.forEach(this::startProbe); } + }); - for (final Peer peer : peersByAddress.values()) { - peer.handleWakeUp(); + futureExecutor.schedule(new Runnable() { + @Override + public void run() { + handleWakeUp(); } - for (ObjectCursor discoveryNodeObjectCursor : lastAcceptedNodes.getMasterNodes().values()) { - startProbe(discoveryNodeObjectCursor.value.getAddress()); + @Override + public String toString() { + return "PeerFinder::handleWakeUp"; } + }, findPeersDelay); + } - configuredHostsResolver.resolveConfiguredHosts(providedAddresses -> { - synchronized (mutex) { - logger.trace("ActivePeerFinder#handleNextWakeUp(): probing resolved transport addresses {}", providedAddresses); - providedAddresses.forEach(ActivePeerFinder.this::startProbe); - } - }); - - futureExecutor.schedule(new Runnable() { - @Override - public void run() { - handleWakeUp(); - } + void startProbe(DiscoveryNode discoveryNode) { + startProbe(discoveryNode.getAddress()); + } - @Override - public String toString() { - return "ActivePeerFinder::handleWakeUp"; - } - }, findPeersDelay); + void startProbe(TransportAddress transportAddress) { + assert holdsLock() : "PeerFinder mutex not held"; + if (active == false) { + logger.trace("startProbe({}) not running", transportAddress); + return; } - void startProbe(DiscoveryNode discoveryNode) { - startProbe(discoveryNode.getAddress()); + if (transportAddress.equals(getLocalNode().getAddress())) { + logger.trace("startProbe({}) not probing local node", transportAddress); + return; } - void startProbe(TransportAddress transportAddress) { - assert holdsLock() : "PeerFinder mutex not held"; - if (running == false) { - logger.trace("startProbe({}) not running", transportAddress); - return; - } + peersByAddress.computeIfAbsent(transportAddress, this::createConnectingPeer); + } - if (transportAddress.equals(getLocalNode().getAddress())) { - logger.trace("startProbe({}) not probing local node", transportAddress); - return; - } + private class Peer { + private final TransportAddress transportAddress; + private SetOnce discoveryNode = new SetOnce<>(); + private volatile boolean peersRequestInFlight; - peersByAddress.computeIfAbsent(transportAddress, this::createConnectingPeer); + Peer(TransportAddress transportAddress) { + this.transportAddress = transportAddress; } - private class Peer { - private final TransportAddress transportAddress; - private SetOnce discoveryNode = new SetOnce<>(); - private volatile boolean peersRequestInFlight; + DiscoveryNode getDiscoveryNode() { + return discoveryNode.get(); + } - Peer(TransportAddress transportAddress) { - this.transportAddress = transportAddress; - } + void handleWakeUp() { + assert holdsLock() : "PeerFinder mutex not held"; - DiscoveryNode getDiscoveryNode() { - return discoveryNode.get(); + if (active == false) { + removePeer(); + return; } - void handleWakeUp() { - assert holdsLock() : "PeerFinder mutex not held"; - - if (running == false) { - return; - } - - final DiscoveryNode discoveryNode = getDiscoveryNode(); - // may be null if connection not yet established + final DiscoveryNode discoveryNode = getDiscoveryNode(); + // may be null if connection not yet established - if (discoveryNode != null) { - if (transportService.nodeConnected(discoveryNode)) { - if (peersRequestInFlight == false) { - requestPeers(); - } - } else { - logger.trace("{} no longer connected to {}", this, discoveryNode); - removePeer(); + if (discoveryNode != null) { + if (transportService.nodeConnected(discoveryNode)) { + if (peersRequestInFlight == false) { + requestPeers(); } + } else { + logger.trace("{} no longer connected to {}", this, discoveryNode); + removePeer(); } } + } - void establishConnection() { - assert holdsLock() : "PeerFinder mutex not held"; - assert getDiscoveryNode() == null : "unexpectedly connected to " + getDiscoveryNode(); - assert running; + void establishConnection() { + assert holdsLock() : "PeerFinder mutex not held"; + assert getDiscoveryNode() == null : "unexpectedly connected to " + getDiscoveryNode(); + assert active; - logger.trace("{} attempting connection", this); - transportAddressConnector.connectToRemoteMasterNode(transportAddress, new ActionListener() { - @Override - public void onResponse(DiscoveryNode remoteNode) { - assert remoteNode.isMasterNode() : remoteNode + " is not master-eligible"; - assert remoteNode.equals(getLocalNode()) == false : remoteNode + " is the local node"; - synchronized (mutex) { - if (running) { - assert discoveryNode.get() == null : "discoveryNode unexpectedly already set to " + discoveryNode.get(); - discoveryNode.set(remoteNode); - requestPeers(); - } + logger.trace("{} attempting connection", this); + transportAddressConnector.connectToRemoteMasterNode(transportAddress, new ActionListener() { + @Override + public void onResponse(DiscoveryNode remoteNode) { + assert remoteNode.isMasterNode() : remoteNode + " is not master-eligible"; + assert remoteNode.equals(getLocalNode()) == false : remoteNode + " is the local node"; + synchronized (mutex) { + if (active) { + assert discoveryNode.get() == null : "discoveryNode unexpectedly already set to " + discoveryNode.get(); + discoveryNode.set(remoteNode); + requestPeers(); } } + } - @Override - public void onFailure(Exception e) { - logger.debug(() -> new ParameterizedMessage("{} connection failed", Peer.this), e); - removePeer(); - } - }); - } + @Override + public void onFailure(Exception e) { + logger.debug(() -> new ParameterizedMessage("{} connection failed", Peer.this), e); + removePeer(); + } + }); + } - private void removePeer() { - final Peer removed = peersByAddress.remove(transportAddress); - assert removed == Peer.this; - } + private void removePeer() { + final Peer removed = peersByAddress.remove(transportAddress); + assert removed == Peer.this; + } - private void requestPeers() { - assert holdsLock() : "PeerFinder mutex not held"; - assert peersRequestInFlight == false : "PeersRequest already in flight"; - assert running; + private void requestPeers() { + assert holdsLock() : "PeerFinder mutex not held"; + assert peersRequestInFlight == false : "PeersRequest already in flight"; + assert active; - final DiscoveryNode discoveryNode = getDiscoveryNode(); - assert discoveryNode != null : "cannot request peers without first connecting"; + final DiscoveryNode discoveryNode = getDiscoveryNode(); + assert discoveryNode != null : "cannot request peers without first connecting"; - logger.trace("{} requesting peers from {}", this, discoveryNode); - peersRequestInFlight = true; + logger.trace("{} requesting peers from {}", this, discoveryNode); + peersRequestInFlight = true; - List knownNodes = getKnownPeers(); + List knownNodes = getKnownPeers(); - transportService.sendRequest(discoveryNode, REQUEST_PEERS_ACTION_NAME, - new PeersRequest(getLocalNode(), knownNodes), - new TransportResponseHandler() { + transportService.sendRequest(discoveryNode, REQUEST_PEERS_ACTION_NAME, + new PeersRequest(getLocalNode(), knownNodes), + new TransportResponseHandler() { - @Override - public PeersResponse read(StreamInput in) throws IOException { - return new PeersResponse(in); - } + @Override + public PeersResponse read(StreamInput in) throws IOException { + return new PeersResponse(in); + } - @Override - public void handleResponse(PeersResponse response) { - logger.trace("{} received {} from {}", this, response, discoveryNode); - synchronized (mutex) { - if (running == false) { - return; - } + @Override + public void handleResponse(PeersResponse response) { + logger.trace("{} received {} from {}", this, response, discoveryNode); + synchronized (mutex) { + if (active == false) { + return; + } - peersRequestInFlight = false; + peersRequestInFlight = false; - if (response.getMasterNode().isPresent()) { - final DiscoveryNode masterNode = response.getMasterNode().get(); - if (masterNode.equals(discoveryNode) == false) { - startProbe(masterNode); - } - } else { - response.getKnownPeers().stream().map(DiscoveryNode::getAddress) - .forEach(ActivePeerFinder.this::startProbe); + if (response.getMasterNode().isPresent()) { + final DiscoveryNode masterNode = response.getMasterNode().get(); + if (masterNode.equals(discoveryNode) == false) { + startProbe(masterNode); } - } - - if (response.getMasterNode().equals(Optional.of(discoveryNode))) { - // Must not hold lock here to avoid deadlock - assert holdsLock() == false : "PeerFinder mutex is held in error"; - onActiveMasterFound(discoveryNode, response.getTerm()); + } else { + response.getKnownPeers().stream().map(DiscoveryNode::getAddress) + .forEach(PeerFinder.this::startProbe); } } - @Override - public void handleException(TransportException exp) { - peersRequestInFlight = false; - logger.debug("PeersRequest failed", exp); + if (response.getMasterNode().equals(Optional.of(discoveryNode))) { + // Must not hold lock here to avoid deadlock + assert holdsLock() == false : "PeerFinder mutex is held in error"; + onActiveMasterFound(discoveryNode, response.getTerm()); } + } - @Override - public String executor() { - return Names.GENERIC; - } - }); - } + @Override + public void handleException(TransportException exp) { + peersRequestInFlight = false; + logger.debug("PeersRequest failed", exp); + } - @Override - public String toString() { - return "Peer{" + transportAddress + " peersRequestInFlight=" + peersRequestInFlight + "}"; - } + @Override + public String executor() { + return Names.GENERIC; + } + }); + } + + @Override + public String toString() { + return "Peer{" + transportAddress + " peersRequestInFlight=" + peersRequestInFlight + "}"; } } } From 380d9308551011daf6ede289c12e2aebe76d4bb4 Mon Sep 17 00:00:00 2001 From: David Turner Date: Sat, 4 Aug 2018 11:57:46 +0100 Subject: [PATCH 50/76] No need to expose isActive - this test is not helpful --- .../elasticsearch/cluster/coordination/PeerFinder.java | 6 ------ .../cluster/coordination/PeerFinderTests.java | 9 --------- 2 files changed, 15 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index ab7159c7dd0cb..5ca3e20e6bd49 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -139,12 +139,6 @@ PeersResponse handlePeersRequest(PeersRequest peersRequest) { } } - public boolean isActive() { - synchronized (mutex) { - return active; - } - } - public void setCurrentTerm(long currentTerm) { this.currentTerm = currentTerm; } diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java index f446bff6e08a5..82848b2c1faa2 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java @@ -211,15 +211,6 @@ public void deactivateAndRunRemainingTasks() { deterministicTaskQueue.runAllTasks(); // termination ensures that everything is properly cleaned up } - public void testActivationAndDeactivation() { - peerFinder.activate(lastAcceptedNodes); - assertFoundPeers(); - assertTrue(peerFinder.isActive()); - - peerFinder.deactivate(localNode); - assertFalse(peerFinder.isActive()); - } - public void testAddsReachableNodesFromUnicastHostsList() { final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); From 34b6fd476a829b62f96834f36f9e060bf2463aa3 Mon Sep 17 00:00:00 2001 From: David Turner Date: Sat, 4 Aug 2018 11:58:35 +0100 Subject: [PATCH 51/76] Assert lifecycle is started --- .../org/elasticsearch/cluster/coordination/PeerFinder.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 5ca3e20e6bd49..1d190866b9a97 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -94,10 +94,7 @@ public Iterable getFoundPeers() { } public void activate(final DiscoveryNodes lastAcceptedNodes) { - if (lifecycle.started() == false) { - logger.debug("ignoring activation, not started"); - return; - } + assert lifecycle.started() : lifecycle; logger.trace("activating PeerFinder {}", lastAcceptedNodes); From 98cd79a22a98b11dc64306833bb9f1dd0105baab Mon Sep 17 00:00:00 2001 From: David Turner Date: Sat, 4 Aug 2018 12:27:03 +0100 Subject: [PATCH 52/76] No need for PeerFinder to be responsible for lifecycle of ConfiguredHostsResolver --- .../cluster/coordination/PeerFinder.java | 39 +++---- .../discovery/ConfiguredHostsResolver.java | 79 +------------- .../UnicastConfiguredHostsResolver.java | 102 ++++++++++++++++++ .../cluster/coordination/PeerFinderTests.java | 99 ++++++++++------- 4 files changed, 180 insertions(+), 139 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/discovery/UnicastConfiguredHostsResolver.java diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 1d190866b9a97..2f4a2a33a9b24 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -25,14 +25,13 @@ import org.elasticsearch.action.ActionListener; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; -import org.elasticsearch.common.component.AbstractLifecycleComponent; +import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.discovery.ConfiguredHostsResolver; -import org.elasticsearch.discovery.zen.UnicastHostsProvider; import org.elasticsearch.threadpool.ThreadPool.Names; import org.elasticsearch.transport.TransportException; import org.elasticsearch.transport.TransportResponseHandler; @@ -44,12 +43,10 @@ import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.concurrent.ExecutorService; -import java.util.function.Supplier; import static org.elasticsearch.common.util.concurrent.ConcurrentCollections.newConcurrentMap; -public abstract class PeerFinder extends AbstractLifecycleComponent { +public abstract class PeerFinder extends AbstractComponent { public static final String REQUEST_PEERS_ACTION_NAME = "internal:discovery/requestpeers"; @@ -72,15 +69,14 @@ public abstract class PeerFinder extends AbstractLifecycleComponent { private final Map peersByAddress = newConcurrentMap(); private Optional leader = Optional.empty(); - PeerFinder(Settings settings, TransportService transportService, UnicastHostsProvider hostsProvider, - FutureExecutor futureExecutor, TransportAddressConnector transportAddressConnector, - Supplier executorServiceFactory) { + PeerFinder(Settings settings, TransportService transportService, FutureExecutor futureExecutor, + TransportAddressConnector transportAddressConnector, ConfiguredHostsResolver configuredHostsResolver) { super(settings); findPeersDelay = DISCOVERY_FIND_PEERS_INTERVAL_SETTING.get(settings); this.transportService = transportService; this.futureExecutor = futureExecutor; this.transportAddressConnector = transportAddressConnector; - configuredHostsResolver = new ConfiguredHostsResolver(settings, transportService, hostsProvider, executorServiceFactory); + this.configuredHostsResolver = configuredHostsResolver; transportService.registerRequestHandler(REQUEST_PEERS_ACTION_NAME, Names.GENERIC, false, false, PeersRequest::new, @@ -94,8 +90,6 @@ public Iterable getFoundPeers() { } public void activate(final DiscoveryNodes lastAcceptedNodes) { - assert lifecycle.started() : lifecycle; - logger.trace("activating PeerFinder {}", lastAcceptedNodes); synchronized (mutex) { @@ -123,6 +117,12 @@ protected final boolean holdsLock() { return Thread.holdsLock(mutex); } + boolean assertInactiveWithNoKnownPeers() { + assert active == false; + assert peersByAddress.isEmpty(); + return true; + } + PeersResponse handlePeersRequest(PeersRequest peersRequest) { synchronized (mutex) { assert peersRequest.getSourceNode().equals(getLocalNode()) == false; @@ -146,21 +146,6 @@ private DiscoveryNode getLocalNode() { return localNode; } - @Override - protected void doStart() { - configuredHostsResolver.start(); - } - - @Override - protected void doStop() { - configuredHostsResolver.stop(); - } - - @Override - protected void doClose() { - configuredHostsResolver.close(); - } - /** * Called on receipt of a PeersResponse from a node that believes it's an active leader, which this node should therefore try and join. */ @@ -216,7 +201,7 @@ private void handleWakeUpUnderLock() { configuredHostsResolver.resolveConfiguredHosts(providedAddresses -> { synchronized (mutex) { - logger.trace("ActivePeerFinder#handleNextWakeUp(): probing resolved transport addresses {}", providedAddresses); + logger.trace("PeerFinder: probing resolved transport addresses {}", providedAddresses); providedAddresses.forEach(this::startProbe); } }); diff --git a/server/src/main/java/org/elasticsearch/discovery/ConfiguredHostsResolver.java b/server/src/main/java/org/elasticsearch/discovery/ConfiguredHostsResolver.java index 4317bdae10161..8122db66e6c9f 100644 --- a/server/src/main/java/org/elasticsearch/discovery/ConfiguredHostsResolver.java +++ b/server/src/main/java/org/elasticsearch/discovery/ConfiguredHostsResolver.java @@ -19,89 +19,18 @@ package org.elasticsearch.discovery; -import org.apache.lucene.util.SetOnce; -import org.elasticsearch.common.component.AbstractLifecycleComponent; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.TransportAddress; -import org.elasticsearch.common.unit.TimeValue; -import org.elasticsearch.common.util.concurrent.AbstractRunnable; -import org.elasticsearch.discovery.zen.UnicastHostsProvider; -import org.elasticsearch.discovery.zen.UnicastZenPing; -import org.elasticsearch.threadpool.ThreadPool; -import org.elasticsearch.transport.TransportService; import java.util.List; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Consumer; -import java.util.function.Supplier; - -public class ConfiguredHostsResolver extends AbstractLifecycleComponent { - - private final AtomicBoolean resolveInProgress = new AtomicBoolean(); - private final TransportService transportService; - private final UnicastHostsProvider hostsProvider; - private final SetOnce executorService = new SetOnce<>(); - private final TimeValue resolveTimeout; - private final Supplier executorServiceFactory; - - public ConfiguredHostsResolver(Settings settings, TransportService transportService, UnicastHostsProvider hostsProvider, - Supplier executorServiceFactory) { - super(settings); - this.transportService = transportService; - this.hostsProvider = hostsProvider; - resolveTimeout = UnicastZenPing.DISCOVERY_ZEN_PING_UNICAST_HOSTS_RESOLVE_TIMEOUT.get(settings); - this.executorServiceFactory = executorServiceFactory; - } - - @Override - protected void doStart() { - executorService.set(executorServiceFactory.get()); - } - - @Override - protected void doStop() { - ThreadPool.terminate(executorService.get(), 10, TimeUnit.SECONDS); - } - - @Override - protected void doClose() { - } +public interface ConfiguredHostsResolver { /** * Attempt to resolve the configured unicast hosts list to a list of transport addresses. + * * @param consumer Consumer for the resolved list. May not be called if an error occurs or if another resolution attempt is in * progress. */ - public void resolveConfiguredHosts(Consumer> consumer) { - if (resolveInProgress.compareAndSet(false, true)) { - transportService.getThreadPool().generic().execute(new AbstractRunnable() { - @Override - public void onFailure(Exception e) { - } - - @Override - protected void doRun() { - List providedAddresses - = hostsProvider.buildDynamicHosts((hosts, limitPortCounts) - -> UnicastZenPing.resolveHostsLists(executorService.get(), logger, hosts, limitPortCounts, - transportService, resolveTimeout)); - - consumer.accept(providedAddresses); - } - - @Override - public void onAfter() { - super.onAfter(); - resolveInProgress.set(false); - } - - @Override - public String toString() { - return "ConfiguredHostsResolver resolving unicast hosts list"; - } - }); - } - } + void resolveConfiguredHosts(Consumer> consumer); } + diff --git a/server/src/main/java/org/elasticsearch/discovery/UnicastConfiguredHostsResolver.java b/server/src/main/java/org/elasticsearch/discovery/UnicastConfiguredHostsResolver.java new file mode 100644 index 0000000000000..f2027a64d23c1 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/discovery/UnicastConfiguredHostsResolver.java @@ -0,0 +1,102 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.discovery; + +import org.apache.lucene.util.SetOnce; +import org.elasticsearch.common.component.AbstractLifecycleComponent; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.transport.TransportAddress; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.common.util.concurrent.AbstractRunnable; +import org.elasticsearch.discovery.zen.UnicastHostsProvider; +import org.elasticsearch.discovery.zen.UnicastZenPing; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportService; + +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Consumer; +import java.util.function.Supplier; + +public class UnicastConfiguredHostsResolver extends AbstractLifecycleComponent implements ConfiguredHostsResolver { + + private final AtomicBoolean resolveInProgress = new AtomicBoolean(); + private final TransportService transportService; + private final UnicastHostsProvider hostsProvider; + private final SetOnce executorService = new SetOnce<>(); + private final TimeValue resolveTimeout; + private final Supplier executorServiceFactory; + + public UnicastConfiguredHostsResolver(Settings settings, TransportService transportService, UnicastHostsProvider hostsProvider, + Supplier executorServiceFactory) { + super(settings); + this.transportService = transportService; + this.hostsProvider = hostsProvider; + resolveTimeout = UnicastZenPing.DISCOVERY_ZEN_PING_UNICAST_HOSTS_RESOLVE_TIMEOUT.get(settings); + this.executorServiceFactory = executorServiceFactory; + } + + @Override + protected void doStart() { + executorService.set(executorServiceFactory.get()); + } + + @Override + protected void doStop() { + ThreadPool.terminate(executorService.get(), 10, TimeUnit.SECONDS); + } + + @Override + protected void doClose() { + } + + public void resolveConfiguredHosts(Consumer> consumer) { + if (resolveInProgress.compareAndSet(false, true)) { + transportService.getThreadPool().generic().execute(new AbstractRunnable() { + @Override + public void onFailure(Exception e) { + } + + @Override + protected void doRun() { + List providedAddresses + = hostsProvider.buildDynamicHosts((hosts, limitPortCounts) + -> UnicastZenPing.resolveHostsLists(executorService.get(), logger, hosts, limitPortCounts, + transportService, resolveTimeout)); + + consumer.accept(providedAddresses); + } + + @Override + public void onAfter() { + super.onAfter(); + resolveInProgress.set(false); + } + + @Override + public String toString() { + return "ConfiguredHostsResolver resolving unicast hosts list"; + } + }); + } + } +} diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java index 82848b2c1faa2..846b54b67ad3c 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java @@ -28,7 +28,6 @@ import org.elasticsearch.cluster.node.DiscoveryNodes.Builder; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.TransportAddress; -import org.elasticsearch.discovery.zen.UnicastHostsProvider; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.transport.CapturingTransport; import org.elasticsearch.test.transport.CapturingTransport.CapturedRequest; @@ -48,11 +47,9 @@ import java.util.Optional; import java.util.OptionalLong; import java.util.Set; -import java.util.concurrent.ExecutorService; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Consumer; import java.util.function.Function; -import java.util.function.Supplier; import java.util.stream.Collectors; import java.util.stream.Stream; import java.util.stream.StreamSupport; @@ -76,9 +73,10 @@ public class PeerFinderTests extends ESTestCase { private CapturingTransport capturingTransport; private DeterministicTaskQueue deterministicTaskQueue; private DiscoveryNode localNode; - private MockUnicastHostsProvider unicastHostsProvider; private MockTransportAddressConnector transportAddressConnector; private TestPeerFinder peerFinder; + private List providedAddresses; + private long addressResolveDelay; // -1 means address resolution fails private Set disconnectedNodes = new HashSet<>(); private Set connectedNodes = new HashSet<>(); @@ -138,23 +136,13 @@ public String toString() { } } - static class MockUnicastHostsProvider implements UnicastHostsProvider { - final List providedAddresses = new ArrayList<>(); - - @Override - public List buildDynamicHosts(HostsResolver hostsResolver) { - return providedAddresses; - } - } - - static class TestPeerFinder extends PeerFinder { + class TestPeerFinder extends PeerFinder { DiscoveryNode discoveredMasterNode; OptionalLong discoveredMasterTerm = OptionalLong.empty(); - TestPeerFinder(Settings settings, TransportService transportService, UnicastHostsProvider hostsProvider, - FutureExecutor futureExecutor, TransportAddressConnector transportAddressConnector, - Supplier executorServiceFactory) { - super(settings, transportService, hostsProvider, futureExecutor, transportAddressConnector, executorServiceFactory); + TestPeerFinder(Settings settings, TransportService transportService, FutureExecutor futureExecutor, + TransportAddressConnector transportAddressConnector) { + super(settings, transportService, futureExecutor, transportAddressConnector, PeerFinderTests.this::resolveConfiguredHosts); } @Override @@ -167,6 +155,24 @@ protected void onActiveMasterFound(DiscoveryNode masterNode, long term) { } } + private void resolveConfiguredHosts(Consumer> onResult) { + if (addressResolveDelay >= 0) { + deterministicTaskQueue.scheduleAt(deterministicTaskQueue.getCurrentTimeMillis() + addressResolveDelay, new Runnable() { + @Override + public void run() { + onResult.accept(providedAddresses); + } + + @Override + public String toString() { + return "PeerFinderTests#resolveConfiguredHosts"; + } + }); + } else { + assertThat(addressResolveDelay, is(-1L)); + } + } + private void updateLastAcceptedNodes(Consumer onBuilder) { final Builder builder = DiscoveryNodes.builder(lastAcceptedNodes); onBuilder.accept(builder); @@ -184,8 +190,9 @@ public boolean nodeConnected(DiscoveryNode node) { return isConnected; } }; - unicastHostsProvider = new MockUnicastHostsProvider(); transportAddressConnector = new MockTransportAddressConnector(); + providedAddresses = new ArrayList<>(); + addressResolveDelay = 0L; final Settings settings = Settings.builder().put(NODE_NAME_SETTING.getKey(), "node").build(); deterministicTaskQueue = new DeterministicTaskQueue(settings); @@ -199,21 +206,20 @@ public boolean nodeConnected(DiscoveryNode node) { lastAcceptedNodes = DiscoveryNodes.builder().localNodeId(localNode.getId()).add(localNode).build(); - peerFinder = new TestPeerFinder(settings, transportService, unicastHostsProvider, - deterministicTaskQueue.getFutureExecutor(), transportAddressConnector, deterministicTaskQueue::getExecutorService); - - peerFinder.start(); + peerFinder = new TestPeerFinder(settings, transportService, + deterministicTaskQueue.getFutureExecutor(), transportAddressConnector); } @After public void deactivateAndRunRemainingTasks() { peerFinder.deactivate(localNode); deterministicTaskQueue.runAllTasks(); // termination ensures that everything is properly cleaned up + peerFinder.assertInactiveWithNoKnownPeers(); // should eventually have no nodes when deactivated } public void testAddsReachableNodesFromUnicastHostsList() { final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); - unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); + providedAddresses.add(otherNode.getAddress()); transportAddressConnector.reachableNodes.add(otherNode); peerFinder.activate(lastAcceptedNodes); @@ -222,9 +228,28 @@ public void testAddsReachableNodesFromUnicastHostsList() { assertFoundPeers(otherNode); } + public void testAddsReachableNodesFromUnicastHostsListProvidedLater() { + final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); + providedAddresses.add(otherNode.getAddress()); + transportAddressConnector.reachableNodes.add(otherNode); + addressResolveDelay = 10000; + + peerFinder.activate(lastAcceptedNodes); + runAllRunnableTasks(); + assertFoundPeers(); + + final long successTime = addressResolveDelay + PeerFinder.DISCOVERY_FIND_PEERS_INTERVAL_SETTING.get(Settings.EMPTY).millis(); + while (deterministicTaskQueue.getCurrentTimeMillis() < successTime) { + deterministicTaskQueue.advanceTime(); + runAllRunnableTasks(); + } + + assertFoundPeers(otherNode); + } + public void testDoesNotAddUnreachableNodesFromUnicastHostsList() { final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); - unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); + providedAddresses.add(otherNode.getAddress()); transportAddressConnector.unreachableAddresses.add(otherNode.getAddress()); peerFinder.activate(lastAcceptedNodes); @@ -237,7 +262,7 @@ public void testDoesNotAddNonMasterEligibleNodesFromUnicastHostsList() { final DiscoveryNode nonMasterNode = new DiscoveryNode("node-from-hosts-list", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT); - unicastHostsProvider.providedAddresses.add(nonMasterNode.getAddress()); + providedAddresses.add(nonMasterNode.getAddress()); transportAddressConnector.reachableNodes.add(nonMasterNode); peerFinder.activate(lastAcceptedNodes); @@ -254,7 +279,7 @@ public void testChecksUnicastHostsForChanges() { assertFoundPeers(); final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); - unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); + providedAddresses.add(otherNode.getAddress()); transportAddressConnector.reachableNodes.add(otherNode); deterministicTaskQueue.advanceTime(); @@ -265,7 +290,7 @@ public void testChecksUnicastHostsForChanges() { public void testDeactivationClearsPastKnowledge() { final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); - unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); + providedAddresses.add(otherNode.getAddress()); transportAddressConnector.reachableNodes.add(otherNode); peerFinder.activate(lastAcceptedNodes); @@ -275,7 +300,7 @@ public void testDeactivationClearsPastKnowledge() { peerFinder.deactivate(localNode); - unicastHostsProvider.providedAddresses.clear(); + providedAddresses.clear(); peerFinder.activate(lastAcceptedNodes); runAllRunnableTasks(); assertFoundPeers(); @@ -420,7 +445,7 @@ public String executor() { public void testRequestsPeersIncludingKnownPeersInRequest() { final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); - unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); + providedAddresses.add(otherNode.getAddress()); transportAddressConnector.reachableNodes.add(otherNode); peerFinder.activate(lastAcceptedNodes); @@ -436,7 +461,7 @@ public void testRequestsPeersIncludingKnownPeersInRequest() { public void testAddsReachablePeersFromResponse() { final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); - unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); + providedAddresses.add(otherNode.getAddress()); transportAddressConnector.reachableNodes.add(otherNode); peerFinder.activate(lastAcceptedNodes); @@ -457,7 +482,7 @@ public void testAddsReachablePeersFromResponse() { public void testAddsReachableMasterFromResponse() { final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); - unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); + providedAddresses.add(otherNode.getAddress()); transportAddressConnector.reachableNodes.add(otherNode); peerFinder.activate(lastAcceptedNodes); @@ -480,7 +505,7 @@ public void testAddsReachableMasterFromResponse() { public void testHandlesDiscoveryOfMasterFromResponseFromMaster() { final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); - unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); + providedAddresses.add(otherNode.getAddress()); transportAddressConnector.reachableNodes.add(otherNode); peerFinder.activate(lastAcceptedNodes); @@ -535,7 +560,7 @@ public void testOnlyRequestsPeersOncePerRoundButDoesRetryNextRound() { public void testDoesNotReconnectToNodesOnceConnected() { final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); - unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); + providedAddresses.add(otherNode.getAddress()); transportAddressConnector.reachableNodes.add(otherNode); peerFinder.activate(lastAcceptedNodes); @@ -552,7 +577,7 @@ public void testDoesNotReconnectToNodesOnceConnected() { public void testDiscardsDisconnectedNodes() { final DiscoveryNode otherNode = newDiscoveryNode("original-node"); - unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); + providedAddresses.add(otherNode.getAddress()); transportAddressConnector.reachableNodes.add(otherNode); peerFinder.activate(lastAcceptedNodes); @@ -572,7 +597,7 @@ public void testDiscardsDisconnectedNodes() { public void testDoesNotMakeMultipleConcurrentConnectionAttemptsToOneAddress() { final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); - unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); + providedAddresses.add(otherNode.getAddress()); transportAddressConnector.unreachableAddresses.add(otherNode.getAddress()); transportAddressConnector.slowAddresses.add(otherNode.getAddress()); @@ -607,7 +632,7 @@ public void testDoesNotMakeMultipleConcurrentConnectionAttemptsToOneAddress() { public void testReconnectsToDisconnectedNodes() { final DiscoveryNode otherNode = newDiscoveryNode("original-node"); - unicastHostsProvider.providedAddresses.add(otherNode.getAddress()); + providedAddresses.add(otherNode.getAddress()); transportAddressConnector.reachableNodes.add(otherNode); peerFinder.activate(lastAcceptedNodes); From e3f96380120ef6798e5a529cd320e44a1b7cccfe Mon Sep 17 00:00:00 2001 From: David Turner Date: Sat, 4 Aug 2018 12:28:46 +0100 Subject: [PATCH 53/76] Add another test with failing address resolution --- .../cluster/coordination/PeerFinderTests.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java index 846b54b67ad3c..0b31f36ff4d86 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java @@ -247,6 +247,25 @@ public void testAddsReachableNodesFromUnicastHostsListProvidedLater() { assertFoundPeers(otherNode); } + public void testDoesNotRequireAddressResolutionToSucceed() { + final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); + providedAddresses.add(otherNode.getAddress()); + transportAddressConnector.reachableNodes.add(otherNode); + addressResolveDelay = -1; + + peerFinder.activate(lastAcceptedNodes); + runAllRunnableTasks(); + assertFoundPeers(); + + final long successTime = 10000 + PeerFinder.DISCOVERY_FIND_PEERS_INTERVAL_SETTING.get(Settings.EMPTY).millis(); + while (deterministicTaskQueue.getCurrentTimeMillis() < successTime) { + deterministicTaskQueue.advanceTime(); + runAllRunnableTasks(); + } + + assertFoundPeers(); + } + public void testDoesNotAddUnreachableNodesFromUnicastHostsList() { final DiscoveryNode otherNode = newDiscoveryNode("node-from-hosts-list"); providedAddresses.add(otherNode.getAddress()); From 253e2eadc1a1375d947fe0580cd136c8c99d76f2 Mon Sep 17 00:00:00 2001 From: David Turner Date: Sat, 4 Aug 2018 12:31:43 +0100 Subject: [PATCH 54/76] Delete unused class --- .../UnicastConfiguredHostsResolver.java | 102 ------------------ 1 file changed, 102 deletions(-) delete mode 100644 server/src/main/java/org/elasticsearch/discovery/UnicastConfiguredHostsResolver.java diff --git a/server/src/main/java/org/elasticsearch/discovery/UnicastConfiguredHostsResolver.java b/server/src/main/java/org/elasticsearch/discovery/UnicastConfiguredHostsResolver.java deleted file mode 100644 index f2027a64d23c1..0000000000000 --- a/server/src/main/java/org/elasticsearch/discovery/UnicastConfiguredHostsResolver.java +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.discovery; - -import org.apache.lucene.util.SetOnce; -import org.elasticsearch.common.component.AbstractLifecycleComponent; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.transport.TransportAddress; -import org.elasticsearch.common.unit.TimeValue; -import org.elasticsearch.common.util.concurrent.AbstractRunnable; -import org.elasticsearch.discovery.zen.UnicastHostsProvider; -import org.elasticsearch.discovery.zen.UnicastZenPing; -import org.elasticsearch.threadpool.ThreadPool; -import org.elasticsearch.transport.TransportService; - -import java.util.List; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.function.Consumer; -import java.util.function.Supplier; - -public class UnicastConfiguredHostsResolver extends AbstractLifecycleComponent implements ConfiguredHostsResolver { - - private final AtomicBoolean resolveInProgress = new AtomicBoolean(); - private final TransportService transportService; - private final UnicastHostsProvider hostsProvider; - private final SetOnce executorService = new SetOnce<>(); - private final TimeValue resolveTimeout; - private final Supplier executorServiceFactory; - - public UnicastConfiguredHostsResolver(Settings settings, TransportService transportService, UnicastHostsProvider hostsProvider, - Supplier executorServiceFactory) { - super(settings); - this.transportService = transportService; - this.hostsProvider = hostsProvider; - resolveTimeout = UnicastZenPing.DISCOVERY_ZEN_PING_UNICAST_HOSTS_RESOLVE_TIMEOUT.get(settings); - this.executorServiceFactory = executorServiceFactory; - } - - @Override - protected void doStart() { - executorService.set(executorServiceFactory.get()); - } - - @Override - protected void doStop() { - ThreadPool.terminate(executorService.get(), 10, TimeUnit.SECONDS); - } - - @Override - protected void doClose() { - } - - public void resolveConfiguredHosts(Consumer> consumer) { - if (resolveInProgress.compareAndSet(false, true)) { - transportService.getThreadPool().generic().execute(new AbstractRunnable() { - @Override - public void onFailure(Exception e) { - } - - @Override - protected void doRun() { - List providedAddresses - = hostsProvider.buildDynamicHosts((hosts, limitPortCounts) - -> UnicastZenPing.resolveHostsLists(executorService.get(), logger, hosts, limitPortCounts, - transportService, resolveTimeout)); - - consumer.accept(providedAddresses); - } - - @Override - public void onAfter() { - super.onAfter(); - resolveInProgress.set(false); - } - - @Override - public String toString() { - return "ConfiguredHostsResolver resolving unicast hosts list"; - } - }); - } - } -} From 5d97de79459e84049878ce1531f10814e85f07d8 Mon Sep 17 00:00:00 2001 From: David Turner Date: Sat, 4 Aug 2018 12:54:16 +0100 Subject: [PATCH 55/76] Fix log message --- .../java/org/elasticsearch/cluster/coordination/PeerFinder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 2f4a2a33a9b24..e44edd9b6b4a1 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -331,7 +331,7 @@ public PeersResponse read(StreamInput in) throws IOException { @Override public void handleResponse(PeersResponse response) { - logger.trace("{} received {} from {}", this, response, discoveryNode); + logger.trace("{} received {} from {}", Peer.this, response, discoveryNode); synchronized (mutex) { if (active == false) { return; From fb171e7fd499720b35ead8b46969c28cefc9886c Mon Sep 17 00:00:00 2001 From: David Turner Date: Sat, 4 Aug 2018 12:55:27 +0100 Subject: [PATCH 56/76] Assert no known peers as soon as deactivated --- .../java/org/elasticsearch/cluster/coordination/PeerFinder.java | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index e44edd9b6b4a1..b06110f5e07f3 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -109,6 +109,7 @@ public void deactivate(DiscoveryNode leader) { handleWakeUpUnderLock(); } this.leader = Optional.of(leader); + assert assertInactiveWithNoKnownPeers(); } } From 8126e8685330a6c9de7bfce69a9d6950f5bcef4b Mon Sep 17 00:00:00 2001 From: David Turner Date: Sun, 5 Aug 2018 11:07:21 +0100 Subject: [PATCH 57/76] Private --- .../elasticsearch/cluster/coordination/PeerFinder.java | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index b06110f5e07f3..3fb1ed28fd65e 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -220,11 +220,7 @@ public String toString() { }, findPeersDelay); } - void startProbe(DiscoveryNode discoveryNode) { - startProbe(discoveryNode.getAddress()); - } - - void startProbe(TransportAddress transportAddress) { + private void startProbe(TransportAddress transportAddress) { assert holdsLock() : "PeerFinder mutex not held"; if (active == false) { logger.trace("startProbe({}) not running", transportAddress); @@ -343,7 +339,7 @@ public void handleResponse(PeersResponse response) { if (response.getMasterNode().isPresent()) { final DiscoveryNode masterNode = response.getMasterNode().get(); if (masterNode.equals(discoveryNode) == false) { - startProbe(masterNode); + startProbe(masterNode.getAddress()); } } else { response.getKnownPeers().stream().map(DiscoveryNode::getAddress) From 01e230f88ace50b288d7ff2629b3bff98475e89c Mon Sep 17 00:00:00 2001 From: David Turner Date: Sun, 5 Aug 2018 11:08:56 +0100 Subject: [PATCH 58/76] Inline and rename --- .../cluster/coordination/PeerFinder.java | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 3fb1ed28fd65e..470b2863d49b0 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -97,7 +97,7 @@ public void activate(final DiscoveryNodes lastAcceptedNodes) { active = true; this.lastAcceptedNodes = lastAcceptedNodes; leader = Optional.empty(); - handleWakeUpUnderLock(); + handleWakeUp(); } } @@ -106,7 +106,7 @@ public void deactivate(DiscoveryNode leader) { logger.trace("deactivating PeerFinder and setting leader to {}", leader); if (active) { active = false; - handleWakeUpUnderLock(); + handleWakeUp(); } this.leader = Optional.of(leader); assert assertInactiveWithNoKnownPeers(); @@ -159,12 +159,6 @@ public interface TransportAddressConnector { void connectToRemoteMasterNode(TransportAddress transportAddress, ActionListener listener); } - private void handleWakeUp() { - synchronized (mutex) { - handleWakeUpUnderLock(); - } - } - private List getKnownPeers() { assert active; assert holdsLock() : "PeerFinder mutex not held"; @@ -184,7 +178,7 @@ private Peer createConnectingPeer(TransportAddress transportAddress) { return peer; } - private void handleWakeUpUnderLock() { + private void handleWakeUp() { assert holdsLock() : "PeerFinder mutex not held"; for (final Peer peer : peersByAddress.values()) { @@ -210,7 +204,9 @@ private void handleWakeUpUnderLock() { futureExecutor.schedule(new Runnable() { @Override public void run() { - handleWakeUp(); + synchronized (mutex) { + handleWakeUp(); + } } @Override From 8e3b1750b9010e6b3d5c3c6fb3f925b7180e4a58 Mon Sep 17 00:00:00 2001 From: David Turner Date: Sun, 5 Aug 2018 11:12:58 +0100 Subject: [PATCH 59/76] Use AbstractRunnable and force execution --- .../cluster/coordination/PeerFinder.java | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 470b2863d49b0..cc1634034379c 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -31,6 +31,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.common.util.concurrent.AbstractRunnable; import org.elasticsearch.discovery.ConfiguredHostsResolver; import org.elasticsearch.threadpool.ThreadPool.Names; import org.elasticsearch.transport.TransportException; @@ -201,9 +202,20 @@ private void handleWakeUp() { } }); - futureExecutor.schedule(new Runnable() { + futureExecutor.schedule(new AbstractRunnable() { @Override - public void run() { + public boolean isForceExecution() { + return true; + } + + @Override + public void onFailure(Exception e) { + assert false : e; + logger.debug("unexpected exception in PeerFinder wakeup", e); + } + + @Override + protected void doRun() { synchronized (mutex) { handleWakeUp(); } From 4c52e503b8588d7913462e86a27d466ee2803c15 Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 6 Aug 2018 09:22:40 +0100 Subject: [PATCH 60/76] Nullable --- .../java/org/elasticsearch/cluster/coordination/PeerFinder.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index cc1634034379c..5364776d25b30 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -25,6 +25,7 @@ import org.elasticsearch.action.ActionListener; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.common.Nullable; import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.settings.Setting; @@ -252,6 +253,7 @@ private class Peer { this.transportAddress = transportAddress; } + @Nullable DiscoveryNode getDiscoveryNode() { return discoveryNode.get(); } From 121aad305986d6ce2fc01a67d72c17d0248aedf8 Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 6 Aug 2018 09:23:30 +0100 Subject: [PATCH 61/76] Rename and streamify --- .../cluster/coordination/PeerFinder.java | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 5364776d25b30..1f95d7bffd416 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -44,7 +44,9 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Optional; +import java.util.stream.Collectors; import static org.elasticsearch.common.util.concurrent.ConcurrentCollections.newConcurrentMap; @@ -132,7 +134,7 @@ PeersResponse handlePeersRequest(PeersRequest peersRequest) { if (active) { startProbe(peersRequest.getSourceNode().getAddress()); peersRequest.getKnownPeers().stream().map(DiscoveryNode::getAddress).forEach(this::startProbe); - return new PeersResponse(Optional.empty(), getKnownPeers(), currentTerm); + return new PeersResponse(Optional.empty(), getFoundPeersUnderLock(), currentTerm); } else { return new PeersResponse(leader, Collections.emptyList(), currentTerm); } @@ -161,17 +163,10 @@ public interface TransportAddressConnector { void connectToRemoteMasterNode(TransportAddress transportAddress, ActionListener listener); } - private List getKnownPeers() { + private List getFoundPeersUnderLock() { assert active; assert holdsLock() : "PeerFinder mutex not held"; - List knownPeers = new ArrayList<>(peersByAddress.size()); - for (final Peer peer : peersByAddress.values()) { - DiscoveryNode peerNode = peer.getDiscoveryNode(); - if (peerNode != null) { - knownPeers.add(peerNode); - } - } - return knownPeers; + return peersByAddress.values().stream().map(Peer::getDiscoveryNode).filter(Objects::nonNull).collect(Collectors.toList()); } private Peer createConnectingPeer(TransportAddress transportAddress) { @@ -325,7 +320,7 @@ private void requestPeers() { logger.trace("{} requesting peers from {}", this, discoveryNode); peersRequestInFlight = true; - List knownNodes = getKnownPeers(); + List knownNodes = getFoundPeersUnderLock(); transportService.sendRequest(discoveryNode, REQUEST_PEERS_ACTION_NAME, new PeersRequest(getLocalNode(), knownNodes), From c07c7d4f21e0a1972302388d111a274f787facbd Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 6 Aug 2018 09:24:31 +0100 Subject: [PATCH 62/76] Reorder method --- .../cluster/coordination/PeerFinder.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 1f95d7bffd416..d0ffd3aa444b8 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -87,12 +87,6 @@ public abstract class PeerFinder extends AbstractComponent { (request, channel, task) -> channel.sendResponse(handlePeersRequest(request))); } - public Iterable getFoundPeers() { - synchronized (mutex) { - return getKnownPeers(); - } - } - public void activate(final DiscoveryNodes lastAcceptedNodes) { logger.trace("activating PeerFinder {}", lastAcceptedNodes); @@ -163,6 +157,12 @@ public interface TransportAddressConnector { void connectToRemoteMasterNode(TransportAddress transportAddress, ActionListener listener); } + public Iterable getFoundPeers() { + synchronized (mutex) { + return getFoundPeersUnderLock(); + } + } + private List getFoundPeersUnderLock() { assert active; assert holdsLock() : "PeerFinder mutex not held"; From 1d4adccacbb835372efa564216f3a3946686a753 Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 6 Aug 2018 09:24:59 +0100 Subject: [PATCH 63/76] Rename request peers action --- .../java/org/elasticsearch/cluster/coordination/PeerFinder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index d0ffd3aa444b8..dce709f75dd00 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -52,7 +52,7 @@ public abstract class PeerFinder extends AbstractComponent { - public static final String REQUEST_PEERS_ACTION_NAME = "internal:discovery/requestpeers"; + public static final String REQUEST_PEERS_ACTION_NAME = "internal:discovery/request_peers"; // the time between attempts to find all peers public static final Setting DISCOVERY_FIND_PEERS_INTERVAL_SETTING = From f0e7155dec79f83ec76c71c44010c07b9d928b83 Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 6 Aug 2018 09:25:28 +0100 Subject: [PATCH 64/76] Safe to wake up peers even if already deactivated --- .../org/elasticsearch/cluster/coordination/PeerFinder.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index dce709f75dd00..8f70d94a0a613 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -102,10 +102,8 @@ public void activate(final DiscoveryNodes lastAcceptedNodes) { public void deactivate(DiscoveryNode leader) { synchronized (mutex) { logger.trace("deactivating PeerFinder and setting leader to {}", leader); - if (active) { - active = false; - handleWakeUp(); - } + active = false; + handleWakeUp(); this.leader = Optional.of(leader); assert assertInactiveWithNoKnownPeers(); } From c19b3dfd94719b4bd7ce3c3766b061249f5ec260 Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 6 Aug 2018 09:25:51 +0100 Subject: [PATCH 65/76] Imports --- .../java/org/elasticsearch/cluster/coordination/PeerFinder.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 8f70d94a0a613..713395ef6d02d 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -40,7 +40,6 @@ import org.elasticsearch.transport.TransportService; import java.io.IOException; -import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; From 2051db6363d1b5105ab578b3ce390e4c655d44ee Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 6 Aug 2018 09:26:59 +0100 Subject: [PATCH 66/76] Fix log message --- .../java/org/elasticsearch/cluster/coordination/PeerFinder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 713395ef6d02d..e0125ddf560f1 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -180,7 +180,7 @@ private void handleWakeUp() { } if (active == false) { - logger.trace("ActivePeerFinder#handleWakeUp(): not running"); + logger.trace("PeerFinder: not running"); return; } From 221253a3f656a1e47d28ff30c551e33032be5a61 Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 6 Aug 2018 09:28:59 +0100 Subject: [PATCH 67/76] Oneliners --- .../cluster/coordination/PeerFinder.java | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index e0125ddf560f1..3c72ca672d8c6 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -338,15 +338,8 @@ public void handleResponse(PeersResponse response) { peersRequestInFlight = false; - if (response.getMasterNode().isPresent()) { - final DiscoveryNode masterNode = response.getMasterNode().get(); - if (masterNode.equals(discoveryNode) == false) { - startProbe(masterNode.getAddress()); - } - } else { - response.getKnownPeers().stream().map(DiscoveryNode::getAddress) - .forEach(PeerFinder.this::startProbe); - } + response.getMasterNode().map(DiscoveryNode::getAddress).ifPresent(PeerFinder.this::startProbe); + response.getKnownPeers().stream().map(DiscoveryNode::getAddress).forEach(PeerFinder.this::startProbe); } if (response.getMasterNode().equals(Optional.of(discoveryNode))) { From e5414a170acd19535823c02537f6e68dd702b7a1 Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 6 Aug 2018 09:35:36 +0100 Subject: [PATCH 68/76] Add discoveryNode to PeersFinder.Peer.toString() and remove from log messages --- .../cluster/coordination/PeerFinder.java | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java index 3c72ca672d8c6..371f3bc2e080a 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java @@ -267,7 +267,7 @@ void handleWakeUp() { requestPeers(); } } else { - logger.trace("{} no longer connected to {}", this, discoveryNode); + logger.trace("{} no longer connected to {}", this); removePeer(); } } @@ -314,7 +314,7 @@ private void requestPeers() { final DiscoveryNode discoveryNode = getDiscoveryNode(); assert discoveryNode != null : "cannot request peers without first connecting"; - logger.trace("{} requesting peers from {}", this, discoveryNode); + logger.trace("{} requesting peers", this); peersRequestInFlight = true; List knownNodes = getFoundPeersUnderLock(); @@ -330,7 +330,7 @@ public PeersResponse read(StreamInput in) throws IOException { @Override public void handleResponse(PeersResponse response) { - logger.trace("{} received {} from {}", Peer.this, response, discoveryNode); + logger.trace("{} received {}", Peer.this, response); synchronized (mutex) { if (active == false) { return; @@ -352,7 +352,7 @@ public void handleResponse(PeersResponse response) { @Override public void handleException(TransportException exp) { peersRequestInFlight = false; - logger.debug("PeersRequest failed", exp); + logger.debug(new ParameterizedMessage("{} peers request failed", this), exp); } @Override @@ -364,7 +364,11 @@ public String executor() { @Override public String toString() { - return "Peer{" + transportAddress + " peersRequestInFlight=" + peersRequestInFlight + "}"; + return "Peer{" + + "transportAddress=" + transportAddress + + ", discoveryNode=" + discoveryNode.get() + + ", peersRequestInFlight=" + peersRequestInFlight + + '}'; } } } From 3608505db0709ba85e84bbeb5a81c7d640e0b8dd Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 6 Aug 2018 09:35:43 +0100 Subject: [PATCH 69/76] Remove TODO --- .../org/elasticsearch/cluster/coordination/PeerFinderTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java index 0b31f36ff4d86..4dca886b0e786 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java @@ -119,7 +119,7 @@ public void run() { listener.onResponse(discoveryNode); return; } else { - listener.onFailure(new ElasticsearchException("non-master node " + discoveryNode)); // TODO better exception + listener.onFailure(new ElasticsearchException("non-master node " + discoveryNode)); return; } } From 2013f10d14b6764a23f1c8c5a2a635a9cab3bb5c Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 6 Aug 2018 09:43:37 +0100 Subject: [PATCH 70/76] Move PeerFinder machinery to discovery package --- .../common/settings/ClusterSettings.java | 2 +- .../discovery/ConfiguredHostsResolver.java | 1 - .../PeerFinder.java | 5 +- .../PeersRequest.java | 2 +- .../cluster/coordination/MessagesTests.java | 76 +----------- .../discovery/PeerFinderMessagesTests.java | 108 ++++++++++++++++++ .../PeerFinderTests.java | 11 +- 7 files changed, 121 insertions(+), 84 deletions(-) rename server/src/main/java/org/elasticsearch/{cluster/coordination => discovery}/PeerFinder.java (98%) rename server/src/main/java/org/elasticsearch/{cluster/coordination => discovery}/PeersRequest.java (98%) create mode 100644 server/src/test/java/org/elasticsearch/discovery/PeerFinderMessagesTests.java rename server/src/test/java/org/elasticsearch/{cluster/coordination => discovery}/PeerFinderTests.java (98%) diff --git a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java index 24a5750d14aa9..5e3945ba58ead 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java @@ -30,7 +30,6 @@ import org.elasticsearch.cluster.InternalClusterInfoService; import org.elasticsearch.cluster.NodeConnectionsService; import org.elasticsearch.cluster.action.index.MappingUpdatedAction; -import org.elasticsearch.cluster.coordination.PeerFinder; import org.elasticsearch.cluster.metadata.IndexGraveyard; import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.routing.OperationRouting; @@ -55,6 +54,7 @@ import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.discovery.DiscoveryModule; import org.elasticsearch.discovery.DiscoverySettings; +import org.elasticsearch.discovery.PeerFinder; import org.elasticsearch.discovery.zen.ElectMasterService; import org.elasticsearch.discovery.zen.FaultDetection; import org.elasticsearch.discovery.zen.SettingsBasedHostsProvider; diff --git a/server/src/main/java/org/elasticsearch/discovery/ConfiguredHostsResolver.java b/server/src/main/java/org/elasticsearch/discovery/ConfiguredHostsResolver.java index 8122db66e6c9f..fbec5ef98399c 100644 --- a/server/src/main/java/org/elasticsearch/discovery/ConfiguredHostsResolver.java +++ b/server/src/main/java/org/elasticsearch/discovery/ConfiguredHostsResolver.java @@ -33,4 +33,3 @@ public interface ConfiguredHostsResolver { */ void resolveConfiguredHosts(Consumer> consumer); } - diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java b/server/src/main/java/org/elasticsearch/discovery/PeerFinder.java similarity index 98% rename from server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java rename to server/src/main/java/org/elasticsearch/discovery/PeerFinder.java index 371f3bc2e080a..93050683d2965 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/discovery/PeerFinder.java @@ -17,12 +17,14 @@ * under the License. */ -package org.elasticsearch.cluster.coordination; +package org.elasticsearch.discovery; import com.carrotsearch.hppc.cursors.ObjectCursor; import org.apache.logging.log4j.message.ParameterizedMessage; import org.apache.lucene.util.SetOnce; import org.elasticsearch.action.ActionListener; +import org.elasticsearch.cluster.coordination.FutureExecutor; +import org.elasticsearch.cluster.coordination.PeersResponse; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.common.Nullable; @@ -33,7 +35,6 @@ import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.concurrent.AbstractRunnable; -import org.elasticsearch.discovery.ConfiguredHostsResolver; import org.elasticsearch.threadpool.ThreadPool.Names; import org.elasticsearch.transport.TransportException; import org.elasticsearch.transport.TransportResponseHandler; diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/PeersRequest.java b/server/src/main/java/org/elasticsearch/discovery/PeersRequest.java similarity index 98% rename from server/src/main/java/org/elasticsearch/cluster/coordination/PeersRequest.java rename to server/src/main/java/org/elasticsearch/discovery/PeersRequest.java index 3ff5f77bc80ac..346cc098f756f 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/PeersRequest.java +++ b/server/src/main/java/org/elasticsearch/discovery/PeersRequest.java @@ -17,7 +17,7 @@ * under the License. */ -package org.elasticsearch.cluster.coordination; +package org.elasticsearch.discovery; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.common.io.stream.StreamInput; diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/MessagesTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/MessagesTests.java index c0e9494bed611..9b885b4a80036 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/MessagesTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/MessagesTests.java @@ -25,16 +25,6 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.EqualsHashCodeTestUtils; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.List; -import java.util.Optional; -import java.util.stream.Collectors; - -import static java.util.Collections.emptyList; -import static java.util.Collections.singletonList; - public class MessagesTests extends ESTestCase { private DiscoveryNode createNode(String id) { @@ -155,69 +145,5 @@ public ClusterState randomClusterState() { new ClusterState.VotingConfiguration(Sets.newHashSet(generateRandomStringArray(10, 10, false))), randomLong()); } - - private List modifyDiscoveryNodesList(Collection originalNodes, boolean allowEmpty) { - final List discoveryNodes = new ArrayList<>(originalNodes); - if (discoveryNodes.isEmpty() == false && randomBoolean() && (allowEmpty || discoveryNodes.size() > 1)) { - discoveryNodes.remove(randomIntBetween(0, discoveryNodes.size() - 1)); - } else if (discoveryNodes.isEmpty() == false && randomBoolean()) { - discoveryNodes.set(randomIntBetween(0, discoveryNodes.size() - 1), createNode(randomAlphaOfLength(10))); - } else { - discoveryNodes.add(createNode(randomAlphaOfLength(10))); - } - return discoveryNodes; - } - - public void testPeersRequestEqualsHashCodeSerialization() { - final PeersRequest initialPeersRequest = new PeersRequest(createNode(randomAlphaOfLength(10)), - Arrays.stream(generateRandomStringArray(10, 10, false)).map(this::createNode).collect(Collectors.toList())); - - EqualsHashCodeTestUtils.checkEqualsAndHashCode(initialPeersRequest, - publishRequest -> copyWriteable(publishRequest, writableRegistry(), PeersRequest::new), - in -> { - final List discoveryNodes = new ArrayList<>(in.getKnownPeers()); - if (randomBoolean()) { - return new PeersRequest(createNode(randomAlphaOfLength(10)), discoveryNodes); - } else { - return new PeersRequest(in.getSourceNode(), modifyDiscoveryNodesList(in.getKnownPeers(), true)); - } - }); - } - - public void testPeersResponseEqualsHashCodeSerialization() { - final long initialTerm = randomNonNegativeLong(); - final PeersResponse initialPeersResponse; - - if (randomBoolean()) { - initialPeersResponse = new PeersResponse(Optional.of(createNode(randomAlphaOfLength(10))), emptyList(), initialTerm); - } else { - initialPeersResponse = new PeersResponse(Optional.empty(), - Arrays.stream(generateRandomStringArray(10, 10, false, false)).map(this::createNode).collect(Collectors.toList()), - initialTerm); - } - - EqualsHashCodeTestUtils.checkEqualsAndHashCode(initialPeersResponse, - publishResponse -> copyWriteable(publishResponse, writableRegistry(), PeersResponse::new), - in -> { - final long term = in.getTerm(); - if (randomBoolean()) { - return new PeersResponse(in.getMasterNode(), in.getKnownPeers(), - randomValueOtherThan(term, ESTestCase::randomNonNegativeLong)); - } else { - if (in.getMasterNode().isPresent()) { - if (randomBoolean()) { - return new PeersResponse(Optional.of(createNode(randomAlphaOfLength(10))), in.getKnownPeers(), term); - } else { - return new PeersResponse(Optional.empty(), singletonList(createNode(randomAlphaOfLength(10))), term); - } - } else { - if (randomBoolean()) { - return new PeersResponse(Optional.of(createNode(randomAlphaOfLength(10))), emptyList(), term); - } else { - return new PeersResponse(in.getMasterNode(), modifyDiscoveryNodesList(in.getKnownPeers(), false), term); - } - } - } - }); - } } + diff --git a/server/src/test/java/org/elasticsearch/discovery/PeerFinderMessagesTests.java b/server/src/test/java/org/elasticsearch/discovery/PeerFinderMessagesTests.java new file mode 100644 index 0000000000000..aabb77b67eef9 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/discovery/PeerFinderMessagesTests.java @@ -0,0 +1,108 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.discovery; + +import org.elasticsearch.Version; +import org.elasticsearch.cluster.coordination.PeersResponse; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.EqualsHashCodeTestUtils; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +import static java.util.Collections.emptyList; +import static java.util.Collections.singletonList; + +public class PeerFinderMessagesTests extends ESTestCase { + private DiscoveryNode createNode(String id) { + return new DiscoveryNode(id, buildNewFakeTransportAddress(), Version.CURRENT); + } + + public void testPeersRequestEqualsHashCodeSerialization() { + final PeersRequest initialPeersRequest = new PeersRequest(createNode(randomAlphaOfLength(10)), + Arrays.stream(generateRandomStringArray(10, 10, false)).map(this::createNode).collect(Collectors.toList())); + + EqualsHashCodeTestUtils.checkEqualsAndHashCode(initialPeersRequest, + publishRequest -> copyWriteable(publishRequest, writableRegistry(), PeersRequest::new), + in -> { + final List discoveryNodes = new ArrayList<>(in.getKnownPeers()); + if (randomBoolean()) { + return new PeersRequest(createNode(randomAlphaOfLength(10)), discoveryNodes); + } else { + return new PeersRequest(in.getSourceNode(), modifyDiscoveryNodesList(in.getKnownPeers(), true)); + } + }); + } + + public void testPeersResponseEqualsHashCodeSerialization() { + final long initialTerm = randomNonNegativeLong(); + final PeersResponse initialPeersResponse; + + if (randomBoolean()) { + initialPeersResponse = new PeersResponse(Optional.of(createNode(randomAlphaOfLength(10))), emptyList(), initialTerm); + } else { + initialPeersResponse = new PeersResponse(Optional.empty(), + Arrays.stream(generateRandomStringArray(10, 10, false, false)).map(this::createNode).collect(Collectors.toList()), + initialTerm); + } + + EqualsHashCodeTestUtils.checkEqualsAndHashCode(initialPeersResponse, + publishResponse -> copyWriteable(publishResponse, writableRegistry(), PeersResponse::new), + in -> { + final long term = in.getTerm(); + if (randomBoolean()) { + return new PeersResponse(in.getMasterNode(), in.getKnownPeers(), + randomValueOtherThan(term, ESTestCase::randomNonNegativeLong)); + } else { + if (in.getMasterNode().isPresent()) { + if (randomBoolean()) { + return new PeersResponse(Optional.of(createNode(randomAlphaOfLength(10))), in.getKnownPeers(), term); + } else { + return new PeersResponse(Optional.empty(), singletonList(createNode(randomAlphaOfLength(10))), term); + } + } else { + if (randomBoolean()) { + return new PeersResponse(Optional.of(createNode(randomAlphaOfLength(10))), emptyList(), term); + } else { + return new PeersResponse(in.getMasterNode(), modifyDiscoveryNodesList(in.getKnownPeers(), false), term); + } + } + } + }); + } + + + private List modifyDiscoveryNodesList(Collection originalNodes, boolean allowEmpty) { + final List discoveryNodes = new ArrayList<>(originalNodes); + if (discoveryNodes.isEmpty() == false && randomBoolean() && (allowEmpty || discoveryNodes.size() > 1)) { + discoveryNodes.remove(randomIntBetween(0, discoveryNodes.size() - 1)); + } else if (discoveryNodes.isEmpty() == false && randomBoolean()) { + discoveryNodes.set(randomIntBetween(0, discoveryNodes.size() - 1), createNode(randomAlphaOfLength(10))); + } else { + discoveryNodes.add(createNode(randomAlphaOfLength(10))); + } + return discoveryNodes; + } +} diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java b/server/src/test/java/org/elasticsearch/discovery/PeerFinderTests.java similarity index 98% rename from server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java rename to server/src/test/java/org/elasticsearch/discovery/PeerFinderTests.java index 4dca886b0e786..c3f5c87d9d9c0 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/PeerFinderTests.java +++ b/server/src/test/java/org/elasticsearch/discovery/PeerFinderTests.java @@ -17,17 +17,20 @@ * under the License. */ -package org.elasticsearch.cluster.coordination; +package org.elasticsearch.discovery; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.Version; import org.elasticsearch.action.ActionListener; -import org.elasticsearch.cluster.coordination.PeerFinder.TransportAddressConnector; +import org.elasticsearch.cluster.coordination.DeterministicTaskQueue; +import org.elasticsearch.cluster.coordination.FutureExecutor; +import org.elasticsearch.cluster.coordination.PeersResponse; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.node.DiscoveryNodes.Builder; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.TransportAddress; +import org.elasticsearch.discovery.PeerFinder.TransportAddressConnector; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.transport.CapturingTransport; import org.elasticsearch.test.transport.CapturingTransport.CapturedRequest; @@ -58,7 +61,7 @@ import static java.util.Collections.emptyMap; import static java.util.Collections.emptySet; import static java.util.Collections.singletonList; -import static org.elasticsearch.cluster.coordination.PeerFinder.REQUEST_PEERS_ACTION_NAME; +import static org.elasticsearch.discovery.PeerFinder.REQUEST_PEERS_ACTION_NAME; import static org.elasticsearch.node.Node.NODE_NAME_SETTING; import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.empty; @@ -436,7 +439,7 @@ public void testReceivesRequestsFromTransportService() { final AtomicBoolean responseReceived = new AtomicBoolean(); - transportService.sendRequest(localNode, PeerFinder.REQUEST_PEERS_ACTION_NAME, new PeersRequest(sourceNode, Collections.emptyList()), + transportService.sendRequest(localNode, REQUEST_PEERS_ACTION_NAME, new PeersRequest(sourceNode, Collections.emptyList()), new TransportResponseHandler() { @Override public void handleResponse(PeersResponse response) { From 8b605ada5cfcd6a76704fceb2fc38aceda86cd2a Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 6 Aug 2018 09:45:06 +0100 Subject: [PATCH 71/76] Move ConfiguredHostsResolver interface into PeerFinder --- .../discovery/ConfiguredHostsResolver.java | 35 ------------------- .../elasticsearch/discovery/PeerFinder.java | 11 ++++++ 2 files changed, 11 insertions(+), 35 deletions(-) delete mode 100644 server/src/main/java/org/elasticsearch/discovery/ConfiguredHostsResolver.java diff --git a/server/src/main/java/org/elasticsearch/discovery/ConfiguredHostsResolver.java b/server/src/main/java/org/elasticsearch/discovery/ConfiguredHostsResolver.java deleted file mode 100644 index fbec5ef98399c..0000000000000 --- a/server/src/main/java/org/elasticsearch/discovery/ConfiguredHostsResolver.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.discovery; - -import org.elasticsearch.common.transport.TransportAddress; - -import java.util.List; -import java.util.function.Consumer; - -public interface ConfiguredHostsResolver { - /** - * Attempt to resolve the configured unicast hosts list to a list of transport addresses. - * - * @param consumer Consumer for the resolved list. May not be called if an error occurs or if another resolution attempt is in - * progress. - */ - void resolveConfiguredHosts(Consumer> consumer); -} diff --git a/server/src/main/java/org/elasticsearch/discovery/PeerFinder.java b/server/src/main/java/org/elasticsearch/discovery/PeerFinder.java index 93050683d2965..6d7659259bb94 100644 --- a/server/src/main/java/org/elasticsearch/discovery/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/discovery/PeerFinder.java @@ -46,6 +46,7 @@ import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.function.Consumer; import java.util.stream.Collectors; import static org.elasticsearch.common.util.concurrent.ConcurrentCollections.newConcurrentMap; @@ -155,6 +156,16 @@ public interface TransportAddressConnector { void connectToRemoteMasterNode(TransportAddress transportAddress, ActionListener listener); } + public interface ConfiguredHostsResolver { + /** + * Attempt to resolve the configured unicast hosts list to a list of transport addresses. + * + * @param consumer Consumer for the resolved list. May not be called if an error occurs or if another resolution attempt is in + * progress. + */ + void resolveConfiguredHosts(Consumer> consumer); + } + public Iterable getFoundPeers() { synchronized (mutex) { return getFoundPeersUnderLock(); From 9aa9003c779dbee71c5b7432dad499c8f5ac3ed9 Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 6 Aug 2018 09:56:19 +0100 Subject: [PATCH 72/76] Logger usage --- .../src/main/java/org/elasticsearch/discovery/PeerFinder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/discovery/PeerFinder.java b/server/src/main/java/org/elasticsearch/discovery/PeerFinder.java index 6d7659259bb94..87bd8c834136e 100644 --- a/server/src/main/java/org/elasticsearch/discovery/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/discovery/PeerFinder.java @@ -279,7 +279,7 @@ void handleWakeUp() { requestPeers(); } } else { - logger.trace("{} no longer connected to {}", this); + logger.trace("{} no longer connected", this); removePeer(); } } From b517ce91b072594e3373ddb3b79fc5523ac251d7 Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 6 Aug 2018 10:01:41 +0100 Subject: [PATCH 73/76] Whitespace --- .../org/elasticsearch/cluster/coordination/MessagesTests.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/MessagesTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/MessagesTests.java index 9b885b4a80036..7fa4a3217348f 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/MessagesTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/MessagesTests.java @@ -146,4 +146,3 @@ public ClusterState randomClusterState() { randomLong()); } } - From 253a994f95ddb4ea32a5a83d75b6b581abd64193 Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 6 Aug 2018 12:28:14 +0100 Subject: [PATCH 74/76] No need to refer to class name in log messages --- .../java/org/elasticsearch/discovery/PeerFinder.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/discovery/PeerFinder.java b/server/src/main/java/org/elasticsearch/discovery/PeerFinder.java index 87bd8c834136e..8c230dd93a6ad 100644 --- a/server/src/main/java/org/elasticsearch/discovery/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/discovery/PeerFinder.java @@ -89,7 +89,7 @@ public abstract class PeerFinder extends AbstractComponent { } public void activate(final DiscoveryNodes lastAcceptedNodes) { - logger.trace("activating PeerFinder {}", lastAcceptedNodes); + logger.trace("activating with {}", lastAcceptedNodes); synchronized (mutex) { assert active == false; @@ -102,7 +102,7 @@ public void activate(final DiscoveryNodes lastAcceptedNodes) { public void deactivate(DiscoveryNode leader) { synchronized (mutex) { - logger.trace("deactivating PeerFinder and setting leader to {}", leader); + logger.trace("deactivating and setting leader to {}", leader); active = false; handleWakeUp(); this.leader = Optional.of(leader); @@ -192,17 +192,18 @@ private void handleWakeUp() { } if (active == false) { - logger.trace("PeerFinder: not running"); + logger.trace("not active"); return; } + logger.trace("probing master nodes from cluster state: {}", lastAcceptedNodes); for (ObjectCursor discoveryNodeObjectCursor : lastAcceptedNodes.getMasterNodes().values()) { startProbe(discoveryNodeObjectCursor.value.getAddress()); } configuredHostsResolver.resolveConfiguredHosts(providedAddresses -> { synchronized (mutex) { - logger.trace("PeerFinder: probing resolved transport addresses {}", providedAddresses); + logger.trace("probing resolved transport addresses {}", providedAddresses); providedAddresses.forEach(this::startProbe); } }); @@ -216,7 +217,7 @@ public boolean isForceExecution() { @Override public void onFailure(Exception e) { assert false : e; - logger.debug("unexpected exception in PeerFinder wakeup", e); + logger.debug("unexpected exception in wakeup", e); } @Override From 7948268c09af8bc024a1d29ee2710bd22dec6462 Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 6 Aug 2018 12:35:18 +0100 Subject: [PATCH 75/76] Can only deactivate an active PeerFinder --- server/src/main/java/org/elasticsearch/discovery/PeerFinder.java | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/main/java/org/elasticsearch/discovery/PeerFinder.java b/server/src/main/java/org/elasticsearch/discovery/PeerFinder.java index 8c230dd93a6ad..10d4115352da7 100644 --- a/server/src/main/java/org/elasticsearch/discovery/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/discovery/PeerFinder.java @@ -102,6 +102,7 @@ public void activate(final DiscoveryNodes lastAcceptedNodes) { public void deactivate(DiscoveryNode leader) { synchronized (mutex) { + assert active; logger.trace("deactivating and setting leader to {}", leader); active = false; handleWakeUp(); From b4df5f38f14d9c0ce73f809058f5622bb86e13ee Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 6 Aug 2018 12:39:11 +0100 Subject: [PATCH 76/76] Revert "Can only deactivate an active PeerFinder" This reverts commit 7948268c09af8bc024a1d29ee2710bd22dec6462. --- server/src/main/java/org/elasticsearch/discovery/PeerFinder.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/discovery/PeerFinder.java b/server/src/main/java/org/elasticsearch/discovery/PeerFinder.java index 10d4115352da7..8c230dd93a6ad 100644 --- a/server/src/main/java/org/elasticsearch/discovery/PeerFinder.java +++ b/server/src/main/java/org/elasticsearch/discovery/PeerFinder.java @@ -102,7 +102,6 @@ public void activate(final DiscoveryNodes lastAcceptedNodes) { public void deactivate(DiscoveryNode leader) { synchronized (mutex) { - assert active; logger.trace("deactivating and setting leader to {}", leader); active = false; handleWakeUp();