Skip to content

Commit edee015

Browse files
committed
Warn on not enough masters during election (#20063)
This changes the trace level logging to warn, and adds the needed number to the message as well. My fear is that it may get noisy, but this is an issue that you want to be noisy.
1 parent ff4065c commit edee015

File tree

4 files changed

+31
-11
lines changed

4 files changed

+31
-11
lines changed

core/src/main/java/org/elasticsearch/discovery/zen/ElectMasterService.java

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -114,14 +114,14 @@ public int minimumMasterNodes() {
114114
return minimumMasterNodes;
115115
}
116116

117-
public boolean hasEnoughMasterNodes(Iterable<DiscoveryNode> nodes) {
117+
public int countMasterNodes(Iterable<DiscoveryNode> nodes) {
118118
int count = 0;
119119
for (DiscoveryNode node : nodes) {
120120
if (node.isMasterNode()) {
121121
count++;
122122
}
123123
}
124-
return count > 0 && (minimumMasterNodes < 0 || count >= minimumMasterNodes);
124+
return count;
125125
}
126126

127127
public boolean hasEnoughCandidates(Collection<MasterCandidate> candidates) {
@@ -152,13 +152,12 @@ public DiscoveryNode tieBreakActiveMasters(Collection<DiscoveryNode> activeMaste
152152
return activeMasters.stream().min(ElectMasterService::compareNodes).get();
153153
}
154154

155+
public boolean hasEnoughMasterNodes(Iterable<DiscoveryNode> nodes) {
156+
return minimumMasterNodes < 1 || countMasterNodes(nodes) >= minimumMasterNodes;
157+
}
158+
155159
public boolean hasTooManyMasterNodes(Iterable<DiscoveryNode> nodes) {
156-
int count = 0;
157-
for (DiscoveryNode node : nodes) {
158-
if (node.isMasterNode()) {
159-
count++;
160-
}
161-
}
160+
final int count = countMasterNodes(nodes);
162161
return count > 1 && minimumMasterNodes <= count / 2;
163162
}
164163

core/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import org.elasticsearch.common.io.stream.StreamInput;
4848
import org.elasticsearch.common.io.stream.StreamOutput;
4949
import org.elasticsearch.common.lease.Releasables;
50+
import org.elasticsearch.common.logging.LoggerMessageFormat;
5051
import org.elasticsearch.common.settings.Setting;
5152
import org.elasticsearch.common.settings.Setting.Property;
5253
import org.elasticsearch.common.settings.Settings;
@@ -581,8 +582,10 @@ public ClusterTasksResult<Task> execute(final ClusterState currentState, final L
581582
final ClusterState remainingNodesClusterState = remainingNodesClusterState(currentState, remainingNodesBuilder);
582583

583584
final ClusterTasksResult.Builder<Task> resultBuilder = ClusterTasksResult.<Task>builder().successes(tasks);
584-
if (!electMasterService.hasEnoughMasterNodes(remainingNodesClusterState.nodes())) {
585-
rejoin.accept("not enough master nodes");
585+
if (electMasterService.hasEnoughMasterNodes(remainingNodesClusterState.nodes()) == false) {
586+
final int masterNodes = electMasterService.countMasterNodes(remainingNodesClusterState.nodes());
587+
rejoin.accept(LoggerMessageFormat.format("not enough master nodes (has [{}], but needed [{}])",
588+
masterNodes, electMasterService.minimumMasterNodes()));
586589
return resultBuilder.build(currentState);
587590
} else {
588591
return resultBuilder.build(allocationService.deassociateDeadNodes(remainingNodesClusterState, true, describeTasks(tasks)));
@@ -924,7 +927,8 @@ private DiscoveryNode findMaster() {
924927
return winner.getNode();
925928
} else {
926929
// if we don't have enough master nodes, we bail, because there are not enough master to elect from
927-
logger.trace("not enough master nodes [{}]", masterCandidates);
930+
logger.warn("not enough master nodes discovered during pinging (found [{}], but needed [{}]), pinging again",
931+
masterCandidates, electMaster.minimumMasterNodes());
928932
return null;
929933
}
930934
} else {

core/src/test/java/org/elasticsearch/discovery/zen/ElectMasterServiceTests.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,4 +139,19 @@ public void testElectMaster() {
139139
}
140140
}
141141
}
142+
143+
public void testCountMasterNodes() {
144+
List<DiscoveryNode> nodes = generateRandomNodes();
145+
ElectMasterService service = electMasterService();
146+
147+
int masterNodes = 0;
148+
149+
for (DiscoveryNode node : nodes) {
150+
if (node.isMasterNode()) {
151+
masterNodes++;
152+
}
153+
}
154+
155+
assertEquals(masterNodes, service.countMasterNodes(nodes));
156+
}
142157
}

core/src/test/java/org/elasticsearch/discovery/zen/NodeRemovalClusterStateTaskExecutorTests.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,8 @@ ClusterState remainingNodesClusterState(ClusterState currentState, DiscoveryNode
109109
final ClusterStateTaskExecutor.ClusterTasksResult<ZenDiscovery.NodeRemovalClusterStateTaskExecutor.Task> result =
110110
executor.execute(clusterState, tasks);
111111
verify(electMasterService).hasEnoughMasterNodes(eq(remainingNodesClusterState.get().nodes()));
112+
verify(electMasterService).countMasterNodes(eq(remainingNodesClusterState.get().nodes()));
113+
verify(electMasterService).minimumMasterNodes();
112114
verifyNoMoreInteractions(electMasterService);
113115

114116
// ensure that we did not reroute

0 commit comments

Comments
 (0)