Skip to content

Commit 12c8423

Browse files
authored
Warn on not enough masters during election (#20063)
This changes the trace level logging to warn, and adds the needed number to the message as well. My fear is that it may get noisy, but this is an issue that you want to be noisy.
1 parent dd3bbfb commit 12c8423

File tree

4 files changed

+31
-11
lines changed

4 files changed

+31
-11
lines changed

core/src/main/java/org/elasticsearch/discovery/zen/ElectMasterService.java

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -111,14 +111,14 @@ public int minimumMasterNodes() {
111111
return minimumMasterNodes;
112112
}
113113

114-
public boolean hasEnoughMasterNodes(Iterable<DiscoveryNode> nodes) {
114+
public int countMasterNodes(Iterable<DiscoveryNode> nodes) {
115115
int count = 0;
116116
for (DiscoveryNode node : nodes) {
117117
if (node.isMasterNode()) {
118118
count++;
119119
}
120120
}
121-
return count > 0 && (minimumMasterNodes < 0 || count >= minimumMasterNodes);
121+
return count;
122122
}
123123

124124
public boolean hasEnoughCandidates(Collection<MasterCandidate> candidates) {
@@ -149,13 +149,12 @@ public DiscoveryNode tieBreakActiveMasters(Collection<DiscoveryNode> activeMaste
149149
return activeMasters.stream().min(ElectMasterService::compareNodes).get();
150150
}
151151

152+
public boolean hasEnoughMasterNodes(Iterable<DiscoveryNode> nodes) {
153+
return minimumMasterNodes < 1 || countMasterNodes(nodes) >= minimumMasterNodes;
154+
}
155+
152156
public boolean hasTooManyMasterNodes(Iterable<DiscoveryNode> nodes) {
153-
int count = 0;
154-
for (DiscoveryNode node : nodes) {
155-
if (node.isMasterNode()) {
156-
count++;
157-
}
158-
}
157+
final int count = countMasterNodes(nodes);
159158
return count > 1 && minimumMasterNodes <= count / 2;
160159
}
161160

core/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
import org.elasticsearch.common.io.stream.StreamInput;
4848
import org.elasticsearch.common.io.stream.StreamOutput;
4949
import org.elasticsearch.common.lease.Releasables;
50+
import org.elasticsearch.common.logging.LoggerMessageFormat;
5051
import org.elasticsearch.common.settings.Setting;
5152
import org.elasticsearch.common.settings.Setting.Property;
5253
import org.elasticsearch.common.settings.Settings;
@@ -580,8 +581,10 @@ public ClusterTasksResult<Task> execute(final ClusterState currentState, final L
580581
final ClusterState remainingNodesClusterState = remainingNodesClusterState(currentState, remainingNodesBuilder);
581582

582583
final ClusterTasksResult.Builder<Task> resultBuilder = ClusterTasksResult.<Task>builder().successes(tasks);
583-
if (!electMasterService.hasEnoughMasterNodes(remainingNodesClusterState.nodes())) {
584-
rejoin.accept("not enough master nodes");
584+
if (electMasterService.hasEnoughMasterNodes(remainingNodesClusterState.nodes()) == false) {
585+
final int masterNodes = electMasterService.countMasterNodes(remainingNodesClusterState.nodes());
586+
rejoin.accept(LoggerMessageFormat.format("not enough master nodes (has [{}], but needed [{}])",
587+
masterNodes, electMasterService.minimumMasterNodes()));
585588
return resultBuilder.build(currentState);
586589
} else {
587590
return resultBuilder.build(allocationService.deassociateDeadNodes(remainingNodesClusterState, true, describeTasks(tasks)));
@@ -920,7 +923,8 @@ private DiscoveryNode findMaster() {
920923
return winner.getNode();
921924
} else {
922925
// if we don't have enough master nodes, we bail, because there are not enough master to elect from
923-
logger.trace("not enough master nodes [{}]", masterCandidates);
926+
logger.warn("not enough master nodes discovered during pinging (found [{}], but needed [{}]), pinging again",
927+
masterCandidates, electMaster.minimumMasterNodes());
924928
return null;
925929
}
926930
} else {

core/src/test/java/org/elasticsearch/discovery/zen/ElectMasterServiceTests.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,4 +139,19 @@ public void testElectMaster() {
139139
}
140140
}
141141
}
142+
143+
public void testCountMasterNodes() {
144+
List<DiscoveryNode> nodes = generateRandomNodes();
145+
ElectMasterService service = electMasterService();
146+
147+
int masterNodes = 0;
148+
149+
for (DiscoveryNode node : nodes) {
150+
if (node.isMasterNode()) {
151+
masterNodes++;
152+
}
153+
}
154+
155+
assertEquals(masterNodes, service.countMasterNodes(nodes));
156+
}
142157
}

core/src/test/java/org/elasticsearch/discovery/zen/NodeRemovalClusterStateTaskExecutorTests.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ ClusterState remainingNodesClusterState(ClusterState currentState, DiscoveryNode
108108
final ClusterStateTaskExecutor.ClusterTasksResult<ZenDiscovery.NodeRemovalClusterStateTaskExecutor.Task> result =
109109
executor.execute(clusterState, tasks);
110110
verify(electMasterService).hasEnoughMasterNodes(eq(remainingNodesClusterState.get().nodes()));
111+
verify(electMasterService).countMasterNodes(eq(remainingNodesClusterState.get().nodes()));
112+
verify(electMasterService).minimumMasterNodes();
111113
verifyNoMoreInteractions(electMasterService);
112114

113115
// ensure that we did not reroute

0 commit comments

Comments
 (0)