Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ public interface MetricsMasterSource extends BaseSource {
String AVERAGE_LOAD_NAME = "averageLoad";
String LIVE_REGION_SERVERS_NAME = "liveRegionServers";
String DEAD_REGION_SERVERS_NAME = "deadRegionServers";
String SERVERS_BELOW_EXPECTED_COUNT_NAME = "serversBelowExpectedCount";
String NUM_REGION_SERVERS_NAME = "numRegionServers";
String NUM_DEAD_REGION_SERVERS_NAME = "numDeadRegionServers";
String ZOOKEEPER_QUORUM_NAME = "zookeeperQuorum";
Expand All @@ -74,6 +75,8 @@ public interface MetricsMasterSource extends BaseSource {
String AVERAGE_LOAD_DESC = "AverageLoad";
String LIVE_REGION_SERVERS_DESC = "Names of live RegionServers";
String NUMBER_OF_REGION_SERVERS_DESC = "Number of RegionServers";
String SERVERS_BELOW_EXPECTED_COUNT_DESC = "Number of region servers missing to reach"
+ " the expected value specified in master configuration";
String DEAD_REGION_SERVERS_DESC = "Names of dead RegionServers";
String NUMBER_OF_DEAD_REGION_SERVERS_DESC = "Number of dead RegionServers";
String ZOOKEEPER_QUORUM_DESC = "ZooKeeper Quorum";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,4 +149,10 @@ public interface MetricsMasterWrapper {
* @return pair of count for online regions and offline regions
*/
PairOfSameType<Integer> getRegionCounts();

/**
* @return Number of region servers missing to reach
* the expected value specified in master configuration
*/
int getNumServersBelowExpected();
}
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ public void getMetrics(MetricsCollector metricsCollector, boolean all) {
regionNumberPair.getFirst())
.addGauge(Interns.info(OFFLINE_REGION_COUNT_NAME, OFFLINE_REGION_COUNT_DESC),
regionNumberPair.getSecond())
.addGauge(Interns.info(SERVERS_BELOW_EXPECTED_COUNT_NAME,
SERVERS_BELOW_EXPECTED_COUNT_DESC), masterWrapper.getNumServersBelowExpected())
.tag(Interns.info(LIVE_REGION_SERVERS_NAME, LIVE_REGION_SERVERS_DESC),
masterWrapper.getRegionServers())
.addGauge(Interns.info(NUM_REGION_SERVERS_NAME,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,15 @@ public int getNumDeadRegionServers() {
return serverManager.getDeadServers().size();
}

@Override
public int getNumServersBelowExpected() {
ServerManager serverManager = this.master.getServerManager();
if (serverManager == null) {
return 0;
}
return serverManager.getNumServersBelowExpected();
}

@Override
public String getServerName() {
ServerName serverName = master.getServerName();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,18 @@ public class ServerManager {
public static final int FLUSHEDSEQUENCEID_FLUSHER_INTERVAL_DEFAULT =
3 * 60 * 60 * 1000; // 3 hours


/**
* The count of region servers master expects to be present; for use in "cloud" use-cases. When
* running on a fixed set of machines, currently the dead server accounting replaces a dead
* server when a new live one comes up on the same host+port; however, this may never happen if
* HBase is running on smth like YARN with many more machines than RS. Instead, one can tell
* master how many region servers to expect in total, so it could report an alternative metric.
* -1 (the default) disables this feature.
*/
public static final String REGIONSERVERS_EXPECTED_COUNT = "hbase.master.expected.regionservers";
public static final int REGIONSERVERS_EXPECTED_COUNT_DEFAULT = -1;

private static final Logger LOG = LoggerFactory.getLogger(ServerManager.class);

// Set if we are to shutdown the cluster.
Expand All @@ -147,6 +159,8 @@ public class ServerManager {
/** File on hdfs to store last flushed sequence id of regions */
private static final String LAST_FLUSHED_SEQ_ID_FILE = ".lastflushedseqids";
private FlushedSequenceIdFlusher flushedSeqIdFlusher;
/** Expected RS count, for metrics; negative means no expectations. */
private final int expectedRsCount;


/**
Expand Down Expand Up @@ -193,6 +207,7 @@ public ServerManager(final MasterServices master) {
this.rpcControllerFactory = this.connection == null? null: connection.getRpcControllerFactory();
persistFlushedSequenceId = c.getBoolean(PERSIST_FLUSHEDSEQUENCEID,
PERSIST_FLUSHEDSEQUENCEID_DEFAULT);
expectedRsCount = c.getInt(REGIONSERVERS_EXPECTED_COUNT, REGIONSERVERS_EXPECTED_COUNT_DEFAULT);
}

/**
Expand Down Expand Up @@ -1171,6 +1186,13 @@ public void removeDeletedRegionFromLoadedFlushedSequenceIds() {
}
}

public int getNumServersBelowExpected() {
if (expectedRsCount <= 0) {
return 0;
}
return Math.max(0, expectedRsCount - this.onlineServers.size());
}

private class FlushedSequenceIdFlusher extends ScheduledChore {

public FlushedSequenceIdFlusher(String name, int p) {
Expand Down