-
Notifications
You must be signed in to change notification settings - Fork 3.4k
HBASE-25032 Wait for region server to become online before adding it to online servers in Master #2769
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
HBASE-25032 Wait for region server to become online before adding it to online servers in Master #2769
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1014,10 +1014,9 @@ public void run() { | |
| // node was created, in case any coprocessors want to use ZooKeeper | ||
| this.rsHost = new RegionServerCoprocessorHost(this, this.conf); | ||
|
|
||
| // Try and register with the Master; tell it we are here. Break if server is stopped or | ||
| // the clusterup flag is down or hdfs went wacky. Once registered successfully, go ahead and | ||
| // start up all Services. Use RetryCounter to get backoff in case Master is struggling to | ||
| // come up. | ||
| // Get configurations from the Master. Break if server is stopped or | ||
| // the clusterup flag is down or hdfs went wacky. Then start up all Services. | ||
| // Use RetryCounter to get backoff in case Master is struggling to come up. | ||
| LOG.debug("About to register with Master."); | ||
| RetryCounterFactory rcf = | ||
| new RetryCounterFactory(Integer.MAX_VALUE, this.sleeper.getPeriod(), 1000 * 60 * 5); | ||
|
|
@@ -1050,7 +1049,7 @@ public void run() { | |
| } | ||
| } | ||
|
|
||
| // We registered with the Master. Go into run mode. | ||
| // Run mode. | ||
| long lastMsg = System.currentTimeMillis(); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about setting this lastMsg here should be set to Long.MAX_LONG so we heartbeat immediately after the report-for-duty... so no lag before the RS is 'online'? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you mean set it to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are right. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, do in a follow-on @caroliney14 ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @saintstack do we have to do this one? asking because the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No. We not have to do it. It was suggestion. You've done a mountain of great work in here already. I'll keep an eye on it. Was thinking we heartbeat immediately but that might not be a good idea on a big cluster.... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sounds good, thank you @saintstack |
||
| long oldRequestCount = -1; | ||
| // The main run loop. | ||
|
|
@@ -1084,7 +1083,14 @@ public void run() { | |
| } | ||
| long now = System.currentTimeMillis(); | ||
| if ((now - lastMsg) >= msgInterval) { | ||
| tryRegionServerReport(lastMsg, now); | ||
| // Register with the Master now that our setup is complete. | ||
| if (tryRegionServerReport(lastMsg, now) && !online.get()) { | ||
| // Wake up anyone waiting for this server to online | ||
| synchronized (online) { | ||
| online.set(true); | ||
| online.notifyAll(); | ||
| } | ||
| } | ||
| lastMsg = System.currentTimeMillis(); | ||
| } | ||
| if (!isStopped() && !isAborted()) { | ||
|
|
@@ -1253,12 +1259,12 @@ private long getWriteRequestCount() { | |
| } | ||
|
|
||
| @InterfaceAudience.Private | ||
| protected void tryRegionServerReport(long reportStartTime, long reportEndTime) | ||
| protected boolean tryRegionServerReport(long reportStartTime, long reportEndTime) | ||
| throws IOException { | ||
| RegionServerStatusService.BlockingInterface rss = rssStub; | ||
| if (rss == null) { | ||
| // the current server could be stopping. | ||
| return; | ||
| return false; | ||
| } | ||
| ClusterStatusProtos.ServerLoad sl = buildServerLoad(reportStartTime, reportEndTime); | ||
| try { | ||
|
|
@@ -1278,7 +1284,9 @@ protected void tryRegionServerReport(long reportStartTime, long reportEndTime) | |
| // Couldn't connect to the master, get location from zk and reconnect | ||
| // Method blocks until new master is found or we are stopped | ||
| createRegionServerStatusStub(true); | ||
| return false; | ||
| } | ||
| return true; | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -1653,11 +1661,6 @@ protected void handleReportForDutyResponse(final RegionServerStartupResponse c) | |
| ", sessionid=0x" + | ||
| Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId())); | ||
|
|
||
| // Wake up anyone waiting for this server to online | ||
| synchronized (online) { | ||
| online.set(true); | ||
| online.notifyAll(); | ||
| } | ||
| } catch (Throwable e) { | ||
| stop("Failed initialization"); | ||
| throw convertThrowableToIOE(cleanup(e, "Failed init"), | ||
|
|
@@ -2836,10 +2839,9 @@ private boolean keepLooping() { | |
| } | ||
|
|
||
| /* | ||
| * Let the master know we're here Run initialization using parameters passed | ||
| * us by the master. | ||
| * Run initialization using parameters passed us by the master. | ||
| * @return A Map of key/value configurations we got from the Master else | ||
| * null if we failed to register. | ||
| * null if we failed during report. | ||
| * @throws IOException | ||
| */ | ||
| private RegionServerStartupResponse reportForDuty() throws IOException { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.