@@ -213,6 +213,9 @@ public void start() throws IOException, KeeperException {
213213 try {
214214 regionNode .setRegionLocation (regionState .getServerName ());
215215 regionNode .setState (regionState .getState ());
216+ if (regionNode .getProcedure () != null ) {
217+ regionNode .getProcedure ().stateLoaded (this , regionNode );
218+ }
216219 setMetaAssigned (regionState .getRegion (), regionState .getState () == State .OPEN );
217220 } finally {
218221 regionNode .unlock ();
@@ -235,14 +238,12 @@ public void setupRIT(List<TransitRegionStateProcedure> procs) {
235238 TransitRegionStateProcedure existingProc = regionNode .getProcedure ();
236239 if (existingProc != null ) {
237240 // This is possible, as we will detach the procedure from the RSN before we
238- // actually finish the procedure. This is because that, we will update the region state
239- // directly in the reportTransition method for TRSP, and theoretically the region transition
240- // has been done, so we need to detach the procedure from the RSN. But actually the
241- // procedure has not been marked as done in the pv2 framework yet, so it is possible that we
242- // schedule a new TRSP immediately and when arriving here, we will find out that there are
243- // multiple TRSPs for the region. But we can make sure that, only the last one can take the
244- // charge, the previous ones should have all been finished already.
245- // So here we will compare the proc id, the greater one will win.
241+ // actually finish the procedure. This is because that, we will detach the TRSP from the RSN
242+ // during execution, at that time, the procedure has not been marked as done in the pv2
243+ // framework yet, so it is possible that we schedule a new TRSP immediately and when
244+ // arriving here, we will find out that there are multiple TRSPs for the region. But we can
245+ // make sure that, only the last one can take the charge, the previous ones should have all
246+ // been finished already. So here we will compare the proc id, the greater one will win.
246247 if (existingProc .getProcId () < proc .getProcId ()) {
247248 // the new one wins, unset and set it to the new one below
248249 regionNode .unsetProcedure (existingProc );
@@ -1307,7 +1308,6 @@ public void visitRegionState(Result result, final RegionInfo regionInfo, final S
13071308 // In any of these cases, state is empty. For now, presume OFFLINE but there are probably
13081309 // cases where we need to probe more to be sure this correct; TODO informed by experience.
13091310 LOG .info (regionInfo .getEncodedName () + " regionState=null; presuming " + State .OFFLINE );
1310-
13111311 localState = State .OFFLINE ;
13121312 }
13131313 RegionStateNode regionNode = regionStates .getOrCreateRegionStateNode (regionInfo );
@@ -1328,6 +1328,9 @@ public void visitRegionState(Result result, final RegionInfo regionInfo, final S
13281328 } else if (localState == State .OFFLINE || regionInfo .isOffline ()) {
13291329 regionStates .addToOfflineRegions (regionNode );
13301330 }
1331+ if (regionNode .getProcedure () != null ) {
1332+ regionNode .getProcedure ().stateLoaded (AssignmentManager .this , regionNode );
1333+ }
13311334 }
13321335 });
13331336
@@ -1488,8 +1491,9 @@ public RegionInfo getRegionInfo(final byte[] regionName) {
14881491 private static final State [] STATES_EXPECTED_ON_UNASSIGN_OR_MOVE = { State .OPEN };
14891492
14901493 // ============================================================================================
1491- // Region Status update
1492- // Should only be called in TransitRegionStateProcedure
1494+ // Region Status update
1495+ // Should only be called in TransitRegionStateProcedure(and related procedures), as the locking
1496+ // and pre-assumptions are very tricky.
14931497 // ============================================================================================
14941498 private void transitStateAndUpdate (RegionStateNode regionNode , RegionState .State newState ,
14951499 RegionState .State ... expectedStates ) throws IOException {
@@ -1518,7 +1522,7 @@ void regionOpening(RegionStateNode regionNode) throws IOException {
15181522 metrics .incrementOperationCounter ();
15191523 }
15201524
1521- // should be called within the synchronized block of RegionStateNode.
1525+ // should be called under the RegionStateNode lock
15221526 // The parameter 'giveUp' means whether we will try to open the region again, if it is true, then
15231527 // we will persist the FAILED_OPEN state into hbase:meta.
15241528 void regionFailedOpen (RegionStateNode regionNode , boolean giveUp ) throws IOException {
@@ -1544,24 +1548,7 @@ void regionFailedOpen(RegionStateNode regionNode, boolean giveUp) throws IOExcep
15441548 }
15451549 }
15461550
1547- // should be called within the synchronized block of RegionStateNode
1548- void regionOpened (RegionStateNode regionNode ) throws IOException {
1549- // TODO: OPENING Updates hbase:meta too... we need to do both here and there?
1550- // That is a lot of hbase:meta writing.
1551- transitStateAndUpdate (regionNode , State .OPEN , STATES_EXPECTED_ON_OPEN );
1552- RegionInfo hri = regionNode .getRegionInfo ();
1553- if (isMetaRegion (hri )) {
1554- // Usually we'd set a table ENABLED at this stage but hbase:meta is ALWAYs enabled, it
1555- // can't be disabled -- so skip the RPC (besides... enabled is managed by TableStateManager
1556- // which is backed by hbase:meta... Avoid setting ENABLED to avoid having to update state
1557- // on table that contains state.
1558- setMetaAssigned (hri , true );
1559- }
1560- regionStates .addRegionToServer (regionNode );
1561- regionStates .removeFromFailedOpen (hri );
1562- }
1563-
1564- // should be called within the synchronized block of RegionStateNode
1551+ // should be called under the RegionStateNode lock
15651552 void regionClosing (RegionStateNode regionNode ) throws IOException {
15661553 transitStateAndUpdate (regionNode , State .CLOSING , STATES_EXPECTED_ON_CLOSING );
15671554
@@ -1575,18 +1562,36 @@ void regionClosing(RegionStateNode regionNode) throws IOException {
15751562 metrics .incrementOperationCounter ();
15761563 }
15771564
1578- // should be called within the synchronized block of RegionStateNode
1579- // The parameter 'normally' means whether we are closed cleanly, if it is true, then it means that
1580- // we are closed due to a RS crash.
1581- void regionClosed (RegionStateNode regionNode , boolean normally ) throws IOException {
1582- RegionState .State state = regionNode .getState ();
1565+ // for open and close, they will first be persist to the procedure store in
1566+ // RegionRemoteProcedureBase. So here we will first change the in memory state as it is considered
1567+ // as succeeded if the persistence to procedure store is succeeded, and then when the
1568+ // RegionRemoteProcedureBase is woken up, we will persist the RegionStateNode to hbase:meta.
1569+
1570+ // should be called under the RegionStateNode lock
1571+ void regionOpenedWithoutPersistingToMeta (RegionStateNode regionNode ) throws IOException {
1572+ regionNode .transitionState (State .OPEN , STATES_EXPECTED_ON_OPEN );
1573+ RegionInfo regionInfo = regionNode .getRegionInfo ();
1574+ regionStates .addRegionToServer (regionNode );
1575+ regionStates .removeFromFailedOpen (regionInfo );
1576+ }
1577+
1578+ // should be called under the RegionStateNode lock
1579+ void regionClosedWithoutPersistingToMeta (RegionStateNode regionNode ) throws IOException {
15831580 ServerName regionLocation = regionNode .getRegionLocation ();
1584- if ( normally ) {
1585- regionNode .transitionState ( State . CLOSED , STATES_EXPECTED_ON_CLOSED );
1586- } else {
1587- // For SCP
1588- regionNode . transitionState ( State . ABNORMALLY_CLOSED );
1581+ regionNode . transitionState ( State . CLOSED , STATES_EXPECTED_ON_CLOSED );
1582+ regionNode .setRegionLocation ( null );
1583+ if ( regionLocation != null ) {
1584+ regionNode . setLastHost ( regionLocation );
1585+ regionStates . removeRegionFromServer ( regionLocation , regionNode );
15891586 }
1587+ }
1588+
1589+ // should be called under the RegionStateNode lock
1590+ // for SCP
1591+ void regionClosedAbnormally (RegionStateNode regionNode ) throws IOException {
1592+ RegionState .State state = regionNode .getState ();
1593+ ServerName regionLocation = regionNode .getRegionLocation ();
1594+ regionNode .transitionState (State .ABNORMALLY_CLOSED );
15901595 regionNode .setRegionLocation (null );
15911596 boolean succ = false ;
15921597 try {
@@ -1605,6 +1610,22 @@ void regionClosed(RegionStateNode regionNode, boolean normally) throws IOExcepti
16051610 }
16061611 }
16071612
1613+ void persistToMeta (RegionStateNode regionNode ) throws IOException {
1614+ regionStateStore .updateRegionLocation (regionNode );
1615+ RegionInfo regionInfo = regionNode .getRegionInfo ();
1616+ if (isMetaRegion (regionInfo ) && regionNode .getState () == State .OPEN ) {
1617+ // Usually we'd set a table ENABLED at this stage but hbase:meta is ALWAYs enabled, it
1618+ // can't be disabled -- so skip the RPC (besides... enabled is managed by TableStateManager
1619+ // which is backed by hbase:meta... Avoid setting ENABLED to avoid having to update state
1620+ // on table that contains state.
1621+ setMetaAssigned (regionInfo , true );
1622+ }
1623+ }
1624+
1625+ // ============================================================================================
1626+ // The above methods can only be called in TransitRegionStateProcedure(and related procedures)
1627+ // ============================================================================================
1628+
16081629 public void markRegionAsSplit (final RegionInfo parent , final ServerName serverName ,
16091630 final RegionInfo daughterA , final RegionInfo daughterB ) throws IOException {
16101631 // Update hbase:meta. Parent will be marked offline and split up in hbase:meta.
0 commit comments