3030import java .nio .file .Files ;
3131import java .nio .file .Paths ;
3232import java .util .ArrayList ;
33+ import java .util .Arrays ;
3334import java .util .Collections ;
3435import java .util .EnumSet ;
3536import java .util .HashSet ;
5253import org .apache .hadoop .hbase .ClusterMetrics .Option ;
5354import org .apache .hadoop .hbase .HBaseConfiguration ;
5455import org .apache .hadoop .hbase .HConstants ;
56+ import org .apache .hadoop .hbase .HRegionLocation ;
57+ import org .apache .hadoop .hbase .MetaTableAccessor ;
5558import org .apache .hadoop .hbase .ServerName ;
5659import org .apache .hadoop .hbase .UnknownRegionException ;
5760import org .apache .hadoop .hbase .client .Admin ;
5861import org .apache .hadoop .hbase .client .Connection ;
5962import org .apache .hadoop .hbase .client .ConnectionFactory ;
6063import org .apache .hadoop .hbase .client .DoNotRetryRegionException ;
6164import org .apache .hadoop .hbase .client .RegionInfo ;
65+ import org .apache .hadoop .hbase .client .RegionInfoBuilder ;
66+ import org .apache .hadoop .hbase .client .Result ;
6267import org .apache .hadoop .hbase .master .RackManager ;
68+ import org .apache .hadoop .hbase .master .RegionState ;
6369import org .apache .hadoop .hbase .master .assignment .AssignmentManager ;
70+ import org .apache .hadoop .hbase .zookeeper .MetaTableLocator ;
71+ import org .apache .hadoop .hbase .zookeeper .ZKWatcher ;
72+ import org .apache .hadoop .hbase .zookeeper .ZNodePaths ;
6473import org .apache .yetus .audience .InterfaceAudience ;
6574import org .slf4j .Logger ;
6675import org .slf4j .LoggerFactory ;
@@ -93,6 +102,7 @@ public class RegionMover extends AbstractHBaseTool implements Closeable {
93102 private boolean ack = true ;
94103 private int maxthreads = 1 ;
95104 private int timeout ;
105+ private List <String > isolateRegionIdArray ;
96106 private String loadUnload ;
97107 private String hostname ;
98108 private String filename ;
@@ -109,6 +119,7 @@ private RegionMover(RegionMoverBuilder builder) throws IOException {
109119 this .excludeFile = builder .excludeFile ;
110120 this .designatedFile = builder .designatedFile ;
111121 this .maxthreads = builder .maxthreads ;
122+ this .isolateRegionIdArray = builder .isolateRegionIdArray ;
112123 this .ack = builder .ack ;
113124 this .port = builder .port ;
114125 this .timeout = builder .timeout ;
@@ -153,6 +164,7 @@ public static class RegionMoverBuilder {
153164 private boolean ack = true ;
154165 private int maxthreads = 1 ;
155166 private int timeout = Integer .MAX_VALUE ;
167+ private List <String > isolateRegionIdArray = new ArrayList <>();
156168 private String hostname ;
157169 private String filename ;
158170 private String excludeFile = null ;
@@ -213,6 +225,14 @@ public RegionMoverBuilder maxthreads(int threads) {
213225 return this ;
214226 }
215227
228+ /**
229+ * Set the region ID to isolate on the region server.
230+ */
231+ public RegionMoverBuilder isolateRegionIdArray (List <String > isolateRegionIdArray ) {
232+ this .isolateRegionIdArray = isolateRegionIdArray ;
233+ return this ;
234+ }
235+
216236 /**
217237 * Path of file containing hostnames to be excluded during region movement. Exclude file should
218238 * have 'host:port' per line. Port is mandatory here as we can have many RS running on a single
@@ -406,6 +426,25 @@ public boolean unloadFromRack()
406426 }
407427
408428 private boolean unloadRegions (boolean unloadFromRack )
429+ throws ExecutionException , InterruptedException , TimeoutException {
430+ return unloadRegions (unloadFromRack , null );
431+ }
432+
433+ /**
434+ * Isolated regions specified in {@link #isolateRegionIdArray} on {@link #hostname} in ack Mode
435+ * and Unload regions from given {@link #hostname} using ack/noAck mode and {@link #maxthreads}.
436+ * In noAck mode we do not make sure that region is successfully online on the target region
437+ * server,hence it is the best effort. We do not unload regions to hostnames given in
438+ * {@link #excludeFile}. If designatedFile is present with some contents, we will unload regions
439+ * to hostnames provided in {@link #designatedFile}
440+ * @return true if region isolation succeeded, false otherwise
441+ */
442+ public boolean isolateRegions ()
443+ throws ExecutionException , InterruptedException , TimeoutException {
444+ return unloadRegions (false , isolateRegionIdArray );
445+ }
446+
447+ private boolean unloadRegions (boolean unloadFromRack , List <String > isolateRegionIdArray )
409448 throws InterruptedException , ExecutionException , TimeoutException {
410449 deleteFile (this .filename );
411450 ExecutorService unloadPool = Executors .newFixedThreadPool (1 );
@@ -459,7 +498,7 @@ private boolean unloadRegions(boolean unloadFromRack)
459498 LOG .warn ("No Regions were moved - no servers available" );
460499 return false ;
461500 }
462- unloadRegions (server , regionServers , movedRegions );
501+ unloadRegions (server , regionServers , movedRegions , isolateRegionIdArray );
463502 } catch (Exception e ) {
464503 LOG .error ("Error while unloading regions " , e );
465504 return false ;
@@ -474,9 +513,111 @@ private boolean unloadRegions(boolean unloadFromRack)
474513 }
475514
476515 private void unloadRegions (ServerName server , List <ServerName > regionServers ,
477- List <RegionInfo > movedRegions ) throws Exception {
516+ List <RegionInfo > movedRegions , List < String > isolateRegionIdArray ) throws Exception {
478517 while (true ) {
518+ List <RegionInfo > isolateRegionInfoList = Collections .synchronizedList (new ArrayList <>());
519+ RegionInfo isolateRegionInfo = null ;
520+ if (isolateRegionIdArray != null && !isolateRegionIdArray .isEmpty ()) {
521+ // Region will be moved to target region server with Ack mode.
522+ final ExecutorService isolateRegionPool = Executors .newFixedThreadPool (maxthreads );
523+ List <Future <Boolean >> isolateRegionTaskList = new ArrayList <>();
524+ List <RegionInfo > recentlyIsolatedRegion = Collections .synchronizedList (new ArrayList <>());
525+ boolean allRegionOpsSuccessful = true ;
526+ boolean isMetaIsolated = false ;
527+ RegionInfo metaRegionInfo = RegionInfoBuilder .FIRST_META_REGIONINFO ;
528+ List <HRegionLocation > hRegionLocationRegionIsolation =
529+ Collections .synchronizedList (new ArrayList <>());
530+ for (String isolateRegionId : isolateRegionIdArray ) {
531+ if (isolateRegionId .equalsIgnoreCase (metaRegionInfo .getEncodedName ())) {
532+ isMetaIsolated = true ;
533+ continue ;
534+ }
535+ Result result = MetaTableAccessor .scanByRegionEncodedName (conn , isolateRegionId );
536+ HRegionLocation hRegionLocation =
537+ MetaTableAccessor .getRegionLocation (conn , result .getRow ());
538+ if (hRegionLocation != null ) {
539+ hRegionLocationRegionIsolation .add (hRegionLocation );
540+ } else {
541+ LOG .error ("Region " + isolateRegionId + " doesn't exists/can't fetch from"
542+ + " meta...Quitting now" );
543+ // We only move the regions if all the regions were found.
544+ allRegionOpsSuccessful = false ;
545+ break ;
546+ }
547+ }
548+
549+ if (!allRegionOpsSuccessful ) {
550+ break ;
551+ }
552+ // If hbase:meta region was isolated, then it needs to be part of isolateRegionInfoList.
553+ if (isMetaIsolated ) {
554+ ZKWatcher zkWatcher = new ZKWatcher (conf , null , null );
555+ List <HRegionLocation > result = new ArrayList <>();
556+ for (String znode : zkWatcher .getMetaReplicaNodes ()) {
557+ String path = ZNodePaths .joinZNode (zkWatcher .getZNodePaths ().baseZNode , znode );
558+ int replicaId = zkWatcher .getZNodePaths ().getMetaReplicaIdFromPath (path );
559+ RegionState state = MetaTableLocator .getMetaRegionState (zkWatcher , replicaId );
560+ result .add (new HRegionLocation (state .getRegion (), state .getServerName ()));
561+ }
562+ ServerName metaSeverName = result .get (0 ).getServerName ();
563+ // For isolating hbase:meta, it should move explicitly in Ack mode,
564+ // hence the forceMoveRegionByAck = true.
565+ if (!metaSeverName .equals (server )) {
566+ LOG .info ("Region of hbase:meta " + metaRegionInfo .getEncodedName () + " is on server "
567+ + metaSeverName + " moving to " + server );
568+ submitRegionMovesWhileUnloading (metaSeverName , Collections .singletonList (server ),
569+ movedRegions , Collections .singletonList (metaRegionInfo ), true );
570+ } else {
571+ LOG .info ("Region of hbase:meta " + metaRegionInfo .getEncodedName () + " already exists"
572+ + " on server : " + server );
573+ }
574+ isolateRegionInfoList .add (RegionInfoBuilder .FIRST_META_REGIONINFO );
575+ }
576+
577+ if (!hRegionLocationRegionIsolation .isEmpty ()) {
578+ for (HRegionLocation hRegionLocation : hRegionLocationRegionIsolation ) {
579+ isolateRegionInfo = hRegionLocation .getRegion ();
580+ isolateRegionInfoList .add (isolateRegionInfo );
581+ if (hRegionLocation .getServerName () == server ) {
582+ LOG .info ("Region " + hRegionLocation .getRegion ().getEncodedName () + " already exists"
583+ + " on server : " + server .getHostname ());
584+ } else {
585+ Future <Boolean > isolateRegionTask =
586+ isolateRegionPool .submit (new MoveWithAck (conn , isolateRegionInfo ,
587+ hRegionLocation .getServerName (), server , recentlyIsolatedRegion ));
588+ isolateRegionTaskList .add (isolateRegionTask );
589+ }
590+ }
591+ }
592+
593+ if (!isolateRegionTaskList .isEmpty ()) {
594+ isolateRegionPool .shutdown ();
595+ // Now that we have fetched all the region's regionInfo, we can move them.
596+ waitMoveTasksToFinish (isolateRegionPool , isolateRegionTaskList ,
597+ admin .getConfiguration ().getLong (MOVE_WAIT_MAX_KEY , DEFAULT_MOVE_WAIT_MAX ));
598+
599+ Set <RegionInfo > currentRegionsOnTheServer = new HashSet <>(admin .getRegions (server ));
600+ if (!currentRegionsOnTheServer .containsAll (isolateRegionInfoList )) {
601+ // If all the regions are not online on the target server,
602+ // we don't put RS in decommission mode and exit from here.
603+ LOG .error ("One of the Region move failed OR stuck in transition...Quitting now" );
604+ break ;
605+ }
606+ } else {
607+ LOG .info ("All regions already exists on server : " + server .getHostname ());
608+ }
609+ // Once region has been moved to target RS, put the target RS into decommission mode,
610+ // so master doesn't assign new region to the target RS while we unload the target RS.
611+ // Also pass 'offload' flag as false since we don't want master to offload the target RS.
612+ List <ServerName > listOfServer = new ArrayList <>();
613+ listOfServer .add (server );
614+ LOG .info ("Putting server : " + server .getHostname () + " in decommission/draining mode" );
615+ admin .decommissionRegionServers (listOfServer , false );
616+ }
479617 List <RegionInfo > regionsToMove = admin .getRegions (server );
618+ // Remove all the regions from the online Region list, that we just isolated.
619+ // This will also include hbase:meta if it was isolated.
620+ regionsToMove .removeAll (isolateRegionInfoList );
480621 regionsToMove .removeAll (movedRegions );
481622 if (regionsToMove .isEmpty ()) {
482623 LOG .info ("No Regions to move....Quitting now" );
@@ -488,21 +629,25 @@ private void unloadRegions(ServerName server, List<ServerName> regionServers,
488629 Optional <RegionInfo > metaRegion = getMetaRegionInfoIfToBeMoved (regionsToMove );
489630 if (metaRegion .isPresent ()) {
490631 RegionInfo meta = metaRegion .get ();
632+ // hbase:meta should move explicitly in Ack mode.
491633 submitRegionMovesWhileUnloading (server , regionServers , movedRegions ,
492- Collections .singletonList (meta ));
634+ Collections .singletonList (meta ), true );
493635 regionsToMove .remove (meta );
494636 }
495- submitRegionMovesWhileUnloading (server , regionServers , movedRegions , regionsToMove );
637+ submitRegionMovesWhileUnloading (server , regionServers , movedRegions , regionsToMove , false );
496638 }
497639 }
498640
499641 private void submitRegionMovesWhileUnloading (ServerName server , List <ServerName > regionServers ,
500- List <RegionInfo > movedRegions , List <RegionInfo > regionsToMove ) throws Exception {
642+ List <RegionInfo > movedRegions , List <RegionInfo > regionsToMove , boolean forceMoveRegionByAck )
643+ throws Exception {
501644 final ExecutorService moveRegionsPool = Executors .newFixedThreadPool (this .maxthreads );
502645 List <Future <Boolean >> taskList = new ArrayList <>();
503646 int serverIndex = 0 ;
504647 for (RegionInfo regionToMove : regionsToMove ) {
505- if (ack ) {
648+ // To move/isolate hbase:meta on a server, it should happen explicitly by Ack mode, hence the
649+ // forceMoveRegionByAck = true.
650+ if (ack || forceMoveRegionByAck ) {
506651 Future <Boolean > task = moveRegionsPool .submit (new MoveWithAck (conn , regionToMove , server ,
507652 regionServers .get (serverIndex ), movedRegions ));
508653 taskList .add (task );
@@ -748,9 +893,17 @@ private ServerName stripServer(List<ServerName> regionServers, String hostname,
748893 @ Override
749894 protected void addOptions () {
750895 this .addRequiredOptWithArg ("r" , "regionserverhost" , "region server <hostname>|<hostname:port>" );
751- this .addRequiredOptWithArg ("o" , "operation" , "Expected: load/unload/unload_from_rack" );
896+ this .addRequiredOptWithArg ("o" , "operation" ,
897+ "Expected: load/unload/unload_from_rack/isolate_regions" );
752898 this .addOptWithArg ("m" , "maxthreads" ,
753899 "Define the maximum number of threads to use to unload and reload the regions" );
900+ this .addOptWithArg ("i" , "isolateRegionIds" ,
901+ "Comma separated list of Region IDs hash to isolate on a RegionServer and put region server"
902+ + " in draining mode. This option should only be used with '-o isolate_regions'."
903+ + " By putting region server in decommission/draining mode, master can't assign any"
904+ + " new region on this server. If one or more regions are not found OR failed to isolate"
905+ + " successfully, utility will exist without putting RS in draining/decommission mode."
906+ + " Ex. --isolateRegionIds id1,id2,id3 OR -i id1,id2,id3" );
754907 this .addOptWithArg ("x" , "excludefile" ,
755908 "File with <hostname:port> per line to exclude as unload targets; default excludes only "
756909 + "target host; useful for rack decommisioning." );
@@ -772,9 +925,14 @@ protected void addOptions() {
772925 protected void processOptions (CommandLine cmd ) {
773926 String hostname = cmd .getOptionValue ("r" );
774927 rmbuilder = new RegionMoverBuilder (hostname );
928+ this .loadUnload = cmd .getOptionValue ("o" ).toLowerCase (Locale .ROOT );
775929 if (cmd .hasOption ('m' )) {
776930 rmbuilder .maxthreads (Integer .parseInt (cmd .getOptionValue ('m' )));
777931 }
932+ if (this .loadUnload .equals ("isolate_regions" ) && cmd .hasOption ("isolateRegionIds" )) {
933+ rmbuilder
934+ .isolateRegionIdArray (Arrays .asList (cmd .getOptionValue ("isolateRegionIds" ).split ("," )));
935+ }
778936 if (cmd .hasOption ('n' )) {
779937 rmbuilder .ack (false );
780938 }
@@ -803,6 +961,15 @@ protected int doWork() throws Exception {
803961 success = rm .unload ();
804962 } else if (loadUnload .equalsIgnoreCase ("unload_from_rack" )) {
805963 success = rm .unloadFromRack ();
964+ } else if (loadUnload .equalsIgnoreCase ("isolate_regions" )) {
965+ if (rm .isolateRegionIdArray != null && !rm .isolateRegionIdArray .isEmpty ()) {
966+ success = rm .isolateRegions ();
967+ } else {
968+ LOG .error ("Missing -i/--isolate_regions option with '-o isolate_regions' option" );
969+ LOG .error ("Use -h or --help for usage instructions" );
970+ printUsage ();
971+ success = false ;
972+ }
806973 } else {
807974 printUsage ();
808975 success = false ;
0 commit comments