Skip to content

Commit 7802e8d

Browse files
author
Ray Mattingly
committed
HBASE-28513 The StochasticLoadBalancer should support discrete evaluations
1 parent 6dc28a1 commit 7802e8d

File tree

46 files changed

+3247
-111
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+3247
-111
lines changed

hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalanceAction.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ enum Type {
2828
ASSIGN_REGION,
2929
MOVE_REGION,
3030
SWAP_REGIONS,
31+
MOVE_BATCH,
3132
NULL,
3233
}
3334

@@ -51,6 +52,10 @@ Type getType() {
5152
return type;
5253
}
5354

55+
long getStepCount() {
56+
return 1;
57+
}
58+
5459
@Override
5560
public String toString() {
5661
return type + ":";

hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java

Lines changed: 133 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,26 @@
2626
import java.util.HashMap;
2727
import java.util.List;
2828
import java.util.Map;
29+
import java.util.Set;
30+
import java.util.concurrent.TimeUnit;
2931
import org.agrona.collections.Hashing;
3032
import org.agrona.collections.Int2IntCounterMap;
3133
import org.apache.hadoop.hbase.HDFSBlocksDistribution;
3234
import org.apache.hadoop.hbase.ServerName;
3335
import org.apache.hadoop.hbase.client.RegionInfo;
3436
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
3537
import org.apache.hadoop.hbase.master.RackManager;
38+
import org.apache.hadoop.hbase.master.RegionPlan;
3639
import org.apache.hadoop.hbase.net.Address;
3740
import org.apache.hadoop.hbase.util.Pair;
3841
import org.apache.yetus.audience.InterfaceAudience;
3942
import org.slf4j.Logger;
4043
import org.slf4j.LoggerFactory;
4144

45+
import org.apache.hbase.thirdparty.com.google.common.base.Supplier;
46+
import org.apache.hbase.thirdparty.com.google.common.base.Suppliers;
47+
import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList;
48+
4249
/**
4350
* An efficient array based implementation similar to ClusterState for keeping the status of the
4451
* cluster in terms of region assignment and distribution. LoadBalancers, such as
@@ -106,6 +113,7 @@ class BalancerClusterState {
106113
int numRacks;
107114
int numTables;
108115
int numRegions;
116+
int maxReplicas;
109117

110118
int numMovedRegions = 0; // num moved regions from the initial configuration
111119
Map<ServerName, List<RegionInfo>> clusterState;
@@ -122,6 +130,14 @@ class BalancerClusterState {
122130
// Maps regionName -> oldServerName -> cache ratio of the region on the old server
123131
Map<String, Pair<ServerName, Float>> regionCacheRatioOnOldServerMap;
124132

133+
private Supplier<List<Integer>> shuffledServerIndicesSupplier =
134+
Suppliers.memoizeWithExpiration(() -> {
135+
Collection<Integer> serverIndices = serversToIndex.values();
136+
List<Integer> shuffledServerIndices = new ArrayList<>(serverIndices);
137+
Collections.shuffle(shuffledServerIndices);
138+
return shuffledServerIndices;
139+
}, 5, TimeUnit.SECONDS);
140+
125141
static class DefaultRackManager extends RackManager {
126142
@Override
127143
public String getRack(ServerName server) {
@@ -446,6 +462,11 @@ private void registerRegion(RegionInfo region, int regionIndex, int serverIndex,
446462
: serversToIndex.get(loc.get(i).getAddress()));
447463
}
448464
}
465+
466+
int numReplicas = region.getReplicaId() - 1;
467+
if (numReplicas > maxReplicas) {
468+
maxReplicas = numReplicas;
469+
}
449470
}
450471

451472
/**
@@ -705,7 +726,41 @@ enum LocalityType {
705726
RACK
706727
}
707728

708-
public void doAction(BalanceAction action) {
729+
public List<RegionPlan> convertActionToPlans(BalanceAction action) {
730+
switch (action.getType()) {
731+
case NULL:
732+
break;
733+
case ASSIGN_REGION:
734+
// FindBugs: Having the assert quietens FB BC_UNCONFIRMED_CAST warnings
735+
assert action instanceof AssignRegionAction : action.getClass();
736+
AssignRegionAction ar = (AssignRegionAction) action;
737+
return ImmutableList.of(regionMoved(ar.getRegion(), -1, ar.getServer()));
738+
case MOVE_REGION:
739+
assert action instanceof MoveRegionAction : action.getClass();
740+
MoveRegionAction mra = (MoveRegionAction) action;
741+
return ImmutableList
742+
.of(regionMoved(mra.getRegion(), mra.getFromServer(), mra.getToServer()));
743+
case SWAP_REGIONS:
744+
assert action instanceof SwapRegionsAction : action.getClass();
745+
SwapRegionsAction a = (SwapRegionsAction) action;
746+
return ImmutableList.of(regionMoved(a.getFromRegion(), a.getFromServer(), a.getToServer()),
747+
regionMoved(a.getToRegion(), a.getToServer(), a.getFromServer()));
748+
case MOVE_BATCH:
749+
assert action instanceof MoveBatchAction : action.getClass();
750+
MoveBatchAction mba = (MoveBatchAction) action;
751+
List<RegionPlan> mbRegionPlans = new ArrayList<>();
752+
for (MoveRegionAction moveRegionAction : mba.getMoveActions()) {
753+
mbRegionPlans.add(regionMoved(moveRegionAction.getRegion(),
754+
moveRegionAction.getFromServer(), moveRegionAction.getToServer()));
755+
}
756+
return mbRegionPlans;
757+
default:
758+
throw new RuntimeException("Unknown action:" + action.getType());
759+
}
760+
return Collections.emptyList();
761+
}
762+
763+
public List<RegionPlan> doAction(BalanceAction action) {
709764
switch (action.getType()) {
710765
case NULL:
711766
break;
@@ -715,30 +770,47 @@ public void doAction(BalanceAction action) {
715770
AssignRegionAction ar = (AssignRegionAction) action;
716771
regionsPerServer[ar.getServer()] =
717772
addRegion(regionsPerServer[ar.getServer()], ar.getRegion());
718-
regionMoved(ar.getRegion(), -1, ar.getServer());
719-
break;
773+
return ImmutableList.of(regionMoved(ar.getRegion(), -1, ar.getServer()));
720774
case MOVE_REGION:
721775
assert action instanceof MoveRegionAction : action.getClass();
722776
MoveRegionAction mra = (MoveRegionAction) action;
723777
regionsPerServer[mra.getFromServer()] =
724778
removeRegion(regionsPerServer[mra.getFromServer()], mra.getRegion());
725779
regionsPerServer[mra.getToServer()] =
726780
addRegion(regionsPerServer[mra.getToServer()], mra.getRegion());
727-
regionMoved(mra.getRegion(), mra.getFromServer(), mra.getToServer());
728-
break;
781+
return ImmutableList
782+
.of(regionMoved(mra.getRegion(), mra.getFromServer(), mra.getToServer()));
729783
case SWAP_REGIONS:
730784
assert action instanceof SwapRegionsAction : action.getClass();
731785
SwapRegionsAction a = (SwapRegionsAction) action;
732786
regionsPerServer[a.getFromServer()] =
733787
replaceRegion(regionsPerServer[a.getFromServer()], a.getFromRegion(), a.getToRegion());
734788
regionsPerServer[a.getToServer()] =
735789
replaceRegion(regionsPerServer[a.getToServer()], a.getToRegion(), a.getFromRegion());
736-
regionMoved(a.getFromRegion(), a.getFromServer(), a.getToServer());
737-
regionMoved(a.getToRegion(), a.getToServer(), a.getFromServer());
738-
break;
790+
return ImmutableList.of(regionMoved(a.getFromRegion(), a.getFromServer(), a.getToServer()),
791+
regionMoved(a.getToRegion(), a.getToServer(), a.getFromServer()));
792+
case MOVE_BATCH:
793+
assert action instanceof MoveBatchAction : action.getClass();
794+
MoveBatchAction mba = (MoveBatchAction) action;
795+
List<RegionPlan> mbRegionPlans = new ArrayList<>();
796+
for (int serverIndex : mba.getServerToRegionsToRemove().keySet()) {
797+
Set<Integer> regionsToRemove = mba.getServerToRegionsToRemove().get(serverIndex);
798+
regionsPerServer[serverIndex] =
799+
removeRegions(regionsPerServer[serverIndex], regionsToRemove);
800+
}
801+
for (int serverIndex : mba.getServerToRegionsToAdd().keySet()) {
802+
Set<Integer> regionsToAdd = mba.getServerToRegionsToAdd().get(serverIndex);
803+
regionsPerServer[serverIndex] = addRegions(regionsPerServer[serverIndex], regionsToAdd);
804+
}
805+
for (MoveRegionAction moveRegionAction : mba.getMoveActions()) {
806+
mbRegionPlans.add(regionMoved(moveRegionAction.getRegion(),
807+
moveRegionAction.getFromServer(), moveRegionAction.getToServer()));
808+
}
809+
return mbRegionPlans;
739810
default:
740-
throw new RuntimeException("Uknown action:" + action.getType());
811+
throw new RuntimeException("Unknown action:" + action.getType());
741812
}
813+
return Collections.emptyList();
742814
}
743815

744816
/**
@@ -822,7 +894,7 @@ void doAssignRegion(RegionInfo regionInfo, ServerName serverName) {
822894
doAction(new AssignRegionAction(region, server));
823895
}
824896

825-
void regionMoved(int region, int oldServer, int newServer) {
897+
RegionPlan regionMoved(int region, int oldServer, int newServer) {
826898
regionIndexToServerIndex[region] = newServer;
827899
if (initialRegionIndexToServerIndex[region] == newServer) {
828900
numMovedRegions--; // region moved back to original location
@@ -853,6 +925,11 @@ void regionMoved(int region, int oldServer, int newServer) {
853925
updateForLocation(serverIndexToRackIndex, regionsPerRack, colocatedReplicaCountsPerRack,
854926
oldServer, newServer, primary, region);
855927
}
928+
929+
// old server name can be null
930+
ServerName oldServerName = oldServer == -1 ? null : servers[oldServer];
931+
932+
return new RegionPlan(regions[region], oldServerName, servers[newServer]);
856933
}
857934

858935
/**
@@ -899,6 +976,48 @@ int[] addRegion(int[] regions, int regionIndex) {
899976
return newRegions;
900977
}
901978

979+
int[] removeRegions(int[] regions, Set<Integer> regionIndicesToRemove) {
980+
// Calculate the size of the new regions array
981+
int newSize = regions.length - regionIndicesToRemove.size();
982+
if (newSize < 0) {
983+
throw new IllegalStateException(
984+
"Region indices mismatch: more regions to remove than in the regions array");
985+
}
986+
987+
int[] newRegions = new int[newSize];
988+
int newIndex = 0;
989+
990+
// Copy only the regions not in the removal set
991+
for (int region : regions) {
992+
if (!regionIndicesToRemove.contains(region)) {
993+
newRegions[newIndex++] = region;
994+
}
995+
}
996+
997+
// If the newIndex is smaller than newSize, some regions were missing from the input array
998+
if (newIndex != newSize) {
999+
throw new IllegalStateException("Region indices mismatch: some regions in the removal "
1000+
+ "set were not found in the regions array");
1001+
}
1002+
1003+
return newRegions;
1004+
}
1005+
1006+
int[] addRegions(int[] regions, Set<Integer> regionIndicesToAdd) {
1007+
int[] newRegions = new int[regions.length + regionIndicesToAdd.size()];
1008+
1009+
// Copy the existing regions to the new array
1010+
System.arraycopy(regions, 0, newRegions, 0, regions.length);
1011+
1012+
// Add the new regions at the end of the array
1013+
int newIndex = regions.length;
1014+
for (int regionIndex : regionIndicesToAdd) {
1015+
newRegions[newIndex++] = regionIndex;
1016+
}
1017+
1018+
return newRegions;
1019+
}
1020+
9021021
int[] addRegionSorted(int[] regions, int regionIndex) {
9031022
int[] newRegions = new int[regions.length + 1];
9041023
int i = 0;
@@ -998,6 +1117,10 @@ void setNumMovedRegions(int numMovedRegions) {
9981117
this.numMovedRegions = numMovedRegions;
9991118
}
10001119

1120+
List<Integer> getShuffledServerIndices() {
1121+
return shuffledServerIndicesSupplier.get();
1122+
}
1123+
10011124
@Override
10021125
public String toString() {
10031126
StringBuilder desc = new StringBuilder("Cluster={servers=[");

0 commit comments

Comments
 (0)