Skip to content

Commit 9008479

Browse files
committed
HBASE-25625 HBASE-25625 StochasticBalancer CostFunctions needs a better way to evaluate resource distribution
1 parent ff38218 commit 9008479

File tree

3 files changed

+131
-97
lines changed

3 files changed

+131
-97
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java

Lines changed: 95 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,12 @@ protected static class Cluster {
168168
int[] initialRegionIndexToServerIndex; //regionIndex -> serverIndex (initial cluster state)
169169
int[] regionIndexToTableIndex; //regionIndex -> tableIndex
170170
int[][] numRegionsPerServerPerTable; //serverIndex -> tableIndex -> # regions
171-
int[] numMaxRegionsPerTable; //tableIndex -> max number of regions in a single RS
171+
int[] numRegionsPerTable; // tableIndex -> region count
172+
double[] meanRegionsPerTable; // mean region count per table
173+
double regionSkewByTable; // skew on RS per by table
174+
double minRegionSkewByTable; // min skew on RS per by table
175+
double maxRegionSkewByTable; // max skew on RS per by table
176+
172177
int[] regionIndexToPrimaryIndex; //regionIndex -> regionIndex of the primary
173178
boolean hasRegionReplicas = false; //whether there is regions with replicas
174179

@@ -376,28 +381,37 @@ protected Cluster(
376381

377382
numTables = tables.size();
378383
numRegionsPerServerPerTable = new int[numServers][numTables];
384+
numRegionsPerTable = new int[numTables];
379385

380386
for (int i = 0; i < numServers; i++) {
381387
for (int j = 0; j < numTables; j++) {
382388
numRegionsPerServerPerTable[i][j] = 0;
383389
}
384390
}
385391

392+
for (int i = 0; i < numTables; i++) {
393+
numRegionsPerTable[i] = 0;
394+
}
395+
386396
for (int i=0; i < regionIndexToServerIndex.length; i++) {
387397
if (regionIndexToServerIndex[i] >= 0) {
388398
numRegionsPerServerPerTable[regionIndexToServerIndex[i]][regionIndexToTableIndex[i]]++;
399+
numRegionsPerTable[regionIndexToTableIndex[i]]++;
389400
}
390401
}
391402

392-
numMaxRegionsPerTable = new int[numTables];
393-
for (int[] aNumRegionsPerServerPerTable : numRegionsPerServerPerTable) {
394-
for (tableIndex = 0; tableIndex < aNumRegionsPerServerPerTable.length; tableIndex++) {
395-
if (aNumRegionsPerServerPerTable[tableIndex] > numMaxRegionsPerTable[tableIndex]) {
396-
numMaxRegionsPerTable[tableIndex] = aNumRegionsPerServerPerTable[tableIndex];
397-
}
398-
}
403+
// Avoid repeated computation for planning
404+
meanRegionsPerTable = new double[numTables];
405+
maxRegionSkewByTable = 0;
406+
minRegionSkewByTable = 0;
407+
for (int i = 0; i < numTables; i++) {
408+
meanRegionsPerTable[i] = Double.valueOf(numRegionsPerTable[i]) / numServers;
409+
minRegionSkewByTable += Cluster.getMinSkew(numRegionsPerTable[i], numServers);
410+
maxRegionSkewByTable += Cluster.getMaxSkew(numRegionsPerTable[i], numServers);
399411
}
400412

413+
computeRegionSkewPerTable();
414+
401415
for (int i = 0; i < regions.length; i ++) {
402416
RegionInfo info = regions[i];
403417
if (RegionReplicaUtil.isDefaultReplica(info)) {
@@ -522,6 +536,51 @@ public boolean serverHasTooFewRegions(int server) {
522536
return numRegions < minLoad;
523537
}
524538

539+
/**
540+
* Return the min skew of distribution
541+
*/
542+
public static double getMinSkew(double total, double numServers) {
543+
double mean = total / numServers;
544+
// It's possible that there aren't enough regions to go around
545+
double min;
546+
if (numServers > total) {
547+
min = ((numServers - total) * mean + (1 - mean) * total) ;
548+
} else {
549+
// Some will have 1 more than everything else.
550+
int numHigh = (int) (total - (Math.floor(mean) * numServers));
551+
int numLow = (int) (numServers - numHigh);
552+
min = numHigh * (Math.ceil(mean) - mean) + numLow * (mean - Math.floor(mean));
553+
}
554+
return min;
555+
}
556+
557+
/**
558+
* Return the max deviation of distribution
559+
* Compute max as if all region servers had 0 and one had the sum of all costs. This must be
560+
* a zero sum cost for this to make sense.
561+
*/
562+
public static double getMaxSkew(double total, double numServers) {
563+
double mean = total / numServers;
564+
return (total - mean) + (numServers - 1) * mean;
565+
}
566+
567+
/**
568+
* Scale the value between 0 and 1.
569+
*
570+
* @param min Min value
571+
* @param max The Max value
572+
* @param value The value to be scaled.
573+
* @return The scaled value.
574+
*/
575+
public static double scale(double min, double max, double value) {
576+
if (max <= min || value <= min) {
577+
return 0;
578+
}
579+
if ((max - min) == 0) return 0;
580+
581+
return Math.max(0d, Math.min(1d, (value - min) / (max - min)));
582+
}
583+
525584
/**
526585
* Retrieves and lazily initializes a field storing the locality of
527586
* every region/server combination
@@ -579,6 +638,21 @@ public int getRegionSizeMB(int region) {
579638
return regionLoads[region].getLast().getStorefileSizeMB();
580639
}
581640

641+
/**
642+
* Recompute the region skew during init or plan of moves.
643+
*/
644+
private void computeRegionSkewPerTable() {
645+
// reinitialize for recomputation
646+
regionSkewByTable = 0;
647+
648+
for (int[] aNumRegionsPerServerPerTable : numRegionsPerServerPerTable) {
649+
for (int tableIndex = 0; tableIndex < aNumRegionsPerServerPerTable.length; tableIndex++) {
650+
regionSkewByTable += Math.abs(aNumRegionsPerServerPerTable[tableIndex]
651+
- meanRegionsPerTable[tableIndex]);
652+
}
653+
}
654+
}
655+
582656
/**
583657
* Computes and caches the locality for each region/rack combinations,
584658
* as well as storing a mapping of region -> server and region -> rack such that server
@@ -834,22 +908,20 @@ void regionMoved(int region, int oldServer, int newServer) {
834908
int tableIndex = regionIndexToTableIndex[region];
835909
if (oldServer >= 0) {
836910
numRegionsPerServerPerTable[oldServer][tableIndex]--;
911+
// update regionSkewPerTable for the move from old server
912+
regionSkewByTable +=
913+
Math.abs(numRegionsPerServerPerTable[oldServer][tableIndex]
914+
- meanRegionsPerTable[tableIndex])
915+
- Math.abs(numRegionsPerServerPerTable[oldServer][tableIndex] + 1
916+
- meanRegionsPerTable[tableIndex]);
837917
}
838918
numRegionsPerServerPerTable[newServer][tableIndex]++;
839-
840-
//check whether this caused maxRegionsPerTable in the new Server to be updated
841-
if (numRegionsPerServerPerTable[newServer][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
842-
numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[newServer][tableIndex];
843-
} else if (oldServer >= 0 && (numRegionsPerServerPerTable[oldServer][tableIndex] + 1)
844-
== numMaxRegionsPerTable[tableIndex]) {
845-
//recompute maxRegionsPerTable since the previous value was coming from the old server
846-
numMaxRegionsPerTable[tableIndex] = 0;
847-
for (int[] aNumRegionsPerServerPerTable : numRegionsPerServerPerTable) {
848-
if (aNumRegionsPerServerPerTable[tableIndex] > numMaxRegionsPerTable[tableIndex]) {
849-
numMaxRegionsPerTable[tableIndex] = aNumRegionsPerServerPerTable[tableIndex];
850-
}
851-
}
852-
}
919+
// update regionSkewPerTable for the move to new server
920+
regionSkewByTable +=
921+
Math.abs(numRegionsPerServerPerTable[newServer][tableIndex]
922+
- meanRegionsPerTable[tableIndex])
923+
- Math.abs(numRegionsPerServerPerTable[newServer][tableIndex] - 1
924+
- meanRegionsPerTable[tableIndex]);
853925

854926
// update for servers
855927
int primary = regionIndexToPrimaryIndex[region];
@@ -1019,7 +1091,7 @@ public String toString() {
10191091
.append(Arrays.toString(serverIndicesSortedByRegionCount))
10201092
.append(", regionsPerServer=").append(Arrays.deepToString(regionsPerServer));
10211093

1022-
desc.append(", numMaxRegionsPerTable=").append(Arrays.toString(numMaxRegionsPerTable))
1094+
desc.append(", regionSkewByTable=").append(regionSkewByTable)
10231095
.append(", numRegions=").append(numRegions).append(", numServers=").append(numServers)
10241096
.append(", numTables=").append(numTables).append(", numMovedRegions=")
10251097
.append(numMovedRegions).append('}');

hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java

Lines changed: 34 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,7 @@ public CostFunction(Configuration c) {
762762
boolean isNeeded() {
763763
return true;
764764
}
765+
765766
float getMultiplier() {
766767
return multiplier;
767768
}
@@ -770,35 +771,39 @@ void setMultiplier(float m) {
770771
this.multiplier = m;
771772
}
772773

773-
/** Called once per LB invocation to give the cost function
774+
/**
775+
* Called once per LB invocation to give the cost function
774776
* to initialize it's state, and perform any costly calculation.
775777
*/
776778
void init(Cluster cluster) {
777779
this.cluster = cluster;
778780
}
779781

780-
/** Called once per cluster Action to give the cost function
782+
/**
783+
* Called once per cluster Action to give the cost function
781784
* an opportunity to update it's state. postAction() is always
782785
* called at least once before cost() is called with the cluster
783-
* that this action is performed on. */
786+
* that this action is performed on.
787+
*/
784788
void postAction(Action action) {
785789
switch (action.type) {
786-
case NULL: break;
787-
case ASSIGN_REGION:
788-
AssignRegionAction ar = (AssignRegionAction) action;
789-
regionMoved(ar.region, -1, ar.server);
790-
break;
791-
case MOVE_REGION:
792-
MoveRegionAction mra = (MoveRegionAction) action;
793-
regionMoved(mra.region, mra.fromServer, mra.toServer);
794-
break;
795-
case SWAP_REGIONS:
796-
SwapRegionsAction a = (SwapRegionsAction) action;
797-
regionMoved(a.fromRegion, a.fromServer, a.toServer);
798-
regionMoved(a.toRegion, a.toServer, a.fromServer);
799-
break;
800-
default:
801-
throw new RuntimeException("Uknown action:" + action.type);
790+
case NULL:
791+
break;
792+
case ASSIGN_REGION:
793+
AssignRegionAction ar = (AssignRegionAction) action;
794+
regionMoved(ar.region, -1, ar.server);
795+
break;
796+
case MOVE_REGION:
797+
MoveRegionAction mra = (MoveRegionAction) action;
798+
regionMoved(mra.region, mra.fromServer, mra.toServer);
799+
break;
800+
case SWAP_REGIONS:
801+
SwapRegionsAction a = (SwapRegionsAction) action;
802+
regionMoved(a.fromRegion, a.fromServer, a.toServer);
803+
regionMoved(a.toRegion, a.toServer, a.fromServer);
804+
break;
805+
default:
806+
throw new RuntimeException("Uknown action:" + action.type);
802807
}
803808
}
804809

@@ -816,65 +821,30 @@ protected void regionMoved(int region, int oldServer, int newServer) {
816821
*
817822
* @param stats the costs
818823
* @return a scaled set of costs.
819-
*/
820-
protected double costFromArray(double[] stats) {
824+
*/ protected double costFromArray(double[] stats) {
821825
double totalCost = 0;
822826
double total = getSum(stats);
823827

824828
double count = stats.length;
825-
double mean = total/count;
826-
827-
// Compute max as if all region servers had 0 and one had the sum of all costs. This must be
828-
// a zero sum cost for this to make sense.
829-
double max = ((count - 1) * mean) + (total - mean);
830-
831-
// It's possible that there aren't enough regions to go around
832-
double min;
833-
if (count > total) {
834-
min = ((count - total) * mean) + ((1 - mean) * total);
835-
} else {
836-
// Some will have 1 more than everything else.
837-
int numHigh = (int) (total - (Math.floor(mean) * count));
838-
int numLow = (int) (count - numHigh);
839-
840-
min = (numHigh * (Math.ceil(mean) - mean)) + (numLow * (mean - Math.floor(mean)));
829+
double mean = total / count;
841830

842-
}
843-
min = Math.max(0, min);
844-
for (int i=0; i<stats.length; i++) {
831+
for (int i = 0; i < stats.length; i++) {
845832
double n = stats[i];
846-
double diff = Math.abs(mean - n);
833+
double diff = Math.abs(mean - n) ;
847834
totalCost += diff;
848835
}
849836

850-
double scaled = scale(min, max, totalCost);
851-
return scaled;
837+
return Cluster
838+
.scale(Cluster.getMinSkew(total, count), Cluster.getMaxSkew(total, count), totalCost);
852839
}
853840

854841
private double getSum(double[] stats) {
855842
double total = 0;
856-
for(double s:stats) {
843+
for (double s : stats) {
857844
total += s;
858845
}
859846
return total;
860847
}
861-
862-
/**
863-
* Scale the value between 0 and 1.
864-
*
865-
* @param min Min value
866-
* @param max The Max value
867-
* @param value The value to be scaled.
868-
* @return The scaled value.
869-
*/
870-
protected double scale(double min, double max, double value) {
871-
if (max <= min || value <= min) {
872-
return 0;
873-
}
874-
if ((max - min) == 0) return 0;
875-
876-
return Math.max(0d, Math.min(1d, (value - min) / (max - min)));
877-
}
878848
}
879849

880850
/**
@@ -923,7 +893,7 @@ protected double cost() {
923893
return 1000000; // return a number much greater than any of the other cost
924894
}
925895

926-
return scale(0, Math.min(cluster.numRegions, maxMoves), moveCost);
896+
return Cluster.scale(0, Math.min(cluster.numRegions, maxMoves), moveCost);
927897
}
928898
}
929899

@@ -1031,15 +1001,7 @@ static class TableSkewCostFunction extends CostFunction {
10311001

10321002
@Override
10331003
protected double cost() {
1034-
double max = cluster.numRegions;
1035-
double min = ((double) cluster.numRegions) / cluster.numServers;
1036-
double value = 0;
1037-
1038-
for (int i = 0; i < cluster.numMaxRegionsPerTable.length; i++) {
1039-
value += cluster.numMaxRegionsPerTable[i];
1040-
}
1041-
1042-
return scale(min, max, value);
1004+
return Cluster.scale(cluster.minRegionSkewByTable, cluster.maxRegionSkewByTable, cluster.regionSkewByTable);
10431005
}
10441006
}
10451007

@@ -1392,7 +1354,7 @@ protected double cost() {
13921354
for (int i = 0 ; i < costsPerGroup.length; i++) {
13931355
totalCost += costsPerGroup[i];
13941356
}
1395-
return scale(0, maxCost, totalCost);
1357+
return Cluster.scale(0, maxCost, totalCost);
13961358
}
13971359

13981360
/**

hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestBaseLoadBalancer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -390,8 +390,8 @@ public void testRegionAvailabilityWithRegionMoves() throws Exception {
390390

391391
// now move region1 from servers[0] to servers[2]
392392
cluster.doAction(new MoveRegionAction(0, 0, 2));
393-
// check that the numMaxRegionsPerTable for "table" has increased to 2
394-
assertEquals(2, cluster.numMaxRegionsPerTable[0]);
393+
// check that the regionSkewByTable for "table" has increased to 2
394+
assertEquals(1, cluster.regionSkewByTable, 0.01);
395395
// now repeat check whether moving region1 from servers[1] to servers[2]
396396
// would lower availability
397397
assertTrue(cluster.wouldLowerAvailability(hri1, servers[2]));

0 commit comments

Comments
 (0)