Skip to content

Commit 533c84d

Browse files
authored
HBASE-25739 TableSkewCostFunction need to use aggregated deviation (apache#3067)
Signed-off-by: Michael Stack <[email protected]> Reviewed-by: David Manning <[email protected]>
1 parent 6cf4fdd commit 533c84d

File tree

3 files changed

+131
-94
lines changed

3 files changed

+131
-94
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java

Lines changed: 97 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,12 @@ protected static class Cluster {
162162
int[] initialRegionIndexToServerIndex; //regionIndex -> serverIndex (initial cluster state)
163163
int[] regionIndexToTableIndex; //regionIndex -> tableIndex
164164
int[][] numRegionsPerServerPerTable; //serverIndex -> tableIndex -> # regions
165-
int[] numMaxRegionsPerTable; //tableIndex -> max number of regions in a single RS
165+
int[] numRegionsPerTable; // tableIndex -> region count
166+
double[] meanRegionsPerTable; // mean region count per table
167+
double regionSkewByTable; // skew on RS per by table
168+
double minRegionSkewByTable; // min skew on RS per by table
169+
double maxRegionSkewByTable; // max skew on RS per by table
170+
166171
int[] regionIndexToPrimaryIndex; //regionIndex -> regionIndex of the primary
167172
boolean hasRegionReplicas = false; //whether there is regions with replicas
168173

@@ -370,28 +375,37 @@ protected Cluster(
370375

371376
numTables = tables.size();
372377
numRegionsPerServerPerTable = new int[numServers][numTables];
378+
numRegionsPerTable = new int[numTables];
373379

374380
for (int i = 0; i < numServers; i++) {
375381
for (int j = 0; j < numTables; j++) {
376382
numRegionsPerServerPerTable[i][j] = 0;
377383
}
378384
}
379385

386+
for (int i = 0; i < numTables; i++) {
387+
numRegionsPerTable[i] = 0;
388+
}
389+
380390
for (int i=0; i < regionIndexToServerIndex.length; i++) {
381391
if (regionIndexToServerIndex[i] >= 0) {
382392
numRegionsPerServerPerTable[regionIndexToServerIndex[i]][regionIndexToTableIndex[i]]++;
393+
numRegionsPerTable[regionIndexToTableIndex[i]]++;
383394
}
384395
}
385396

386-
numMaxRegionsPerTable = new int[numTables];
387-
for (int[] aNumRegionsPerServerPerTable : numRegionsPerServerPerTable) {
388-
for (tableIndex = 0; tableIndex < aNumRegionsPerServerPerTable.length; tableIndex++) {
389-
if (aNumRegionsPerServerPerTable[tableIndex] > numMaxRegionsPerTable[tableIndex]) {
390-
numMaxRegionsPerTable[tableIndex] = aNumRegionsPerServerPerTable[tableIndex];
391-
}
392-
}
397+
// Avoid repeated computation for planning
398+
meanRegionsPerTable = new double[numTables];
399+
maxRegionSkewByTable = 0;
400+
minRegionSkewByTable = 0;
401+
for (int i = 0; i < numTables; i++) {
402+
meanRegionsPerTable[i] = Double.valueOf(numRegionsPerTable[i]) / numServers;
403+
minRegionSkewByTable += Cluster.getMinSkew(numRegionsPerTable[i], numServers);
404+
maxRegionSkewByTable += Cluster.getMaxSkew(numRegionsPerTable[i], numServers);
393405
}
394406

407+
computeRegionSkewPerTable();
408+
395409
for (int i = 0; i < regions.length; i ++) {
396410
RegionInfo info = regions[i];
397411
if (RegionReplicaUtil.isDefaultReplica(info)) {
@@ -516,6 +530,53 @@ public boolean serverHasTooFewRegions(int server) {
516530
return numRegions < minLoad;
517531
}
518532

533+
/**
534+
* Return the min skew of distribution
535+
*/
536+
public static double getMinSkew(double total, double numServers) {
537+
double mean = total / numServers;
538+
// It's possible that there aren't enough regions to go around
539+
double min;
540+
if (numServers > total) {
541+
min = ((numServers - total) * mean + (1 - mean) * total) ;
542+
} else {
543+
// Some will have 1 more than everything else.
544+
int numHigh = (int) (total - (Math.floor(mean) * numServers));
545+
int numLow = (int) (numServers - numHigh);
546+
min = numHigh * (Math.ceil(mean) - mean) + numLow * (mean - Math.floor(mean));
547+
}
548+
return min;
549+
}
550+
551+
/**
552+
* Return the max deviation of distribution
553+
* Compute max as if all region servers had 0 and one had the sum of all costs. This must be
554+
* a zero sum cost for this to make sense.
555+
*/
556+
public static double getMaxSkew(double total, double numServers) {
557+
double mean = total / numServers;
558+
return (total - mean) + (numServers - 1) * mean;
559+
}
560+
561+
/**
562+
* Scale the value between 0 and 1.
563+
*
564+
* @param min Min value
565+
* @param max The Max value
566+
* @param value The value to be scaled.
567+
* @return The scaled value.
568+
*/
569+
public static double scale(double min, double max, double value) {
570+
if (max <= min || value <= min) {
571+
return 0;
572+
}
573+
if ((max - min) == 0) {
574+
return 0;
575+
}
576+
577+
return Math.max(0d, Math.min(1d, (value - min) / (max - min)));
578+
}
579+
519580
/**
520581
* Retrieves and lazily initializes a field storing the locality of
521582
* every region/server combination
@@ -573,6 +634,21 @@ public int getRegionSizeMB(int region) {
573634
return regionLoads[region].getLast().getStorefileSizeMB();
574635
}
575636

637+
/**
638+
* Recompute the region skew during init or plan of moves.
639+
*/
640+
private void computeRegionSkewPerTable() {
641+
// reinitialize for recomputation
642+
regionSkewByTable = 0;
643+
644+
for (int[] aNumRegionsPerServerPerTable : numRegionsPerServerPerTable) {
645+
for (int tableIndex = 0; tableIndex < aNumRegionsPerServerPerTable.length; tableIndex++) {
646+
regionSkewByTable += Math.abs(aNumRegionsPerServerPerTable[tableIndex]
647+
- meanRegionsPerTable[tableIndex]);
648+
}
649+
}
650+
}
651+
576652
/**
577653
* Computes and caches the locality for each region/rack combinations,
578654
* as well as storing a mapping of region -> server and region -> rack such that server
@@ -828,22 +904,20 @@ void regionMoved(int region, int oldServer, int newServer) {
828904
int tableIndex = regionIndexToTableIndex[region];
829905
if (oldServer >= 0) {
830906
numRegionsPerServerPerTable[oldServer][tableIndex]--;
907+
// update regionSkewPerTable for the move from old server
908+
regionSkewByTable +=
909+
Math.abs(numRegionsPerServerPerTable[oldServer][tableIndex]
910+
- meanRegionsPerTable[tableIndex])
911+
- Math.abs(numRegionsPerServerPerTable[oldServer][tableIndex] + 1
912+
- meanRegionsPerTable[tableIndex]);
831913
}
832914
numRegionsPerServerPerTable[newServer][tableIndex]++;
833-
834-
//check whether this caused maxRegionsPerTable in the new Server to be updated
835-
if (numRegionsPerServerPerTable[newServer][tableIndex] > numMaxRegionsPerTable[tableIndex]) {
836-
numMaxRegionsPerTable[tableIndex] = numRegionsPerServerPerTable[newServer][tableIndex];
837-
} else if (oldServer >= 0 && (numRegionsPerServerPerTable[oldServer][tableIndex] + 1)
838-
== numMaxRegionsPerTable[tableIndex]) {
839-
//recompute maxRegionsPerTable since the previous value was coming from the old server
840-
numMaxRegionsPerTable[tableIndex] = 0;
841-
for (int[] aNumRegionsPerServerPerTable : numRegionsPerServerPerTable) {
842-
if (aNumRegionsPerServerPerTable[tableIndex] > numMaxRegionsPerTable[tableIndex]) {
843-
numMaxRegionsPerTable[tableIndex] = aNumRegionsPerServerPerTable[tableIndex];
844-
}
845-
}
846-
}
915+
// update regionSkewPerTable for the move to new server
916+
regionSkewByTable +=
917+
Math.abs(numRegionsPerServerPerTable[newServer][tableIndex]
918+
- meanRegionsPerTable[tableIndex])
919+
- Math.abs(numRegionsPerServerPerTable[newServer][tableIndex] - 1
920+
- meanRegionsPerTable[tableIndex]);
847921

848922
// update for servers
849923
int primary = regionIndexToPrimaryIndex[region];
@@ -1013,7 +1087,7 @@ public String toString() {
10131087
.append(Arrays.toString(serverIndicesSortedByRegionCount))
10141088
.append(", regionsPerServer=").append(Arrays.deepToString(regionsPerServer));
10151089

1016-
desc.append(", numMaxRegionsPerTable=").append(Arrays.toString(numMaxRegionsPerTable))
1090+
desc.append(", regionSkewByTable=").append(regionSkewByTable)
10171091
.append(", numRegions=").append(numRegions).append(", numServers=").append(numServers)
10181092
.append(", numTables=").append(numTables).append(", numMovedRegions=")
10191093
.append(numMovedRegions).append('}');

hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java

Lines changed: 32 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -762,6 +762,7 @@ public CostFunction(Configuration c) {
762762
boolean isNeeded() {
763763
return true;
764764
}
765+
765766
float getMultiplier() {
766767
return multiplier;
767768
}
@@ -770,35 +771,39 @@ void setMultiplier(float m) {
770771
this.multiplier = m;
771772
}
772773

773-
/** Called once per LB invocation to give the cost function
774+
/**
775+
* Called once per LB invocation to give the cost function
774776
* to initialize it's state, and perform any costly calculation.
775777
*/
776778
void init(Cluster cluster) {
777779
this.cluster = cluster;
778780
}
779781

780-
/** Called once per cluster Action to give the cost function
782+
/**
783+
* Called once per cluster Action to give the cost function
781784
* an opportunity to update it's state. postAction() is always
782785
* called at least once before cost() is called with the cluster
783-
* that this action is performed on. */
786+
* that this action is performed on.
787+
*/
784788
void postAction(Action action) {
785789
switch (action.type) {
786-
case NULL: break;
787-
case ASSIGN_REGION:
788-
AssignRegionAction ar = (AssignRegionAction) action;
789-
regionMoved(ar.region, -1, ar.server);
790-
break;
791-
case MOVE_REGION:
792-
MoveRegionAction mra = (MoveRegionAction) action;
793-
regionMoved(mra.region, mra.fromServer, mra.toServer);
794-
break;
795-
case SWAP_REGIONS:
796-
SwapRegionsAction a = (SwapRegionsAction) action;
797-
regionMoved(a.fromRegion, a.fromServer, a.toServer);
798-
regionMoved(a.toRegion, a.toServer, a.fromServer);
799-
break;
800-
default:
801-
throw new RuntimeException("Uknown action:" + action.type);
790+
case NULL:
791+
break;
792+
case ASSIGN_REGION:
793+
AssignRegionAction ar = (AssignRegionAction) action;
794+
regionMoved(ar.region, -1, ar.server);
795+
break;
796+
case MOVE_REGION:
797+
MoveRegionAction mra = (MoveRegionAction) action;
798+
regionMoved(mra.region, mra.fromServer, mra.toServer);
799+
break;
800+
case SWAP_REGIONS:
801+
SwapRegionsAction a = (SwapRegionsAction) action;
802+
regionMoved(a.fromRegion, a.fromServer, a.toServer);
803+
regionMoved(a.toRegion, a.toServer, a.fromServer);
804+
break;
805+
default:
806+
throw new RuntimeException("Uknown action:" + action.type);
802807
}
803808
}
804809

@@ -822,59 +827,25 @@ protected double costFromArray(double[] stats) {
822827
double total = getSum(stats);
823828

824829
double count = stats.length;
825-
double mean = total/count;
826-
827-
// Compute max as if all region servers had 0 and one had the sum of all costs. This must be
828-
// a zero sum cost for this to make sense.
829-
double max = ((count - 1) * mean) + (total - mean);
830-
831-
// It's possible that there aren't enough regions to go around
832-
double min;
833-
if (count > total) {
834-
min = ((count - total) * mean) + ((1 - mean) * total);
835-
} else {
836-
// Some will have 1 more than everything else.
837-
int numHigh = (int) (total - (Math.floor(mean) * count));
838-
int numLow = (int) (count - numHigh);
839-
840-
min = (numHigh * (Math.ceil(mean) - mean)) + (numLow * (mean - Math.floor(mean)));
830+
double mean = total / count;
841831

842-
}
843-
min = Math.max(0, min);
844-
for (int i=0; i<stats.length; i++) {
832+
for (int i = 0; i < stats.length; i++) {
845833
double n = stats[i];
846834
double diff = Math.abs(mean - n);
847835
totalCost += diff;
848836
}
849837

850-
double scaled = scale(min, max, totalCost);
851-
return scaled;
838+
return Cluster
839+
.scale(Cluster.getMinSkew(total, count), Cluster.getMaxSkew(total, count), totalCost);
852840
}
853841

854842
private double getSum(double[] stats) {
855843
double total = 0;
856-
for(double s:stats) {
844+
for (double s : stats) {
857845
total += s;
858846
}
859847
return total;
860848
}
861-
862-
/**
863-
* Scale the value between 0 and 1.
864-
*
865-
* @param min Min value
866-
* @param max The Max value
867-
* @param value The value to be scaled.
868-
* @return The scaled value.
869-
*/
870-
protected double scale(double min, double max, double value) {
871-
if (max <= min || value <= min) {
872-
return 0;
873-
}
874-
if ((max - min) == 0) return 0;
875-
876-
return Math.max(0d, Math.min(1d, (value - min) / (max - min)));
877-
}
878849
}
879850

880851
/**
@@ -927,7 +898,7 @@ protected double cost() {
927898
return 1000000; // return a number much greater than any of the other cost
928899
}
929900

930-
return scale(0, Math.min(cluster.numRegions, maxMoves), moveCost);
901+
return Cluster.scale(0, Math.min(cluster.numRegions, maxMoves), moveCost);
931902
}
932903
}
933904

@@ -1035,15 +1006,7 @@ static class TableSkewCostFunction extends CostFunction {
10351006

10361007
@Override
10371008
protected double cost() {
1038-
double max = cluster.numRegions;
1039-
double min = ((double) cluster.numRegions) / cluster.numServers;
1040-
double value = 0;
1041-
1042-
for (int i = 0; i < cluster.numMaxRegionsPerTable.length; i++) {
1043-
value += cluster.numMaxRegionsPerTable[i];
1044-
}
1045-
1046-
return scale(min, max, value);
1009+
return Cluster.scale(cluster.minRegionSkewByTable, cluster.maxRegionSkewByTable, cluster.regionSkewByTable);
10471010
}
10481011
}
10491012

@@ -1366,7 +1329,7 @@ protected double cost() {
13661329
for (int i = 0 ; i < costsPerGroup.length; i++) {
13671330
totalCost += costsPerGroup[i];
13681331
}
1369-
return scale(0, maxCost, totalCost);
1332+
return Cluster.scale(0, maxCost, totalCost);
13701333
}
13711334

13721335
/**

hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestBaseLoadBalancer.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -390,8 +390,8 @@ public void testRegionAvailabilityWithRegionMoves() throws Exception {
390390

391391
// now move region1 from servers[0] to servers[2]
392392
cluster.doAction(new MoveRegionAction(0, 0, 2));
393-
// check that the numMaxRegionsPerTable for "table" has increased to 2
394-
assertEquals(2, cluster.numMaxRegionsPerTable[0]);
393+
// check that the regionSkewByTable for "table" has increased to 2
394+
assertEquals(2, cluster.regionSkewByTable, 0.01);
395395
// now repeat check whether moving region1 from servers[1] to servers[2]
396396
// would lower availability
397397
assertTrue(cluster.wouldLowerAvailability(hri1, servers[2]));

0 commit comments

Comments
 (0)