@@ -133,7 +133,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
133133
134134 private List <CandidateGenerator > candidateGenerators ;
135135 private List <CostFunction > costFunctions ; // FindBugs: Wants this protected; IS2_INCONSISTENT_SYNC
136-
136+ // To save currently configed sum of multiplier. Defaulted at 1 for cases that carry high cost
137+ private float sumMultiplier = 1.0f ;
137138 // to save and report costs to JMX
138139 private double curOverallCost = 0d ;
139140 private double [] tempFunctionCosts ;
@@ -229,7 +230,6 @@ protected void loadConf(Configuration conf) {
229230
230231 regionReplicaHostCostFunction = new RegionReplicaHostCostFunction (conf );
231232 regionReplicaRackCostFunction = new RegionReplicaRackCostFunction (conf );
232-
233233 costFunctions = new ArrayList <>();
234234 addCostFunction (new RegionCountSkewCostFunction (conf ));
235235 addCostFunction (new PrimaryRegionCountSkewCostFunction (conf ));
@@ -310,63 +310,66 @@ private boolean areSomeRegionReplicasColocated(BalancerClusterState c) {
310310 boolean needsBalance (TableName tableName , BalancerClusterState cluster ) {
311311 ClusterLoadState cs = new ClusterLoadState (cluster .clusterState );
312312 if (cs .getNumServers () < MIN_SERVER_BALANCE ) {
313- if (LOG .isDebugEnabled ()) {
314- LOG .debug ("Not running balancer because only " + cs .getNumServers ()
315- + " active regionserver(s)" );
316- }
317- if (this .isBalancerRejectionRecording ) {
318- sendRejectionReasonToRingBuffer ("The number of RegionServers " +
319- cs .getNumServers () + " < MIN_SERVER_BALANCE(" + MIN_SERVER_BALANCE + ")" , null );
320- }
313+ LOG .info ("Not running balancer because only " + cs .getNumServers () +
314+ " active regionserver(s)" );
315+ sendRejectionReasonToRingBuffer (
316+ "The number of RegionServers " + cs .getNumServers () + " < MIN_SERVER_BALANCE(" +
317+ MIN_SERVER_BALANCE + ")" , null );
321318 return false ;
322319 }
323320 if (areSomeRegionReplicasColocated (cluster )) {
321+ LOG .info ("Running balancer because at least one server hosts replicas of the same region." );
324322 return true ;
325323 }
326324
327325 if (idleRegionServerExist (cluster )){
326+ LOG .info ("Running balancer because cluster has idle server(s)." );
328327 return true ;
329328 }
330329
330+ sumMultiplier = 0.0f ;
331331 double total = 0.0 ;
332- float sumMultiplier = 0.0f ;
333332 for (CostFunction c : costFunctions ) {
334333 float multiplier = c .getMultiplier ();
335- if (multiplier <= 0 ) {
336- LOG .trace ("{} not needed because multiplier is <= 0" , c .getClass ().getSimpleName ());
337- continue ;
338- }
334+ double cost = c .cost ();
339335 if (!c .isNeeded ()) {
340336 LOG .trace ("{} not needed" , c .getClass ().getSimpleName ());
341337 continue ;
342338 }
339+ total += cost * multiplier ;
343340 sumMultiplier += multiplier ;
344- total += c .cost () * multiplier ;
345- }
346-
347- boolean balanced = total <= 0 || sumMultiplier <= 0 ||
348- (sumMultiplier > 0 && (total / sumMultiplier ) < minCostNeedBalance );
349- if (balanced && isBalancerRejectionRecording ){
350- String reason = "" ;
351- if (total <= 0 ) {
352- reason = "(cost1*multiplier1)+(cost2*multiplier2)+...+(costn*multipliern) = " + total + " <= 0" ;
353- } else if (sumMultiplier <= 0 ) {
354- reason = "sumMultiplier = " + sumMultiplier + " <= 0" ;
355- } else if ((total / sumMultiplier ) < minCostNeedBalance ) {
356- reason =
357- "[(cost1*multiplier1)+(cost2*multiplier2)+...+(costn*multipliern)]/sumMultiplier = " + (total
358- / sumMultiplier ) + " <= minCostNeedBalance(" + minCostNeedBalance + ")" ;
359- }
360- sendRejectionReasonToRingBuffer (reason , costFunctions );
361- }
362- if (LOG .isDebugEnabled ()) {
363- LOG .debug ("{} {}; total cost={}, sum multiplier={}; cost/multiplier to need a balance is {}" ,
364- balanced ? "Skipping load balancing because balanced" : "We need to load balance" ,
365- isByTable ? String .format ("table (%s)" , tableName ) : "cluster" ,
366- total , sumMultiplier , minCostNeedBalance );
367- if (LOG .isTraceEnabled ()) {
368- LOG .trace ("Balance decision detailed function costs={}" , functionCost ());
341+ }
342+ if (sumMultiplier <= 0 ) {
343+ LOG .error ("At least one cost function needs a multiplier > 0. For example, set "
344+ + "hbase.master.balancer.stochastic.regionCountCost to a positive value or default" );
345+ return false ;
346+ }
347+
348+ boolean balanced = (total / sumMultiplier < minCostNeedBalance );
349+ if (balanced ) {
350+ if (isBalancerRejectionRecording ) {
351+ String reason = "" ;
352+ if (total <= 0 ) {
353+ reason = "(cost1*multiplier1)+(cost2*multiplier2)+...+(costn*multipliern) = " +
354+ total + " <= 0" ;
355+ } else if (sumMultiplier <= 0 ) {
356+ reason = "sumMultiplier = " + sumMultiplier + " <= 0" ;
357+ } else if ((total / sumMultiplier ) < minCostNeedBalance ) {
358+ reason =
359+ "[(cost1*multiplier1)+(cost2*multiplier2)+...+(costn*multipliern)]/sumMultiplier = " +
360+ (total / sumMultiplier ) + " <= minCostNeedBalance(" + minCostNeedBalance + ")" ;
361+ }
362+ sendRejectionReasonToRingBuffer (reason , costFunctions );
369363 }
364+ LOG .info ("{} - skipping load balancing because weighted average imbalance={} <= "
365+ + "threshold({}). If you want more aggressive balancing, either lower "
366+ + "hbase.master.balancer.stochastic.minCostNeedBalance from {} or increase the relative "
367+ + "multiplier(s) of the specific cost function(s). functionCost={}" ,
368+ isByTable ? "Table specific (" +tableName +")" : "Cluster wide" , total / sumMultiplier ,
369+ minCostNeedBalance , minCostNeedBalance , functionCost ());
370+ } else {
371+ LOG .info ("{} - Calculating plan. may take up to {}ms to complete." ,
372+ isByTable ? "Table specific (" +tableName +")" : "Cluster wide" , maxRunningTime );
370373 }
371374 return !balanced ;
372375 }
@@ -452,8 +455,9 @@ protected List<RegionPlan> balanceTable(TableName tableName, Map<ServerName,
452455 maxSteps );
453456 }
454457 }
455- LOG .info ("start StochasticLoadBalancer.balancer, initCost=" + currentCost + ", functionCost="
456- + functionCost () + " computedMaxSteps: " + computedMaxSteps );
458+ LOG .info ("Start StochasticLoadBalancer.balancer, initial weighted average imbalance={}, "
459+ + "functionCost={} computedMaxSteps={}" ,
460+ currentCost / sumMultiplier , functionCost (), computedMaxSteps );
457461
458462 final String initFunctionTotalCosts = totalCostsPerFunc ();
459463 // Perform a stochastic walk to see if we can get a good fit.
@@ -499,17 +503,19 @@ protected List<RegionPlan> balanceTable(TableName tableName, Map<ServerName,
499503 updateStochasticCosts (tableName , curOverallCost , curFunctionCosts );
500504 if (initCost > currentCost ) {
501505 plans = createRegionPlans (cluster );
502- LOG .info ("Finished computing new load balance plan. Computation took {}" +
503- " to try {} different iterations. Found a solution that moves " +
504- "{} regions; Going from a computed cost of {}" +
505- " to a new cost of {}" , java .time .Duration .ofMillis (endTime - startTime ),
506- step , plans .size (), initCost , currentCost );
506+ LOG .info ("Finished computing new moving plan. Computation took {} ms" +
507+ " to try {} different iterations. Found a solution that moves " +
508+ "{} regions; Going from a computed imbalance of {}" +
509+ " to a new imbalance of {}. " ,
510+ endTime - startTime , step , plans .size (),
511+ initCost / sumMultiplier , currentCost / sumMultiplier );
512+
507513 sendRegionPlansToRingBuffer (plans , currentCost , initCost , initFunctionTotalCosts , step );
508514 return plans ;
509515 }
510- LOG .info ("Could not find a better load balance plan. Tried {} different configurations in " +
511- "{}, and did not find anything with a computed cost less than {}" , step ,
512- java . time . Duration . ofMillis ( endTime - startTime ) , initCost );
516+ LOG .info ("Could not find a better moving plan. Tried {} different configurations in " +
517+ "{} ms , and did not find anything with an imbalance score less than {}" , step ,
518+ endTime - startTime , initCost / sumMultiplier );
513519 return null ;
514520 }
515521
@@ -520,8 +526,7 @@ private void sendRejectionReasonToRingBuffer(String reason, List<CostFunction> c
520526 .setReason (reason );
521527 if (costFunctions != null ) {
522528 for (CostFunction c : costFunctions ) {
523- float multiplier = c .getMultiplier ();
524- if (multiplier <= 0 || !c .isNeeded ()) {
529+ if (!c .isNeeded ()) {
525530 continue ;
526531 }
527532 builder .addCostFuncInfo (c .getClass ().getName (), c .cost (), c .getMultiplier ());
@@ -580,7 +585,8 @@ private void updateStochasticCosts(TableName tableName, double overall, double[]
580585 }
581586
582587 private void addCostFunction (CostFunction costFunction ) {
583- if (costFunction .getMultiplier () > 0 ) {
588+ float multiplier = costFunction .getMultiplier ();
589+ if (multiplier > 0 ) {
584590 costFunctions .add (costFunction );
585591 }
586592 }
@@ -591,9 +597,13 @@ private String functionCost() {
591597 builder .append (c .getClass ().getSimpleName ());
592598 builder .append (" : (" );
593599 if (c .isNeeded ()) {
594- builder .append (c .getMultiplier ());
600+ builder .append ("multiplier=" + c .getMultiplier ());
595601 builder .append (", " );
596- builder .append (c .cost ());
602+ double cost = c .cost ();
603+ builder .append ("imbalance=" + cost );
604+ if (cost < minCostNeedBalance ) {
605+ builder .append (", balanced" );
606+ }
597607 } else {
598608 builder .append ("not needed" );
599609 }
@@ -605,7 +615,7 @@ private String functionCost() {
605615 private String totalCostsPerFunc () {
606616 StringBuilder builder = new StringBuilder ();
607617 for (CostFunction c : costFunctions ) {
608- if (c . getMultiplier () <= 0 || !c .isNeeded ()) {
618+ if (!c .isNeeded ()) {
609619 continue ;
610620 }
611621 double cost = c .getMultiplier () * c .cost ();
@@ -689,7 +699,7 @@ void initCosts(BalancerClusterState cluster) {
689699 allowedOnPath = ".*(/src/test/.*|StochasticLoadBalancer).java" )
690700 void updateCostsWithAction (BalancerClusterState cluster , BalanceAction action ) {
691701 for (CostFunction c : costFunctions ) {
692- if (c .getMultiplier () > 0 && c . isNeeded ()) {
702+ if (c .isNeeded ()) {
693703 c .postAction (action );
694704 }
695705 }
@@ -728,7 +738,7 @@ String[] getCostFunctionNames() {
728738 CostFunction c = costFunctions .get (i );
729739 this .tempFunctionCosts [i ] = 0.0 ;
730740
731- if (c . getMultiplier () <= 0 || !c .isNeeded ()) {
741+ if (!c .isNeeded ()) {
732742 continue ;
733743 }
734744
0 commit comments