@@ -133,7 +133,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
133133
134134 private List <CandidateGenerator > candidateGenerators ;
135135 private List <CostFunction > costFunctions ; // FindBugs: Wants this protected; IS2_INCONSISTENT_SYNC
136-
136+ // To save currently configed sum of multiplier. Defaulted at 1 for cases that carry high cost
137+ private float sumMultiplier = 1.0f ;
137138 // to save and report costs to JMX
138139 private double curOverallCost = 0d ;
139140 private double [] tempFunctionCosts ;
@@ -229,7 +230,6 @@ protected void loadConf(Configuration conf) {
229230
230231 regionReplicaHostCostFunction = new RegionReplicaHostCostFunction (conf );
231232 regionReplicaRackCostFunction = new RegionReplicaRackCostFunction (conf );
232-
233233 costFunctions = new ArrayList <>();
234234 addCostFunction (new RegionCountSkewCostFunction (conf ));
235235 addCostFunction (new PrimaryRegionCountSkewCostFunction (conf ));
@@ -310,63 +310,63 @@ private boolean areSomeRegionReplicasColocated(BalancerClusterState c) {
310310 boolean needsBalance (TableName tableName , BalancerClusterState cluster ) {
311311 ClusterLoadState cs = new ClusterLoadState (cluster .clusterState );
312312 if (cs .getNumServers () < MIN_SERVER_BALANCE ) {
313- if (LOG .isDebugEnabled ()) {
314- LOG .debug ("Not running balancer because only " + cs .getNumServers ()
315- + " active regionserver(s)" );
316- }
317- if (this .isBalancerRejectionRecording ) {
318- sendRejectionReasonToRingBuffer ("The number of RegionServers " +
319- cs .getNumServers () + " < MIN_SERVER_BALANCE(" + MIN_SERVER_BALANCE + ")" , null );
320- }
313+ LOG .info ("Not running balancer because only " + cs .getNumServers () + " active regionserver(s)" );
314+ sendRejectionReasonToRingBuffer (
315+ "The number of RegionServers " + cs .getNumServers () + " < MIN_SERVER_BALANCE(" + MIN_SERVER_BALANCE + ")" , null );
321316 return false ;
322317 }
323318 if (areSomeRegionReplicasColocated (cluster )) {
319+ LOG .info ("Running balancer because at least one server hosts replicas of the same region." );
324320 return true ;
325321 }
326322
327323 if (idleRegionServerExist (cluster )){
324+ LOG .info ("Running balancer because cluster has idle server(s)." );
328325 return true ;
329326 }
330327
328+ sumMultiplier = 0.0f ;
331329 double total = 0.0 ;
332- float sumMultiplier = 0.0f ;
333330 for (CostFunction c : costFunctions ) {
334331 float multiplier = c .getMultiplier ();
335- if (multiplier <= 0 ) {
336- LOG .trace ("{} not needed because multiplier is <= 0" , c .getClass ().getSimpleName ());
337- continue ;
338- }
332+ double cost = c .cost ();
339333 if (!c .isNeeded ()) {
340334 LOG .trace ("{} not needed" , c .getClass ().getSimpleName ());
341335 continue ;
342336 }
337+ total += cost * multiplier ;
343338 sumMultiplier += multiplier ;
344- total += c .cost () * multiplier ;
345- }
346-
347- boolean balanced = total <= 0 || sumMultiplier <= 0 ||
348- (sumMultiplier > 0 && (total / sumMultiplier ) < minCostNeedBalance );
349- if (balanced && isBalancerRejectionRecording ){
350- String reason = "" ;
351- if (total <= 0 ) {
352- reason = "(cost1*multiplier1)+(cost2*multiplier2)+...+(costn*multipliern) = " + total + " <= 0" ;
353- } else if (sumMultiplier <= 0 ) {
354- reason = "sumMultiplier = " + sumMultiplier + " <= 0" ;
355- } else if ((total / sumMultiplier ) < minCostNeedBalance ) {
356- reason =
357- "[(cost1*multiplier1)+(cost2*multiplier2)+...+(costn*multipliern)]/sumMultiplier = " + (total
358- / sumMultiplier ) + " <= minCostNeedBalance(" + minCostNeedBalance + ")" ;
359- }
360- sendRejectionReasonToRingBuffer (reason , costFunctions );
361- }
362- if (LOG .isDebugEnabled ()) {
363- LOG .debug ("{} {}; total cost={}, sum multiplier={}; cost/multiplier to need a balance is {}" ,
364- balanced ? "Skipping load balancing because balanced" : "We need to load balance" ,
365- isByTable ? String .format ("table (%s)" , tableName ) : "cluster" ,
366- total , sumMultiplier , minCostNeedBalance );
367- if (LOG .isTraceEnabled ()) {
368- LOG .trace ("Balance decision detailed function costs={}" , functionCost ());
339+ }
340+ if (sumMultiplier <= 0 ) {
341+ LOG .error ("At least one cost function needs a multiplier > 0. For example, set "
342+ + "hbase.master.balancer.stochastic.regionCountCost to a positive value or default" );
343+ return false ;
344+ }
345+
346+ boolean balanced = (total / sumMultiplier < minCostNeedBalance );
347+ if (balanced ) {
348+ if (isBalancerRejectionRecording ) {
349+ String reason = "" ;
350+ if (total <= 0 ) {
351+ reason = "(cost1*multiplier1)+(cost2*multiplier2)+...+(costn*multipliern) = " + total + " <= 0" ;
352+ } else if (sumMultiplier <= 0 ) {
353+ reason = "sumMultiplier = " + sumMultiplier + " <= 0" ;
354+ } else if ((total / sumMultiplier ) < minCostNeedBalance ) {
355+ reason =
356+ "[(cost1*multiplier1)+(cost2*multiplier2)+...+(costn*multipliern)]/sumMultiplier = " + (
357+ total / sumMultiplier ) + " <= minCostNeedBalance(" + minCostNeedBalance + ")" ;
358+ }
359+ sendRejectionReasonToRingBuffer (reason , costFunctions );
369360 }
361+ LOG .info ("{} - skipping load balancing because weighted average imbalance={} <= "
362+ + "threshold({}). If you want more aggressive balancing, either lower "
363+ + "hbase.master.balancer.stochastic.minCostNeedBalance from {} or increase the relative "
364+ + "multiplier(s) of the specific cost function(s). functionCost={}" ,
365+ isByTable ? "Table specific (" +tableName +")" : "Cluster wide" , total / sumMultiplier ,
366+ minCostNeedBalance , minCostNeedBalance , functionCost ());
367+ } else {
368+ LOG .info ("{} - Calculating plan. may take up to {}ms to complete." ,
369+ isByTable ? "Table specific (" +tableName +")" : "Cluster wide" , maxRunningTime );
370370 }
371371 return !balanced ;
372372 }
@@ -452,8 +452,9 @@ protected List<RegionPlan> balanceTable(TableName tableName, Map<ServerName,
452452 maxSteps );
453453 }
454454 }
455- LOG .info ("start StochasticLoadBalancer.balancer, initCost=" + currentCost + ", functionCost="
456- + functionCost () + " computedMaxSteps: " + computedMaxSteps );
455+ LOG .info ("Start StochasticLoadBalancer.balancer, initial weighted average imbalance={}, "
456+ + "functionCost={} computedMaxSteps={}" ,
457+ currentCost / sumMultiplier , functionCost (), computedMaxSteps );
457458
458459 final String initFunctionTotalCosts = totalCostsPerFunc ();
459460 // Perform a stochastic walk to see if we can get a good fit.
@@ -499,17 +500,19 @@ protected List<RegionPlan> balanceTable(TableName tableName, Map<ServerName,
499500 updateStochasticCosts (tableName , curOverallCost , curFunctionCosts );
500501 if (initCost > currentCost ) {
501502 plans = createRegionPlans (cluster );
502- LOG .info ("Finished computing new load balance plan. Computation took {}" +
503- " to try {} different iterations. Found a solution that moves " +
504- "{} regions; Going from a computed cost of {}" +
505- " to a new cost of {}" , java .time .Duration .ofMillis (endTime - startTime ),
506- step , plans .size (), initCost , currentCost );
503+ LOG .info ("Finished computing new moving plan. Computation took {} ms" +
504+ " to try {} different iterations. Found a solution that moves " +
505+ "{} regions; Going from a computed imbalance of {}" +
506+ " to a new imbalance of {}. " ,
507+ endTime - startTime , step , plans .size (),
508+ initCost / sumMultiplier , currentCost / sumMultiplier );
509+
507510 sendRegionPlansToRingBuffer (plans , currentCost , initCost , initFunctionTotalCosts , step );
508511 return plans ;
509512 }
510- LOG .info ("Could not find a better load balance plan. Tried {} different configurations in " +
511- "{}, and did not find anything with a computed cost less than {}" , step ,
512- java . time . Duration . ofMillis ( endTime - startTime ) , initCost );
513+ LOG .info ("Could not find a better moving plan. Tried {} different configurations in " +
514+ "{} ms , and did not find anything with an imbalance score less than {}" , step ,
515+ endTime - startTime , initCost / sumMultiplier );
513516 return null ;
514517 }
515518
@@ -520,8 +523,7 @@ private void sendRejectionReasonToRingBuffer(String reason, List<CostFunction> c
520523 .setReason (reason );
521524 if (costFunctions != null ) {
522525 for (CostFunction c : costFunctions ) {
523- float multiplier = c .getMultiplier ();
524- if (multiplier <= 0 || !c .isNeeded ()) {
526+ if (!c .isNeeded ()) {
525527 continue ;
526528 }
527529 builder .addCostFuncInfo (c .getClass ().getName (), c .cost (), c .getMultiplier ());
@@ -580,7 +582,8 @@ private void updateStochasticCosts(TableName tableName, double overall, double[]
580582 }
581583
582584 private void addCostFunction (CostFunction costFunction ) {
583- if (costFunction .getMultiplier () > 0 ) {
585+ float multiplier = costFunction .getMultiplier ();
586+ if (multiplier > 0 ) {
584587 costFunctions .add (costFunction );
585588 }
586589 }
@@ -591,9 +594,13 @@ private String functionCost() {
591594 builder .append (c .getClass ().getSimpleName ());
592595 builder .append (" : (" );
593596 if (c .isNeeded ()) {
594- builder .append (c .getMultiplier ());
597+ builder .append ("multiplier=" + c .getMultiplier ());
595598 builder .append (", " );
596- builder .append (c .cost ());
599+ double cost = c .cost ();
600+ builder .append ("imbalance=" + cost );
601+ if (cost < minCostNeedBalance ) {
602+ builder .append (", balanced" );
603+ }
597604 } else {
598605 builder .append ("not needed" );
599606 }
@@ -605,7 +612,7 @@ private String functionCost() {
605612 private String totalCostsPerFunc () {
606613 StringBuilder builder = new StringBuilder ();
607614 for (CostFunction c : costFunctions ) {
608- if (c . getMultiplier () <= 0 || !c .isNeeded ()) {
615+ if (!c .isNeeded ()) {
609616 continue ;
610617 }
611618 double cost = c .getMultiplier () * c .cost ();
@@ -689,7 +696,7 @@ void initCosts(BalancerClusterState cluster) {
689696 allowedOnPath = ".*(/src/test/.*|StochasticLoadBalancer).java" )
690697 void updateCostsWithAction (BalancerClusterState cluster , BalanceAction action ) {
691698 for (CostFunction c : costFunctions ) {
692- if (c .getMultiplier () > 0 && c . isNeeded ()) {
699+ if (c .isNeeded ()) {
693700 c .postAction (action );
694701 }
695702 }
@@ -728,7 +735,7 @@ String[] getCostFunctionNames() {
728735 CostFunction c = costFunctions .get (i );
729736 this .tempFunctionCosts [i ] = 0.0 ;
730737
731- if (c . getMultiplier () <= 0 || !c .isNeeded ()) {
738+ if (!c .isNeeded ()) {
732739 continue ;
733740 }
734741
0 commit comments