From 908597fcffe53b581ac70b8dab706ed2c4767aef Mon Sep 17 00:00:00 2001 From: Clara Xiong Date: Wed, 29 Sep 2021 15:43:20 -0700 Subject: [PATCH 1/2] HBASE-26308 Sum of multiplier of cost functions is not populated properly when we have a shortcut for trigger --- .../balancer/StochasticLoadBalancer.java | 44 ++++++++++--------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index eaa923e74fcf..79aaa965aef5 100644 --- a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -132,7 +132,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { private List candidateGenerators; private List costFunctions; // FindBugs: Wants this protected; IS2_INCONSISTENT_SYNC // To save currently configed sum of multiplier. Defaulted at 1 for cases that carry high cost - private float sumMultiplier = 1.0f; + private float sumMultiplier; // to save and report costs to JMX private double curOverallCost = 0d; private double[] tempFunctionCosts; @@ -248,11 +248,22 @@ protected void loadConf(Configuration conf) { curFunctionCosts = new double[costFunctions.size()]; tempFunctionCosts = new double[costFunctions.size()]; - LOG.info("Loaded config; maxSteps=" + maxSteps + ", runMaxSteps=" + runMaxSteps, - ", stepsPerRegion=" + stepsPerRegion + - ", maxRunningTime=" + maxRunningTime + ", isByTable=" + isByTable + ", CostFunctions=" + - Arrays.toString(getCostFunctionNames()) + " etc."); - } + sumMultiplier = 0; + for (CostFunction c : costFunctions) { + float multiplier = c.getMultiplier(); + sumMultiplier += multiplier; + } + if (sumMultiplier <= 0) { + LOG.error("At least one cost function needs a multiplier > 0. For example, set " + + "hbase.master.balancer.stochastic.regionCountCost to a positive value or default"); + } + + LOG.info( + "Loaded config; maxSteps=" + maxSteps + ", runMaxSteps=" + runMaxSteps + + ", stepsPerRegion=" + stepsPerRegion + + ", maxRunningTime=" + maxRunningTime + ", isByTable=" + isByTable + + ", CostFunctions=" + Arrays.toString(getCostFunctionNames()) + + " , sum of multiplier of cost functions = " + sumMultiplier + " etc."); } @Override public void updateClusterMetrics(ClusterMetrics st) { @@ -345,33 +356,26 @@ boolean needsBalance(TableName tableName, BalancerClusterState cluster) { return false; } if (areSomeRegionReplicasColocated(cluster)) { - LOG.info("Running balancer because at least one server hosts replicas of the same region."); + LOG.info("Running balancer because at least one server hosts replicas of the same region. " + + "function cost={}", functionCost()); return true; } if (idleRegionServerExist(cluster)){ + LOG.info("Running balancer because at least one server hosts replicas of the same region." + + "regionReplicaRackCostFunction={}", regionReplicaRackCostFunction.cost()); LOG.info("Running balancer because cluster has idle server(s)."); return true; } - sumMultiplier = 0.0f; double total = 0.0; for (CostFunction c : costFunctions) { - float multiplier = c.getMultiplier(); - double cost = c.cost(); if (!c.isNeeded()) { LOG.trace("{} not needed", c.getClass().getSimpleName()); continue; } - total += cost * multiplier; - sumMultiplier += multiplier; + total += c.cost() * c.getMultiplier(); } - if (sumMultiplier <= 0) { - LOG.error("At least one cost function needs a multiplier > 0. For example, set " - + "hbase.master.balancer.stochastic.regionCountCost to a positive value or default"); - return false; - } - boolean balanced = (total / sumMultiplier < minCostNeedBalance); if (balanced) { @@ -598,8 +602,8 @@ private String functionCost() { builder.append(", "); double cost = c.cost(); builder.append("imbalance=" + cost); - if (cost < minCostNeedBalance) { - builder.append(", balanced"); + if (cost >= minCostNeedBalance) { + builder.append(", need balance"); } } else { builder.append("not needed"); From c06097f842185faeb3cb85a933234b7c71ce48c4 Mon Sep 17 00:00:00 2001 From: Clara Xiong Date: Wed, 29 Sep 2021 16:43:18 -0700 Subject: [PATCH 2/2] Move to after init the cost functions. --- .../balancer/StochasticLoadBalancer.java | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 79aaa965aef5..eb16cdfeef52 100644 --- a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -248,16 +248,6 @@ protected void loadConf(Configuration conf) { curFunctionCosts = new double[costFunctions.size()]; tempFunctionCosts = new double[costFunctions.size()]; - sumMultiplier = 0; - for (CostFunction c : costFunctions) { - float multiplier = c.getMultiplier(); - sumMultiplier += multiplier; - } - if (sumMultiplier <= 0) { - LOG.error("At least one cost function needs a multiplier > 0. For example, set " - + "hbase.master.balancer.stochastic.regionCountCost to a positive value or default"); - } - LOG.info( "Loaded config; maxSteps=" + maxSteps + ", runMaxSteps=" + runMaxSteps + ", stepsPerRegion=" + stepsPerRegion + @@ -356,15 +346,16 @@ boolean needsBalance(TableName tableName, BalancerClusterState cluster) { return false; } if (areSomeRegionReplicasColocated(cluster)) { - LOG.info("Running balancer because at least one server hosts replicas of the same region. " - + "function cost={}", functionCost()); + LOG.info("Running balancer because at least one server hosts replicas of the same region." + + " function cost={}", functionCost()); return true; } if (idleRegionServerExist(cluster)){ LOG.info("Running balancer because at least one server hosts replicas of the same region." + "regionReplicaRackCostFunction={}", regionReplicaRackCostFunction.cost()); - LOG.info("Running balancer because cluster has idle server(s)."); + LOG.info("Running balancer because cluster has idle server(s)."+ + " function cost={}", functionCost()); return true; } @@ -439,6 +430,18 @@ protected List balanceTable(TableName tableName, Map 0. For example, set " + + "hbase.master.balancer.stochastic.regionCountCost to a positive value or default"); + return null; + } + double currentCost = computeCost(cluster, Double.MAX_VALUE); curOverallCost = currentCost; System.arraycopy(tempFunctionCosts, 0, curFunctionCosts, 0, curFunctionCosts.length);