Skip to content

Commit e5ab24c

Browse files
author
Ray Mattingly
committed
Simplifying the isViolation interface, fixing generator weights
1 parent c2d88ca commit e5ab24c

27 files changed

+394
-850
lines changed

hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java

Lines changed: 40 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -709,6 +709,40 @@ enum LocalityType {
709709
RACK
710710
}
711711

712+
public List<RegionPlan> convertActionToPlans(BalanceAction action) {
713+
switch (action.getType()) {
714+
case NULL:
715+
break;
716+
case ASSIGN_REGION:
717+
// FindBugs: Having the assert quietens FB BC_UNCONFIRMED_CAST warnings
718+
assert action instanceof AssignRegionAction : action.getClass();
719+
AssignRegionAction ar = (AssignRegionAction) action;
720+
return ImmutableList.of(regionMoved(ar.getRegion(), -1, ar.getServer()));
721+
case MOVE_REGION:
722+
assert action instanceof MoveRegionAction : action.getClass();
723+
MoveRegionAction mra = (MoveRegionAction) action;
724+
return ImmutableList
725+
.of(regionMoved(mra.getRegion(), mra.getFromServer(), mra.getToServer()));
726+
case SWAP_REGIONS:
727+
assert action instanceof SwapRegionsAction : action.getClass();
728+
SwapRegionsAction a = (SwapRegionsAction) action;
729+
return ImmutableList.of(regionMoved(a.getFromRegion(), a.getFromServer(), a.getToServer()),
730+
regionMoved(a.getToRegion(), a.getToServer(), a.getFromServer()));
731+
case MOVE_BATCH:
732+
assert action instanceof MoveBatchAction : action.getClass();
733+
MoveBatchAction mba = (MoveBatchAction) action;
734+
List<RegionPlan> mbRegionPlans = new ArrayList<>();
735+
for (MoveRegionAction moveRegionAction : mba.getMoveActions()) {
736+
mbRegionPlans.add(regionMoved(moveRegionAction.getRegion(),
737+
moveRegionAction.getFromServer(), moveRegionAction.getToServer()));
738+
}
739+
return mbRegionPlans;
740+
default:
741+
throw new RuntimeException("Unknown action:" + action.getType());
742+
}
743+
return Collections.emptyList();
744+
}
745+
712746
public List<RegionPlan> doAction(BalanceAction action) {
713747
switch (action.getType()) {
714748
case NULL:
@@ -723,16 +757,12 @@ public List<RegionPlan> doAction(BalanceAction action) {
723757
case MOVE_REGION:
724758
assert action instanceof MoveRegionAction : action.getClass();
725759
MoveRegionAction mra = (MoveRegionAction) action;
726-
try {
727-
regionsPerServer[mra.getFromServer()] =
728-
removeRegion(regionsPerServer[mra.getFromServer()], mra.getRegion());
729-
regionsPerServer[mra.getToServer()] =
730-
addRegion(regionsPerServer[mra.getToServer()], mra.getRegion());
731-
return ImmutableList
732-
.of(regionMoved(mra.getRegion(), mra.getFromServer(), mra.getToServer()));
733-
} catch (Exception e) {
734-
throw e;
735-
}
760+
regionsPerServer[mra.getFromServer()] =
761+
removeRegion(regionsPerServer[mra.getFromServer()], mra.getRegion());
762+
regionsPerServer[mra.getToServer()] =
763+
addRegion(regionsPerServer[mra.getToServer()], mra.getRegion());
764+
return ImmutableList
765+
.of(regionMoved(mra.getRegion(), mra.getFromServer(), mra.getToServer()));
736766
case SWAP_REGIONS:
737767
assert action instanceof SwapRegionsAction : action.getClass();
738768
SwapRegionsAction a = (SwapRegionsAction) action;
@@ -742,19 +772,6 @@ public List<RegionPlan> doAction(BalanceAction action) {
742772
replaceRegion(regionsPerServer[a.getToServer()], a.getToRegion(), a.getFromRegion());
743773
return ImmutableList.of(regionMoved(a.getFromRegion(), a.getFromServer(), a.getToServer()),
744774
regionMoved(a.getToRegion(), a.getToServer(), a.getFromServer()));
745-
case ISOLATE_TABLE:
746-
assert action instanceof IsolateTablesAction : action.getClass();
747-
IsolateTablesAction ia = (IsolateTablesAction) action;
748-
List<RegionPlan> iaRegionPlans = new ArrayList<>();
749-
for (MoveRegionAction moveRegionAction : ia.getMoveActions()) {
750-
regionsPerServer[moveRegionAction.getFromServer()] = removeRegion(
751-
regionsPerServer[moveRegionAction.getFromServer()], moveRegionAction.getRegion());
752-
regionsPerServer[moveRegionAction.getToServer()] = addRegion(
753-
regionsPerServer[moveRegionAction.getToServer()], moveRegionAction.getRegion());
754-
iaRegionPlans.add(regionMoved(moveRegionAction.getRegion(),
755-
moveRegionAction.getFromServer(), moveRegionAction.getToServer()));
756-
}
757-
return iaRegionPlans;
758775
case MOVE_BATCH:
759776
assert action instanceof MoveBatchAction : action.getClass();
760777
MoveBatchAction mba = (MoveBatchAction) action;

hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java

Lines changed: 33 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import java.lang.reflect.Constructor;
2121
import java.util.Collection;
2222
import java.util.Collections;
23-
import java.util.HashSet;
2423
import java.util.List;
2524
import java.util.Objects;
2625
import java.util.Optional;
@@ -105,24 +104,9 @@ boolean shouldSkipSloppyServerEvaluation() {
105104
return isConditionalBalancingEnabled();
106105
}
107106

108-
Set<Integer> getServersWithTablesToIsolate() {
109-
Optional<MetaTableIsolationConditional> metaTableIsolationConditional =
110-
conditionals.stream().filter(MetaTableIsolationConditional.class::isInstance)
111-
.map(MetaTableIsolationConditional.class::cast).findAny();
112-
Set<Integer> serversWithTablesToIsolate = new HashSet<>();
113-
if (metaTableIsolationConditional.isPresent()) {
114-
serversWithTablesToIsolate
115-
.addAll(metaTableIsolationConditional.get().getServersHostingMeta());
116-
}
117-
118-
Optional<SystemTableIsolationConditional> systemTableIsolationConditional =
119-
conditionals.stream().filter(SystemTableIsolationConditional.class::isInstance)
120-
.map(SystemTableIsolationConditional.class::cast).findAny();
121-
if (systemTableIsolationConditional.isPresent()) {
122-
serversWithTablesToIsolate
123-
.addAll(systemTableIsolationConditional.get().getServersHostingSystemTables());
124-
}
125-
return serversWithTablesToIsolate;
107+
void clearConditionalWeightCaches() {
108+
conditionals.stream().map(RegionPlanConditional::getCandidateGenerator)
109+
.flatMap(Optional::stream).forEach(RegionPlanConditionalCandidateGenerator::clearWeightCache);
126110
}
127111

128112
void loadConf(Configuration conf) {
@@ -165,65 +149,49 @@ void loadClusterState(BalancerClusterState cluster) {
165149
}
166150

167151
/**
168-
* Check if the proposed action violates conditionals. Must be called before applying the action.
152+
* Indicates whether the action is good for our conditional compliance.
169153
* @param cluster The cluster state
170154
* @param action The proposed action
171-
* @return -1 if the action is an improvement, 0 if it's neutral, and >=1 if it is in violation
155+
* @return -1 if conditionals improve, 0 if neutral, 1 if conditionals degrade
172156
*/
173-
int isViolating(BalancerClusterState cluster, BalanceAction action) {
174-
if (conditionals.isEmpty()) {
157+
int getViolationCountChange(BalancerClusterState cluster, BalanceAction action) {
158+
boolean isViolatingPre = isViolating(cluster, action.undoAction());
159+
boolean isViolatingPost = isViolating(cluster, action);
160+
if (isViolatingPre && isViolatingPost) {
175161
return 0;
162+
} else if (!isViolatingPre && isViolatingPost) {
163+
return 1;
164+
} else {
165+
return -1;
176166
}
167+
}
177168

178-
// loadClusterState(cluster); todo rmattingly don't think this is necessary
179-
List<RegionPlan> regionPlans = doActionAndRefreshConditionals(cluster, action);
180-
181-
// Now we're in the proposed finished state
182-
// We can get the original violation count by running inverse plans
183-
List<RegionPlan> inversePlans = getInversePlans(regionPlans);
184-
int originalViolationCount = 0;
185-
for (RegionPlan inversePlan : inversePlans) {
186-
originalViolationCount += getConditionalViolationCount(conditionals, inversePlan);
169+
/**
170+
* Check if the proposed action violates conditionals
171+
* @param cluster The cluster state
172+
* @param action The proposed action
173+
*/
174+
boolean isViolating(BalancerClusterState cluster, BalanceAction action) {
175+
conditionals.forEach(conditional -> conditional.refreshClusterState(cluster));
176+
if (conditionals.isEmpty()) {
177+
return false;
187178
}
188-
189-
// Now go back to the original state, and measure
190-
// the proposed violation count
191-
doActionAndRefreshConditionals(cluster, action.undoAction());
192-
int proposedViolationCount = 0;
179+
List<RegionPlan> regionPlans = cluster.convertActionToPlans(action);
193180
for (RegionPlan regionPlan : regionPlans) {
194-
proposedViolationCount += getConditionalViolationCount(conditionals, regionPlan);
195-
}
196-
197-
if (proposedViolationCount - originalViolationCount < 0 && proposedViolationCount == 0) {
198-
// Only take a random improvement if it eliminates violations, or exists in a neutral state
199-
// Otherwise we are probably just fighting our conditional generators
200-
return -1;
201-
} else {
202-
return proposedViolationCount;
181+
if (isViolating(regionPlan)) {
182+
return true;
183+
}
203184
}
185+
return false;
204186
}
205187

206-
private List<RegionPlan> doActionAndRefreshConditionals(BalancerClusterState cluster,
207-
BalanceAction action) {
208-
List<RegionPlan> regionPlans = cluster.doAction(action);
209-
// loadClusterState(cluster); todo rmattingly don't think this is necessary
210-
return regionPlans;
211-
}
212-
213-
private static List<RegionPlan> getInversePlans(List<RegionPlan> regionPlans) {
214-
return regionPlans.stream().map(regionPlan -> new RegionPlan(regionPlan.getRegionInfo(),
215-
regionPlan.getDestination(), regionPlan.getSource())).toList();
216-
}
217-
218-
private static int getConditionalViolationCount(Set<RegionPlanConditional> conditionals,
219-
RegionPlan regionPlan) {
220-
int regionPlanConditionalViolationCount = 0;
221-
for (RegionPlanConditional regionPlanConditional : conditionals) {
222-
if (regionPlanConditional.isViolating(regionPlan)) {
223-
regionPlanConditionalViolationCount++;
188+
boolean isViolating(RegionPlan regionPlan) {
189+
for (RegionPlanConditional conditional : conditionals) {
190+
if (conditional.isViolating(regionPlan)) {
191+
return true;
224192
}
225193
}
226-
return regionPlanConditionalViolationCount;
194+
return false;
227195
}
228196

229197
private RegionPlanConditional createConditional(Class<? extends RegionPlanConditional> clazz,

hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CandidateGenerator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
@InterfaceAudience.Private
2929
abstract class CandidateGenerator {
3030

31-
static double MAX_WEIGHT = 10_000.0;
31+
static double MAX_WEIGHT = 100_000;
3232

3333
abstract BalanceAction generate(BalancerClusterState cluster);
3434

hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CostFunction.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,6 @@ void postAction(BalanceAction action) {
7474
regionMoved(a.getFromRegion(), a.getFromServer(), a.getToServer());
7575
regionMoved(a.getToRegion(), a.getToServer(), a.getFromServer());
7676
break;
77-
case ISOLATE_TABLE:
78-
IsolateTablesAction ita = (IsolateTablesAction) action;
79-
ita.getMoveActions()
80-
.forEach(m -> regionMoved(m.getRegion(), m.getFromServer(), m.getToServer()));
81-
break;
8277
case MOVE_BATCH:
8378
MoveBatchAction mba = (MoveBatchAction) action;
8479
for (MoveRegionAction moveRegionAction : mba.getMoveActions()) {

hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/DistributeReplicasCandidateGenerator.java

Lines changed: 16 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,30 @@
1818
package org.apache.hadoop.hbase.master.balancer;
1919

2020
import static java.util.Collections.shuffle;
21+
import static org.apache.hadoop.hbase.master.balancer.DistributeReplicasConditional.getReplicaKey;
2122

2223
import java.util.ArrayList;
2324
import java.util.HashSet;
2425
import java.util.List;
2526
import java.util.Set;
27+
import org.apache.yetus.audience.InterfaceAudience;
2628
import org.slf4j.Logger;
2729
import org.slf4j.LoggerFactory;
2830

2931
/**
3032
* CandidateGenerator to distribute colocated replicas across different servers.
3133
*/
34+
@InterfaceAudience.Private
3235
class DistributeReplicasCandidateGenerator extends RegionPlanConditionalCandidateGenerator {
3336

37+
static DistributeReplicasCandidateGenerator INSTANCE = new DistributeReplicasCandidateGenerator();
38+
3439
private static final Logger LOG =
3540
LoggerFactory.getLogger(DistributeReplicasCandidateGenerator.class);
36-
private static final int BATCH_SIZE = 1000;
41+
private static final int BATCH_SIZE = 100_000;
42+
43+
private DistributeReplicasCandidateGenerator() {
44+
}
3745

3846
/**
3947
* Generates a balancing action to distribute colocated replicas. Moves one replica of a colocated
@@ -50,7 +58,7 @@ BalanceAction generateCandidate(BalancerClusterState cluster, boolean isWeighing
5058
BalanceAction generateCandidate(BalancerClusterState cluster, boolean isWeighing,
5159
boolean isForced) {
5260
// Shuffle server indices to add some randomness to the moves
53-
// todo rmattingly share cache of shuffled servers in BalancerConditionals or something
61+
// Should we cache these shuffled servers? Doesn't seem necessary at the moment
5462
List<Integer> shuffledServerIndices = new ArrayList<>(cluster.numServers);
5563
for (int i = 0; i < cluster.servers.length; i++) {
5664
shuffledServerIndices.add(i);
@@ -66,16 +74,18 @@ BalanceAction generateCandidate(BalancerClusterState cluster, boolean isWeighing
6674
new HashSet<>(serverRegions.length);
6775
for (int regionIndex : serverRegions) {
6876
DistributeReplicasConditional.ReplicaKey replicaKey =
69-
new DistributeReplicasConditional.ReplicaKey(cluster.regions[regionIndex]);
77+
getReplicaKey(cluster.regions[regionIndex]);
7078
if (replicaKeys.contains(replicaKey)) {
7179
foundColocatedReplicas = true;
7280
if (isWeighing) {
73-
// if weighing, fast exit with an actionable move
81+
// If weighing, fast exit with an actionable move
7482
return getAction(sourceIndex, regionIndex, pickOtherRandomServer(cluster, sourceIndex),
7583
-1);
7684
} else {
77-
// if not weighing, pick a good move
78-
for (int destinationIndex : shuffledServerIndices) {
85+
// If not weighing, pick a good move
86+
for (int i = 0; i < cluster.numServers; i++) {
87+
// Randomize destination ordering so we aren't overloading one destination
88+
int destinationIndex = pickOtherRandomServer(cluster, sourceIndex);
7989
if (destinationIndex == sourceIndex) {
8090
continue;
8191
}
@@ -86,30 +96,6 @@ BalanceAction generateCandidate(BalancerClusterState cluster, boolean isWeighing
8696
} else if (willBeAccepted(cluster, possibleAction)) {
8797
moveRegionActions.add(possibleAction);
8898
break;
89-
} else if (LOG.isTraceEnabled()) {
90-
// Find regions on the destination server that block movement because they share a
91-
// replica with the regionIndex
92-
Set<Integer> blockingRegionIndices = new HashSet<>();
93-
int[] destinationServerRegions = cluster.regionsPerServer[destinationIndex];
94-
for (int destinationRegionIndex : destinationServerRegions) {
95-
DistributeReplicasConditional.ReplicaKey destinationReplicaKey =
96-
new DistributeReplicasConditional.ReplicaKey(
97-
cluster.regions[destinationRegionIndex]);
98-
if (destinationReplicaKey.equals(replicaKey)) {
99-
blockingRegionIndices.add(destinationRegionIndex);
100-
}
101-
}
102-
if (blockingRegionIndices.isEmpty()) {
103-
LOG.trace(
104-
"Can't move region {} from server {} to server {} because OTHER conditionals reject it",
105-
regionIndex, cluster.servers[sourceIndex].getServerName(),
106-
cluster.servers[destinationIndex].getServerName());
107-
} else {
108-
LOG.trace(
109-
"Can't move region {} from server {} to server {} because this destination has regions that share this replica: {}",
110-
regionIndex, cluster.servers[sourceIndex].getServerName(),
111-
cluster.servers[destinationIndex].getServerName(), blockingRegionIndices);
112-
}
11399
}
114100
}
115101
}

0 commit comments

Comments
 (0)