Skip to content

Commit 233cebc

Browse files
committed
HBASE-28403 Improve debugging for failures in procedure tests (#5709)
We see unit test failures in Jenkins that look like this: ``` java.lang.IllegalArgumentException: run queue not empty at org.apache.hbase.thirdparty.com.google.common.base.Preconditions.checkArgument(Preconditions.java:143) at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.load(ProcedureExecutor.java:332) at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.init(ProcedureExecutor.java:665) at org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.restart(ProcedureTestingUtility.java:132) at org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.restart(ProcedureTestingUtility.java:100) at org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility.restartMasterProcedureExecutor(MasterProcedureTestingUtility.java:85) at org.apache.hadoop.hbase.master.assignment.TestRollbackSCP.testFailAndRollback(TestRollbackSCP.java:180) ``` This isn't enough information to debug the situation. The test code in question looks reasonable enough – it clears the object for re-use between tests. However, somewhere between stop/clear/start we miss something. Add some toString implementations and dump the objects in the preconditions. Signed-off-by: Duo Zhang <[email protected]>
1 parent 643d6b7 commit 233cebc

File tree

13 files changed

+125
-31
lines changed

13 files changed

+125
-31
lines changed

hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/AbstractProcedureScheduler.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
import java.util.concurrent.TimeUnit;
2222
import java.util.concurrent.locks.Condition;
2323
import java.util.concurrent.locks.ReentrantLock;
24+
import org.apache.commons.lang3.builder.ToStringBuilder;
25+
import org.apache.commons.lang3.builder.ToStringStyle;
2426
import org.apache.yetus.audience.InterfaceAudience;
2527
import org.slf4j.Logger;
2628
import org.slf4j.LoggerFactory;
@@ -293,4 +295,10 @@ protected void wakePollIfNeeded(final int waitingCount) {
293295
schedWaitCond.signalAll();
294296
}
295297
}
298+
299+
@Override
300+
public String toString() {
301+
return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE).append("running", running)
302+
.build();
303+
}
296304
}

hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockAndQueue.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
import java.util.function.Function;
2121
import java.util.function.Predicate;
2222
import java.util.stream.Stream;
23+
import org.apache.commons.lang3.builder.ToStringBuilder;
24+
import org.apache.commons.lang3.builder.ToStringStyle;
2325
import org.apache.yetus.audience.InterfaceAudience;
2426

2527
/**
@@ -36,8 +38,9 @@
3638
* NOT thread-safe. Needs external concurrency control: e.g. uses in MasterProcedureScheduler are
3739
* guarded by schedLock(). <br/>
3840
* There is no need of 'volatile' keyword for member variables because of memory synchronization
39-
* guarantees of locks (see 'Memory Synchronization',
40-
* http://docs.oracle.com/javase/8/docs/api/java/util/concurrent/locks/Lock.html) <br/>
41+
* guarantees of locks (see
42+
* <a href="http://docs.oracle.com/javase/8/docs/api/java/util/concurrent/locks/Lock.html">Memory
43+
* Synchronization</a>) <br/>
4144
* We do not implement Lock interface because we need exclusive and shared locking, and also because
4245
* try-lock functions require procedure id. <br/>
4346
* We do not use ReentrantReadWriteLock directly because of its high memory overhead.
@@ -182,7 +185,7 @@ public Stream<Procedure> filterWaitingQueue(Predicate<Procedure> predicate) {
182185

183186
@Override
184187
public String toString() {
185-
return "exclusiveLockOwner=" + (hasExclusiveLock() ? getExclusiveLockProcIdOwner() : "NONE")
186-
+ ", sharedLockCount=" + getSharedLockCount() + ", waitingProcCount=" + queue.size();
188+
return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
189+
.appendSuper(describeLockStatus()).append("waitingProcCount", queue.size()).build();
187190
}
188191
}

hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/LockStatus.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
*/
1818
package org.apache.hadoop.hbase.procedure2;
1919

20+
import org.apache.commons.lang3.builder.ToStringBuilder;
21+
import org.apache.commons.lang3.builder.ToStringStyle;
2022
import org.apache.yetus.audience.InterfaceAudience;
2123

2224
/**
@@ -68,4 +70,14 @@ default long getExclusiveLockProcIdOwner() {
6870
* Get the number of procedures which hold the shared lock.
6971
*/
7072
int getSharedLockCount();
73+
74+
default String describeLockStatus() {
75+
ToStringBuilder builder = new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
76+
.append("exclusiveLock", hasExclusiveLock());
77+
if (hasExclusiveLock()) {
78+
builder.append("exclusiveLockProcIdOwner", getExclusiveLockProcIdOwner());
79+
}
80+
builder.append("sharedLockCount", getSharedLockCount());
81+
return builder.build();
82+
}
7183
}

hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -333,10 +333,11 @@ public void forceUpdate(long[] procIds) {
333333
}
334334

335335
private void load(final boolean abortOnCorruption) throws IOException {
336-
Preconditions.checkArgument(completed.isEmpty(), "completed not empty");
337-
Preconditions.checkArgument(rollbackStack.isEmpty(), "rollback state not empty");
338-
Preconditions.checkArgument(procedures.isEmpty(), "procedure map not empty");
339-
Preconditions.checkArgument(scheduler.size() == 0, "run queue not empty");
336+
Preconditions.checkArgument(completed.isEmpty(), "completed not empty: %s", completed);
337+
Preconditions.checkArgument(rollbackStack.isEmpty(), "rollback state not empty: %s",
338+
rollbackStack);
339+
Preconditions.checkArgument(procedures.isEmpty(), "procedure map not empty: %s", procedures);
340+
Preconditions.checkArgument(scheduler.size() == 0, "scheduler queue not empty: %s", scheduler);
340341

341342
store.load(new ProcedureStore.ProcedureLoader() {
342343
@Override

hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/SimpleProcedureScheduler.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
import java.util.Collections;
2121
import java.util.List;
22+
import org.apache.commons.lang3.builder.ToStringBuilder;
23+
import org.apache.commons.lang3.builder.ToStringStyle;
2224
import org.apache.yetus.audience.InterfaceAudience;
2325
import org.apache.yetus.audience.InterfaceStability;
2426

@@ -82,4 +84,10 @@ public List<LockedResource> getLocks() {
8284
public LockedResource getLockResource(LockedResourceType resourceType, String resourceName) {
8385
return null;
8486
}
87+
88+
@Override
89+
public String toString() {
90+
return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE).appendSuper(super.toString())
91+
.append("runnables", runnables).build();
92+
}
8593
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RandomCandidateGenerator.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
*/
1818
package org.apache.hadoop.hbase.master.balancer;
1919

20+
import org.apache.commons.lang3.builder.ToStringBuilder;
21+
import org.apache.commons.lang3.builder.ToStringStyle;
2022
import org.apache.yetus.audience.InterfaceAudience;
2123

2224
@InterfaceAudience.Private
@@ -31,4 +33,10 @@ BalanceAction generate(BalancerClusterState cluster) {
3133

3234
return pickRandomRegions(cluster, thisServer, otherServer);
3335
}
36+
37+
@Override
38+
public String toString() {
39+
return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE).appendSuper(super.toString())
40+
.build();
41+
}
3442
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureScheduler.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
import java.util.List;
2323
import java.util.function.Function;
2424
import java.util.function.Supplier;
25+
import org.apache.commons.lang3.builder.ToStringBuilder;
26+
import org.apache.commons.lang3.builder.ToStringStyle;
2527
import org.apache.hadoop.hbase.ServerName;
2628
import org.apache.hadoop.hbase.TableExistsException;
2729
import org.apache.hadoop.hbase.TableName;
@@ -1022,4 +1024,29 @@ public String dumpLocks() throws IOException {
10221024
schedUnlock();
10231025
}
10241026
}
1027+
1028+
private void serverBucketToString(ToStringBuilder builder, String queueName, Queue<?> queue) {
1029+
int size = queueSize(queue);
1030+
if (size != 0) {
1031+
builder.append(queueName, queue);
1032+
}
1033+
}
1034+
1035+
@Override
1036+
public String toString() {
1037+
ToStringBuilder builder =
1038+
new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE).appendSuper(super.toString());
1039+
schedLock();
1040+
try {
1041+
for (int i = 0; i < serverBuckets.length; i++) {
1042+
serverBucketToString(builder, "serverBuckets[" + i + "]", serverBuckets[i]);
1043+
}
1044+
builder.append("tableMap", tableMap);
1045+
builder.append("peerMap", peerMap);
1046+
builder.append("metaMap", metaMap);
1047+
} finally {
1048+
schedUnlock();
1049+
}
1050+
return builder.build();
1051+
}
10251052
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MetaQueue.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
*/
1818
package org.apache.hadoop.hbase.master.procedure;
1919

20+
import org.apache.commons.lang3.builder.ToStringBuilder;
21+
import org.apache.commons.lang3.builder.ToStringStyle;
2022
import org.apache.hadoop.hbase.TableName;
2123
import org.apache.hadoop.hbase.procedure2.LockStatus;
2224
import org.apache.hadoop.hbase.procedure2.Procedure;
@@ -38,4 +40,10 @@ protected MetaQueue(LockStatus lockStatus) {
3840
boolean requireExclusiveLock(Procedure<?> proc) {
3941
return true;
4042
}
43+
44+
@Override
45+
public String toString() {
46+
return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE).appendSuper(super.toString())
47+
.build();
48+
}
4149
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/PeerQueue.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
*/
1818
package org.apache.hadoop.hbase.master.procedure;
1919

20+
import org.apache.commons.lang3.builder.ToStringBuilder;
21+
import org.apache.commons.lang3.builder.ToStringStyle;
2022
import org.apache.hadoop.hbase.master.procedure.PeerProcedureInterface.PeerOperationType;
2123
import org.apache.hadoop.hbase.procedure2.LockStatus;
2224
import org.apache.hadoop.hbase.procedure2.Procedure;
@@ -37,4 +39,10 @@ public boolean requireExclusiveLock(Procedure<?> proc) {
3739
private static boolean requirePeerExclusiveLock(PeerProcedureInterface proc) {
3840
return proc.getPeerOperationType() != PeerOperationType.REFRESH;
3941
}
42+
43+
@Override
44+
public String toString() {
45+
return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE).appendSuper(super.toString())
46+
.build();
47+
}
4048
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/Queue.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
*/
1818
package org.apache.hadoop.hbase.master.procedure;
1919

20+
import org.apache.commons.lang3.builder.ToStringBuilder;
21+
import org.apache.commons.lang3.builder.ToStringStyle;
2022
import org.apache.hadoop.hbase.procedure2.LockStatus;
2123
import org.apache.hadoop.hbase.procedure2.Procedure;
2224
import org.apache.hadoop.hbase.procedure2.ProcedureDeque;
@@ -105,10 +107,7 @@ public int compareTo(Queue<TKey> other) {
105107

106108
@Override
107109
public String toString() {
108-
return String.format("%s(%s, xlock=%s sharedLock=%s size=%s)", getClass().getSimpleName(), key,
109-
lockStatus.hasExclusiveLock()
110-
? "true (" + lockStatus.getExclusiveLockProcIdOwner() + ")"
111-
: "false",
112-
lockStatus.getSharedLockCount(), size());
110+
return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE).append("key", key)
111+
.append("lockStatus", lockStatus.describeLockStatus()).append("size", size()).build();
113112
}
114113
}

0 commit comments

Comments
 (0)