Skip to content

Commit 50a6249

Browse files
committed
HBASE-27897 ConnectionImplementation#locateRegionInMeta should pause and retry when taking user region lock failed (#5258)
Signed-off-by: Wellington Chevreuil <[email protected]>
1 parent 556e11d commit 50a6249

File tree

2 files changed

+18
-13
lines changed

2 files changed

+18
-13
lines changed

hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionImplementation.java

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -993,9 +993,12 @@ private RegionLocations locateRegionInMeta(TableName tableName, byte[] row, bool
993993
}
994994
// Query the meta region
995995
long pauseBase = connectionConfig.getPauseMillis();
996-
takeUserRegionLock();
997-
final long lockStartTime = EnvironmentEdgeManager.currentTime();
996+
long lockStartTime = 0;
997+
boolean lockedUserRegion = false;
998998
try {
999+
takeUserRegionLock();
1000+
lockStartTime = EnvironmentEdgeManager.currentTime();
1001+
lockedUserRegion = true;
9991002
// We don't need to check if useCache is enabled or not. Even if useCache is false
10001003
// we already cleared the cache for this row before acquiring userRegion lock so if this
10011004
// row is present in cache that means some other thread has populated it while we were
@@ -1104,10 +1107,12 @@ rpcControllerFactory, getMetaLookupPool(), connectionConfig.getMetaReadRpcTimeou
11041107
ConnectionUtils.getPauseTime(pauseBase, tries), TimeUnit.MILLISECONDS);
11051108
}
11061109
} finally {
1107-
userRegionLock.unlock();
1108-
// update duration of the lock being held
1109-
if (metrics != null) {
1110-
metrics.updateUserRegionLockHeld(EnvironmentEdgeManager.currentTime() - lockStartTime);
1110+
if (lockedUserRegion) {
1111+
userRegionLock.unlock();
1112+
// update duration of the lock being held
1113+
if (metrics != null) {
1114+
metrics.updateUserRegionLockHeld(EnvironmentEdgeManager.currentTime() - lockStartTime);
1115+
}
11111116
}
11121117
}
11131118
try {

hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaCache.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -592,21 +592,21 @@ public void testUserRegionLockThrowsException() throws IOException, InterruptedE
592592
// obtain the client metrics
593593
MetricsConnection metrics = conn.getConnectionMetrics();
594594
long queueCount = metrics.getUserRegionLockQueue().getCount();
595-
assertEquals("Queue of userRegionLock should be updated twice. queueCount: " + queueCount,
596-
queueCount, 2);
595+
assertEquals("Queue of userRegionLock should be updated twice. queueCount: " + queueCount, 2,
596+
queueCount);
597597

598598
long timeoutCount = metrics.getUserRegionLockTimeout().getCount();
599-
assertEquals("Timeout of userRegionLock should happen once. timeoutCount: " + timeoutCount,
600-
timeoutCount, 1);
599+
assertEquals("Timeout of userRegionLock should happen once. timeoutCount: " + timeoutCount, 1,
600+
timeoutCount);
601601

602602
long waitingTimerCount = metrics.getUserRegionLockWaitingTimer().getCount();
603603
assertEquals("userRegionLock should be grabbed successfully once. waitingTimerCount: "
604-
+ waitingTimerCount, waitingTimerCount, 1);
604+
+ waitingTimerCount, 1, waitingTimerCount);
605605

606606
long heldTimerCount = metrics.getUserRegionLockHeldTimer().getCount();
607607
assertEquals(
608-
"userRegionLock should be held successfully once. heldTimerCount: " + heldTimerCount,
609-
heldTimerCount, 1);
608+
"userRegionLock should be held successfully once. heldTimerCount: " + heldTimerCount, 1,
609+
heldTimerCount);
610610
double heldTime = metrics.getUserRegionLockHeldTimer().getSnapshot().getMax();
611611
assertTrue("Max held time should be greater than 2 seconds. heldTime: " + heldTime,
612612
heldTime >= 2E9);

0 commit comments

Comments
 (0)