3030import java .util .Iterator ;
3131import java .util .List ;
3232import java .util .Map ;
33+ import java .util .Map .Entry ;
3334import java .util .Random ;
3435import java .util .Set ;
3536import java .util .TreeSet ;
@@ -319,19 +320,66 @@ public AbstractResponse callWithoutRetries(RetryingCallable<AbstractResponse> ca
319320 }
320321 }
321322
323+ /**
324+ * Used to simulate the case where a RegionServer responds to a multi request, but some or all of
325+ * the actions have an Exception instead of Result. These responses go through receiveMultiAction,
326+ * which has handling for individual action failures.
327+ */
328+ static class CallerWithRegionException extends RpcRetryingCallerImpl <AbstractResponse > {
329+
330+ private final IOException e ;
331+ private MultiAction multi ;
332+
333+ public CallerWithRegionException (IOException e , MultiAction multi ) {
334+ super (100 , 500 , 100 , RetryingCallerInterceptorFactory .NO_OP_INTERCEPTOR , 9 , 0 , null );
335+ this .e = e ;
336+ this .multi = multi ;
337+ }
338+
339+ @ Override
340+ public AbstractResponse callWithoutRetries (RetryingCallable <AbstractResponse > callable ,
341+ int callTimeout ) throws IOException , RuntimeException {
342+ MultiResponse response = new MultiResponse ();
343+ for (Entry <byte [], List <Action >> entry : multi .actions .entrySet ()) {
344+ response .addException (entry .getKey (), e );
345+ }
346+ return response ;
347+ }
348+ }
349+
322350 static class AsyncProcessWithFailure extends MyAsyncProcess {
323351
324352 private final IOException ioe ;
353+ private final ServerName failingServer ;
354+ private final boolean returnAsRegionException ;
325355
326- public AsyncProcessWithFailure (ClusterConnection hc , Configuration conf , IOException ioe ) {
356+ public AsyncProcessWithFailure (ClusterConnection hc , Configuration conf , IOException ioe ,
357+ ServerName failingServer , boolean returnAsRegionException ) {
327358 super (hc , conf );
328359 this .ioe = ioe ;
360+ this .failingServer = failingServer ;
361+ this .returnAsRegionException = returnAsRegionException ;
329362 serverTrackerTimeout = 1L ;
330363 }
331364
365+ public AsyncProcessWithFailure (ClusterConnection hc , Configuration conf , IOException ioe ) {
366+ this (hc , conf , ioe , null , false );
367+ }
368+
332369 @ Override
333370 protected RpcRetryingCaller <AbstractResponse >
334371 createCaller (CancellableRegionServerCallable callable , int rpcTimeout ) {
372+ MultiServerCallable msc = (MultiServerCallable ) callable ;
373+ if (failingServer != null ) {
374+ if (!msc .getServerName ().equals (failingServer )) {
375+ return super .createCaller (callable , rpcTimeout );
376+ }
377+ }
378+
379+ if (returnAsRegionException ) {
380+ return new CallerWithRegionException (ioe , msc .getMulti ());
381+ }
382+
335383 callsCt .incrementAndGet ();
336384 return new CallerWithFailure (ioe );
337385 }
@@ -1754,14 +1802,35 @@ private void testRetryPauseWhenServerIsOverloaded(HBaseServerException exception
17541802 Assert .assertTrue ("Slept for too long: " + actualSleep + "ms" , actualSleep <= expectedSleep );
17551803 }
17561804
1805+ /**
1806+ * Tests that we properly recover from exceptions that DO NOT go through receiveGlobalFailure, due
1807+ * to updating the meta cache for the region which failed. Successful multigets can include region
1808+ * exceptions in the MultiResponse. In that case, it skips receiveGlobalFailure and instead
1809+ * handles in receiveMultiAction
1810+ */
1811+ @ Test
1812+ public void testRetryWithExceptionClearsMetaCacheUsingRegionException () throws Exception {
1813+ testRetryWithExceptionClearsMetaCache (true );
1814+ }
1815+
1816+ /**
1817+ * Tests that we properly recover from exceptions that go through receiveGlobalFailure, due to
1818+ * updating the meta cache for the region which failed.
1819+ */
17571820 @ Test
1758- public void testRetryWithExceptionClearsMetaCache () throws Exception {
1821+ public void testRetryWithExceptionClearsMetaCacheUsingServerException () throws Exception {
1822+ testRetryWithExceptionClearsMetaCache (false );
1823+ }
1824+
1825+ private void testRetryWithExceptionClearsMetaCache (boolean useRegionException )
1826+ throws IOException {
17591827 Configuration myConf = new Configuration (CONF );
1760- myConf .setInt (HConstants .HBASE_CLIENT_RETRIES_NUMBER , 0 );
1828+ myConf .setInt (HConstants .HBASE_CLIENT_RETRIES_NUMBER , 1 );
17611829 ClusterConnection conn = createHConnection (new ConnectionConfiguration (myConf ));
17621830
1763- AsyncProcessWithFailure ap =
1764- new AsyncProcessWithFailure (conn , myConf , new RegionOpeningException ("test" ));
1831+ // we pass in loc1.getServerName here so that only calls to that server will fail
1832+ AsyncProcessWithFailure ap = new AsyncProcessWithFailure (conn , myConf ,
1833+ new RegionOpeningException ("test" ), loc1 .getServerName (), useRegionException );
17651834 BufferedMutatorParams bufferParam = createBufferedMutatorParams (ap , DUMMY_TABLE );
17661835 BufferedMutatorImpl mutator = new BufferedMutatorImpl (conn , bufferParam , ap );
17671836
@@ -1770,20 +1839,33 @@ public void testRetryWithExceptionClearsMetaCache() throws Exception {
17701839 Assert .assertEquals (conn .locateRegion (DUMMY_TABLE , DUMMY_BYTES_1 , true , true ).toString (),
17711840 new RegionLocations (loc1 ).toString ());
17721841
1773- Mockito .verify (conn , Mockito .times (0 )).clearCaches (Mockito .any ());
1842+ // simulate updateCachedLocations, by changing the loc for this row to loc3. only loc1 fails,
1843+ // so this means retry will succeed
1844+ Mockito .doAnswer (invocation -> {
1845+ setMockLocation (conn , DUMMY_BYTES_1 , new RegionLocations (loc3 ));
1846+ return null ;
1847+ }).when (conn ).updateCachedLocations (Mockito .eq (DUMMY_TABLE ),
1848+ Mockito .eq (loc1 .getRegion ().getRegionName ()), Mockito .eq (DUMMY_BYTES_1 ), Mockito .any (),
1849+ Mockito .eq (loc1 .getServerName ()));
1850+
1851+ // Ensure we haven't called updateCachedLocations yet
1852+ Mockito .verify (conn , Mockito .times (0 )).updateCachedLocations (Mockito .any (), Mockito .any (),
1853+ Mockito .any (), Mockito .any (), Mockito .any ());
17741854
17751855 Put p = createPut (1 , true );
17761856 mutator .mutate (p );
17771857
1778- try {
1779- mutator .flush ();
1780- Assert .fail ();
1781- } catch (RetriesExhaustedWithDetailsException expected ) {
1782- assertEquals (1 , expected .getNumExceptions ());
1783- assertTrue (expected .getRow (0 ) == p );
1784- }
1858+ // we expect this to succeed because the bad region location should be updated upon
1859+ // the initial failure causing retries to succeed.
1860+ mutator .flush ();
17851861
1786- Mockito .verify (conn , Mockito .times (1 )).clearCaches (loc1 .getServerName ());
1862+ // validate that we updated the location, as we expected
1863+ Assert .assertEquals (conn .locateRegion (DUMMY_TABLE , DUMMY_BYTES_1 , true , true ).toString (),
1864+ new RegionLocations (loc3 ).toString ());
1865+ // this is a given since the location updated, but validate that we called updateCachedLocations
1866+ Mockito .verify (conn , Mockito .atLeastOnce ()).updateCachedLocations (Mockito .eq (DUMMY_TABLE ),
1867+ Mockito .eq (loc1 .getRegion ().getRegionName ()), Mockito .eq (DUMMY_BYTES_1 ), Mockito .any (),
1868+ Mockito .eq (loc1 .getServerName ()));
17871869 }
17881870
17891871 @ Test
0 commit comments