2020
2121import java .io .IOException ;
2222import java .nio .ByteBuffer ;
23+ import java .util .ArrayList ;
2324import java .util .Arrays ;
25+ import java .util .HashMap ;
2426import java .util .LinkedHashSet ;
2527import java .util .List ;
2628import java .util .Map ;
29+ import java .util .concurrent .TimeoutException ;
2730
2831import com .google .common .collect .ImmutableMap ;
2932import com .google .common .collect .Sets ;
33+
34+ import org .junit .Before ;
3035import org .junit .Test ;
3136import org .mockito .stubbing .Answer ;
3237import org .mockito .stubbing .Stubber ;
3843import org .apache .spark .network .buffer .NioManagedBuffer ;
3944import org .apache .spark .network .util .MapConfigProvider ;
4045import org .apache .spark .network .util .TransportConf ;
46+ import org .apache .spark .network .sasl .SaslTimeoutException ;
4147import static org .apache .spark .network .shuffle .RetryingBlockTransferor .BlockTransferStarter ;
4248
4349/**
@@ -49,6 +55,16 @@ public class RetryingBlockTransferorSuite {
4955 private final ManagedBuffer block0 = new NioManagedBuffer (ByteBuffer .wrap (new byte [13 ]));
5056 private final ManagedBuffer block1 = new NioManagedBuffer (ByteBuffer .wrap (new byte [7 ]));
5157 private final ManagedBuffer block2 = new NioManagedBuffer (ByteBuffer .wrap (new byte [19 ]));
58+ private static Map <String , String > configMap ;
59+ private static RetryingBlockTransferor _retryingBlockTransferor ;
60+
61+ @ Before
62+ public void initMap () {
63+ configMap = new HashMap <String , String >() {{
64+ put ("spark.shuffle.io.maxRetries" , "2" );
65+ put ("spark.shuffle.io.retryWait" , "0" );
66+ }};
67+ }
5268
5369 @ Test
5470 public void testNoFailures () throws IOException , InterruptedException {
@@ -230,6 +246,101 @@ public void testRetryAndUnrecoverable() throws IOException, InterruptedException
230246 verifyNoMoreInteractions (listener );
231247 }
232248
249+ @ Test
250+ public void testSaslTimeoutFailure () throws IOException , InterruptedException {
251+ BlockFetchingListener listener = mock (BlockFetchingListener .class );
252+ TimeoutException timeoutException = new TimeoutException ();
253+ SaslTimeoutException saslTimeoutException =
254+ new SaslTimeoutException (timeoutException );
255+ List <? extends Map <String , Object >> interactions = Arrays .asList (
256+ ImmutableMap .<String , Object >builder ()
257+ .put ("b0" , saslTimeoutException )
258+ .build (),
259+ ImmutableMap .<String , Object >builder ()
260+ .put ("b0" , block0 )
261+ .build ()
262+ );
263+
264+ performInteractions (interactions , listener );
265+
266+ verify (listener , timeout (5000 )).onBlockTransferFailure ("b0" , saslTimeoutException );
267+ verify (listener ).getTransferType ();
268+ verifyNoMoreInteractions (listener );
269+ }
270+
271+ @ Test
272+ public void testRetryOnSaslTimeout () throws IOException , InterruptedException {
273+ BlockFetchingListener listener = mock (BlockFetchingListener .class );
274+
275+ List <? extends Map <String , Object >> interactions = Arrays .asList (
276+ // SaslTimeout will cause a retry. Since b0 fails, we will retry both.
277+ ImmutableMap .<String , Object >builder ()
278+ .put ("b0" , new SaslTimeoutException (new TimeoutException ()))
279+ .build (),
280+ ImmutableMap .<String , Object >builder ()
281+ .put ("b0" , block0 )
282+ .build ()
283+ );
284+ configMap .put ("spark.shuffle.sasl.enableRetries" , "true" );
285+ performInteractions (interactions , listener );
286+
287+ verify (listener , timeout (5000 )).onBlockTransferSuccess ("b0" , block0 );
288+ verify (listener ).getTransferType ();
289+ verifyNoMoreInteractions (listener );
290+ assert (_retryingBlockTransferor .getRetryCount () == 0 );
291+ }
292+
293+ @ Test
294+ public void testRepeatedSaslRetryFailures () throws IOException , InterruptedException {
295+ BlockFetchingListener listener = mock (BlockFetchingListener .class );
296+ TimeoutException timeoutException = new TimeoutException ();
297+ SaslTimeoutException saslTimeoutException =
298+ new SaslTimeoutException (timeoutException );
299+ List <ImmutableMap <String , Object >> interactions = new ArrayList <>();
300+ for (int i = 0 ; i < 3 ; i ++) {
301+ interactions .add (
302+ ImmutableMap .<String , Object >builder ()
303+ .put ("b0" , saslTimeoutException )
304+ .build ()
305+ );
306+ }
307+ configMap .put ("spark.shuffle.sasl.enableRetries" , "true" );
308+ performInteractions (interactions , listener );
309+ verify (listener , timeout (5000 )).onBlockTransferFailure ("b0" , saslTimeoutException );
310+ verify (listener , times (3 )).getTransferType ();
311+ verifyNoMoreInteractions (listener );
312+ assert (_retryingBlockTransferor .getRetryCount () == 2 );
313+ }
314+
315+ @ Test
316+ public void testBlockTransferFailureAfterSasl () throws IOException , InterruptedException {
317+ BlockFetchingListener listener = mock (BlockFetchingListener .class );
318+
319+ List <? extends Map <String , Object >> interactions = Arrays .asList (
320+ ImmutableMap .<String , Object >builder ()
321+ .put ("b0" , new SaslTimeoutException (new TimeoutException ()))
322+ .put ("b1" , new IOException ())
323+ .build (),
324+ ImmutableMap .<String , Object >builder ()
325+ .put ("b0" , block0 )
326+ .put ("b1" , new IOException ())
327+ .build (),
328+ ImmutableMap .<String , Object >builder ()
329+ .put ("b1" , block1 )
330+ .build ()
331+ );
332+ configMap .put ("spark.shuffle.sasl.enableRetries" , "true" );
333+ performInteractions (interactions , listener );
334+ verify (listener , timeout (5000 )).onBlockTransferSuccess ("b0" , block0 );
335+ verify (listener , timeout (5000 )).onBlockTransferSuccess ("b1" , block1 );
336+ verify (listener , atLeastOnce ()).getTransferType ();
337+ verifyNoMoreInteractions (listener );
338+ // This should be equal to 1 because after the SASL exception is retried,
339+ // retryCount should be set back to 0. Then after that b1 encounters an
340+ // exception that is retried.
341+ assert (_retryingBlockTransferor .getRetryCount () == 1 );
342+ }
343+
233344 /**
234345 * Performs a set of interactions in response to block requests from a RetryingBlockFetcher.
235346 * Each interaction is a Map from BlockId to either ManagedBuffer or Exception. This interaction
@@ -245,9 +356,7 @@ private static void performInteractions(List<? extends Map<String, Object>> inte
245356 BlockFetchingListener listener )
246357 throws IOException , InterruptedException {
247358
248- MapConfigProvider provider = new MapConfigProvider (ImmutableMap .of (
249- "spark.shuffle.io.maxRetries" , "2" ,
250- "spark.shuffle.io.retryWait" , "0" ));
359+ MapConfigProvider provider = new MapConfigProvider (configMap );
251360 TransportConf conf = new TransportConf ("shuffle" , provider );
252361 BlockTransferStarter fetchStarter = mock (BlockTransferStarter .class );
253362
@@ -299,6 +408,8 @@ private static void performInteractions(List<? extends Map<String, Object>> inte
299408 assertNotNull (stub );
300409 stub .when (fetchStarter ).createAndStart (any (), any ());
301410 String [] blockIdArray = blockIds .toArray (new String [blockIds .size ()]);
302- new RetryingBlockTransferor (conf , fetchStarter , blockIdArray , listener ).start ();
411+ _retryingBlockTransferor =
412+ new RetryingBlockTransferor (conf , fetchStarter , blockIdArray , listener );
413+ _retryingBlockTransferor .start ();
303414 }
304415}
0 commit comments