2121import java .io .File ;
2222import java .io .FileInputStream ;
2323import java .io .IOException ;
24- import java .util .concurrent .ConcurrentHashMap ;
24+ import java .util .Iterator ;
25+ import java .util .Map ;
26+ import java .util .concurrent .ConcurrentMap ;
27+ import java .util .concurrent .Executor ;
28+ import java .util .concurrent .ExecutorService ;
29+ import java .util .concurrent .Executors ;
2530
2631import com .google .common .annotations .VisibleForTesting ;
32+ import com .google .common .base .Objects ;
33+ import com .google .common .collect .Maps ;
2734import org .slf4j .Logger ;
2835import org .slf4j .LoggerFactory ;
2936
4350public class ExternalShuffleBlockManager {
4451 private final Logger logger = LoggerFactory .getLogger (ExternalShuffleBlockManager .class );
4552
46- // Map from "appId-execId" to the executor's configuration.
47- private final ConcurrentHashMap <String , ExecutorShuffleInfo > executors =
48- new ConcurrentHashMap <String , ExecutorShuffleInfo >();
53+ // Map containing all registered executors' metadata.
54+ private final ConcurrentMap <AppExecId , ExecutorShuffleInfo > executors ;
4955
50- // Returns an id suitable for a single executor within a single application.
51- private String getAppExecId (String appId , String execId ) {
52- return appId + "-" + execId ;
56+ // Single-threaded executor used to perform expensive recursive directory deletion.
57+ private final Executor directoryCleanupExecutor ;
58+
59+ public ExternalShuffleBlockManager () {
60+ // TODO: Give this thread a name.
61+ this (Executors .newSingleThreadExecutor ());
62+ }
63+
64+ // Allows tests to have more control over when directories are cleaned up.
65+ @ VisibleForTesting
66+ ExternalShuffleBlockManager (Executor directoryCleanupExecutor ) {
67+ this .executors = Maps .newConcurrentMap ();
68+ this .directoryCleanupExecutor = directoryCleanupExecutor ;
5369 }
5470
5571 /** Registers a new Executor with all the configuration we need to find its shuffle files. */
5672 public void registerExecutor (
5773 String appId ,
5874 String execId ,
5975 ExecutorShuffleInfo executorInfo ) {
60- String fullId = getAppExecId (appId , execId );
76+ AppExecId fullId = new AppExecId (appId , execId );
6177 logger .info ("Registered executor {} with {}" , fullId , executorInfo );
6278 executors .put (fullId , executorInfo );
6379 }
@@ -78,7 +94,7 @@ public ManagedBuffer getBlockData(String appId, String execId, String blockId) {
7894 int mapId = Integer .parseInt (blockIdParts [2 ]);
7995 int reduceId = Integer .parseInt (blockIdParts [3 ]);
8096
81- ExecutorShuffleInfo executor = executors .get (getAppExecId (appId , execId ));
97+ ExecutorShuffleInfo executor = executors .get (new AppExecId (appId , execId ));
8298 if (executor == null ) {
8399 throw new RuntimeException (
84100 String .format ("Executor is not registered (appId=%s, execId=%s)" , appId , execId ));
@@ -94,6 +110,56 @@ public ManagedBuffer getBlockData(String appId, String execId, String blockId) {
94110 }
95111 }
96112
113+ /**
114+ * Removes our metadata of all executors registered for the given application, and optionally
115+ * also deletes the local directories associated with the executors of that application in a
116+ * separate thread.
117+ *
118+ * It is not valid to call registerExecutor() for an executor with this appId after invoking
119+ * this method.
120+ */
121+ public void removeApplication (String appId , boolean cleanupLocalDirs ) {
122+ logger .info ("Application {} removed, cleanupLocalDirs = {}" , appId , cleanupLocalDirs );
123+ Iterator <Map .Entry <AppExecId , ExecutorShuffleInfo >> it = executors .entrySet ().iterator ();
124+ while (it .hasNext ()) {
125+ Map .Entry <AppExecId , ExecutorShuffleInfo > entry = it .next ();
126+ AppExecId fullId = entry .getKey ();
127+ final ExecutorShuffleInfo executor = entry .getValue ();
128+
129+ // Only touch executors associated with the appId that was removed.
130+ if (appId .equals (fullId .appId )) {
131+ it .remove ();
132+
133+ if (cleanupLocalDirs ) {
134+ logger .debug ("Cleaning up executor {}'s {} local dirs" , fullId , executor .localDirs .length );
135+
136+ // Execute the actual deletion in a different thread, as it may take some time.
137+ directoryCleanupExecutor .execute (new Runnable () {
138+ @ Override
139+ public void run () {
140+ deleteExecutorDirs (executor .localDirs );
141+ }
142+ });
143+ }
144+ }
145+ }
146+ }
147+
148+ /**
149+ * Synchronously deletes each directory one at a time.
150+ * Should be executed in its own thread, as this may take a long time.
151+ */
152+ private void deleteExecutorDirs (String [] dirs ) {
153+ for (String localDir : dirs ) {
154+ try {
155+ JavaUtils .deleteRecursively (new File (localDir ));
156+ logger .info ("Successfully cleaned up directory: " + localDir );
157+ } catch (Exception e ) {
158+ logger .error ("Failed to delete directory: " + localDir , e );
159+ }
160+ }
161+ }
162+
97163 /**
98164 * Hash-based shuffle data is simply stored as one file per block.
99165 * This logic is from FileShuffleBlockManager.
@@ -146,9 +212,36 @@ static File getFile(String[] localDirs, int subDirsPerLocalDir, String filename)
146212 return new File (new File (localDir , String .format ("%02x" , subDirId )), filename );
147213 }
148214
149- /** For testing, clears all registered executors. */
150- @ VisibleForTesting
151- void clearRegisteredExecutors () {
152- executors .clear ();
215+ /** Simply encodes an executor's full ID, which is appId + execId. */
216+ private static class AppExecId {
217+ final String appId ;
218+ final String execId ;
219+
220+ private AppExecId (String appId , String execId ) {
221+ this .appId = appId ;
222+ this .execId = execId ;
223+ }
224+
225+ @ Override
226+ public boolean equals (Object o ) {
227+ if (this == o ) return true ;
228+ if (o == null || getClass () != o .getClass ()) return false ;
229+
230+ AppExecId appExecId = (AppExecId ) o ;
231+ return Objects .equal (appId , appExecId .appId ) && Objects .equal (execId , appExecId .execId );
232+ }
233+
234+ @ Override
235+ public int hashCode () {
236+ return Objects .hashCode (appId , execId );
237+ }
238+
239+ @ Override
240+ public String toString () {
241+ return Objects .toStringHelper (this )
242+ .add ("appId" , appId )
243+ .add ("execId" , execId )
244+ .toString ();
245+ }
153246 }
154247}
0 commit comments