1717 */ 
1818package  org .apache .hadoop .hdfs .server .namenode ;
1919
20+ import  com .google .common .base .Preconditions ;
2021import  org .apache .commons .logging .Log ;
2122import  org .apache .commons .logging .LogFactory ;
2223import  org .apache .commons .logging .impl .Log4JLogger ;
2829import  org .apache .hadoop .hdfs .server .blockmanagement .BlockManager ;
2930import  org .apache .hadoop .hdfs .server .blockmanagement .DatanodeManager ;
3031import  org .apache .hadoop .hdfs .server .common .HdfsServerConstants ;
32+ import  org .apache .hadoop .hdfs .server .common .Storage ;
3133import  org .apache .hadoop .hdfs .server .namenode .startupprogress .Phase ;
34+ import  org .apache .hadoop .hdfs .server .namenode .NNStorage .NameNodeFile ;
3235import  org .apache .hadoop .hdfs .server .namenode .top .metrics .TopMetrics ;
36+ import  org .apache .hadoop .hdfs .server .namenode .visitor .INodeCountVisitor ;
37+ import  org .apache .hadoop .hdfs .server .namenode .visitor .INodeCountVisitor .Counts ;
3338import  org .apache .hadoop .hdfs .server .protocol .NamespaceInfo ;
3439import  org .apache .hadoop .util .GSet ;
3540import  org .apache .hadoop .util .StringUtils ;
4045import  org .slf4j .LoggerFactory ;
4146
4247import  java .io .File ;
48+ import  java .io .FilenameFilter ;
49+ import  java .io .IOException ;
4350import  java .util .Arrays ;
51+ import  java .util .Collections ;
52+ import  java .util .Objects ;
4453import  java .util .Timer ;
4554import  java .util .TimerTask ;
55+ import  java .util .concurrent .atomic .AtomicInteger ;
4656
4757import  static  org .apache .hadoop .hdfs .DFSConfigKeys .DFS_HA_NAMENODES_KEY_PREFIX ;
4858import  static  org .apache .hadoop .hdfs .DFSConfigKeys .DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY ;
4959import  static  org .apache .hadoop .hdfs .DFSConfigKeys .DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_KEY ;
5060import  static  org .apache .hadoop .hdfs .DFSConfigKeys .DFS_NAMENODE_RPC_ADDRESS_KEY ;
5161import  static  org .apache .hadoop .hdfs .DFSConfigKeys .DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_KEY ;
62+ import  static  org .apache .hadoop .hdfs .server .namenode .FsImageValidation .Cli .println ;
5263import  static  org .apache .hadoop .util .Time .now ;
5364
5465/** 
@@ -134,6 +145,25 @@ static String toCommaSeparatedNumber(long n) {
134145      }
135146      return  b .insert (0 , n ).toString ();
136147    }
148+ 
149+     /** @return a filter for the given type. */ 
150+     static  FilenameFilter  newFilenameFilter (NameNodeFile  type ) {
151+       final  String  prefix  = type .getName () + "_" ;
152+       return  new  FilenameFilter () {
153+         @ Override 
154+         public  boolean  accept (File  dir , String  name ) {
155+           if  (!name .startsWith (prefix )) {
156+             return  false ;
157+           }
158+           for  (int  i  = prefix .length (); i  < name .length (); i ++) {
159+             if  (!Character .isDigit (name .charAt (i ))) {
160+               return  false ;
161+             }
162+           }
163+           return  true ;
164+         }
165+       };
166+     }
137167  }
138168
139169  private  final  File  fsImageFile ;
@@ -142,21 +172,44 @@ static String toCommaSeparatedNumber(long n) {
142172    this .fsImageFile  = fsImageFile ;
143173  }
144174
145-   int  checkINodeReference (Configuration  conf ) throws  Exception  {
175+   int  run () throws  Exception  {
176+     return  run (new  Configuration (), new  AtomicInteger ());
177+   }
178+ 
179+   int  run (AtomicInteger  errorCount ) throws  Exception  {
180+     return  run (new  Configuration (), errorCount );
181+   }
182+ 
183+   int  run (Configuration  conf , AtomicInteger  errorCount ) throws  Exception  {
184+     final  int  initCount  = errorCount .get ();
146185    LOG .info (Util .memoryInfo ());
147186    initConf (conf );
148187
188+     // check INodeReference 
189+     final  FSNamesystem  namesystem  = checkINodeReference (conf , errorCount );
190+ 
191+     // check INodeMap 
192+     INodeMapValidation .run (namesystem .getFSDirectory (), errorCount );
193+     LOG .info (Util .memoryInfo ());
194+ 
195+     final  int  d  = errorCount .get () - initCount ;
196+     if  (d  > 0 ) {
197+       Cli .println ("Found %d error(s) in %s" , d , fsImageFile .getAbsolutePath ());
198+     }
199+     return  d ;
200+   }
201+ 
202+   private  FSNamesystem  loadImage (Configuration  conf ) throws  IOException  {
149203    final  TimerTask  checkProgress  = new  TimerTask () {
150204      @ Override 
151205      public  void  run () {
152206        final  double  percent  = NameNode .getStartupProgress ().createView ()
153207            .getPercentComplete (Phase .LOADING_FSIMAGE );
154-         LOG .info (String .format ("%s Progress: %.1f%%" ,
155-             Phase .LOADING_FSIMAGE , 100 *percent ));
208+         LOG .info (String .format ("%s Progress: %.1f%% (%s) " ,
209+             Phase .LOADING_FSIMAGE , 100 *percent ,  Util . memoryInfo () ));
156210      }
157211    };
158212
159-     INodeReferenceValidation .start ();
160213    final  Timer  t  = new  Timer ();
161214    t .scheduleAtFixedRate (checkProgress , 0 , 60_000 );
162215    final  long  loadStart  = now ();
@@ -197,10 +250,33 @@ public void run() {
197250    t .cancel ();
198251    Cli .println ("Loaded %s %s successfully in %s" ,
199252        FS_IMAGE , fsImageFile , StringUtils .formatTime (now () - loadStart ));
253+     return  namesystem ;
254+   }
255+ 
256+   FSNamesystem  checkINodeReference (Configuration  conf ,
257+       AtomicInteger  errorCount ) throws  Exception  {
258+     INodeReferenceValidation .start ();
259+     final  FSNamesystem  namesystem  = loadImage (conf );
200260    LOG .info (Util .memoryInfo ());
201-     final   int   errorCount  =  INodeReferenceValidation .end ();
261+     INodeReferenceValidation .end (errorCount );
202262    LOG .info (Util .memoryInfo ());
203-     return  errorCount ;
263+     return  namesystem ;
264+   }
265+ 
266+   static  class  INodeMapValidation  {
267+     static  void  run (FSDirectory  fsdir , AtomicInteger  errorCount ) {
268+       final  int  initErrorCount  = errorCount .get ();
269+       final  Counts  counts  = INodeCountVisitor .countTree (fsdir .getRoot ());
270+       for  (INodeWithAdditionalFields  i  : fsdir .getINodeMap ()) {
271+         if  (counts .getCount (i ) == 0 ) {
272+           Cli .printError (errorCount , "%s (%d) is inaccessible (%s)" ,
273+               i , i .getId (), i .getFullPathName ());
274+         }
275+       }
276+       println ("%s ended successfully: %d error(s) found." ,
277+           INodeMapValidation .class .getSimpleName (),
278+           errorCount .get () - initErrorCount );
279+     }
204280  }
205281
206282  static  class  Cli  extends  Configured  implements  Tool  {
@@ -217,9 +293,10 @@ public int run(String[] args) throws Exception {
217293      initLogLevels ();
218294
219295      final  FsImageValidation  validation  = FsImageValidation .newInstance (args );
220-       final  int  errorCount  = validation .checkINodeReference (getConf ());
296+       final  AtomicInteger  errorCount  = new  AtomicInteger ();
297+       validation .run (getConf (), errorCount );
221298      println ("Error Count: %s" , errorCount );
222-       return  errorCount  == 0 ? 0 : 1 ;
299+       return  errorCount . get ()  == 0 ? 0 : 1 ;
223300    }
224301
225302    static  String  parse (String ... args ) {
@@ -240,19 +317,63 @@ static String parse(String... args) {
240317      return  f ;
241318    }
242319
243-     static  void  println (String  format , Object ... args ) {
320+     static  synchronized   void  println (String  format , Object ... args ) {
244321      final  String  s  = String .format (format , args );
245322      System .out .println (s );
246323      LOG .info (s );
247324    }
248325
249-     static  void  printError (String  message , Throwable  t ) {
326+     static  synchronized  void  warn (String  format , Object ... args ) {
327+       final  String  s  = "WARN: "  + String .format (format , args );
328+       System .out .println (s );
329+       LOG .warn (s );
330+     }
331+ 
332+     static  synchronized  void  printError (String  message , Throwable  t ) {
250333      System .out .println (message );
251334      if  (t  != null ) {
252335        t .printStackTrace (System .out );
253336      }
254337      LOG .error (message , t );
255338    }
339+ 
340+     static  synchronized  void  printError (AtomicInteger  errorCount ,
341+         String  format , Object ... args ) {
342+       final  int  count  = errorCount .incrementAndGet ();
343+       final  String  s  = "FSIMAGE_ERROR "  + count  + ": "  + String .format (format , args );
344+       System .out .println (s );
345+       LOG .info (s );
346+     }
347+   }
348+ 
349+   public  static  int  validate (FSNamesystem  namesystem ) throws  Exception  {
350+     final  AtomicInteger  errorCount  = new  AtomicInteger ();
351+     final  NNStorage  nnStorage  = namesystem .getFSImage ().getStorage ();
352+     for (Storage .StorageDirectory  sd  : nnStorage .getStorageDirs ()) {
353+       validate (sd .getCurrentDir (), errorCount );
354+     }
355+     return  errorCount .get ();
356+   }
357+ 
358+   public  static  void  validate (File  path , AtomicInteger  errorCount )
359+       throws  Exception  {
360+     if  (path .isFile ()) {
361+       new  FsImageValidation (path ).run (errorCount );
362+     } else  if  (path .isDirectory ()) {
363+       final  File [] images  = path .listFiles (
364+           Util .newFilenameFilter (NameNodeFile .IMAGE ));
365+       Objects .requireNonNull (images );
366+       Preconditions .checkState (images .length  > 0 );
367+ 
368+       Arrays .sort (images , Collections .reverseOrder ());
369+       for  (int  i  = 0 ; i  < images .length ; i ++) {
370+         final  File  image  = images [i ];
371+         Cli .println ("%d) image=%s" , i , image );
372+         FsImageValidation .validate (image , errorCount );
373+       }
374+     }
375+ 
376+     Cli .warn ("%s is neither a file nor a directory" , path .getAbsolutePath ());
256377  }
257378
258379  public  static  void  main (String [] args ) {
0 commit comments