2828import org .apache .hadoop .hdfs .server .blockmanagement .BlockManager ;
2929import org .apache .hadoop .hdfs .server .blockmanagement .DatanodeManager ;
3030import org .apache .hadoop .hdfs .server .common .HdfsServerConstants ;
31+ import org .apache .hadoop .hdfs .server .common .Storage ;
3132import org .apache .hadoop .hdfs .server .namenode .startupprogress .Phase ;
33+ import org .apache .hadoop .hdfs .server .namenode .NNStorage .NameNodeFile ;
3234import org .apache .hadoop .hdfs .server .namenode .top .metrics .TopMetrics ;
35+ import org .apache .hadoop .hdfs .server .namenode .visitor .INodeCountVisitor ;
36+ import org .apache .hadoop .hdfs .server .namenode .visitor .INodeCountVisitor .Counts ;
3337import org .apache .hadoop .hdfs .server .protocol .NamespaceInfo ;
3438import org .apache .hadoop .util .GSet ;
3539import org .apache .hadoop .util .StringUtils ;
4044import org .slf4j .LoggerFactory ;
4145
4246import java .io .File ;
47+ import java .io .FilenameFilter ;
48+ import java .io .IOException ;
4349import java .util .Arrays ;
50+ import java .util .Collections ;
51+ import java .util .Iterator ;
4452import java .util .Timer ;
4553import java .util .TimerTask ;
54+ import java .util .concurrent .atomic .AtomicInteger ;
4655
4756import static org .apache .hadoop .hdfs .DFSConfigKeys .DFS_HA_NAMENODES_KEY_PREFIX ;
4857import static org .apache .hadoop .hdfs .DFSConfigKeys .DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY ;
4958import static org .apache .hadoop .hdfs .DFSConfigKeys .DFS_NAMENODE_READ_LOCK_REPORTING_THRESHOLD_MS_KEY ;
5059import static org .apache .hadoop .hdfs .DFSConfigKeys .DFS_NAMENODE_RPC_ADDRESS_KEY ;
5160import static org .apache .hadoop .hdfs .DFSConfigKeys .DFS_NAMENODE_WRITE_LOCK_REPORTING_THRESHOLD_MS_KEY ;
61+ import static org .apache .hadoop .hdfs .server .namenode .FsImageValidation .Cli .println ;
5262import static org .apache .hadoop .util .Time .now ;
5363
5464/**
@@ -134,6 +144,25 @@ static String toCommaSeparatedNumber(long n) {
134144 }
135145 return b .insert (0 , n ).toString ();
136146 }
147+
148+ /** @return a filter for the given type. */
149+ static FilenameFilter newFilenameFilter (NameNodeFile type ) {
150+ final String prefix = type .getName () + "_" ;
151+ return new FilenameFilter () {
152+ @ Override
153+ public boolean accept (File dir , String name ) {
154+ if (!name .startsWith (prefix )) {
155+ return false ;
156+ }
157+ for (int i = prefix .length (); i < name .length (); i ++) {
158+ if (!Character .isDigit (name .charAt (i ))) {
159+ return false ;
160+ }
161+ }
162+ return true ;
163+ }
164+ };
165+ }
137166 }
138167
139168 private final File fsImageFile ;
@@ -142,21 +171,44 @@ static String toCommaSeparatedNumber(long n) {
142171 this .fsImageFile = fsImageFile ;
143172 }
144173
145- int checkINodeReference (Configuration conf ) throws Exception {
174+ int run () throws Exception {
175+ return run (new Configuration (), new AtomicInteger ());
176+ }
177+
178+ int run (AtomicInteger errorCount ) throws Exception {
179+ return run (new Configuration (), errorCount );
180+ }
181+
182+ int run (Configuration conf , AtomicInteger errorCount ) throws Exception {
183+ final int initCount = errorCount .get ();
146184 LOG .info (Util .memoryInfo ());
147185 initConf (conf );
148186
187+ // check INodeReference
188+ final FSNamesystem namesystem = checkINodeReference (conf , errorCount );
189+
190+ // check INodeMap
191+ INodeMapValidation .run (namesystem .getFSDirectory (), errorCount );
192+ LOG .info (Util .memoryInfo ());
193+
194+ final int d = errorCount .get () - initCount ;
195+ if (d > 0 ) {
196+ Cli .println ("Found %d error(s) in %s" , d , fsImageFile .getAbsolutePath ());
197+ }
198+ return d ;
199+ }
200+
201+ private FSNamesystem loadImage (Configuration conf ) throws IOException {
149202 final TimerTask checkProgress = new TimerTask () {
150203 @ Override
151204 public void run () {
152205 final double percent = NameNode .getStartupProgress ().createView ()
153206 .getPercentComplete (Phase .LOADING_FSIMAGE );
154- LOG .info (String .format ("%s Progress: %.1f%%" ,
155- Phase .LOADING_FSIMAGE , 100 *percent ));
207+ LOG .info (String .format ("%s Progress: %.1f%% (%s) " ,
208+ Phase .LOADING_FSIMAGE , 100 *percent , Util . memoryInfo () ));
156209 }
157210 };
158211
159- INodeReferenceValidation .start ();
160212 final Timer t = new Timer ();
161213 t .scheduleAtFixedRate (checkProgress , 0 , 60_000 );
162214 final long loadStart = now ();
@@ -197,10 +249,42 @@ public void run() {
197249 t .cancel ();
198250 Cli .println ("Loaded %s %s successfully in %s" ,
199251 FS_IMAGE , fsImageFile , StringUtils .formatTime (now () - loadStart ));
252+ return namesystem ;
253+ }
254+
255+ FSNamesystem checkINodeReference (Configuration conf ,
256+ AtomicInteger errorCount ) throws Exception {
257+ INodeReferenceValidation .start ();
258+ final FSNamesystem namesystem = loadImage (conf );
200259 LOG .info (Util .memoryInfo ());
201- final int errorCount = INodeReferenceValidation .end ();
260+ INodeReferenceValidation .end (errorCount );
202261 LOG .info (Util .memoryInfo ());
203- return errorCount ;
262+ return namesystem ;
263+ }
264+
265+ static class INodeMapValidation {
266+ static Iterable <INodeWithAdditionalFields > iterate (INodeMap map ) {
267+ return new Iterable <INodeWithAdditionalFields >() {
268+ @ Override
269+ public Iterator <INodeWithAdditionalFields > iterator () {
270+ return map .getMapIterator ();
271+ }
272+ };
273+ }
274+
275+ static void run (FSDirectory fsdir , AtomicInteger errorCount ) {
276+ final int initErrorCount = errorCount .get ();
277+ final Counts counts = INodeCountVisitor .countTree (fsdir .getRoot ());
278+ for (INodeWithAdditionalFields i : iterate (fsdir .getINodeMap ())) {
279+ if (counts .getCount (i ) == 0 ) {
280+ Cli .printError (errorCount , "%s (%d) is inaccessible (%s)" ,
281+ i , i .getId (), i .getFullPathName ());
282+ }
283+ }
284+ println ("%s ended successfully: %d error(s) found." ,
285+ INodeMapValidation .class .getSimpleName (),
286+ errorCount .get () - initErrorCount );
287+ }
204288 }
205289
206290 static class Cli extends Configured implements Tool {
@@ -217,9 +301,10 @@ public int run(String[] args) throws Exception {
217301 initLogLevels ();
218302
219303 final FsImageValidation validation = FsImageValidation .newInstance (args );
220- final int errorCount = validation .checkINodeReference (getConf ());
304+ final AtomicInteger errorCount = new AtomicInteger ();
305+ validation .run (getConf (), errorCount );
221306 println ("Error Count: %s" , errorCount );
222- return errorCount == 0 ? 0 : 1 ;
307+ return errorCount . get () == 0 ? 0 : 1 ;
223308 }
224309
225310 static String parse (String ... args ) {
@@ -240,19 +325,68 @@ static String parse(String... args) {
240325 return f ;
241326 }
242327
243- static void println (String format , Object ... args ) {
328+ static synchronized void println (String format , Object ... args ) {
244329 final String s = String .format (format , args );
245330 System .out .println (s );
246331 LOG .info (s );
247332 }
248333
249- static void printError (String message , Throwable t ) {
334+ static synchronized void warn (String format , Object ... args ) {
335+ final String s = "WARN: " + String .format (format , args );
336+ System .out .println (s );
337+ LOG .warn (s );
338+ }
339+
340+ static synchronized void printError (String message , Throwable t ) {
250341 System .out .println (message );
251342 if (t != null ) {
252343 t .printStackTrace (System .out );
253344 }
254345 LOG .error (message , t );
255346 }
347+
348+ static synchronized void printError (AtomicInteger errorCount ,
349+ String format , Object ... args ) {
350+ final int count = errorCount .incrementAndGet ();
351+ final String s = "FSIMAGE_ERROR " + count + ": "
352+ + String .format (format , args );
353+ System .out .println (s );
354+ LOG .info (s );
355+ }
356+ }
357+
358+ public static int validate (FSNamesystem namesystem ) throws Exception {
359+ final AtomicInteger errorCount = new AtomicInteger ();
360+ final NNStorage nnStorage = namesystem .getFSImage ().getStorage ();
361+ for (Storage .StorageDirectory sd : nnStorage .getStorageDirs ()) {
362+ validate (sd .getCurrentDir (), errorCount );
363+ }
364+ return errorCount .get ();
365+ }
366+
367+ public static void validate (File path , AtomicInteger errorCount )
368+ throws Exception {
369+ if (path .isFile ()) {
370+ new FsImageValidation (path ).run (errorCount );
371+ } else if (path .isDirectory ()) {
372+ final File [] images = path .listFiles (
373+ Util .newFilenameFilter (NameNodeFile .IMAGE ));
374+ if (images == null || images .length == 0 ) {
375+ Cli .warn ("%s not found in %s" , FSImage .class .getSimpleName (),
376+ path .getAbsolutePath ());
377+ return ;
378+ }
379+
380+ Arrays .sort (images , Collections .reverseOrder ());
381+ for (int i = 0 ; i < images .length ; i ++) {
382+ final File image = images [i ];
383+ Cli .println ("%s %d) %s" , FSImage .class .getSimpleName (),
384+ i , image .getAbsolutePath ());
385+ FsImageValidation .validate (image , errorCount );
386+ }
387+ }
388+
389+ Cli .warn ("%s is neither a file nor a directory" , path .getAbsolutePath ());
256390 }
257391
258392 public static void main (String [] args ) {
0 commit comments