11package  gov .loc .repository .bagit .verify ;
22
3- import  java .io .File ;
43import  java .io .IOException ;
5- import  java .nio .file .DirectoryStream ;
6- import  java .nio .file .Files ;
74import  java .nio .file .Path ;
85import  java .security .MessageDigest ;
96import  java .security .NoSuchAlgorithmException ;
10- import  java .util .AbstractMap .SimpleImmutableEntry ;
117import  java .util .ArrayList ;
12- import  java .util .HashSet ;
138import  java .util .List ;
149import  java .util .Map .Entry ;
15- import  java .util .Set ;
1610import  java .util .concurrent .CountDownLatch ;
1711import  java .util .concurrent .ExecutorService ;
1812import  java .util .concurrent .Executors ;
2115import  org .slf4j .LoggerFactory ;
2216
2317import  gov .loc .repository .bagit .domain .Bag ;
24- import  gov .loc .repository .bagit .domain .FetchItem ;
2518import  gov .loc .repository .bagit .domain .Manifest ;
26- import  gov .loc .repository .bagit .domain .Version ;
2719import  gov .loc .repository .bagit .exceptions .CorruptChecksumException ;
2820import  gov .loc .repository .bagit .exceptions .FileNotInPayloadDirectoryException ;
2921import  gov .loc .repository .bagit .exceptions .InvalidBagitFileFormatException ;
3729import  gov .loc .repository .bagit .exceptions .VerificationException ;
3830import  gov .loc .repository .bagit .hash .BagitAlgorithmNameToSupportedAlgorithmMapping ;
3931import  gov .loc .repository .bagit .hash .StandardBagitAlgorithmNameToSupportedAlgorithmMapping ;
40- import  gov .loc .repository .bagit .reader .ManifestReader ;
41- import  gov .loc .repository .bagit .util .PathUtils ;
4232
4333/** 
4434 * Responsible for verifying if a bag is valid, complete 
4535 */ 
46- @ SuppressWarnings ({"PMD.GodClass" }) //TODO refactor 
4736public  final  class  BagVerifier  {
4837  private  static  final  Logger  logger  = LoggerFactory .getLogger (BagVerifier .class );
4938
50-   private  static  final  String  PAYLOAD_DIR_NAME  = "data" ;
51-   //@Incubating 
52-   private  static  final  String  DOT_BAGIT_DIR_NAME  = ".bagit" ;
53-   private  static  final  String  PAYLOAD_OXUM_REGEX  = "\\ d+\\ .\\ d+" ;
54-   
55-   private  final  BagitAlgorithmNameToSupportedAlgorithmMapping  nameMapping ;
39+   private  final  PayloadVerifier  manifestVerifier ;
5640  private  final  ExecutorService  executor ;
5741
5842  public  BagVerifier (){
59-     nameMapping  = new  StandardBagitAlgorithmNameToSupportedAlgorithmMapping ();
60-     executor  = Executors .newCachedThreadPool ();
43+     this (Executors .newCachedThreadPool (), new  StandardBagitAlgorithmNameToSupportedAlgorithmMapping ());
6144  }
6245
6346  public  BagVerifier (final  BagitAlgorithmNameToSupportedAlgorithmMapping  nameMapping ){
64-     this .nameMapping  = nameMapping ;
65-     executor  = Executors .newCachedThreadPool ();
47+     this (Executors .newCachedThreadPool (), nameMapping );
6648  }
6749
6850  public  BagVerifier (final  ExecutorService  executor ){
69-     nameMapping  = new  StandardBagitAlgorithmNameToSupportedAlgorithmMapping ();
70-     this .executor  = executor ;
51+     this (executor , new  StandardBagitAlgorithmNameToSupportedAlgorithmMapping ());
7152  }
7253
7354  public  BagVerifier (final  ExecutorService  executor , final  BagitAlgorithmNameToSupportedAlgorithmMapping  nameMapping ){
74-     this . nameMapping  = nameMapping ;
55+     manifestVerifier  = new   PayloadVerifier ( nameMapping ) ;
7556    this .executor  = executor ;
7657  }
7758
@@ -82,21 +63,7 @@ public BagVerifier(final ExecutorService executor, final BagitAlgorithmNameToSup
8263   * @return true if the bag can be quickly verified 
8364   */ 
8465  public  boolean  canQuickVerify (final  Bag  bag ){
85-     final  String  payloadOxum  = getPayloadOxum (bag );
86-     logger .debug ("Found payload-oxum [{}] for bag [{}]" , payloadOxum , bag .getRootDir ());
87-     return  payloadOxum  != null  && payloadOxum .matches (PAYLOAD_OXUM_REGEX ) && bag .getItemsToFetch ().size () == 0 ;
88-   }
89-   
90-   /* 
91-    * Get the Payload-Oxum value from the key value pairs 
92-    */ 
93-   private  String  getPayloadOxum (final  Bag  bag ){
94-     for (final  SimpleImmutableEntry <String ,String > keyValue  : bag .getMetadata ()){
95-       if ("Payload-Oxum" .equals (keyValue .getKey ())){
96-         return  keyValue .getValue ();
97-       }
98-     }
99-     return  null ;
66+     return  QuickVerifier .canQuickVerify (bag );
10067  }
10168
10269  /** 
@@ -112,28 +79,7 @@ private String getPayloadOxum(final Bag bag){
11279   * To check, run {@link BagVerifier#canQuickVerify} 
11380   */ 
11481  public  void  quicklyVerify (final  Bag  bag , final  boolean  ignoreHiddenFiles ) throws  IOException , InvalidPayloadOxumException {
115-     final  String  payloadOxum  = getPayloadOxum (bag );
116-     if (payloadOxum  == null  || !payloadOxum .matches (PAYLOAD_OXUM_REGEX )){
117-       throw  new  PayloadOxumDoesNotExistException ("Payload-Oxum does not exist in bag." );
118-     }
119- 
120-     final  String [] parts  = payloadOxum .split ("\\ ." );
121-     logger .debug ("Parsing [{}] for the total byte size of the payload oxum" , parts [0 ]);
122-     final  long  totalSize  = Long .parseLong (parts [0 ]);
123-     logger .debug ("Parsing [{}] for the number of files to find in the payload directory" , parts [1 ]);
124-     final  long  numberOfFiles  = Long .parseLong (parts [1 ]);
125-     
126-     final  Path  payloadDir  = getDataDir (bag );
127-     final  FileCountAndTotalSizeVistor  vistor  = new  FileCountAndTotalSizeVistor (ignoreHiddenFiles );
128-     Files .walkFileTree (payloadDir , vistor );
129-     logger .info ("supplied payload-oxum: [{}], Calculated payload-oxum: [{}.{}], for payload directory [{}]" , payloadOxum , vistor .getTotalSize (), vistor .getCount (), payloadDir );
130-     
131-     if (totalSize  != vistor .getTotalSize ()){
132-       throw  new  InvalidPayloadOxumException ("Invalid total size. Expected "  + totalSize  + "but calculated "  + vistor .getTotalSize ());
133-     }
134-     if (numberOfFiles  != vistor .getCount ()){
135-       throw  new  InvalidPayloadOxumException ("Invalid file count. Expected "  + numberOfFiles  + "but found "  + vistor .getCount () + " files" );
136-     }
82+     QuickVerifier .quicklyVerify (bag , ignoreHiddenFiles );
13783  }
13884
13985  /** 
@@ -226,177 +172,22 @@ public void isComplete(final Bag bag, final boolean ignoreHiddenFiles) throws
226172    FileNotInPayloadDirectoryException , InterruptedException , MaliciousPathException , UnsupportedAlgorithmException , InvalidBagitFileFormatException {
227173    logger .info ("Checking if the bag with root directory [{}] is complete." , bag .getRootDir ());
228174
229-     final  Path  dataDir  = getDataDir (bag );
230-     
231-     checkFetchItemsExist (bag .getItemsToFetch (), bag .getRootDir ());
232-     
233-     checkBagitFileExists (bag .getRootDir (), bag .getVersion ());
234-     
235-     checkPayloadDirectoryExists (bag );
236-     
237-     checkIfAtLeastOnePayloadManifestsExist (bag .getRootDir (), bag .getVersion ());
238-     
239-     final  Set <Path > allFilesListedInManifests  = getAllFilesListedInManifests (bag );
240-     checkAllFilesListedInManifestExist (allFilesListedInManifests );
241-     
242-     if (new  Version (1 ,0 ).compareTo (bag .getVersion ()) < 0 ){
243-       checkAllFilesInPayloadDirAreListedInAtLeastOneAManifest (allFilesListedInManifests , dataDir , ignoreHiddenFiles );
244-     }
245-     else {
246-       CheckAllFilesInPayloadDirAreListedInAllManifests (bag .getPayLoadManifests (), dataDir , ignoreHiddenFiles );
247-     }
248-   }
249-   
250-   /* 
251-    * Get the directory that contains the payload files. 
252-    */ 
253-   private  Path  getDataDir (final  Bag  bag ){
254-     if (bag .getVersion ().compareTo (new  Version (2 , 0 )) >= 0 ){ //is it a .bagit version? 
255-       return  bag .getRootDir ();
256-     }
257-     
258-     return  bag .getRootDir ().resolve (PAYLOAD_DIR_NAME );
259-   }
260-   
261-   /* 
262-    * make sure all the fetch items exist in the data directory 
263-    */ 
264-   private  void  checkFetchItemsExist (final  List <FetchItem > items , final  Path  bagDir ) throws  FileNotInPayloadDirectoryException {
265-     logger .info ("Checking if all [{}] items in fetch.txt exist in the [{}]" , items .size (), bagDir );
266-     for (final  FetchItem  item  : items ){
267-       final  Path  file  = bagDir .resolve (item .path );
268-       if (!Files .exists (file )){
269-         throw  new  FileNotInPayloadDirectoryException ("Fetch item "  + item  + " has not been fetched!" );
270-       }
271-     }
272-   }
273-   
274-   /* 
275-    * make sure the bagit.txt file exists 
276-    */ 
277-   private  void  checkBagitFileExists (final  Path  rootDir , final  Version  version ) throws  MissingBagitFileException {
278-     logger .info ("Checking if bagit.txt file exists" );
279-     Path  bagitFile  = rootDir .resolve ("bagit.txt" );
280-     //@Incubating 
281-     if (version .compareTo (new  Version (2 , 0 )) >= 0 ){ //is it a .bagit version? 
282-       bagitFile  = rootDir .resolve (DOT_BAGIT_DIR_NAME  + File .separator  + "bagit.txt" );
283-     }
284-     
285-     if (!Files .exists (bagitFile )){
286-       throw  new  MissingBagitFileException ("File ["  + bagitFile  + "] should exist but it doesn't" );
287-     }
288-   }
289-   
290-   /* 
291-    * Make sure the data directory exists 
292-    */ 
293-   private  void  checkPayloadDirectoryExists (final  Bag  bag ) throws  MissingPayloadDirectoryException {
294-     logger .info ("Checking if special payload directory exists (only for version 0.97 and earlier)" );
295-     final  Path  dataDir  = getDataDir (bag );
296-     
297-     if (!Files .exists (dataDir )){
298-       throw  new  MissingPayloadDirectoryException ("File ["  + dataDir  + "] should exist but it doesn't" );
299-     }
300-   }
301-   
302-   /* 
303-    * Must have at least one manifest-<ALGORITHM>.txt file 
304-    */ 
305-   private  void  checkIfAtLeastOnePayloadManifestsExist (final  Path  rootDir , final  Version  version ) throws  MissingPayloadManifestException , IOException {
306-     logger .info ("Checking if there is at least one payload manifest in [{}]" , rootDir );
307-     boolean  hasAtLeastOneManifest  = false ;
175+     MandatoryVerifier .checkFetchItemsExist (bag .getItemsToFetch (), bag .getRootDir ());
308176
309-     DirectoryStream <Path > directoryStream  = Files .newDirectoryStream (rootDir );
310-     //@Incubating 
311-     if (version .compareTo (new  Version (2 , 00 )) >= 0 ){ //is it a .bagit version? 
312-       directoryStream  = Files .newDirectoryStream (rootDir .resolve (DOT_BAGIT_DIR_NAME ));
313-     }
177+     MandatoryVerifier .checkBagitFileExists (bag .getRootDir (), bag .getVersion ());
314178
315-     for (final  Path  path  : directoryStream ){
316-       if (PathUtils .getFilename (path ).startsWith ("manifest-" )){
317-         logger .debug ("Found payload manifest file [{}]" , path .getFileName ());
318-         hasAtLeastOneManifest  = true ;
319-       }
320-     }
179+     MandatoryVerifier .checkPayloadDirectoryExists (bag );
321180
322-     if (!hasAtLeastOneManifest ){
323-       throw  new  MissingPayloadManifestException ("Bag does not contain any payload manifest files" );
324-     }
181+     MandatoryVerifier .checkIfAtLeastOnePayloadManifestsExist (bag .getRootDir (), bag .getVersion ());
325182
183+     manifestVerifier .verifyPayload (bag , ignoreHiddenFiles );
326184  }
327185
328-   /* 
329-    * get all the files listed in all the manifests 
330-    */ 
331-   private  Set <Path > getAllFilesListedInManifests (final  Bag  bag ) throws  IOException , MaliciousPathException , UnsupportedAlgorithmException , InvalidBagitFileFormatException {
332-     logger .debug ("Getting all files listed in the manifest(s)" );
333-     final  Set <Path > filesListedInManifests  = new  HashSet <>();
334-     
335-     DirectoryStream <Path > directoryStream  = Files .newDirectoryStream (bag .getRootDir ());
336-     //@Incubating 
337-     if (bag .getVersion ().compareTo (new  Version (2 , 00 )) >= 0 ){ //is it a .bagit version? 
338-       directoryStream  = Files .newDirectoryStream (bag .getRootDir ().resolve (DOT_BAGIT_DIR_NAME ));
339-     }
340-     
341-     for (final  Path  path  : directoryStream ){
342-       final  String  filename  = PathUtils .getFilename (path );
343-       if (filename .startsWith ("tagmanifest-" ) || filename .startsWith ("manifest-" )){
344-         logger .debug ("Getting files and checksums listed in [{}]" , path );
345-         final  Manifest  manifest  = ManifestReader .readManifest (nameMapping , path , bag .getRootDir (), bag .getFileEncoding ());
346-         filesListedInManifests .addAll (manifest .getFileToChecksumMap ().keySet ());
347-       }
348-     }
349-     
350-     return  filesListedInManifests ;
351-   }
352-   
353-   /* 
354-    * Make sure all the listed files actually exist 
355-    */ 
356-   @ SuppressWarnings ("PMD.AvoidInstantiatingObjectsInLoops" )
357-   private  void  checkAllFilesListedInManifestExist (final  Set <Path > files ) throws  FileNotInPayloadDirectoryException , InterruptedException {
358-     final  ExecutorService  executor  = Executors .newCachedThreadPool ();
359-     final  CountDownLatch  latch  = new  CountDownLatch (files .size ());
360-     final  List <Path > missingFiles  = new  ArrayList <>();
361-     
362-     logger .debug ("Checking if all files listed in the manifest(s) exist" );
363-     for (final  Path  file  : files ){
364-       executor .execute (new  CheckIfFileExistsTask (file , missingFiles , latch ));
365-     }
366-     
367-     latch .await ();
368-     executor .shutdown ();
369-     
370-     if (!missingFiles .isEmpty ()){
371-       throw  new  FileNotInPayloadDirectoryException ("Manifest(s) contains file(s) "  + missingFiles  + " but they don't exist!" );
372-     }
373-   }
374-   
375-   /* 
376-    * Make sure all files in the directory are in at least 1 manifest 
377-    */ 
378-   private  void  checkAllFilesInPayloadDirAreListedInAtLeastOneAManifest (final  Set <Path > filesListedInManifests , final  Path  payloadDir , final  boolean  ignoreHiddenFiles ) throws  IOException {
379-     logger .debug ("Checking if all payload files (files in {} dir) are listed in at least one manifest" , payloadDir );
380-     if (Files .exists (payloadDir )){
381-       Files .walkFileTree (payloadDir , new  PayloadFileExistsInAtLeastOneManifestVistor (filesListedInManifests , ignoreHiddenFiles ));
382-     }
383-   }
384-   
385-   /* 
386-    * as per the bagit-spec 1.0+ all files have to be listed in all manifests 
387-    */ 
388-   private  void  CheckAllFilesInPayloadDirAreListedInAllManifests (final  Set <Manifest > payLoadManifests , final  Path  payloadDir , final  boolean  ignoreHiddenFiles ) throws  IOException {
389-     logger .debug ("Checking if all payload files (files in {} dir) are listed in all manifests" , payloadDir );
390-     if (Files .exists (payloadDir )){
391-       Files .walkFileTree (payloadDir , new  PayloadFileExistsInAllManifestsVistor (payLoadManifests , ignoreHiddenFiles ));
392-     }
393-   }
394- 
395-   public  BagitAlgorithmNameToSupportedAlgorithmMapping  getNameMapping () {
396-     return  nameMapping ;
397-   }
398- 
399186  public  ExecutorService  getExecutor () {
400187    return  executor ;
401188  }
189+ 
190+   public  PayloadVerifier  getManifestVerifier () {
191+     return  manifestVerifier ;
192+   }
402193}
0 commit comments