Skip to content

Commit 87a33bc

Browse files
committed
refactored BagVerifier to use other more modular classes
1 parent 33144ad commit 87a33bc

File tree

10 files changed

+626
-376
lines changed

10 files changed

+626
-376
lines changed

src/main/java/gov/loc/repository/bagit/exceptions/InvalidBagitFileFormatException.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package gov.loc.repository.bagit.exceptions;
22

33
/**
4-
* Class to represent an error when the bag manifest file does not conform to the bagit specfication format
4+
* Class to represent an error when a specific bag file does not conform to its bagit specfication format
55
*/
66
public class InvalidBagitFileFormatException extends Exception {
77
private static final long serialVersionUID = 1L;

src/main/java/gov/loc/repository/bagit/util/PathUtils.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,11 @@
55
import java.nio.file.Path;
66
import java.nio.file.attribute.DosFileAttributes;
77

8+
import gov.loc.repository.bagit.domain.Bag;
9+
import gov.loc.repository.bagit.domain.Version;
10+
811
public final class PathUtils {
12+
private static final String PAYLOAD_DIR_NAME = "data";
913

1014
private PathUtils(){
1115
//intentionally left blank
@@ -62,4 +66,23 @@ public static boolean isHidden(final Path path) throws IOException{
6266

6367
return Files.isHidden(path);
6468
}
69+
70+
/*
71+
* Get the directory that contains the payload files.
72+
*/
73+
/**
74+
* With bagit version 2.0 (.bagit)
75+
* payload files are no longer in the "data" directory. This method accounts for this
76+
* and will return the directory that contains the payload files
77+
*
78+
* @param bag that contains the payload files you want
79+
* @return the directory that contains the payload files
80+
*/
81+
public static Path getDataDir(final Bag bag){
82+
if(bag.getVersion().compareTo(new Version(2, 0)) >= 0){ //is it a .bagit version?
83+
return bag.getRootDir();
84+
}
85+
86+
return bag.getRootDir().resolve(PAYLOAD_DIR_NAME);
87+
}
6588
}
Lines changed: 16 additions & 225 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,12 @@
11
package gov.loc.repository.bagit.verify;
22

3-
import java.io.File;
43
import java.io.IOException;
5-
import java.nio.file.DirectoryStream;
6-
import java.nio.file.Files;
74
import java.nio.file.Path;
85
import java.security.MessageDigest;
96
import java.security.NoSuchAlgorithmException;
10-
import java.util.AbstractMap.SimpleImmutableEntry;
117
import java.util.ArrayList;
12-
import java.util.HashSet;
138
import java.util.List;
149
import java.util.Map.Entry;
15-
import java.util.Set;
1610
import java.util.concurrent.CountDownLatch;
1711
import java.util.concurrent.ExecutorService;
1812
import java.util.concurrent.Executors;
@@ -21,9 +15,7 @@
2115
import org.slf4j.LoggerFactory;
2216

2317
import gov.loc.repository.bagit.domain.Bag;
24-
import gov.loc.repository.bagit.domain.FetchItem;
2518
import gov.loc.repository.bagit.domain.Manifest;
26-
import gov.loc.repository.bagit.domain.Version;
2719
import gov.loc.repository.bagit.exceptions.CorruptChecksumException;
2820
import gov.loc.repository.bagit.exceptions.FileNotInPayloadDirectoryException;
2921
import gov.loc.repository.bagit.exceptions.InvalidBagitFileFormatException;
@@ -37,41 +29,30 @@
3729
import gov.loc.repository.bagit.exceptions.VerificationException;
3830
import gov.loc.repository.bagit.hash.BagitAlgorithmNameToSupportedAlgorithmMapping;
3931
import gov.loc.repository.bagit.hash.StandardBagitAlgorithmNameToSupportedAlgorithmMapping;
40-
import gov.loc.repository.bagit.reader.ManifestReader;
41-
import gov.loc.repository.bagit.util.PathUtils;
4232

4333
/**
4434
* Responsible for verifying if a bag is valid, complete
4535
*/
46-
@SuppressWarnings({"PMD.GodClass"}) //TODO refactor
4736
public final class BagVerifier {
4837
private static final Logger logger = LoggerFactory.getLogger(BagVerifier.class);
4938

50-
private static final String PAYLOAD_DIR_NAME = "data";
51-
//@Incubating
52-
private static final String DOT_BAGIT_DIR_NAME = ".bagit";
53-
private static final String PAYLOAD_OXUM_REGEX = "\\d+\\.\\d+";
54-
55-
private final BagitAlgorithmNameToSupportedAlgorithmMapping nameMapping;
39+
private final PayloadVerifier manifestVerifier;
5640
private final ExecutorService executor;
5741

5842
public BagVerifier(){
59-
nameMapping = new StandardBagitAlgorithmNameToSupportedAlgorithmMapping();
60-
executor = Executors.newCachedThreadPool();
43+
this(Executors.newCachedThreadPool(), new StandardBagitAlgorithmNameToSupportedAlgorithmMapping());
6144
}
6245

6346
public BagVerifier(final BagitAlgorithmNameToSupportedAlgorithmMapping nameMapping){
64-
this.nameMapping = nameMapping;
65-
executor = Executors.newCachedThreadPool();
47+
this(Executors.newCachedThreadPool(), nameMapping);
6648
}
6749

6850
public BagVerifier(final ExecutorService executor){
69-
nameMapping = new StandardBagitAlgorithmNameToSupportedAlgorithmMapping();
70-
this.executor = executor;
51+
this(executor, new StandardBagitAlgorithmNameToSupportedAlgorithmMapping());
7152
}
7253

7354
public BagVerifier(final ExecutorService executor, final BagitAlgorithmNameToSupportedAlgorithmMapping nameMapping){
74-
this.nameMapping = nameMapping;
55+
manifestVerifier = new PayloadVerifier(nameMapping);
7556
this.executor = executor;
7657
}
7758

@@ -82,21 +63,7 @@ public BagVerifier(final ExecutorService executor, final BagitAlgorithmNameToSup
8263
* @return true if the bag can be quickly verified
8364
*/
8465
public boolean canQuickVerify(final Bag bag){
85-
final String payloadOxum = getPayloadOxum(bag);
86-
logger.debug("Found payload-oxum [{}] for bag [{}]", payloadOxum, bag.getRootDir());
87-
return payloadOxum != null && payloadOxum.matches(PAYLOAD_OXUM_REGEX) && bag.getItemsToFetch().size() == 0;
88-
}
89-
90-
/*
91-
* Get the Payload-Oxum value from the key value pairs
92-
*/
93-
private String getPayloadOxum(final Bag bag){
94-
for(final SimpleImmutableEntry<String,String> keyValue : bag.getMetadata()){
95-
if("Payload-Oxum".equals(keyValue.getKey())){
96-
return keyValue.getValue();
97-
}
98-
}
99-
return null;
66+
return QuickVerifier.canQuickVerify(bag);
10067
}
10168

10269
/**
@@ -112,28 +79,7 @@ private String getPayloadOxum(final Bag bag){
11279
* To check, run {@link BagVerifier#canQuickVerify}
11380
*/
11481
public void quicklyVerify(final Bag bag, final boolean ignoreHiddenFiles) throws IOException, InvalidPayloadOxumException{
115-
final String payloadOxum = getPayloadOxum(bag);
116-
if(payloadOxum == null || !payloadOxum.matches(PAYLOAD_OXUM_REGEX)){
117-
throw new PayloadOxumDoesNotExistException("Payload-Oxum does not exist in bag.");
118-
}
119-
120-
final String[] parts = payloadOxum.split("\\.");
121-
logger.debug("Parsing [{}] for the total byte size of the payload oxum", parts[0]);
122-
final long totalSize = Long.parseLong(parts[0]);
123-
logger.debug("Parsing [{}] for the number of files to find in the payload directory", parts[1]);
124-
final long numberOfFiles = Long.parseLong(parts[1]);
125-
126-
final Path payloadDir = getDataDir(bag);
127-
final FileCountAndTotalSizeVistor vistor = new FileCountAndTotalSizeVistor(ignoreHiddenFiles);
128-
Files.walkFileTree(payloadDir, vistor);
129-
logger.info("supplied payload-oxum: [{}], Calculated payload-oxum: [{}.{}], for payload directory [{}]", payloadOxum, vistor.getTotalSize(), vistor.getCount(), payloadDir);
130-
131-
if(totalSize != vistor.getTotalSize()){
132-
throw new InvalidPayloadOxumException("Invalid total size. Expected " + totalSize + "but calculated " + vistor.getTotalSize());
133-
}
134-
if(numberOfFiles != vistor.getCount()){
135-
throw new InvalidPayloadOxumException("Invalid file count. Expected " + numberOfFiles + "but found " + vistor.getCount() + " files");
136-
}
82+
QuickVerifier.quicklyVerify(bag, ignoreHiddenFiles);
13783
}
13884

13985
/**
@@ -226,177 +172,22 @@ public void isComplete(final Bag bag, final boolean ignoreHiddenFiles) throws
226172
FileNotInPayloadDirectoryException, InterruptedException, MaliciousPathException, UnsupportedAlgorithmException, InvalidBagitFileFormatException{
227173
logger.info("Checking if the bag with root directory [{}] is complete.", bag.getRootDir());
228174

229-
final Path dataDir = getDataDir(bag);
230-
231-
checkFetchItemsExist(bag.getItemsToFetch(), bag.getRootDir());
232-
233-
checkBagitFileExists(bag.getRootDir(), bag.getVersion());
234-
235-
checkPayloadDirectoryExists(bag);
236-
237-
checkIfAtLeastOnePayloadManifestsExist(bag.getRootDir(), bag.getVersion());
238-
239-
final Set<Path> allFilesListedInManifests = getAllFilesListedInManifests(bag);
240-
checkAllFilesListedInManifestExist(allFilesListedInManifests);
241-
242-
if(new Version(1,0).compareTo(bag.getVersion()) < 0){
243-
checkAllFilesInPayloadDirAreListedInAtLeastOneAManifest(allFilesListedInManifests, dataDir, ignoreHiddenFiles);
244-
}
245-
else{
246-
CheckAllFilesInPayloadDirAreListedInAllManifests(bag.getPayLoadManifests(), dataDir, ignoreHiddenFiles);
247-
}
248-
}
249-
250-
/*
251-
* Get the directory that contains the payload files.
252-
*/
253-
private Path getDataDir(final Bag bag){
254-
if(bag.getVersion().compareTo(new Version(2, 0)) >= 0){ //is it a .bagit version?
255-
return bag.getRootDir();
256-
}
257-
258-
return bag.getRootDir().resolve(PAYLOAD_DIR_NAME);
259-
}
260-
261-
/*
262-
* make sure all the fetch items exist in the data directory
263-
*/
264-
private void checkFetchItemsExist(final List<FetchItem> items, final Path bagDir) throws FileNotInPayloadDirectoryException{
265-
logger.info("Checking if all [{}] items in fetch.txt exist in the [{}]", items.size(), bagDir);
266-
for(final FetchItem item : items){
267-
final Path file = bagDir.resolve(item.path);
268-
if(!Files.exists(file)){
269-
throw new FileNotInPayloadDirectoryException("Fetch item " + item + " has not been fetched!");
270-
}
271-
}
272-
}
273-
274-
/*
275-
* make sure the bagit.txt file exists
276-
*/
277-
private void checkBagitFileExists(final Path rootDir, final Version version) throws MissingBagitFileException{
278-
logger.info("Checking if bagit.txt file exists");
279-
Path bagitFile = rootDir.resolve("bagit.txt");
280-
//@Incubating
281-
if(version.compareTo(new Version(2, 0)) >= 0){ //is it a .bagit version?
282-
bagitFile = rootDir.resolve(DOT_BAGIT_DIR_NAME + File.separator + "bagit.txt");
283-
}
284-
285-
if(!Files.exists(bagitFile)){
286-
throw new MissingBagitFileException("File [" + bagitFile + "] should exist but it doesn't");
287-
}
288-
}
289-
290-
/*
291-
* Make sure the data directory exists
292-
*/
293-
private void checkPayloadDirectoryExists(final Bag bag) throws MissingPayloadDirectoryException{
294-
logger.info("Checking if special payload directory exists (only for version 0.97 and earlier)");
295-
final Path dataDir = getDataDir(bag);
296-
297-
if(!Files.exists(dataDir)){
298-
throw new MissingPayloadDirectoryException("File [" + dataDir + "] should exist but it doesn't");
299-
}
300-
}
301-
302-
/*
303-
* Must have at least one manifest-<ALGORITHM>.txt file
304-
*/
305-
private void checkIfAtLeastOnePayloadManifestsExist(final Path rootDir, final Version version) throws MissingPayloadManifestException, IOException{
306-
logger.info("Checking if there is at least one payload manifest in [{}]", rootDir);
307-
boolean hasAtLeastOneManifest = false;
175+
MandatoryVerifier.checkFetchItemsExist(bag.getItemsToFetch(), bag.getRootDir());
308176

309-
DirectoryStream<Path> directoryStream = Files.newDirectoryStream(rootDir);
310-
//@Incubating
311-
if(version.compareTo(new Version(2, 00)) >= 0){ //is it a .bagit version?
312-
directoryStream = Files.newDirectoryStream(rootDir.resolve(DOT_BAGIT_DIR_NAME));
313-
}
177+
MandatoryVerifier.checkBagitFileExists(bag.getRootDir(), bag.getVersion());
314178

315-
for(final Path path : directoryStream){
316-
if(PathUtils.getFilename(path).startsWith("manifest-")){
317-
logger.debug("Found payload manifest file [{}]", path.getFileName());
318-
hasAtLeastOneManifest = true;
319-
}
320-
}
179+
MandatoryVerifier.checkPayloadDirectoryExists(bag);
321180

322-
if(!hasAtLeastOneManifest){
323-
throw new MissingPayloadManifestException("Bag does not contain any payload manifest files");
324-
}
181+
MandatoryVerifier.checkIfAtLeastOnePayloadManifestsExist(bag.getRootDir(), bag.getVersion());
325182

183+
manifestVerifier.verifyPayload(bag, ignoreHiddenFiles);
326184
}
327185

328-
/*
329-
* get all the files listed in all the manifests
330-
*/
331-
private Set<Path> getAllFilesListedInManifests(final Bag bag) throws IOException, MaliciousPathException, UnsupportedAlgorithmException, InvalidBagitFileFormatException{
332-
logger.debug("Getting all files listed in the manifest(s)");
333-
final Set<Path> filesListedInManifests = new HashSet<>();
334-
335-
DirectoryStream<Path> directoryStream = Files.newDirectoryStream(bag.getRootDir());
336-
//@Incubating
337-
if(bag.getVersion().compareTo(new Version(2, 00)) >= 0){ //is it a .bagit version?
338-
directoryStream = Files.newDirectoryStream(bag.getRootDir().resolve(DOT_BAGIT_DIR_NAME));
339-
}
340-
341-
for(final Path path : directoryStream){
342-
final String filename = PathUtils.getFilename(path);
343-
if(filename.startsWith("tagmanifest-") || filename.startsWith("manifest-")){
344-
logger.debug("Getting files and checksums listed in [{}]", path);
345-
final Manifest manifest = ManifestReader.readManifest(nameMapping, path, bag.getRootDir(), bag.getFileEncoding());
346-
filesListedInManifests.addAll(manifest.getFileToChecksumMap().keySet());
347-
}
348-
}
349-
350-
return filesListedInManifests;
351-
}
352-
353-
/*
354-
* Make sure all the listed files actually exist
355-
*/
356-
@SuppressWarnings("PMD.AvoidInstantiatingObjectsInLoops")
357-
private void checkAllFilesListedInManifestExist(final Set<Path> files) throws FileNotInPayloadDirectoryException, InterruptedException{
358-
final ExecutorService executor = Executors.newCachedThreadPool();
359-
final CountDownLatch latch = new CountDownLatch(files.size());
360-
final List<Path> missingFiles = new ArrayList<>();
361-
362-
logger.debug("Checking if all files listed in the manifest(s) exist");
363-
for(final Path file : files){
364-
executor.execute(new CheckIfFileExistsTask(file, missingFiles, latch));
365-
}
366-
367-
latch.await();
368-
executor.shutdown();
369-
370-
if(!missingFiles.isEmpty()){
371-
throw new FileNotInPayloadDirectoryException("Manifest(s) contains file(s) " + missingFiles + " but they don't exist!");
372-
}
373-
}
374-
375-
/*
376-
* Make sure all files in the directory are in at least 1 manifest
377-
*/
378-
private void checkAllFilesInPayloadDirAreListedInAtLeastOneAManifest(final Set<Path> filesListedInManifests, final Path payloadDir, final boolean ignoreHiddenFiles) throws IOException{
379-
logger.debug("Checking if all payload files (files in {} dir) are listed in at least one manifest", payloadDir);
380-
if(Files.exists(payloadDir)){
381-
Files.walkFileTree(payloadDir, new PayloadFileExistsInAtLeastOneManifestVistor(filesListedInManifests, ignoreHiddenFiles));
382-
}
383-
}
384-
385-
/*
386-
* as per the bagit-spec 1.0+ all files have to be listed in all manifests
387-
*/
388-
private void CheckAllFilesInPayloadDirAreListedInAllManifests(final Set<Manifest> payLoadManifests, final Path payloadDir, final boolean ignoreHiddenFiles) throws IOException{
389-
logger.debug("Checking if all payload files (files in {} dir) are listed in all manifests", payloadDir);
390-
if(Files.exists(payloadDir)){
391-
Files.walkFileTree(payloadDir, new PayloadFileExistsInAllManifestsVistor(payLoadManifests, ignoreHiddenFiles));
392-
}
393-
}
394-
395-
public BagitAlgorithmNameToSupportedAlgorithmMapping getNameMapping() {
396-
return nameMapping;
397-
}
398-
399186
public ExecutorService getExecutor() {
400187
return executor;
401188
}
189+
190+
public PayloadVerifier getManifestVerifier() {
191+
return manifestVerifier;
192+
}
402193
}

0 commit comments

Comments
 (0)