Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions src/main/java/gov/loc/repository/bagit/domain/Bag.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ public final class Bag {
//the current location of the bag on the filesystem
private Path rootDir;

private Long payloadFileCount;

private Long payloadByteCount;

//TODO
// add payload file and byte count here and to reader. Use in verifier and writer

/**
* empty bag with an invalid version
*/
Expand Down Expand Up @@ -167,4 +174,20 @@ public void setRootDir(final Path rootDir) {
public void setVersion(final Version version) {
this.version = version;
}

public Long getPayloadFileCount() {
return payloadFileCount;
}

public void setPayloadFileCount(final Long payloadFileCount) {
this.payloadFileCount = payloadFileCount;
}

public Long getPayloadByteCount() {
return payloadByteCount;
}

public void setPayloadByteCount(final Long payloadByteCount) {
this.payloadByteCount = payloadByteCount;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
/**
* The {@link Bag} object should contain the Payload-Oxum metatdata key value pair,
* this class represents the error when trying to calculate the payload-oxum and it doesn't exist on the bag object.
* Or if the payload-byte-count and payload-file-count don't exist for versions 1.0+
*/
public class PayloadOxumDoesNotExistException extends RuntimeException {
private static final long serialVersionUID = 1L;
Expand Down
8 changes: 1 addition & 7 deletions src/main/java/gov/loc/repository/bagit/reader/BagReader.java
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
package gov.loc.repository.bagit.reader;

import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.AbstractMap.SimpleImmutableEntry;

import gov.loc.repository.bagit.domain.Bag;
import gov.loc.repository.bagit.domain.Version;
import gov.loc.repository.bagit.exceptions.InvalidBagMetadataException;
import gov.loc.repository.bagit.exceptions.InvalidBagitFileFormatException;
import gov.loc.repository.bagit.exceptions.MaliciousPathException;
Expand Down Expand Up @@ -53,10 +50,7 @@ public Bag read(final Path rootDir) throws IOException, UnparsableVersionExcepti
}
bag.setRootDir(rootDir);

final Path bagitFile = bagitDir.resolve("bagit.txt");
final SimpleImmutableEntry<Version, Charset> bagitInfo = BagitTextFileReader.readBagitTextFile(bagitFile);
bag.setVersion(bagitInfo.getKey());
bag.setFileEncoding(bagitInfo.getValue());
BagitTextFileReader.readBagitTextFile(bag);

ManifestReader.readAllManifests(nameMapping, bagitDir, bag);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.AbstractMap.SimpleImmutableEntry;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import gov.loc.repository.bagit.domain.Bag;
import gov.loc.repository.bagit.domain.Version;
import gov.loc.repository.bagit.exceptions.InvalidBagMetadataException;
import gov.loc.repository.bagit.exceptions.UnparsableVersionException;
Expand All @@ -32,23 +34,63 @@ private BagitTextFileReader(){
* @throws InvalidBagMetadataException if the bagit.txt file does not conform to the bagit spec
*/
public static SimpleImmutableEntry<Version, Charset> readBagitTextFile(final Path bagitFile) throws IOException, UnparsableVersionException, InvalidBagMetadataException{
final BagitFileValues values = parseValues(bagitFile);

return new SimpleImmutableEntry<Version, Charset>(values.version, values.encoding);
}

/**
* Read the bagit.txt file and get the version and encoding. In version 1.0+ also check for
* payload-byte-count and payload-file-count
*
* @param bag the to read that contains the bagit.txt file and set the values in the bag
*
* @throws IOException if there is a problem reading a file
* @throws UnparsableVersionException if there is a problem parsing the bagit version number
* @throws InvalidBagMetadataException if the bagit.txt file does not conform to the bagit spec
*/
public static void readBagitTextFile(final Bag bag) throws IOException, UnparsableVersionException, InvalidBagMetadataException{
Path bagitDir = bag.getRootDir().resolve(".bagit");
if(!Files.exists(bagitDir)){
bagitDir = bag.getRootDir();
}
final BagitFileValues values = parseValues(bagitDir.resolve("bagit.txt"));

bag.setVersion(values.version);
bag.setFileEncoding(values.encoding);
bag.setPayloadByteCount(values.payloadByteCount);
bag.setPayloadFileCount(values.payloadFileCount);
}

private static BagitFileValues parseValues(final Path bagitFile) throws UnparsableVersionException, IOException, InvalidBagMetadataException{
logger.debug("Reading [{}] file", bagitFile);
final List<SimpleImmutableEntry<String, String>> pairs = KeyValueReader.readKeyValuesFromFile(bagitFile, ":", StandardCharsets.UTF_8);
final BagitFileValues values = new BagitFileValues();

String version = "";
Charset encoding = StandardCharsets.UTF_8;
for(final SimpleImmutableEntry<String, String> pair : pairs){
if("BagIt-Version".equals(pair.getKey())){
version = pair.getValue();
logger.debug("BagIt-Version is [{}]", version);
values.version = parseVersion(version);
}
if("Tag-File-Character-Encoding".equals(pair.getKey())){
encoding = Charset.forName(pair.getValue());
logger.debug("Tag-File-Character-Encoding is [{}]", encoding);
values.encoding = encoding;
}
if("Payload-Byte-Count".equals(pair.getKey())){ //assume version is 1.0+
logger.debug("Payload-Byte-Count is [{}]", pair.getKey());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this be getValue?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(and possibly the whole thing should be something like "{} is {}", pair.getKey(), pair.getValue()?

values.payloadByteCount = Long.valueOf(pair.getValue());
}
if("Payload-File-Count".equals(pair.getKey())){ //assume version is 1.0+
logger.debug("Payload-File-Count is [{}]", pair.getKey());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See comment for payload byte count

values.payloadFileCount = Long.valueOf(pair.getValue());
}
}

return new SimpleImmutableEntry<Version, Charset>(parseVersion(version), encoding);
return values;
}

/*
Expand All @@ -65,4 +107,12 @@ static Version parseVersion(final String version) throws UnparsableVersionExcept

return new Version(major, minor);
}

@SuppressWarnings({"PMD.BeanMembersShouldSerialize"})
private static class BagitFileValues{
public Version version;
public Charset encoding;
public Long payloadByteCount;
public Long payloadFileCount;
}
}
59 changes: 44 additions & 15 deletions src/main/java/gov/loc/repository/bagit/verify/QuickVerifier.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,21 @@ private QuickVerifier(){
* @return true if the bag can be quickly verified
*/
public static boolean canQuickVerify(final Bag bag){
boolean payloadInfoExists = false;

if(bag.getPayloadByteCount() != null && bag.getPayloadFileCount() != null){
logger.debug("Found payload byte and file count, using that instead of payload-oxum");
//TODO check if it matches payload-oxum, and if not issue warning?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 on a warning - I was just looking to see what would happen if both are present or if they are and do not match. I would use the bagit.txt fields in preference but issue a validation error or possibly just a linter warning if Payload-Oxum doesn't match the more controlled fields.

payloadInfoExists = true;
}

final String payloadOxum = getPayloadOxum(bag);
logger.debug("Found payload-oxum [{}] for bag [{}]", payloadOxum, bag.getRootDir());
return payloadOxum != null && payloadOxum.matches(PAYLOAD_OXUM_REGEX) && bag.getItemsToFetch().size() == 0;
if(payloadOxum != null && payloadOxum.matches(PAYLOAD_OXUM_REGEX)){
logger.debug("Found payload-oxum [{}] for bag [{}]", payloadOxum, bag.getRootDir());
payloadInfoExists = true;
}

return payloadInfoExists && bag.getItemsToFetch().size() == 0;
}

/*
Expand All @@ -53,7 +65,7 @@ private static String getPayloadOxum(final Bag bag){
/**
* Quickly verify by comparing the number of files and the total number of bytes expected
*
* @param bag the bag to verify by payload-oxum
* @param bag the bag to quickly verify
* @param ignoreHiddenFiles ignore hidden files found in payload directory
*
* @throws IOException if there is an error reading a file
Expand All @@ -63,9 +75,36 @@ private static String getPayloadOxum(final Bag bag){
* To check, run {@link BagVerifier#canQuickVerify}
*/
public static void quicklyVerify(final Bag bag, final boolean ignoreHiddenFiles) throws IOException, InvalidPayloadOxumException{
final SimpleImmutableEntry<Long, Long> byteAndFileCount = getByteAndFileCount(bag);

final Path payloadDir = PathUtils.getDataDir(bag);
final FileCountAndTotalSizeVistor vistor = new FileCountAndTotalSizeVistor(ignoreHiddenFiles);
Files.walkFileTree(payloadDir, vistor);
logger.info("supplied payload-oxum: [{}.{}], Calculated payload-oxum: [{}.{}], for payload directory [{}]",
byteAndFileCount.getKey(), byteAndFileCount.getValue(), vistor.getTotalSize(), vistor.getCount(), payloadDir);

if(byteAndFileCount.getKey() != vistor.getTotalSize()){
throw new InvalidPayloadOxumException("Invalid total size. Expected " + byteAndFileCount.getKey() + " but calculated " + vistor.getTotalSize());
}
if(byteAndFileCount.getValue() != vistor.getCount()){
throw new InvalidPayloadOxumException("Invalid file count. Expected " + byteAndFileCount.getValue() + " but found " + vistor.getCount() + " files");
}
}

/**
* get either the payload-oxum values or the payload-byte-count and payload-file-count
*
* @param bag the bag to get the payload info from
* @return the byte count, the file count
*/
private static SimpleImmutableEntry<Long, Long> getByteAndFileCount(final Bag bag){
if(bag.getPayloadByteCount() != null && bag.getPayloadFileCount() != null){
return new SimpleImmutableEntry<Long, Long>(bag.getPayloadByteCount(), bag.getPayloadFileCount());
}

final String payloadOxum = getPayloadOxum(bag);
if(payloadOxum == null || !payloadOxum.matches(PAYLOAD_OXUM_REGEX)){
throw new PayloadOxumDoesNotExistException("Payload-Oxum does not exist in bag.");
throw new PayloadOxumDoesNotExistException("Payload-Oxum or payload-byte-count and payload-file-count does not exist in bag.");
}

final String[] parts = payloadOxum.split("\\.");
Expand All @@ -74,16 +113,6 @@ public static void quicklyVerify(final Bag bag, final boolean ignoreHiddenFiles)
logger.debug("Parsing [{}] for the number of files to find in the payload directory", parts[1]);
final long numberOfFiles = Long.parseLong(parts[1]);

final Path payloadDir = PathUtils.getDataDir(bag);
final FileCountAndTotalSizeVistor vistor = new FileCountAndTotalSizeVistor(ignoreHiddenFiles);
Files.walkFileTree(payloadDir, vistor);
logger.info("supplied payload-oxum: [{}], Calculated payload-oxum: [{}.{}], for payload directory [{}]", payloadOxum, vistor.getTotalSize(), vistor.getCount(), payloadDir);

if(totalSize != vistor.getTotalSize()){
throw new InvalidPayloadOxumException("Invalid total size. Expected " + totalSize + "but calculated " + vistor.getTotalSize());
}
if(numberOfFiles != vistor.getCount()){
throw new InvalidPayloadOxumException("Invalid file count. Expected " + numberOfFiles + "but found " + vistor.getCount() + " files");
}
return new SimpleImmutableEntry<>(totalSize, numberOfFiles);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public static void write(final Bag bag, final Path outputDir) throws IOException
final Path bagitDir = PayloadWriter.writeVersionDependentPayloadFiles(bag, outputDir);

logger.debug("writing the bagit.txt file");
BagitFileWriter.writeBagitFile(bag.getVersion(), bag.getFileEncoding(), bagitDir);
BagitFileWriter.writeBagitFile(bag.getVersion(), bag.getFileEncoding(), bag.getPayloadByteCount(), bag.getPayloadFileCount(), bagitDir);

logger.debug("writing the payload manifest(s)");
ManifestWriter.writePayloadManifests(bag.getPayLoadManifests(), bagitDir, bag.getRootDir(), bag.getFileEncoding());
Expand Down
31 changes: 31 additions & 0 deletions src/main/java/gov/loc/repository/bagit/writer/BagitFileWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
public final class BagitFileWriter {
private static final Logger logger = LoggerFactory.getLogger(BagitFileWriter.class);

private static final Version ONE_DOT_ZERO = new Version(1, 0);

private BagitFileWriter(){
//intentionally left empty
}
Expand All @@ -29,6 +31,27 @@ private BagitFileWriter(){
* @throws IOException if there was a problem writing the file
*/
public static void writeBagitFile(final Version version, final Charset encoding, final Path outputDir) throws IOException{
writeBagitFileInternal(version, encoding, null, null, outputDir);
}

/**
* Write the bagit.txt file in required UTF-8 encoding for versions 1.0+
*
* @param version the version of the bag to write out
* @param encoding the encoding of the tag files
* @param payloadByteCount the total number of bytes for all files in the payload directory
* @param payloadFileCount the total number of files in the payload directory
* @param outputDir the root of the bag
*
* @throws IOException if there was a problem writing the file
*/
public static void writeBagitFile(final Version version, final Charset encoding, final Long payloadByteCount,
final Long payloadFileCount, final Path outputDir) throws IOException{
writeBagitFileInternal(version, encoding, payloadByteCount, payloadFileCount, outputDir);
}

private static void writeBagitFileInternal(final Version version, final Charset encoding, final Long payloadByteCount,
final Long payloadFileCount, final Path outputDir) throws IOException{
final Path bagitPath = outputDir.resolve("bagit.txt");
logger.debug("Writing bagit.txt file to [{}]", outputDir);

Expand All @@ -41,5 +64,13 @@ public static void writeBagitFile(final Version version, final Charset encoding,
final String secondLine = "Tag-File-Character-Encoding : " + encoding + System.lineSeparator();
logger.debug("Writing line [{}] to [{}]", secondLine, bagitPath);
Files.write(bagitPath, secondLine.getBytes(StandardCharsets.UTF_8), StandardOpenOption.WRITE, StandardOpenOption.APPEND);

if(version.compareTo(ONE_DOT_ZERO) >= 0 && payloadByteCount != null && payloadFileCount != null){ //if it is 1.0 or greater
final String thirdLine = "Payload-Byte-Count : " + payloadByteCount + System.lineSeparator();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering whether now is the right time to fix the naming convention here - firstLine, et al. are making me itch a bit.

Also, looking at the Files.write code, this is repeatedly opening the same file with a bunch of redundant options and the getBytes calls are being repeated on every line as well. Could all of this be collapsed into a single array with 2-4 lines for a single Files.write call using the form which accepts text and the target output encoding?

Files.write(bagitPath, thirdLine.getBytes(StandardCharsets.UTF_8), StandardOpenOption.WRITE, StandardOpenOption.APPEND);

final String fourthLine = "Payload-File-Count : " + payloadFileCount + System.lineSeparator();
Files.write(bagitPath, fourthLine.getBytes(StandardCharsets.UTF_8), StandardOpenOption.WRITE, StandardOpenOption.APPEND);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package gov.loc.repository.bagit.reader;

import java.io.File;
import java.lang.reflect.InvocationTargetException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
Expand All @@ -10,6 +11,7 @@
import org.junit.Test;

import gov.loc.repository.bagit.PrivateConstructorTest;
import gov.loc.repository.bagit.domain.Bag;
import gov.loc.repository.bagit.domain.Version;
import gov.loc.repository.bagit.exceptions.UnparsableVersionException;

Expand All @@ -32,4 +34,17 @@ public void testReadBagitFile()throws Exception{
assertEquals(new Version(0, 97), actualBagitInfo.getKey());
assertEquals(StandardCharsets.UTF_8, actualBagitInfo.getValue());
}

@Test
public void testReadingPayloadByteAndFileCount() throws Exception{
Path passingRootDir = Paths.get(new File("src/test/resources/bags/v1_0/bag").toURI());
BagReader reader = new BagReader();
Bag bag = reader.read(passingRootDir);

BagitTextFileReader.readBagitTextFile(bag);
assertNotNull(bag.getVersion());
assertNotNull(bag.getFileEncoding());
assertNotNull(bag.getPayloadByteCount());
assertNotNull(bag.getPayloadFileCount());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,21 @@ public void testCanQuickVerify() throws Exception{
}

@Test
public void testQuickVerify() throws Exception{
public void testQuickVerifyUsingPayloadOxum() throws Exception{
Path passingRootDir = Paths.get(new File("src/test/resources/bags/v0_94/bag").toURI());
Bag bag = reader.read(passingRootDir);

QuickVerifier.quicklyVerify(bag, true);
}

@Test
public void testQuickVerifyUsingPayloadByteAndFileCount() throws Exception{
Path passingRootDir = Paths.get(new File("src/test/resources/bags/v1_0/bag").toURI());
Bag bag = reader.read(passingRootDir);

QuickVerifier.quicklyVerify(bag, true);
}

@Test(expected=PayloadOxumDoesNotExistException.class)
public void testExceptionIsThrownWhenPayloadOxumDoesntExist() throws Exception{
Bag bag = reader.read(rootDir);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,29 @@ public void testWriteBagitFile() throws Exception{
Files.getLastModifiedTime(bagit).toMillis() >= originalModified);
assertEquals(size, Files.size(bagit));
}

@Test
public void testBagitFileWritesOptionalLines() throws Exception{
File rootDir = folder.newFolder();
Path rootDirPath = Paths.get(rootDir.toURI());
Path bagit = rootDirPath.resolve("bagit.txt");

assertFalse(Files.exists(bagit));
BagitFileWriter.writeBagitFile(new Version(1, 0), StandardCharsets.UTF_8, 5l, 5l, rootDirPath);
assertTrue(Files.exists(bagit));
assertEquals(4, Files.readAllLines(bagit).size());
}

@Test //should not write payload byte and file count lines for version older than 1.0
public void testBagitFileDoesntWritesOptionalLines() throws Exception{
File rootDir = folder.newFolder();
Path rootDirPath = Paths.get(rootDir.toURI());
Path bagit = rootDirPath.resolve("bagit.txt");

assertFalse(Files.exists(bagit));
BagitFileWriter.writeBagitFile(new Version(0, 97), StandardCharsets.UTF_8, 5l, 5l, rootDirPath);
assertTrue(Files.exists(bagit));
assertEquals(2, Files.readAllLines(bagit).size());
}

}
Loading