-
Notifications
You must be signed in to change notification settings - Fork 50
as per the proposed spec, allow for payload-oxum to be in bagit.txt #80
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,13 +3,15 @@ | |
| import java.io.IOException; | ||
| import java.nio.charset.Charset; | ||
| import java.nio.charset.StandardCharsets; | ||
| import java.nio.file.Files; | ||
| import java.nio.file.Path; | ||
| import java.util.List; | ||
| import java.util.AbstractMap.SimpleImmutableEntry; | ||
| import java.util.List; | ||
|
|
||
| import org.slf4j.Logger; | ||
| import org.slf4j.LoggerFactory; | ||
|
|
||
| import gov.loc.repository.bagit.domain.Bag; | ||
| import gov.loc.repository.bagit.domain.Version; | ||
| import gov.loc.repository.bagit.exceptions.InvalidBagMetadataException; | ||
| import gov.loc.repository.bagit.exceptions.UnparsableVersionException; | ||
|
|
@@ -32,23 +34,63 @@ private BagitTextFileReader(){ | |
| * @throws InvalidBagMetadataException if the bagit.txt file does not conform to the bagit spec | ||
| */ | ||
| public static SimpleImmutableEntry<Version, Charset> readBagitTextFile(final Path bagitFile) throws IOException, UnparsableVersionException, InvalidBagMetadataException{ | ||
| final BagitFileValues values = parseValues(bagitFile); | ||
|
|
||
| return new SimpleImmutableEntry<Version, Charset>(values.version, values.encoding); | ||
| } | ||
|
|
||
| /** | ||
| * Read the bagit.txt file and get the version and encoding. In version 1.0+ also check for | ||
| * payload-byte-count and payload-file-count | ||
| * | ||
| * @param bag the to read that contains the bagit.txt file and set the values in the bag | ||
| * | ||
| * @throws IOException if there is a problem reading a file | ||
| * @throws UnparsableVersionException if there is a problem parsing the bagit version number | ||
| * @throws InvalidBagMetadataException if the bagit.txt file does not conform to the bagit spec | ||
| */ | ||
| public static void readBagitTextFile(final Bag bag) throws IOException, UnparsableVersionException, InvalidBagMetadataException{ | ||
| Path bagitDir = bag.getRootDir().resolve(".bagit"); | ||
| if(!Files.exists(bagitDir)){ | ||
| bagitDir = bag.getRootDir(); | ||
| } | ||
| final BagitFileValues values = parseValues(bagitDir.resolve("bagit.txt")); | ||
|
|
||
| bag.setVersion(values.version); | ||
| bag.setFileEncoding(values.encoding); | ||
| bag.setPayloadByteCount(values.payloadByteCount); | ||
| bag.setPayloadFileCount(values.payloadFileCount); | ||
| } | ||
|
|
||
| private static BagitFileValues parseValues(final Path bagitFile) throws UnparsableVersionException, IOException, InvalidBagMetadataException{ | ||
| logger.debug("Reading [{}] file", bagitFile); | ||
| final List<SimpleImmutableEntry<String, String>> pairs = KeyValueReader.readKeyValuesFromFile(bagitFile, ":", StandardCharsets.UTF_8); | ||
| final BagitFileValues values = new BagitFileValues(); | ||
|
|
||
| String version = ""; | ||
| Charset encoding = StandardCharsets.UTF_8; | ||
| for(final SimpleImmutableEntry<String, String> pair : pairs){ | ||
| if("BagIt-Version".equals(pair.getKey())){ | ||
| version = pair.getValue(); | ||
| logger.debug("BagIt-Version is [{}]", version); | ||
| values.version = parseVersion(version); | ||
| } | ||
| if("Tag-File-Character-Encoding".equals(pair.getKey())){ | ||
| encoding = Charset.forName(pair.getValue()); | ||
| logger.debug("Tag-File-Character-Encoding is [{}]", encoding); | ||
| values.encoding = encoding; | ||
| } | ||
| if("Payload-Byte-Count".equals(pair.getKey())){ //assume version is 1.0+ | ||
| logger.debug("Payload-Byte-Count is [{}]", pair.getKey()); | ||
| values.payloadByteCount = Long.valueOf(pair.getValue()); | ||
| } | ||
| if("Payload-File-Count".equals(pair.getKey())){ //assume version is 1.0+ | ||
| logger.debug("Payload-File-Count is [{}]", pair.getKey()); | ||
|
||
| values.payloadFileCount = Long.valueOf(pair.getValue()); | ||
| } | ||
| } | ||
|
|
||
| return new SimpleImmutableEntry<Version, Charset>(parseVersion(version), encoding); | ||
| return values; | ||
| } | ||
|
|
||
| /* | ||
|
|
@@ -65,4 +107,12 @@ static Version parseVersion(final String version) throws UnparsableVersionExcept | |
|
|
||
| return new Version(major, minor); | ||
| } | ||
|
|
||
| @SuppressWarnings({"PMD.BeanMembersShouldSerialize"}) | ||
| private static class BagitFileValues{ | ||
| public Version version; | ||
| public Charset encoding; | ||
| public Long payloadByteCount; | ||
| public Long payloadFileCount; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,9 +33,21 @@ private QuickVerifier(){ | |
| * @return true if the bag can be quickly verified | ||
| */ | ||
| public static boolean canQuickVerify(final Bag bag){ | ||
| boolean payloadInfoExists = false; | ||
|
|
||
| if(bag.getPayloadByteCount() != null && bag.getPayloadFileCount() != null){ | ||
| logger.debug("Found payload byte and file count, using that instead of payload-oxum"); | ||
| //TODO check if it matches payload-oxum, and if not issue warning? | ||
|
||
| payloadInfoExists = true; | ||
| } | ||
|
|
||
| final String payloadOxum = getPayloadOxum(bag); | ||
| logger.debug("Found payload-oxum [{}] for bag [{}]", payloadOxum, bag.getRootDir()); | ||
| return payloadOxum != null && payloadOxum.matches(PAYLOAD_OXUM_REGEX) && bag.getItemsToFetch().size() == 0; | ||
| if(payloadOxum != null && payloadOxum.matches(PAYLOAD_OXUM_REGEX)){ | ||
| logger.debug("Found payload-oxum [{}] for bag [{}]", payloadOxum, bag.getRootDir()); | ||
| payloadInfoExists = true; | ||
| } | ||
|
|
||
| return payloadInfoExists && bag.getItemsToFetch().size() == 0; | ||
| } | ||
|
|
||
| /* | ||
|
|
@@ -53,7 +65,7 @@ private static String getPayloadOxum(final Bag bag){ | |
| /** | ||
| * Quickly verify by comparing the number of files and the total number of bytes expected | ||
| * | ||
| * @param bag the bag to verify by payload-oxum | ||
| * @param bag the bag to quickly verify | ||
| * @param ignoreHiddenFiles ignore hidden files found in payload directory | ||
| * | ||
| * @throws IOException if there is an error reading a file | ||
|
|
@@ -63,9 +75,36 @@ private static String getPayloadOxum(final Bag bag){ | |
| * To check, run {@link BagVerifier#canQuickVerify} | ||
| */ | ||
| public static void quicklyVerify(final Bag bag, final boolean ignoreHiddenFiles) throws IOException, InvalidPayloadOxumException{ | ||
| final SimpleImmutableEntry<Long, Long> byteAndFileCount = getByteAndFileCount(bag); | ||
|
|
||
| final Path payloadDir = PathUtils.getDataDir(bag); | ||
| final FileCountAndTotalSizeVistor vistor = new FileCountAndTotalSizeVistor(ignoreHiddenFiles); | ||
| Files.walkFileTree(payloadDir, vistor); | ||
| logger.info("supplied payload-oxum: [{}.{}], Calculated payload-oxum: [{}.{}], for payload directory [{}]", | ||
| byteAndFileCount.getKey(), byteAndFileCount.getValue(), vistor.getTotalSize(), vistor.getCount(), payloadDir); | ||
|
|
||
| if(byteAndFileCount.getKey() != vistor.getTotalSize()){ | ||
| throw new InvalidPayloadOxumException("Invalid total size. Expected " + byteAndFileCount.getKey() + " but calculated " + vistor.getTotalSize()); | ||
| } | ||
| if(byteAndFileCount.getValue() != vistor.getCount()){ | ||
| throw new InvalidPayloadOxumException("Invalid file count. Expected " + byteAndFileCount.getValue() + " but found " + vistor.getCount() + " files"); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * get either the payload-oxum values or the payload-byte-count and payload-file-count | ||
| * | ||
| * @param bag the bag to get the payload info from | ||
| * @return the byte count, the file count | ||
| */ | ||
| private static SimpleImmutableEntry<Long, Long> getByteAndFileCount(final Bag bag){ | ||
| if(bag.getPayloadByteCount() != null && bag.getPayloadFileCount() != null){ | ||
| return new SimpleImmutableEntry<Long, Long>(bag.getPayloadByteCount(), bag.getPayloadFileCount()); | ||
| } | ||
|
|
||
| final String payloadOxum = getPayloadOxum(bag); | ||
| if(payloadOxum == null || !payloadOxum.matches(PAYLOAD_OXUM_REGEX)){ | ||
| throw new PayloadOxumDoesNotExistException("Payload-Oxum does not exist in bag."); | ||
| throw new PayloadOxumDoesNotExistException("Payload-Oxum or payload-byte-count and payload-file-count does not exist in bag."); | ||
| } | ||
|
|
||
| final String[] parts = payloadOxum.split("\\."); | ||
|
|
@@ -74,16 +113,6 @@ public static void quicklyVerify(final Bag bag, final boolean ignoreHiddenFiles) | |
| logger.debug("Parsing [{}] for the number of files to find in the payload directory", parts[1]); | ||
| final long numberOfFiles = Long.parseLong(parts[1]); | ||
|
|
||
| final Path payloadDir = PathUtils.getDataDir(bag); | ||
| final FileCountAndTotalSizeVistor vistor = new FileCountAndTotalSizeVistor(ignoreHiddenFiles); | ||
| Files.walkFileTree(payloadDir, vistor); | ||
| logger.info("supplied payload-oxum: [{}], Calculated payload-oxum: [{}.{}], for payload directory [{}]", payloadOxum, vistor.getTotalSize(), vistor.getCount(), payloadDir); | ||
|
|
||
| if(totalSize != vistor.getTotalSize()){ | ||
| throw new InvalidPayloadOxumException("Invalid total size. Expected " + totalSize + "but calculated " + vistor.getTotalSize()); | ||
| } | ||
| if(numberOfFiles != vistor.getCount()){ | ||
| throw new InvalidPayloadOxumException("Invalid file count. Expected " + numberOfFiles + "but found " + vistor.getCount() + " files"); | ||
| } | ||
| return new SimpleImmutableEntry<>(totalSize, numberOfFiles); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,6 +15,8 @@ | |
| public final class BagitFileWriter { | ||
| private static final Logger logger = LoggerFactory.getLogger(BagitFileWriter.class); | ||
|
|
||
| private static final Version ONE_DOT_ZERO = new Version(1, 0); | ||
|
|
||
| private BagitFileWriter(){ | ||
| //intentionally left empty | ||
| } | ||
|
|
@@ -29,6 +31,27 @@ private BagitFileWriter(){ | |
| * @throws IOException if there was a problem writing the file | ||
| */ | ||
| public static void writeBagitFile(final Version version, final Charset encoding, final Path outputDir) throws IOException{ | ||
| writeBagitFileInternal(version, encoding, null, null, outputDir); | ||
| } | ||
|
|
||
| /** | ||
| * Write the bagit.txt file in required UTF-8 encoding for versions 1.0+ | ||
| * | ||
| * @param version the version of the bag to write out | ||
| * @param encoding the encoding of the tag files | ||
| * @param payloadByteCount the total number of bytes for all files in the payload directory | ||
| * @param payloadFileCount the total number of files in the payload directory | ||
| * @param outputDir the root of the bag | ||
| * | ||
| * @throws IOException if there was a problem writing the file | ||
| */ | ||
| public static void writeBagitFile(final Version version, final Charset encoding, final Long payloadByteCount, | ||
| final Long payloadFileCount, final Path outputDir) throws IOException{ | ||
| writeBagitFileInternal(version, encoding, payloadByteCount, payloadFileCount, outputDir); | ||
| } | ||
|
|
||
| private static void writeBagitFileInternal(final Version version, final Charset encoding, final Long payloadByteCount, | ||
| final Long payloadFileCount, final Path outputDir) throws IOException{ | ||
| final Path bagitPath = outputDir.resolve("bagit.txt"); | ||
| logger.debug("Writing bagit.txt file to [{}]", outputDir); | ||
|
|
||
|
|
@@ -41,5 +64,13 @@ public static void writeBagitFile(final Version version, final Charset encoding, | |
| final String secondLine = "Tag-File-Character-Encoding : " + encoding + System.lineSeparator(); | ||
| logger.debug("Writing line [{}] to [{}]", secondLine, bagitPath); | ||
| Files.write(bagitPath, secondLine.getBytes(StandardCharsets.UTF_8), StandardOpenOption.WRITE, StandardOpenOption.APPEND); | ||
|
|
||
| if(version.compareTo(ONE_DOT_ZERO) >= 0 && payloadByteCount != null && payloadFileCount != null){ //if it is 1.0 or greater | ||
| final String thirdLine = "Payload-Byte-Count : " + payloadByteCount + System.lineSeparator(); | ||
|
||
| Files.write(bagitPath, thirdLine.getBytes(StandardCharsets.UTF_8), StandardOpenOption.WRITE, StandardOpenOption.APPEND); | ||
|
|
||
| final String fourthLine = "Payload-File-Count : " + payloadFileCount + System.lineSeparator(); | ||
| Files.write(bagitPath, fourthLine.getBytes(StandardCharsets.UTF_8), StandardOpenOption.WRITE, StandardOpenOption.APPEND); | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't this be getValue?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(and possibly the whole thing should be something like
"{} is {}", pair.getKey(), pair.getValue()?