Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@
<excludes>
<exclude>synanon/video/synanon.mp4</exclude>
<exclude>soul/audio/uclapasc.wav</exclude>
<exclude>soul/audio/uclapasc.dat</exclude>
<exclude>soul/audio/uclapasc.dat.gz</exclude>
</excludes>
</testResource>
</testResources>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
package edu.ucla.library.avpairtree.verticles;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URLEncoder;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.zip.GZIPOutputStream;

import info.freelibrary.util.Logger;
import info.freelibrary.util.LoggerFactory;
Expand Down Expand Up @@ -148,8 +149,9 @@ public void start(final Promise<Void> aPromise) {
}

/**
* Transforms the source audio file at the given path into audiowaveform data, uploads that data to S3, and replies
* to the message with the URL for the data. If either the transformation or upload fails, sends back error details.
* Transforms the source audio file at the given path into audiowaveform data, compresses and uploads that data to
* S3, and replies to the message with the URL for the compressed data. If either the transformation, compression,
* or upload fails, sends back error details.
*
* @param aMessage A message with the file path of the audio file to transform
*/
Expand All @@ -158,30 +160,37 @@ private void handle(final Message<CsvItem> aMessage) {
final CsvItem csvItem = aMessage.body();
final Path audioFilePath = AvPtUtils.getInputFilePath(csvItem, mySourceDir);

audiowaveform(audioFilePath).onSuccess(s3ObjectData -> {
audiowaveform(audioFilePath).onSuccess(data -> {
final String ark = csvItem.getItemARK();
final String s3ObjectKey = StringUtils.format(S3_OBJECT_KEY_TEMPLATE, ark);
final PutObjectRequest req = PutObjectRequest.builder().bucket(myS3Bucket).key(s3ObjectKey).build();
final AsyncRequestBody body = AsyncRequestBody.fromByteBuffer(s3ObjectData);

// Store the audiowaveform data on S3
myS3Client.putObject(req, body).whenComplete((resp, err) -> {
if (resp != null) {
// Success!
final String audiowaveformURL = StringUtils.format(myS3ObjectUrlTemplate,
URLEncoder.encode(s3ObjectKey, StandardCharsets.UTF_8));

// Reply with a JsonObject associating the item ARK with the URL for the audiowaveform data
aMessage.reply(new JsonObject().put(csvItem.getItemARK(), audiowaveformURL));
} else {
final String s3ErrorMsg =
LOGGER.getMessage(MessageCodes.AVPT_022, s3ObjectKey, err.getMessage());

// Since the sender (WatcherVerticle) just logs all errors, should be okay to use a single
// failureCode for all errors
aMessage.fail(Op.ERROR_CODE, s3ErrorMsg);
}
});
final PutObjectRequest req =
PutObjectRequest.builder().bucket(myS3Bucket).key(s3ObjectKey).contentEncoding("gzip").build();

try {
final byte[] compressedData = gzip(data);
final AsyncRequestBody body = AsyncRequestBody.fromBytes(compressedData);

// Store the compressed audiowaveform data on S3
myS3Client.putObject(req, body).whenComplete((resp, err) -> {
if (resp != null) {
// Success!
final String audiowaveformURL = StringUtils.format(myS3ObjectUrlTemplate,
URLEncoder.encode(s3ObjectKey, StandardCharsets.UTF_8));

// Reply with a JsonObject associating the item ARK with the URL for the audiowaveform data
aMessage.reply(new JsonObject().put(csvItem.getItemARK(), audiowaveformURL));
} else {
final String s3ErrorMsg =
LOGGER.getMessage(MessageCodes.AVPT_022, s3ObjectKey, err.getMessage());

// Since the sender (WatcherVerticle) just logs all errors, should be okay to use a single
// failureCode for all errors
aMessage.fail(Op.ERROR_CODE, s3ErrorMsg);
}
});
} catch (final IOException details) {
aMessage.fail(Op.ERROR_CODE, details.getMessage());
}
}).onFailure(details -> {
aMessage.fail(Op.ERROR_CODE, details.getMessage());
});
Expand All @@ -194,11 +203,11 @@ private void handle(final Message<CsvItem> aMessage) {
* Transforms the source audio file at the given path into binary audiowaveform data.
*
* @param anAudioFilePath The path to the audio file to transform
* @return A Future that is completed with a ByteBuffer containing the audiowaveform data
* @return A Future that is completed with a byte array containing the audiowaveform data
* @throws IOException if an I/O error occurs during the execution of the audiowaveform program
*/
private Future<ByteBuffer> audiowaveform(final Path anAudioFilePath) throws IOException {
final Promise<ByteBuffer> asyncResult = Promise.promise();
private Future<byte[]> audiowaveform(final Path anAudioFilePath) throws IOException {
final Promise<byte[]> asyncResult = Promise.promise();
final String[] cmd = { AUDIOWAVEFORM, "--input-filename", anAudioFilePath.toString(), "--output-format", "dat",
"--bits", "8" };
final String cmdline = String.join(SPACE, cmd);
Expand All @@ -221,7 +230,7 @@ private Future<ByteBuffer> audiowaveform(final Path anAudioFilePath) throws IOEx
// Redact the binary audiowaveform data for logging
LOGGER.debug(MessageCodes.AVPT_015, cmdline, exitValue, "[binary audiowaveform data]");

asyncResult.complete(ByteBuffer.wrap(stdout));
asyncResult.complete(stdout);
} else {
asyncResult.fail(LOGGER.getMessage(MessageCodes.AVPT_015, cmdline, exitValue, stderr));
}
Expand All @@ -232,4 +241,24 @@ private Future<ByteBuffer> audiowaveform(final Path anAudioFilePath) throws IOEx

return asyncResult.future();
}

/**
* Compresses the data in the given byte array to GZIP format.
*
* @param aByteArray The uncompressed data
* @return The compressed data
* @throws IOException if an I/O error occurs during the data compression
*/
private byte[] gzip(final byte[] aByteArray) throws IOException {
final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();

try (GZIPOutputStream gz = new GZIPOutputStream(outputStream)) {
gz.write(aByteArray);
gz.finish();

return outputStream.toByteArray();
} catch (final IOException details) {
throw new IOException(LOGGER.getMessage(MessageCodes.AVPT_023, details));
}
}
}
1 change: 1 addition & 0 deletions src/main/resources/av-pairtree_messages.xml
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,6 @@
<entry key="AVPT_020">The environment variable AUDIOWAVEFORM_S3_BUCKET must be set</entry>
<entry key="AVPT_021">The environment variable AUDIOWAVEFORM_S3_OBJECT_URL_TEMPLATE must be set</entry>
<entry key="AVPT_022">Unable to upload audiowaveform for item '{}' to S3: {}</entry>
<entry key="AVPT_023">Unable to compress data: {}</entry>

</properties>
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,28 @@ public void testWaveformGenerationAndS3Storage(final TestContext aContext) {

WebClient.create(vertx).getAbs(audiowaveformURL).send().onSuccess(resp -> {
final Buffer expected =
vertx.fileSystem().readFileBlocking("src/test/resources/soul/audio/uclapasc.dat");
vertx.fileSystem().readFileBlocking("src/test/resources/soul/audio/uclapasc.dat.gz");
final Buffer actual = resp.body();

// Partition the GZIP data into the header, body, and footer (according to RFC 1952)
final Buffer expectedHeader = expected.getBuffer(0, 10);
final Buffer actualHeader = actual.getBuffer(0, 10);

final Buffer expectedBody = expected.getBuffer(10, expected.length() - 8);
final Buffer actualBody = actual.getBuffer(10, actual.length() - 8);

final Buffer expectedFooter = expected.getBuffer(expected.length() - 8, expected.length());
final Buffer actualFooter = actual.getBuffer(actual.length() - 8, actual.length());

try {
assertEquals(expected, actual);
// Apparently JDK 11 doesn't implement RFC 1952 correctly (i.e., it always sets the OS field (the
// last byte in the header) to "0"), so only compare the first nine bytes
assertEquals(expectedHeader.getBuffer(0, expectedHeader.length() - 1),
actualHeader.getBuffer(0, actualHeader.length() - 1));
assertEquals(expectedBody, actualBody);
assertEquals(expectedFooter, actualFooter);
} catch (final AssertionError details) {
LOGGER.error(details, details.getMessage());
aContext.fail();
aContext.fail(details);
} finally {
// TODO: clean up the S3 bucket
asyncTask.complete();
Expand Down
5 changes: 5 additions & 0 deletions src/test/resources/soul/audio/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
The file `uclapasc.dat.gz` was generated with the command:

```bash
audiowaveform --input-filename uclapasc.wav --output-format dat --bits 8 | gzip -n - > uclapasc.dat.gz
```
Binary file removed src/test/resources/soul/audio/uclapasc.dat
Binary file not shown.
Binary file added src/test/resources/soul/audio/uclapasc.dat.gz
Binary file not shown.