From e50d08b14d8ad3ef49d01bbf0df4ce0d5f373914 Mon Sep 17 00:00:00 2001 From: "Mark A. Matney, Jr" Date: Thu, 1 Jul 2021 14:27:52 -0700 Subject: [PATCH 1/2] Compress audiowaveform data --- pom.xml | 2 +- .../verticles/WaveformVerticle.java | 87 ++++++++++++------ src/main/resources/av-pairtree_messages.xml | 1 + .../verticles/WaveformVerticleTest.java | 22 ++++- src/test/resources/soul/audio/uclapasc.dat | Bin 5188 -> 0 bytes src/test/resources/soul/audio/uclapasc.dat.gz | Bin 0 -> 2755 bytes 6 files changed, 78 insertions(+), 34 deletions(-) delete mode 100644 src/test/resources/soul/audio/uclapasc.dat create mode 100644 src/test/resources/soul/audio/uclapasc.dat.gz diff --git a/pom.xml b/pom.xml index ea7135f..91e13c0 100644 --- a/pom.xml +++ b/pom.xml @@ -162,7 +162,7 @@ synanon/video/synanon.mp4 soul/audio/uclapasc.wav - soul/audio/uclapasc.dat + soul/audio/uclapasc.dat.gz diff --git a/src/main/java/edu/ucla/library/avpairtree/verticles/WaveformVerticle.java b/src/main/java/edu/ucla/library/avpairtree/verticles/WaveformVerticle.java index 610fc71..5ba56df 100644 --- a/src/main/java/edu/ucla/library/avpairtree/verticles/WaveformVerticle.java +++ b/src/main/java/edu/ucla/library/avpairtree/verticles/WaveformVerticle.java @@ -1,11 +1,12 @@ package edu.ucla.library.avpairtree.verticles; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.URLEncoder; -import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.nio.file.Path; +import java.util.zip.GZIPOutputStream; import info.freelibrary.util.Logger; import info.freelibrary.util.LoggerFactory; @@ -148,8 +149,9 @@ public void start(final Promise aPromise) { } /** - * Transforms the source audio file at the given path into audiowaveform data, uploads that data to S3, and replies - * to the message with the URL for the data. If either the transformation or upload fails, sends back error details. + * Transforms the source audio file at the given path into audiowaveform data, compresses and uploads that data to + * S3, and replies to the message with the URL for the compressed data. If either the transformation, compression, + * or upload fails, sends back error details. * * @param aMessage A message with the file path of the audio file to transform */ @@ -158,30 +160,37 @@ private void handle(final Message aMessage) { final CsvItem csvItem = aMessage.body(); final Path audioFilePath = AvPtUtils.getInputFilePath(csvItem, mySourceDir); - audiowaveform(audioFilePath).onSuccess(s3ObjectData -> { + audiowaveform(audioFilePath).onSuccess(data -> { final String ark = csvItem.getItemARK(); final String s3ObjectKey = StringUtils.format(S3_OBJECT_KEY_TEMPLATE, ark); - final PutObjectRequest req = PutObjectRequest.builder().bucket(myS3Bucket).key(s3ObjectKey).build(); - final AsyncRequestBody body = AsyncRequestBody.fromByteBuffer(s3ObjectData); - - // Store the audiowaveform data on S3 - myS3Client.putObject(req, body).whenComplete((resp, err) -> { - if (resp != null) { - // Success! - final String audiowaveformURL = StringUtils.format(myS3ObjectUrlTemplate, - URLEncoder.encode(s3ObjectKey, StandardCharsets.UTF_8)); - - // Reply with a JsonObject associating the item ARK with the URL for the audiowaveform data - aMessage.reply(new JsonObject().put(csvItem.getItemARK(), audiowaveformURL)); - } else { - final String s3ErrorMsg = - LOGGER.getMessage(MessageCodes.AVPT_022, s3ObjectKey, err.getMessage()); - - // Since the sender (WatcherVerticle) just logs all errors, should be okay to use a single - // failureCode for all errors - aMessage.fail(Op.ERROR_CODE, s3ErrorMsg); - } - }); + final PutObjectRequest req = + PutObjectRequest.builder().bucket(myS3Bucket).key(s3ObjectKey).contentEncoding("gzip").build(); + + try { + final byte[] compressedData = gzip(data); + final AsyncRequestBody body = AsyncRequestBody.fromBytes(compressedData); + + // Store the compressed audiowaveform data on S3 + myS3Client.putObject(req, body).whenComplete((resp, err) -> { + if (resp != null) { + // Success! + final String audiowaveformURL = StringUtils.format(myS3ObjectUrlTemplate, + URLEncoder.encode(s3ObjectKey, StandardCharsets.UTF_8)); + + // Reply with a JsonObject associating the item ARK with the URL for the audiowaveform data + aMessage.reply(new JsonObject().put(csvItem.getItemARK(), audiowaveformURL)); + } else { + final String s3ErrorMsg = + LOGGER.getMessage(MessageCodes.AVPT_022, s3ObjectKey, err.getMessage()); + + // Since the sender (WatcherVerticle) just logs all errors, should be okay to use a single + // failureCode for all errors + aMessage.fail(Op.ERROR_CODE, s3ErrorMsg); + } + }); + } catch (final IOException details) { + aMessage.fail(Op.ERROR_CODE, details.getMessage()); + } }).onFailure(details -> { aMessage.fail(Op.ERROR_CODE, details.getMessage()); }); @@ -194,11 +203,11 @@ private void handle(final Message aMessage) { * Transforms the source audio file at the given path into binary audiowaveform data. * * @param anAudioFilePath The path to the audio file to transform - * @return A Future that is completed with a ByteBuffer containing the audiowaveform data + * @return A Future that is completed with a byte array containing the audiowaveform data * @throws IOException if an I/O error occurs during the execution of the audiowaveform program */ - private Future audiowaveform(final Path anAudioFilePath) throws IOException { - final Promise asyncResult = Promise.promise(); + private Future audiowaveform(final Path anAudioFilePath) throws IOException { + final Promise asyncResult = Promise.promise(); final String[] cmd = { AUDIOWAVEFORM, "--input-filename", anAudioFilePath.toString(), "--output-format", "dat", "--bits", "8" }; final String cmdline = String.join(SPACE, cmd); @@ -221,7 +230,7 @@ private Future audiowaveform(final Path anAudioFilePath) throws IOEx // Redact the binary audiowaveform data for logging LOGGER.debug(MessageCodes.AVPT_015, cmdline, exitValue, "[binary audiowaveform data]"); - asyncResult.complete(ByteBuffer.wrap(stdout)); + asyncResult.complete(stdout); } else { asyncResult.fail(LOGGER.getMessage(MessageCodes.AVPT_015, cmdline, exitValue, stderr)); } @@ -232,4 +241,24 @@ private Future audiowaveform(final Path anAudioFilePath) throws IOEx return asyncResult.future(); } + + /** + * Compresses the data in the given byte array to GZIP format. + * + * @param aByteArray The uncompressed data + * @return The compressed data + * @throws IOException if an I/O error occurs during the data compression + */ + private byte[] gzip(final byte[] aByteArray) throws IOException { + final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + + try (GZIPOutputStream gz = new GZIPOutputStream(outputStream)) { + gz.write(aByteArray); + gz.finish(); + + return outputStream.toByteArray(); + } catch (final IOException details) { + throw new IOException(LOGGER.getMessage(MessageCodes.AVPT_023, details)); + } + } } diff --git a/src/main/resources/av-pairtree_messages.xml b/src/main/resources/av-pairtree_messages.xml index 5792725..83c6d15 100644 --- a/src/main/resources/av-pairtree_messages.xml +++ b/src/main/resources/av-pairtree_messages.xml @@ -28,5 +28,6 @@ The environment variable AUDIOWAVEFORM_S3_BUCKET must be set The environment variable AUDIOWAVEFORM_S3_OBJECT_URL_TEMPLATE must be set Unable to upload audiowaveform for item '{}' to S3: {} + Unable to compress data: {} diff --git a/src/test/java/edu/ucla/library/avpairtree/verticles/WaveformVerticleTest.java b/src/test/java/edu/ucla/library/avpairtree/verticles/WaveformVerticleTest.java index a7e1b25..ee370eb 100644 --- a/src/test/java/edu/ucla/library/avpairtree/verticles/WaveformVerticleTest.java +++ b/src/test/java/edu/ucla/library/avpairtree/verticles/WaveformVerticleTest.java @@ -52,14 +52,28 @@ public void testWaveformGenerationAndS3Storage(final TestContext aContext) { WebClient.create(vertx).getAbs(audiowaveformURL).send().onSuccess(resp -> { final Buffer expected = - vertx.fileSystem().readFileBlocking("src/test/resources/soul/audio/uclapasc.dat"); + vertx.fileSystem().readFileBlocking("src/test/resources/soul/audio/uclapasc.dat.gz"); final Buffer actual = resp.body(); + // Partition the GZIP data into the header, body, and footer (according to RFC 1952) + final Buffer expectedHeader = expected.getBuffer(0, 10); + final Buffer actualHeader = actual.getBuffer(0, 10); + + final Buffer expectedBody = expected.getBuffer(10, expected.length() - 8); + final Buffer actualBody = actual.getBuffer(10, actual.length() - 8); + + final Buffer expectedFooter = expected.getBuffer(expected.length() - 8, expected.length()); + final Buffer actualFooter = actual.getBuffer(actual.length() - 8, actual.length()); + try { - assertEquals(expected, actual); + // Apparently JDK 11 doesn't implement RFC 1952 correctly (i.e., it always sets the OS field (the + // last byte in the header) to "0"), so only compare the first nine bytes + assertEquals(expectedHeader.getBuffer(0, expectedHeader.length() - 1), + actualHeader.getBuffer(0, actualHeader.length() - 1)); + assertEquals(expectedBody, actualBody); + assertEquals(expectedFooter, actualFooter); } catch (final AssertionError details) { - LOGGER.error(details, details.getMessage()); - aContext.fail(); + aContext.fail(details); } finally { // TODO: clean up the S3 bucket asyncTask.complete(); diff --git a/src/test/resources/soul/audio/uclapasc.dat b/src/test/resources/soul/audio/uclapasc.dat deleted file mode 100644 index 8a4ed8bbbc04a4c3ffe72d6c62287fbcb94293a4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5188 zcmdUz=W^pn5`(JfmWA!zQQ6E;WzJ5H|>*L4j^FiNlt6B5cNLc^6@^pV(V}TbawuFFDJ2t)_MF>sk8S5%$+wqA`+g*Vd@7Re8Ti=ljRwyq;IH zGk?vscXkc?2CMi*_RsIgb7YgX@t3Y>`Q@d>S6m--NIy8LyvZ}t&o`koG@;?m6pS9fzVYY4_+vDMqiY=A`NkUqM*6JCoDplxnzwwNoWYxY z=B$~I7TkGbI3#F=hG^zZ?joV{`e2X*=JU8k}5*0-Xw1Gk)RM`ajL8${4 zgb#ai52`KtZ4NLN%nHXs^RH8*HFqVcBj^Up{vuh@ClEa`Lch%VwUX9=C%6*P$PmFH z!qRxecf@0Z=6I7MF7X$5^LxJy@Of0m+?>s~HzWkUg>i=7BwKvQ6tcb1-5x z@ysQ^;i<{+tk3{9gp-EI2thsZW=smAz!q=(&Knp>o)JYb5P=iM;cu-dWCx07OB|J* zh6oza&v>DR@2xvJ#MbLw11iCSa>W^V5{k)cXC}vHXN2w1AK&36PrgMuqZC<2aL0UQ zrtaYp<~yA*olf!So7gygq6{IvFv8%Xbz~Bd>?j{hj4Y5nq2uTX6BC&sM6_6Dgb;N6 z(u(|+WqiTovKwCqebRbkhW&U_7&OShM}dh9xr;Z%3itpIC?D~hK{|N$41Sb%(8KTW z(4^6@(m*aNPgxC%#S?H1_Sq~K1Bipxvp@ScqI`3)3{MLQ$`|A#4~BplNhp>m9suuJ&_ z|5;X&HTd>tY-M3|85YgSDthy0qF)rA@a!YA_A@^dsB7%& z4ss%_@_SZ>tzttQkeHB5aT6;_bufs*m1Cab&~Lh|Nv_)hsc>~tXIuK5F&k zHF=jdVZ-`$()xB(RKd8G?ei+b-$;l5L9+ah)=8IDn;~g~3YpBz>ZFDxTX4fzbY`@2 zU3CMl0c+?kT~(PTaAnAcJ^=^90@y+>qjYbp7Z#AqWV~|RRxa=NCds)zN)vR!7$iprK*<5a@hzoC=y^|`DQZ(JA^?pRoA6=5z0X$k`uHEC-EOx zq6VgeSQKWW+7Nd!&432j0SChNVtmyJx*Kv_enI1DeHXUXIZ}7T5O`MjB7>>7#GrRz z6R%H*Y1o1?@Rq>s%0)cY2MzcU_PJM&CGAuZ)DOu|l8(3FS?-Af{7anZS7tmcD@Q8m z=n`=vX}ceorN;|kj5UaZ9#YaMhTQ%gBOh<00mXrjyqt zqucv77HF(~4FrX%Cq!=1T9t;Xji0?K#2pT)wahXrO!cKWXc8&7J5>=0Pu!$s~A06Zr&(TfSSlX&LwTLCILKnWXo>Vk%$_ucd zZiGI;`Op)4gqghqxq~%myE5SCCZ4oxE`Gd`Uy}JVi+|px?aOjxyDZO6Uz;CM43;PblMR@|sJa#Ttrgj-< z`fz)+E7*O<=L0cC=Yi^gpMxg2U`=Q>z zVOhDaShq;)yh+s!!G@{jPVSW*=tx&XMP6`5U4{wZCUQfaEOit8S&te3dIoWd*aC#f zugVd$vYoc=FhI5X68X?_mNH#FVQ0i$V3LVdL4|59^`Q&k*hKnW4%hUOU9xifzG-4~ zPC8HgNX@77nS%kX2JaSjw*GePV8lICIw6C-(_MV1j&#kNz^ui9>UhAHa%I^AY3HS$ z)VbTe?)`z7f&g}3%0Jt;e(0C%0X^i=9ely}^o&JXW@UEG>a0lM{e)-yVsS}S;1K*q zrmSzEz_5y04ahZglaq2$zw7C7Vm(muD8`a5HcRkFMO+4INE?;vx`K}C-h}kAR51KR zcOfPMmzoDUzYbwh7O|>WZ`onbevub>8CO|_#_A5LjOV<{iw!k#qx%9&s;rKP4OZHS z*>2{xw%E3NXx{Tq+NhhNuSQQQ+_p@+h+WL)k{9r5k=0?jKjnYzPut>f*}reg!!@s{ zj2GCKIrNTXH0!7fvtCt3VhI}CET{Tq!>0J-yuH4je}DV!^mN^D?yh^#{d|2Yh3!$p1(bvt}Yq2f%m+g6XIh2Rfq1v^Z>(l%0@`8Oo&b#B&@%iodw`Vfq<+Q(U zPtWIA`RAS- z+a7msyE4CI*L{&3pRU_;jz71@r_14Ts1x>UX_Z!q)jGNI=CZ4j;!r+c^S8rwS7)b} zKYu&F9QVi9zn-r9YhLc!?c1i@y**v?>*ly2qxVIAdbvIyUysj4Qbnwl7u)NW6?f;& z8P8ldZAzu}?7kIjz_|45dP66a&|gy7$ZrrshZ_&XY78rCG1&soki&Eb+Bw7g^t|ep zY_IR%)X%FMXV*wwQU6P4t!yUK==6wu-1(E_debATfqivbChIeGwbT{utko58Oe^+R znhj=D7R%|Ggta#8=E5|*{YQ=QRC&hSOdNapIhWnqhW n%gHdZMtq1f7hV=Ob1d9!@AUlr^E)}M-r672F>U-ht>b?KZ@0m< diff --git a/src/test/resources/soul/audio/uclapasc.dat.gz b/src/test/resources/soul/audio/uclapasc.dat.gz new file mode 100644 index 0000000000000000000000000000000000000000..b44fdc968042fe098f10f0ca634469a3dc185800 GIT binary patch literal 2755 zcmV;!3Ow~6iwFP!000001JzjBlH<4$oayEP@TRUl#v2>4ALajl*cf%|77qX8P+$=xqq>4*RF@)M8Adqh_Jn~i>zP~=3^{2_n*rMdT* z*L(J-jNWG(1~o)OJjOU7VTeXKW_?4BIK0DKg>P{3OaIj%2S&m%p5}RSgnmBX4VhDY zY7&$5Nw2!p;l~cU*VTt_gR;9d6%}C%i5$(O=|OAHAS1*v`hNTX07VVu7np;lR4dGmZNE_ z+M+Ew-9aV~3tijHlx=>kuO;tYea)_Umo-(BH<`&=9=c7TQ+h5;)-3wGfj&cG7&XZ3 zvp%C`U1kuuUO}6rN5p!GC?jihmvnibw$!+W%(Tm!vd0r<7uWP!Nc0ox-X>P}Den*` zc&mp$M|wGFuvPPBh)%X{(k%M!#&PX4L(3ohQ zB)8`?cw`BsTA&+P)?Tt84~QP{#URN3YbBq_j&0p+9^F|0wzu(qmdhqyzt)&&=d~#HS(Zi0N|3t#-MK z`hppP_<|8k46QR)0Lc;a!R3$zWDn5MM#RLRiE`)%%N!A+$Qz;+vXhqK4LnY};p?O? zT4gKPpApSHvL7~KJ(oG;E_s3&!xi%2J$TMRI`FI<{MZX)z`O9!z^}nd2DxnfYOq+= zRkTmCoD4t?3P(Na2~nP6Se7wItj_@LAP)=?)8~dBW(=7-nb!bbKo*cAk?{p0iJ2YQ zB_yAKZ|F&lm15{f?#SSUL^4yD20kQ{$Xa9z$W57JhAb@kG|+yYuts$DvC+gl4%j(n z!ef{V+|^fb5Aa;Iz~H4aHL#X7MOR)Hm#WJVNoHu=`$#C8(Kq1a&`4A$k(GeeMQR~A z-Scxch*vS6dV|w2CaeI@WUw9iVscBC$iWx6jKV*Rvem+^DI*6Aq$8fO4%pZcImwU; zlG6_Ijod>5WOT?xW&{|JoSMK6tlOlLQ<|}Io}a-vWK>wC`<(nX89Af#gIvLjg~RbRbDecVGc({H(UqPYfGek&3BUod z0Bk|7Far_6S_>?Im2AWum&z5hHVOyE?!kwl3#WJpI5r4B`l`=aE$cuAFKNjrqMH>q zVa-H@i8`srk{_|;4P*>*W$k73MvoJ$=ld908;mPROqeMHt2d=7sk|bUwS<<_Mlb^- zk-B+B$&^^IMt0}b_b$GhVFyq^n{*)m0ZXWXr6G&v>_;FkC)1dadRuTH*q)5fI)Umj zguc^(tnfx`vvXwk8;fU&FJv(4En?6|U=!6B9qR&62D}yEcHc7~H=9)a1M7O_HI$0L zekh(ZC!%-gb;oMQd_})9!o#%EqJrAElSoSU17_*r1z-$oAP&OC)Do=ZJ2u`2Q|vpd z8|9-8F(1)W1w;co7if}TO^ibSl(I+Zje2IElGvd?a~g62-+Q8pp-c0fHo7$A`w)!k z25lgPC;;o&0=W!XVA|Z|PWrz=b%l+4EwRAKehmmpSx*o-uQe-;^xueP3V$T@z=SJM zl}n$;=mQ6a9Y!MDtxVJtN94B1NS(LjXLhp*>J{vy7igD_r90<33S){Dj{PDf5BZo) zyyn!Fy{RA;aTSKsqj-|hoR}BEEu8lg(Vb7&A&lr!oK6C@TGiC!;l zz;Dvy2>Y6VQD*NkDeO8#CJ$)(G(CSkKSsWu5L4)EG_m87COOpSN zm)rI3AN%$0W%qimJ~yA6>jE5Bo!1wvdn&JWQ?eTZ&y~WRxMy}iM|3rPGUE(&8E8tG z3oSd@SoFtwy1u z_}x+{`N3}Kv2TrTve&&m-MR1LK9cz-ed;}u{ouUox&dF{d-RN_@?4#(V|A@g1$e)} zbHRGaypg&FzghYPC~#QC1t#Pk{DzffMWAVT9s){+FCy7n2a+Z5#|3dQs3C23X4jQ; zsP0XQJ{A=WenNLaCIT)wFTVZSX#op;!yKGmoCoL{O7G-h{jsqkE1>eB)> zal!i&EV)$I46%Wg5;5s!rckggduWWup3T`!p|6IXsBp`3*=5+pEDrSvyn3px>3O}Y zf3A1S)8?@LT%I?_`hv=M0Q+K2&MG!3cG8lQJ|kfXG?pw!^{dR{^v8aAeBb~6@!Rg@ zxWKtOu7U2?NusT;iSMBm+abA7A9P8s^yFf;-PxbEY z__}@HzMhIphPCR`^0>r`tNmgR&m0$RiAt+v-wJGianY}H1f5XPv)|UpZy?4IH;9!@ zFQ~=bBU|P$x&!W<4ZHb_-IDbBdSXA%ZXCPDp4~Bf63J#{n&R#ge5MD=Rl~FF$GPjy zT-8_XYEf6Xvu0O7W)F;#|5yOCCcY_4t_XxP0_G|4MppKF#Cst1FSz^v>G1zk#4>ky z!TkTBQ2w_=W_%*%g^Qo$E6>SK{y$`Qa;E3y`S9E Date: Thu, 1 Jul 2021 14:45:54 -0700 Subject: [PATCH 2/2] Explain how to generate the test fixture --- src/test/resources/soul/audio/README.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 src/test/resources/soul/audio/README.md diff --git a/src/test/resources/soul/audio/README.md b/src/test/resources/soul/audio/README.md new file mode 100644 index 0000000..b479344 --- /dev/null +++ b/src/test/resources/soul/audio/README.md @@ -0,0 +1,5 @@ +The file `uclapasc.dat.gz` was generated with the command: + +```bash +audiowaveform --input-filename uclapasc.wav --output-format dat --bits 8 | gzip -n - > uclapasc.dat.gz +``` \ No newline at end of file