From 6817f659b812249cc01a47044949196eebb81207 Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Fri, 8 May 2020 19:14:45 +0900 Subject: [PATCH 01/20] Increase size of direct buffer --- .../org/apache/hadoop/io/compress/snappy/SnappyCompressor.java | 2 +- .../apache/hadoop/io/compress/snappy/SnappyDecompressor.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java index 3d386800e4d87..56310a9c4839f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java @@ -35,7 +35,7 @@ public class SnappyCompressor implements Compressor { private static final Logger LOG = LoggerFactory.getLogger(SnappyCompressor.class.getName()); - private static final int DEFAULT_DIRECT_BUFFER_SIZE = 64 * 1024; + private static final int DEFAULT_DIRECT_BUFFER_SIZE = 256 * 1024; private int directBufferSize; private Buffer compressedDirectBuf = null; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java index f31b76c347c5c..5cbae77fb2b9e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java @@ -35,7 +35,7 @@ public class SnappyDecompressor implements Decompressor { private static final Logger LOG = LoggerFactory.getLogger(SnappyDecompressor.class.getName()); - private static final int DEFAULT_DIRECT_BUFFER_SIZE = 64 * 1024; + private static final int DEFAULT_DIRECT_BUFFER_SIZE = 256 * 1024; private int directBufferSize; private Buffer compressedDirectBuf = null; From 0e357a7a0a2d8e0a3bd62ac4605f51473a9b4009 Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Fri, 8 May 2020 19:23:08 +0900 Subject: [PATCH 02/20] Add logging --- .../compress/snappy/TestSnappyCompressorDecompressor.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java index cc986c7e0aea4..655f58f4da402 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java @@ -30,6 +30,7 @@ import java.io.IOException; import java.lang.reflect.Array; import java.nio.ByteBuffer; +import java.util.Arrays; import java.util.Random; import org.apache.hadoop.io.DataInputBuffer; @@ -44,11 +45,16 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import static org.junit.Assume.*; public class TestSnappyCompressorDecompressor { + public static final Logger LOG = + LoggerFactory.getLogger(TestSnappyCompressorDecompressor.class); + @Before public void before() { assumeTrue(SnappyCodec.isNativeCodeLoaded()); @@ -170,6 +176,7 @@ public void testSnappyDecompressorCompressAIOBException() { public void testSnappyCompressDecompress() { int BYTE_SIZE = 1024 * 54; byte[] bytes = BytesGenerator.get(BYTE_SIZE); + LOG.info(Arrays.toString(bytes)); SnappyCompressor compressor = new SnappyCompressor(); try { compressor.setInput(bytes, 0, bytes.length); @@ -181,6 +188,7 @@ public void testSnappyCompressDecompress() { byte[] compressed = new byte[BYTE_SIZE]; int cSize = compressor.compress(compressed, 0, compressed.length); + LOG.info(Arrays.toString(compressed)); assertTrue( "SnappyCompressDecompress getBytesWritten after compress error !!!", compressor.getBytesWritten() > 0); From 44448e0ab77f0addb5779a3f713b111f3d6e691c Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Fri, 8 May 2020 19:35:41 +0900 Subject: [PATCH 03/20] Output compressed size --- .../io/compress/snappy/TestSnappyCompressorDecompressor.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java index 655f58f4da402..71f33dcf06d46 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java @@ -176,7 +176,7 @@ public void testSnappyDecompressorCompressAIOBException() { public void testSnappyCompressDecompress() { int BYTE_SIZE = 1024 * 54; byte[] bytes = BytesGenerator.get(BYTE_SIZE); - LOG.info(Arrays.toString(bytes)); + // LOG.info(Arrays.toString(bytes)); SnappyCompressor compressor = new SnappyCompressor(); try { compressor.setInput(bytes, 0, bytes.length); @@ -188,7 +188,8 @@ public void testSnappyCompressDecompress() { byte[] compressed = new byte[BYTE_SIZE]; int cSize = compressor.compress(compressed, 0, compressed.length); - LOG.info(Arrays.toString(compressed)); + LOG.info("cSize: {}", cSize); + // LOG.info(Arrays.toString(compressed)); assertTrue( "SnappyCompressDecompress getBytesWritten after compress error !!!", compressor.getBytesWritten() > 0); From e9cabdd159249b6c11ed75b6746dfb57ee319250 Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Fri, 8 May 2020 19:51:09 +0900 Subject: [PATCH 04/20] Revert the change of buffer size. --- .../org/apache/hadoop/io/compress/snappy/SnappyCompressor.java | 2 +- .../apache/hadoop/io/compress/snappy/SnappyDecompressor.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java index 56310a9c4839f..3d386800e4d87 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java @@ -35,7 +35,7 @@ public class SnappyCompressor implements Compressor { private static final Logger LOG = LoggerFactory.getLogger(SnappyCompressor.class.getName()); - private static final int DEFAULT_DIRECT_BUFFER_SIZE = 256 * 1024; + private static final int DEFAULT_DIRECT_BUFFER_SIZE = 64 * 1024; private int directBufferSize; private Buffer compressedDirectBuf = null; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java index 5cbae77fb2b9e..f31b76c347c5c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java @@ -35,7 +35,7 @@ public class SnappyDecompressor implements Decompressor { private static final Logger LOG = LoggerFactory.getLogger(SnappyDecompressor.class.getName()); - private static final int DEFAULT_DIRECT_BUFFER_SIZE = 256 * 1024; + private static final int DEFAULT_DIRECT_BUFFER_SIZE = 64 * 1024; private int directBufferSize; private Buffer compressedDirectBuf = null; From d6df06e0c1300ac2a76c308c55fcef379587f162 Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Fri, 8 May 2020 20:01:11 +0900 Subject: [PATCH 05/20] More debug messages --- .../org/apache/hadoop/io/compress/snappy/SnappyCompressor.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java index 3d386800e4d87..12b8cfc96a8ef 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java @@ -226,6 +226,8 @@ public int compress(byte[] b, int off, int len) // Compress data n = compressBytesDirect(); + LOG.info("n: {}", n); + LOG.info("uncompressedDirectBufLen: {}", uncompressedDirectBufLen); compressedDirectBuf.limit(n); uncompressedDirectBuf.clear(); // snappy consumes all buffer input From 34f86164dbeb32933fe40bee462364c073875c98 Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Fri, 8 May 2020 20:10:35 +0900 Subject: [PATCH 06/20] Use compressed size as possible --- .../apache/hadoop/io/compress/snappy/SnappyCompressor.java | 4 ---- .../compress/snappy/TestSnappyCompressorDecompressor.java | 7 +++---- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java index 12b8cfc96a8ef..e5e523fe7cbb1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java @@ -226,8 +226,6 @@ public int compress(byte[] b, int off, int len) // Compress data n = compressBytesDirect(); - LOG.info("n: {}", n); - LOG.info("uncompressedDirectBufLen: {}", uncompressedDirectBufLen); compressedDirectBuf.limit(n); uncompressedDirectBuf.clear(); // snappy consumes all buffer input @@ -236,8 +234,6 @@ public int compress(byte[] b, int off, int len) finished = true; } - // Get atmost 'len' bytes - n = Math.min(n, len); bytesWritten += n; ((ByteBuffer) compressedDirectBuf).get(b, off, n); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java index 71f33dcf06d46..76de0435993fc 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java @@ -176,7 +176,6 @@ public void testSnappyDecompressorCompressAIOBException() { public void testSnappyCompressDecompress() { int BYTE_SIZE = 1024 * 54; byte[] bytes = BytesGenerator.get(BYTE_SIZE); - // LOG.info(Arrays.toString(bytes)); SnappyCompressor compressor = new SnappyCompressor(); try { compressor.setInput(bytes, 0, bytes.length); @@ -188,13 +187,13 @@ public void testSnappyCompressDecompress() { byte[] compressed = new byte[BYTE_SIZE]; int cSize = compressor.compress(compressed, 0, compressed.length); - LOG.info("cSize: {}", cSize); - // LOG.info(Arrays.toString(compressed)); + LOG.info("input size: {}", BYTE_SIZE); + LOG.info("compressed size: {}", cSize); assertTrue( "SnappyCompressDecompress getBytesWritten after compress error !!!", compressor.getBytesWritten() > 0); - SnappyDecompressor decompressor = new SnappyDecompressor(BYTE_SIZE); + SnappyDecompressor decompressor = new SnappyDecompressor(Math.max(cSize, BYTE_SIZE)); // set as input for decompressor only compressed data indicated with cSize decompressor.setInput(compressed, 0, cSize); byte[] decompressed = new byte[BYTE_SIZE]; From 918abd00fb8a0964ecc15df103ad3abc2992be17 Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Fri, 8 May 2020 20:14:35 +0900 Subject: [PATCH 07/20] Throw exception in test --- .../TestSnappyCompressorDecompressor.java | 62 +++++++++---------- 1 file changed, 29 insertions(+), 33 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java index 76de0435993fc..aa3ff0fd519fd 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java @@ -173,42 +173,38 @@ public void testSnappyDecompressorCompressAIOBException() { } @Test - public void testSnappyCompressDecompress() { + public void testSnappyCompressDecompress() throws Exception { int BYTE_SIZE = 1024 * 54; byte[] bytes = BytesGenerator.get(BYTE_SIZE); SnappyCompressor compressor = new SnappyCompressor(); - try { - compressor.setInput(bytes, 0, bytes.length); - assertTrue("SnappyCompressDecompress getBytesRead error !!!", - compressor.getBytesRead() > 0); - assertTrue( - "SnappyCompressDecompress getBytesWritten before compress error !!!", - compressor.getBytesWritten() == 0); - - byte[] compressed = new byte[BYTE_SIZE]; - int cSize = compressor.compress(compressed, 0, compressed.length); - LOG.info("input size: {}", BYTE_SIZE); - LOG.info("compressed size: {}", cSize); - assertTrue( - "SnappyCompressDecompress getBytesWritten after compress error !!!", - compressor.getBytesWritten() > 0); - - SnappyDecompressor decompressor = new SnappyDecompressor(Math.max(cSize, BYTE_SIZE)); - // set as input for decompressor only compressed data indicated with cSize - decompressor.setInput(compressed, 0, cSize); - byte[] decompressed = new byte[BYTE_SIZE]; - decompressor.decompress(decompressed, 0, decompressed.length); - - assertTrue("testSnappyCompressDecompress finished error !!!", - decompressor.finished()); - Assert.assertArrayEquals(bytes, decompressed); - compressor.reset(); - decompressor.reset(); - assertTrue("decompressor getRemaining error !!!", - decompressor.getRemaining() == 0); - } catch (Exception e) { - fail("testSnappyCompressDecompress ex error!!!"); - } + compressor.setInput(bytes, 0, bytes.length); + assertTrue("SnappyCompressDecompress getBytesRead error !!!", + compressor.getBytesRead() > 0); + assertTrue( + "SnappyCompressDecompress getBytesWritten before compress error !!!", + compressor.getBytesWritten() == 0); + + byte[] compressed = new byte[BYTE_SIZE]; + int cSize = compressor.compress(compressed, 0, compressed.length); + LOG.info("input size: {}", BYTE_SIZE); + LOG.info("compressed size: {}", cSize); + assertTrue( + "SnappyCompressDecompress getBytesWritten after compress error !!!", + compressor.getBytesWritten() > 0); + + SnappyDecompressor decompressor = new SnappyDecompressor(Math.max(cSize, BYTE_SIZE)); + // set as input for decompressor only compressed data indicated with cSize + decompressor.setInput(compressed, 0, cSize); + byte[] decompressed = new byte[BYTE_SIZE]; + decompressor.decompress(decompressed, 0, decompressed.length); + + assertTrue("testSnappyCompressDecompress finished error !!!", + decompressor.finished()); + Assert.assertArrayEquals(bytes, decompressed); + compressor.reset(); + decompressor.reset(); + assertTrue("decompressor getRemaining error !!!", + decompressor.getRemaining() == 0); } @Test From 2d7ea2d380026f236edc5666e22f94ede997033e Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Fri, 8 May 2020 20:22:12 +0900 Subject: [PATCH 08/20] Increase compressed buffer --- .../compress/snappy/TestSnappyCompressorDecompressor.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java index aa3ff0fd519fd..c23d6c87ad42d 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java @@ -184,7 +184,10 @@ public void testSnappyCompressDecompress() throws Exception { "SnappyCompressDecompress getBytesWritten before compress error !!!", compressor.getBytesWritten() == 0); - byte[] compressed = new byte[BYTE_SIZE]; + // snappy compression may increase data. + // This calculation comes from "Snappy::MaxCompressedLength(size_t)" + int max_bytes = 32 + BYTE_SIZE + BYTE_SIZE / 6; + byte[] compressed = new byte[max_bytes]; int cSize = compressor.compress(compressed, 0, compressed.length); LOG.info("input size: {}", BYTE_SIZE); LOG.info("compressed size: {}", cSize); @@ -192,7 +195,7 @@ public void testSnappyCompressDecompress() throws Exception { "SnappyCompressDecompress getBytesWritten after compress error !!!", compressor.getBytesWritten() > 0); - SnappyDecompressor decompressor = new SnappyDecompressor(Math.max(cSize, BYTE_SIZE)); + SnappyDecompressor decompressor = new SnappyDecompressor(); // set as input for decompressor only compressed data indicated with cSize decompressor.setInput(compressed, 0, cSize); byte[] decompressed = new byte[BYTE_SIZE]; From 5fbbb51350afce00f2db4771ff3d201175b4ccd7 Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Fri, 8 May 2020 20:24:29 +0900 Subject: [PATCH 09/20] Tweak doc --- .../io/compress/snappy/TestSnappyCompressorDecompressor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java index c23d6c87ad42d..fa22923f67a2b 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java @@ -184,7 +184,7 @@ public void testSnappyCompressDecompress() throws Exception { "SnappyCompressDecompress getBytesWritten before compress error !!!", compressor.getBytesWritten() == 0); - // snappy compression may increase data. + // snappy compression may increase data size. // This calculation comes from "Snappy::MaxCompressedLength(size_t)" int max_bytes = 32 + BYTE_SIZE + BYTE_SIZE / 6; byte[] compressed = new byte[max_bytes]; From 86233882c5cf2ac24ead3331c4ba1e53339f9e58 Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Fri, 8 May 2020 20:37:51 +0900 Subject: [PATCH 10/20] Remove unused import --- .../io/compress/snappy/TestSnappyCompressorDecompressor.java | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java index fa22923f67a2b..b6a7ae2c50be3 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java @@ -30,7 +30,6 @@ import java.io.IOException; import java.lang.reflect.Array; import java.nio.ByteBuffer; -import java.util.Arrays; import java.util.Random; import org.apache.hadoop.io.DataInputBuffer; From b188a47fffb46a01f8d17f299178af29028c7a67 Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Fri, 8 May 2020 20:39:35 +0900 Subject: [PATCH 11/20] Use assertEquals instead of assertTrue --- .../compress/snappy/TestSnappyCompressorDecompressor.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java index b6a7ae2c50be3..3a0f4465de502 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java @@ -179,9 +179,9 @@ public void testSnappyCompressDecompress() throws Exception { compressor.setInput(bytes, 0, bytes.length); assertTrue("SnappyCompressDecompress getBytesRead error !!!", compressor.getBytesRead() > 0); - assertTrue( + assertEquals( "SnappyCompressDecompress getBytesWritten before compress error !!!", - compressor.getBytesWritten() == 0); + 0, compressor.getBytesWritten()); // snappy compression may increase data size. // This calculation comes from "Snappy::MaxCompressedLength(size_t)" @@ -205,8 +205,8 @@ public void testSnappyCompressDecompress() throws Exception { Assert.assertArrayEquals(bytes, decompressed); compressor.reset(); decompressor.reset(); - assertTrue("decompressor getRemaining error !!!", - decompressor.getRemaining() == 0); + assertEquals("decompressor getRemaining error !!!", + 0, decompressor.getRemaining()); } @Test From dac152bb70a37d1c2517cf67c487fa52eb58f719 Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Sat, 9 May 2020 02:11:18 +0900 Subject: [PATCH 12/20] Refactor test --- .../io/compress/CompressDecompressTester.java | 68 +++++++++---------- 1 file changed, 32 insertions(+), 36 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java index 35f84b950e427..13ba356aa9756 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java @@ -126,7 +126,7 @@ private void addPair(T compressor, E decompressor, String name) { builder.add(new TesterPair(name, compressor, decompressor)); } - public void test() throws InstantiationException, IllegalAccessException { + public void test() throws Exception { pairs = builder.build(); pairs = assertionDelegate.filterOnAssumeWhat(pairs); @@ -287,47 +287,43 @@ private boolean checkSetInputArrayIndexOutOfBoundsException( @Override public void assertCompression(String name, Compressor compressor, - Decompressor decompressor, byte[] rawData) { + Decompressor decompressor, byte[] rawData) throws Exception { int cSize = 0; int decompressedSize = 0; byte[] compressedResult = new byte[rawData.length]; byte[] decompressedBytes = new byte[rawData.length]; - try { - assertTrue( - joiner.join(name, "compressor.needsInput before error !!!"), - compressor.needsInput()); - assertTrue( + assertTrue( + joiner.join(name, "compressor.needsInput before error !!!"), + compressor.needsInput()); + assertEquals( joiner.join(name, "compressor.getBytesWritten before error !!!"), - compressor.getBytesWritten() == 0); - compressor.setInput(rawData, 0, rawData.length); - compressor.finish(); - while (!compressor.finished()) { - cSize += compressor.compress(compressedResult, 0, - compressedResult.length); - } - compressor.reset(); - - assertTrue( - joiner.join(name, "decompressor.needsInput() before error !!!"), - decompressor.needsInput()); - decompressor.setInput(compressedResult, 0, cSize); - assertFalse( - joiner.join(name, "decompressor.needsInput() after error !!!"), - decompressor.needsInput()); - while (!decompressor.finished()) { - decompressedSize = decompressor.decompress(decompressedBytes, 0, - decompressedBytes.length); - } - decompressor.reset(); - assertTrue(joiner.join(name, " byte size not equals error !!!"), - decompressedSize == rawData.length); - assertArrayEquals( - joiner.join(name, " byte arrays not equals error !!!"), rawData, - decompressedBytes); - } catch (Exception ex) { - fail(joiner.join(name, ex.getMessage())); + 0, compressor.getBytesWritten()); + compressor.setInput(rawData, 0, rawData.length); + compressor.finish(); + while (!compressor.finished()) { + cSize += compressor.compress(compressedResult, 0, + compressedResult.length); + } + compressor.reset(); + + assertTrue( + joiner.join(name, "decompressor.needsInput() before error !!!"), + decompressor.needsInput()); + decompressor.setInput(compressedResult, 0, cSize); + assertFalse( + joiner.join(name, "decompressor.needsInput() after error !!!"), + decompressor.needsInput()); + while (!decompressor.finished()) { + decompressedSize = decompressor.decompress(decompressedBytes, 0, + decompressedBytes.length); } + decompressor.reset(); + assertEquals(joiner.join(name, " byte size not equals error !!!"), + rawData.length, decompressedSize); + assertArrayEquals( + joiner.join(name, " byte arrays not equals error !!!"), rawData, + decompressedBytes); } }), @@ -519,6 +515,6 @@ abstract static class TesterCompressionStrategy { protected final Logger logger = Logger.getLogger(getClass()); abstract void assertCompression(String name, Compressor compressor, - Decompressor decompressor, byte[] originalRawData); + Decompressor decompressor, byte[] originalRawData) throws Exception; } } From d9f0d9fc3f69aba0b1063f35c24ccc16252d5963 Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Sat, 9 May 2020 02:15:01 +0900 Subject: [PATCH 13/20] Fix TestCompressorDecompressor --- .../apache/hadoop/io/compress/CompressDecompressTester.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java index 13ba356aa9756..8be2dce06d1fe 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java @@ -291,7 +291,9 @@ public void assertCompression(String name, Compressor compressor, int cSize = 0; int decompressedSize = 0; - byte[] compressedResult = new byte[rawData.length]; + // Snappy compression can increase data size + int maxCompressedLength = 32 + rawData.length + rawData.length/6; + byte[] compressedResult = new byte[maxCompressedLength]; byte[] decompressedBytes = new byte[rawData.length]; assertTrue( joiner.join(name, "compressor.needsInput before error !!!"), From 6febbfa0618e9b3c09d0caf28d60d86c7a1a74fa Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Sun, 10 May 2020 02:20:16 +0900 Subject: [PATCH 14/20] Fix checkstyle warning --- .../io/compress/snappy/TestSnappyCompressorDecompressor.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java index 3a0f4465de502..465fb4a0d24a2 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java @@ -185,8 +185,8 @@ public void testSnappyCompressDecompress() throws Exception { // snappy compression may increase data size. // This calculation comes from "Snappy::MaxCompressedLength(size_t)" - int max_bytes = 32 + BYTE_SIZE + BYTE_SIZE / 6; - byte[] compressed = new byte[max_bytes]; + int maxSize = 32 + BYTE_SIZE + BYTE_SIZE / 6; + byte[] compressed = new byte[maxSize]; int cSize = compressor.compress(compressed, 0, compressed.length); LOG.info("input size: {}", BYTE_SIZE); LOG.info("compressed size: {}", cSize); From 0650432f439a27579204d71fd8b680dad73236bb Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Mon, 11 May 2020 08:37:00 +0900 Subject: [PATCH 15/20] Limit the compressed data within the buffer size for each compress call. --- .../org/apache/hadoop/io/compress/snappy/SnappyCompressor.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java index e5e523fe7cbb1..3d386800e4d87 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java @@ -234,6 +234,8 @@ public int compress(byte[] b, int off, int len) finished = true; } + // Get atmost 'len' bytes + n = Math.min(n, len); bytesWritten += n; ((ByteBuffer) compressedDirectBuf).get(b, off, n); From 4a7befba1514eb764aa88478119e0a2463f6767b Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Mon, 11 May 2020 09:22:42 +0900 Subject: [PATCH 16/20] Add a test case for small buffer. --- .../TestSnappyCompressorDecompressor.java | 33 ++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java index 465fb4a0d24a2..3956c7b6f956c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.io.compress.snappy; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -284,7 +285,37 @@ public void testSnappyBlockCompression() { fail("testSnappyBlockCompression ex error !!!"); } } - + + @Test + // Data size is greater than the buffer size. + public void testSnappyCompressDecompressWithSmallBuffer() throws Exception { + int BYTE_SIZE = 1024 * 50; + int BUFFER_SIZE = 512; + ByteArrayOutputStream out = new ByteArrayOutputStream(); + byte[] buffer = new byte[BUFFER_SIZE]; + byte[] bytes = BytesGenerator.get(BYTE_SIZE); + + SnappyCompressor compressor = new SnappyCompressor(); + compressor.setInput(bytes, 0, BYTE_SIZE); + compressor.finish(); + while (!compressor.finished()) { + int len = compressor.compress(buffer, 0, buffer.length); + out.write(buffer, 0, len); + } + out.reset(); + byte[] compressed = out.toByteArray(); + + SnappyDecompressor decompressor = new SnappyDecompressor(); + decompressor.setInput(compressed, 0, compressed.length); + while (!decompressor.finished()) { + int len = decompressor.decompress(buffer, 0, buffer.length); + out.write(buffer, 0, len); + } + byte[] decompressed = out.toByteArray(); + + assertThat(decompressed).isEqualTo(bytes); + } + private void compressDecompressLoop(int rawDataSize) throws IOException { byte[] rawData = BytesGenerator.get(rawDataSize); byte[] compressedResult = new byte[rawDataSize+20]; From b487be0fd407ab232bda91dc37de4cf6f9fb1c59 Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Mon, 11 May 2020 09:26:51 +0900 Subject: [PATCH 17/20] Fix infinite-loop --- .../io/compress/snappy/TestSnappyCompressorDecompressor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java index 3956c7b6f956c..13d6239d4de58 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java @@ -302,8 +302,8 @@ public void testSnappyCompressDecompressWithSmallBuffer() throws Exception { int len = compressor.compress(buffer, 0, buffer.length); out.write(buffer, 0, len); } - out.reset(); byte[] compressed = out.toByteArray(); + out.reset(); SnappyDecompressor decompressor = new SnappyDecompressor(); decompressor.setInput(compressed, 0, compressed.length); From 175ddaa0800d5991a68da58b10f6a723ff082f01 Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Mon, 11 May 2020 09:29:36 +0900 Subject: [PATCH 18/20] Add assertion for the compressed data --- .../io/compress/snappy/TestSnappyCompressorDecompressor.java | 1 + 1 file changed, 1 insertion(+) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java index 13d6239d4de58..ff864189e4c64 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java @@ -303,6 +303,7 @@ public void testSnappyCompressDecompressWithSmallBuffer() throws Exception { out.write(buffer, 0, len); } byte[] compressed = out.toByteArray(); + assertThat(compressed).hasSizeGreaterThan(0); out.reset(); SnappyDecompressor decompressor = new SnappyDecompressor(); From 5ef9ba5907a22c2a8f267664438f4380e69be3af Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Mon, 11 May 2020 09:34:17 +0900 Subject: [PATCH 19/20] Tweak the doc --- .../io/compress/snappy/TestSnappyCompressorDecompressor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java index ff864189e4c64..b7c0a87fb8534 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java @@ -287,7 +287,7 @@ public void testSnappyBlockCompression() { } @Test - // Data size is greater than the buffer size. + // The buffer size is smaller than the input. public void testSnappyCompressDecompressWithSmallBuffer() throws Exception { int BYTE_SIZE = 1024 * 50; int BUFFER_SIZE = 512; From ff47f779ac7e6d2a93e84207ec3bf12b62e88219 Mon Sep 17 00:00:00 2001 From: Akira Ajisaka Date: Mon, 11 May 2020 12:50:03 +0900 Subject: [PATCH 20/20] Rename local variables to fix check style warnings --- .../snappy/TestSnappyCompressorDecompressor.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java index b7c0a87fb8534..c8900bad1df56 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java @@ -289,14 +289,14 @@ public void testSnappyBlockCompression() { @Test // The buffer size is smaller than the input. public void testSnappyCompressDecompressWithSmallBuffer() throws Exception { - int BYTE_SIZE = 1024 * 50; - int BUFFER_SIZE = 512; + int inputSize = 1024 * 50; + int bufferSize = 512; ByteArrayOutputStream out = new ByteArrayOutputStream(); - byte[] buffer = new byte[BUFFER_SIZE]; - byte[] bytes = BytesGenerator.get(BYTE_SIZE); + byte[] buffer = new byte[bufferSize]; + byte[] input = BytesGenerator.get(inputSize); SnappyCompressor compressor = new SnappyCompressor(); - compressor.setInput(bytes, 0, BYTE_SIZE); + compressor.setInput(input, 0, inputSize); compressor.finish(); while (!compressor.finished()) { int len = compressor.compress(buffer, 0, buffer.length); @@ -314,7 +314,7 @@ public void testSnappyCompressDecompressWithSmallBuffer() throws Exception { } byte[] decompressed = out.toByteArray(); - assertThat(decompressed).isEqualTo(bytes); + assertThat(decompressed).isEqualTo(input); } private void compressDecompressLoop(int rawDataSize) throws IOException {