From 23da5134a4a14853491d6674b1bfe848efc12b63 Mon Sep 17 00:00:00 2001 From: DB Tsai Date: Mon, 6 Jul 2020 16:13:34 -0700 Subject: [PATCH 01/23] SnappyCodec with java-snappy --- .../hadoop/io/compress/Decompressor.java | 2 +- .../hadoop/io/compress/SnappyCodec.java | 40 +--------------- .../io/compress/snappy/SnappyCompressor.java | 35 +++++--------- .../compress/snappy/SnappyDecompressor.java | 48 +++++++++---------- .../apache/hadoop/util/NativeCodeLoader.java | 5 -- .../hadoop/util/NativeLibraryChecker.java | 11 +---- .../io/compress/CompressDecompressTester.java | 48 ++++++------------- .../apache/hadoop/io/compress/TestCodec.java | 15 +++--- .../compress/TestCompressorDecompressor.java | 4 +- .../TestSnappyCompressorDecompressor.java | 23 ++++----- .../hadoop/util/TestNativeCodeLoader.java | 4 -- hadoop-project/pom.xml | 6 +++ 12 files changed, 76 insertions(+), 165 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Decompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Decompressor.java index 3808003de291d..e9558fab87325 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Decompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/Decompressor.java @@ -92,7 +92,7 @@ public interface Decompressor { * {@link #needsInput()} should be called in order to determine if more * input data is required. * - * @param b Buffer for the compressed data + * @param b Buffer for the uncompressed data * @param off Start offset of the data * @param len Size of the buffer * @return The actual number of bytes of uncompressed data. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/SnappyCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/SnappyCodec.java index 686f30c9f89a2..77cf36a339b34 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/SnappyCodec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/SnappyCodec.java @@ -28,7 +28,6 @@ import org.apache.hadoop.io.compress.snappy.SnappyDecompressor; import org.apache.hadoop.io.compress.snappy.SnappyDecompressor.SnappyDirectDecompressor; import org.apache.hadoop.fs.CommonConfigurationKeys; -import org.apache.hadoop.util.NativeCodeLoader; /** * This class creates snappy compressors/decompressors. @@ -56,37 +55,6 @@ public Configuration getConf() { return conf; } - /** - * Are the native snappy libraries loaded & initialized? - */ - public static void checkNativeCodeLoaded() { - if (!NativeCodeLoader.buildSupportsSnappy()) { - throw new RuntimeException("native snappy library not available: " + - "this version of libhadoop was built without " + - "snappy support."); - } - if (!NativeCodeLoader.isNativeCodeLoaded()) { - throw new RuntimeException("Failed to load libhadoop."); - } - if (!SnappyCompressor.isNativeCodeLoaded()) { - throw new RuntimeException("native snappy library not available: " + - "SnappyCompressor has not been loaded."); - } - if (!SnappyDecompressor.isNativeCodeLoaded()) { - throw new RuntimeException("native snappy library not available: " + - "SnappyDecompressor has not been loaded."); - } - } - - public static boolean isNativeCodeLoaded() { - return SnappyCompressor.isNativeCodeLoaded() && - SnappyDecompressor.isNativeCodeLoaded(); - } - - public static String getLibraryName() { - return SnappyCompressor.getLibraryName(); - } - /** * Create a {@link CompressionOutputStream} that will write to the given * {@link OutputStream}. @@ -115,7 +83,6 @@ public CompressionOutputStream createOutputStream(OutputStream out) public CompressionOutputStream createOutputStream(OutputStream out, Compressor compressor) throws IOException { - checkNativeCodeLoaded(); int bufferSize = conf.getInt( CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT); @@ -133,7 +100,6 @@ public CompressionOutputStream createOutputStream(OutputStream out, */ @Override public Class getCompressorType() { - checkNativeCodeLoaded(); return SnappyCompressor.class; } @@ -144,7 +110,6 @@ public Class getCompressorType() { */ @Override public Compressor createCompressor() { - checkNativeCodeLoaded(); int bufferSize = conf.getInt( CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT); @@ -179,7 +144,6 @@ public CompressionInputStream createInputStream(InputStream in) public CompressionInputStream createInputStream(InputStream in, Decompressor decompressor) throws IOException { - checkNativeCodeLoaded(); return new BlockDecompressorStream(in, decompressor, conf.getInt( CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT)); @@ -192,7 +156,6 @@ public CompressionInputStream createInputStream(InputStream in, */ @Override public Class getDecompressorType() { - checkNativeCodeLoaded(); return SnappyDecompressor.class; } @@ -203,7 +166,6 @@ public Class getDecompressorType() { */ @Override public Decompressor createDecompressor() { - checkNativeCodeLoaded(); int bufferSize = conf.getInt( CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_KEY, CommonConfigurationKeys.IO_COMPRESSION_CODEC_SNAPPY_BUFFERSIZE_DEFAULT); @@ -215,7 +177,7 @@ public Decompressor createDecompressor() { */ @Override public DirectDecompressor createDirectDecompressor() { - return isNativeCodeLoaded() ? new SnappyDirectDecompressor() : null; + return new SnappyDirectDecompressor(); } /** diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java index 3d386800e4d87..5cf0a4aab620e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java @@ -24,9 +24,9 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.compress.Compressor; -import org.apache.hadoop.util.NativeCodeLoader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xerial.snappy.Snappy; /** * A {@link Compressor} based on the snappy compression algorithm. @@ -48,24 +48,6 @@ public class SnappyCompressor implements Compressor { private long bytesRead = 0L; private long bytesWritten = 0L; - private static boolean nativeSnappyLoaded = false; - - static { - if (NativeCodeLoader.isNativeCodeLoaded() && - NativeCodeLoader.buildSupportsSnappy()) { - try { - initIDs(); - nativeSnappyLoaded = true; - } catch (Throwable t) { - LOG.error("failed to load SnappyCompressor", t); - } - } - } - - public static boolean isNativeCodeLoaded() { - return nativeSnappyLoaded; - } - /** * Creates a new compressor. * @@ -291,9 +273,14 @@ public long getBytesWritten() { public void end() { } - private native static void initIDs(); - - private native int compressBytesDirect(); - - public native static String getLibraryName(); + private int compressBytesDirect() throws IOException { + if (uncompressedDirectBufLen == 0) { + return 0; + } else { + // Set the position and limit of `uncompressedDirectBuf` for reading + uncompressedDirectBuf.position(0).limit(uncompressedDirectBufLen); + return Snappy.compress((ByteBuffer) uncompressedDirectBuf, + (ByteBuffer) compressedDirectBuf); + } + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java index f31b76c347c5c..ec8c662f47d17 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java @@ -24,9 +24,9 @@ import org.apache.hadoop.io.compress.Decompressor; import org.apache.hadoop.io.compress.DirectDecompressor; -import org.apache.hadoop.util.NativeCodeLoader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xerial.snappy.Snappy; /** * A {@link Decompressor} based on the snappy compression algorithm. @@ -45,24 +45,6 @@ public class SnappyDecompressor implements Decompressor { private int userBufOff = 0, userBufLen = 0; private boolean finished; - private static boolean nativeSnappyLoaded = false; - - static { - if (NativeCodeLoader.isNativeCodeLoaded() && - NativeCodeLoader.buildSupportsSnappy()) { - try { - initIDs(); - nativeSnappyLoaded = true; - } catch (Throwable t) { - LOG.error("failed to load SnappyDecompressor", t); - } - } - } - - public static boolean isNativeCodeLoaded() { - return nativeSnappyLoaded; - } - /** * Creates a new compressor. * @@ -201,7 +183,7 @@ public boolean finished() { * {@link #needsInput()} should be called in order to determine if more * input data is required. * - * @param b Buffer for the compressed data + * @param b Buffer for the uncompressed data * @param off Start offset of the data * @param len Size of the buffer * @return The actual number of bytes of compressed data. @@ -276,13 +258,27 @@ public void end() { // do nothing } - private native static void initIDs(); + private int decompressBytesDirect() throws IOException { + if (compressedDirectBufLen == 0) { + return 0; + } else { + // Set the position and limit of `compressedDirectBuf` for reading + compressedDirectBuf.position(0).limit(compressedDirectBufLen); + // There is compressed input, decompress it now. + int size = Snappy.uncompressedLength((ByteBuffer) compressedDirectBuf); + if (size > uncompressedDirectBuf.capacity()) { + throw new IOException("Could not decompress data. " + + "uncompressedDirectBuf length is too small."); + } + size = Snappy.uncompress((ByteBuffer) compressedDirectBuf, + (ByteBuffer) uncompressedDirectBuf); + return size; + } + } - private native int decompressBytesDirect(); - int decompressDirect(ByteBuffer src, ByteBuffer dst) throws IOException { assert (this instanceof SnappyDirectDecompressor); - + ByteBuffer presliced = dst; if (dst.position() > 0) { presliced = dst; @@ -311,10 +307,10 @@ int decompressDirect(ByteBuffer src, ByteBuffer dst) throws IOException { } return n; } - + public static class SnappyDirectDecompressor extends SnappyDecompressor implements DirectDecompressor { - + @Override public boolean finished() { return (endOfInput && super.finished()); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCodeLoader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCodeLoader.java index a8a380ed070d1..11d1176f92a59 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCodeLoader.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeCodeLoader.java @@ -74,11 +74,6 @@ public static boolean isNativeCodeLoaded() { return nativeCodeLoaded; } - /** - * Returns true only if this build was compiled with support for snappy. - */ - public static native boolean buildSupportsSnappy(); - /** * Returns true only if this build was compiled with support for ISA-L. */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java index 23388248575ac..e40f01195ba07 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/NativeLibraryChecker.java @@ -23,7 +23,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.OpensslCipher; import org.apache.hadoop.io.compress.Lz4Codec; -import org.apache.hadoop.io.compress.SnappyCodec; import org.apache.hadoop.io.compress.bzip2.Bzip2Factory; import org.apache.hadoop.io.compress.zlib.ZlibFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -67,7 +66,6 @@ public static void main(String[] args) { Configuration conf = new Configuration(); boolean nativeHadoopLoaded = NativeCodeLoader.isNativeCodeLoaded(); boolean zlibLoaded = false; - boolean snappyLoaded = false; boolean isalLoaded = false; boolean zStdLoaded = false; boolean pmdkLoaded = false; @@ -80,7 +78,6 @@ public static void main(String[] args) { String openSslDetail = ""; String hadoopLibraryName = ""; String zlibLibraryName = ""; - String snappyLibraryName = ""; String isalDetail = ""; String pmdkDetail = ""; String zstdLibraryName = ""; @@ -99,11 +96,6 @@ public static void main(String[] args) { if (zStdLoaded && NativeCodeLoader.buildSupportsZstd()) { zstdLibraryName = ZStandardCodec.getLibraryName(); } - snappyLoaded = NativeCodeLoader.buildSupportsSnappy() && - SnappyCodec.isNativeCodeLoaded(); - if (snappyLoaded && NativeCodeLoader.buildSupportsSnappy()) { - snappyLibraryName = SnappyCodec.getLibraryName(); - } isalDetail = ErasureCodeNative.getLoadingFailureReason(); if (isalDetail != null) { @@ -152,7 +144,6 @@ public static void main(String[] args) { System.out.printf("hadoop: %b %s%n", nativeHadoopLoaded, hadoopLibraryName); System.out.printf("zlib: %b %s%n", zlibLoaded, zlibLibraryName); System.out.printf("zstd : %b %s%n", zStdLoaded, zstdLibraryName); - System.out.printf("snappy: %b %s%n", snappyLoaded, snappyLibraryName); System.out.printf("lz4: %b %s%n", lz4Loaded, lz4LibraryName); System.out.printf("bzip2: %b %s%n", bzip2Loaded, bzip2LibraryName); System.out.printf("openssl: %b %s%n", openSslLoaded, openSslDetail); @@ -164,7 +155,7 @@ public static void main(String[] args) { } if ((!nativeHadoopLoaded) || (Shell.WINDOWS && (!winutilsExists)) || - (checkAll && !(zlibLoaded && snappyLoaded && lz4Loaded + (checkAll && !(zlibLoaded && lz4Loaded && bzip2Loaded && isalLoaded && zStdLoaded))) { // return 1 to indicated check failed ExitUtil.terminate(1); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java index 8be2dce06d1fe..52831c7fde6e1 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java @@ -79,27 +79,6 @@ public ImmutableList> filterOnAssumeWhat( }; } - private static boolean isNativeSnappyLoadable() { - boolean snappyAvailable = false; - boolean loaded = false; - try { - System.loadLibrary("snappy"); - logger.warn("Snappy native library is available"); - snappyAvailable = true; - boolean hadoopNativeAvailable = NativeCodeLoader.isNativeCodeLoaded(); - loaded = snappyAvailable && hadoopNativeAvailable; - if (loaded) { - logger.info("Snappy native library loaded"); - } else { - logger.warn("Snappy native library not loaded"); - } - } catch (Throwable t) { - logger.warn("Failed to load snappy: ", t); - return false; - } - return loaded; - } - public static CompressDecompressTester of( byte[] rawData) { return new CompressDecompressTester(rawData); @@ -421,13 +400,19 @@ public void assertCompression(String name, Compressor compressor, for (Integer step : blockLabels) { decompressor.setInput(compressedBytes, off, step); while (!decompressor.finished()) { - int dSize = decompressor.decompress(operationBlock, 0, - operationBlock.length); - decompressOut.write(operationBlock, 0, dSize); + try { + int dSize = decompressor.decompress(operationBlock, 0, + operationBlock.length); + decompressOut.write(operationBlock, 0, dSize); + } catch (NullPointerException ex) { + int b = 10; + + } } decompressor.reset(); off = off + step; } + int a = 10; assertArrayEquals( joiner.join(name, "byte arrays not equals error !!!"), originalRawData, decompressOut.toByteArray()); @@ -495,19 +480,16 @@ private static boolean isAvailabl Compressor compressor = pair.compressor; if (compressor.getClass().isAssignableFrom(Lz4Compressor.class) - && (NativeCodeLoader.isNativeCodeLoaded())) + && (NativeCodeLoader.isNativeCodeLoaded())) { return true; - - else if (compressor.getClass().isAssignableFrom(BuiltInZlibDeflater.class) - && NativeCodeLoader.isNativeCodeLoaded()) + } else if (compressor.getClass().isAssignableFrom(BuiltInZlibDeflater.class) + && NativeCodeLoader.isNativeCodeLoaded()) { return true; - - else if (compressor.getClass().isAssignableFrom(ZlibCompressor.class)) { + } else if (compressor.getClass().isAssignableFrom(ZlibCompressor.class)) { return ZlibFactory.isNativeZlibLoaded(new Configuration()); - } - else if (compressor.getClass().isAssignableFrom(SnappyCompressor.class) - && isNativeSnappyLoadable()) + } else if (compressor.getClass().isAssignableFrom(SnappyCompressor.class)) { return true; + } return false; } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodec.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodec.java index 94ff7a88493c7..cfb94bd65794f 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodec.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodec.java @@ -77,7 +77,6 @@ import org.apache.hadoop.util.ReflectionUtils; import org.junit.After; import org.junit.Assert; -import org.junit.Assume; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -135,10 +134,8 @@ public void testBZip2NativeCodec() throws IOException { @Test public void testSnappyCodec() throws IOException { - if (SnappyCodec.isNativeCodeLoaded()) { - codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.SnappyCodec"); - codecTest(conf, seed, count, "org.apache.hadoop.io.compress.SnappyCodec"); - } + codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.SnappyCodec"); + codecTest(conf, seed, count, "org.apache.hadoop.io.compress.SnappyCodec"); } @Test @@ -247,8 +244,13 @@ private static void codecTest(Configuration conf, int seed, int count, RandomDatum v2 = new RandomDatum(); k2.readFields(inflateIn); v2.readFields(inflateIn); + + boolean debug = k1.equals(k2) && v1.equals(v2); + if(!debug) { + int a = 10; + } assertTrue("original and compressed-then-decompressed-output not equal", - k1.equals(k2) && v1.equals(v2)); + debug); // original and compressed-then-decompressed-output have the same // hashCode @@ -614,7 +616,6 @@ private static void sequenceFileCodecTest(Configuration conf, int lines, */ @Test public void testSnappyMapFile() throws Exception { - Assume.assumeTrue(SnappyCodec.isNativeCodeLoaded()); codecTestMapFile(SnappyCodec.class, CompressionType.BLOCK, 100); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java index 1f035974883cf..afc7740d9e216 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java @@ -60,7 +60,7 @@ public void testCompressorDecompressor() { .withCompressDecompressPair(new Lz4Compressor(), new Lz4Decompressor()) .withCompressDecompressPair(new BuiltInZlibDeflater(), new BuiltInZlibInflater()) .withTestCases(ImmutableSet.of(CompressionTestStrategy.COMPRESS_DECOMPRESS_SINGLE_BLOCK, - CompressionTestStrategy.COMPRESS_DECOMPRESS_BLOCK, + // CompressionTestStrategy.COMPRESS_DECOMPRESS_BLOCK, CompressionTestStrategy.COMPRESS_DECOMPRESS_ERRORS, CompressionTestStrategy.COMPRESS_DECOMPRESS_WITH_EMPTY_STREAM)) .test(); @@ -83,7 +83,7 @@ public void testCompressorDecompressorWithExeedBufferLimit() { .withCompressDecompressPair(new Lz4Compressor(BYTE_SIZE), new Lz4Decompressor(BYTE_SIZE)) .withTestCases(ImmutableSet.of(CompressionTestStrategy.COMPRESS_DECOMPRESS_SINGLE_BLOCK, - CompressionTestStrategy.COMPRESS_DECOMPRESS_BLOCK, +// CompressionTestStrategy.COMPRESS_DECOMPRESS_BLOCK, CompressionTestStrategy.COMPRESS_DECOMPRESS_ERRORS, CompressionTestStrategy.COMPRESS_DECOMPRESS_WITH_EMPTY_STREAM)) .test(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java index c8900bad1df56..53472a779a16e 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java @@ -39,7 +39,6 @@ import org.apache.hadoop.io.compress.BlockDecompressorStream; import org.apache.hadoop.io.compress.CompressionInputStream; import org.apache.hadoop.io.compress.CompressionOutputStream; -import org.apache.hadoop.io.compress.SnappyCodec; import org.apache.hadoop.io.compress.snappy.SnappyDecompressor.SnappyDirectDecompressor; import org.apache.hadoop.test.MultithreadedTestUtil; import org.junit.Assert; @@ -48,8 +47,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.junit.Assume.*; - public class TestSnappyCompressorDecompressor { public static final Logger LOG = @@ -57,7 +54,6 @@ public class TestSnappyCompressorDecompressor { @Before public void before() { - assumeTrue(SnappyCodec.isNativeCodeLoaded()); } @Test @@ -318,46 +314,45 @@ public void testSnappyCompressDecompressWithSmallBuffer() throws Exception { } private void compressDecompressLoop(int rawDataSize) throws IOException { - byte[] rawData = BytesGenerator.get(rawDataSize); + byte[] rawData = BytesGenerator.get(rawDataSize); byte[] compressedResult = new byte[rawDataSize+20]; - int directBufferSize = Math.max(rawDataSize*2, 64*1024); + int directBufferSize = Math.max(rawDataSize*2, 64*1024); SnappyCompressor compressor = new SnappyCompressor(directBufferSize); compressor.setInput(rawData, 0, rawDataSize); int compressedSize = compressor.compress(compressedResult, 0, compressedResult.length); SnappyDirectDecompressor decompressor = new SnappyDirectDecompressor(); - + ByteBuffer inBuf = ByteBuffer.allocateDirect(compressedSize); ByteBuffer outBuf = ByteBuffer.allocateDirect(rawDataSize); inBuf.put(compressedResult, 0, compressedSize); - inBuf.flip(); + inBuf.flip(); ByteBuffer expected = ByteBuffer.wrap(rawData); - + outBuf.clear(); while(!decompressor.finished()) { decompressor.decompress(inBuf, outBuf); if (outBuf.remaining() == 0) { outBuf.flip(); - while (outBuf.remaining() > 0) { + while (outBuf.remaining() > 0) { assertEquals(expected.get(), outBuf.get()); } outBuf.clear(); } } outBuf.flip(); - while (outBuf.remaining() > 0) { + while (outBuf.remaining() > 0) { assertEquals(expected.get(), outBuf.get()); } outBuf.clear(); - + assertEquals(0, expected.remaining()); } @Test public void testSnappyDirectBlockCompression() { - int[] size = { 4 * 1024, 64 * 1024, 128 * 1024, 1024 * 1024 }; - assumeTrue(SnappyCodec.isNativeCodeLoaded()); + int[] size = {4 * 1024, 64 * 1024, 128 * 1024, 1024 * 1024}; try { for (int i = 0; i < size.length; i++) { compressDecompressLoop(size[i]); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNativeCodeLoader.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNativeCodeLoader.java index 58874fdcdfba6..d3da6c191071d 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNativeCodeLoader.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestNativeCodeLoader.java @@ -22,7 +22,6 @@ import org.apache.hadoop.crypto.OpensslCipher; import org.apache.hadoop.io.compress.Lz4Codec; -import org.apache.hadoop.io.compress.SnappyCodec; import org.apache.hadoop.io.compress.zlib.ZlibFactory; import org.apache.hadoop.util.NativeCodeLoader; import org.slf4j.Logger; @@ -52,9 +51,6 @@ public void testNativeCodeLoaded() { // library names are depended on platform and build envs // so just check names are available assertFalse(ZlibFactory.getLibraryName().isEmpty()); - if (NativeCodeLoader.buildSupportsSnappy()) { - assertFalse(SnappyCodec.getLibraryName().isEmpty()); - } if (NativeCodeLoader.buildSupportsOpenssl()) { assertFalse(OpensslCipher.getLibraryName().isEmpty()); } diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 4b7585ea5e6e4..811947f2af8ce 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -141,6 +141,7 @@ 3.2.4 3.10.6.Final 4.1.50.Final + 1.1.7.7 0.5.1 @@ -1777,6 +1778,11 @@ + + org.xerial.snappy + snappy-java + ${snappy-java.version} + From 3aa3f55a770dc1cf736142786dd791ea78960453 Mon Sep 17 00:00:00 2001 From: DB Tsai Date: Sun, 30 Aug 2020 20:46:33 -0700 Subject: [PATCH 02/23] rebase master --- hadoop-common-project/hadoop-common/pom.xml | 4 ++++ .../hadoop/io/compress/TestCompressorDecompressor.java | 5 +++-- .../io/compress/snappy/TestSnappyCompressorDecompressor.java | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index fb4193a074a33..aa04ada65ec8d 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -363,6 +363,10 @@ wildfly-openssl-java provided + + org.xerial.snappy + snappy-java + diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java index afc7740d9e216..02476056d02ea 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java @@ -59,8 +59,9 @@ public void testCompressorDecompressor() { .withCompressDecompressPair(new SnappyCompressor(), new SnappyDecompressor()) .withCompressDecompressPair(new Lz4Compressor(), new Lz4Decompressor()) .withCompressDecompressPair(new BuiltInZlibDeflater(), new BuiltInZlibInflater()) - .withTestCases(ImmutableSet.of(CompressionTestStrategy.COMPRESS_DECOMPRESS_SINGLE_BLOCK, - // CompressionTestStrategy.COMPRESS_DECOMPRESS_BLOCK, + .withTestCases(ImmutableSet.of( + // CompressionTestStrategy.COMPRESS_DECOMPRESS_SINGLE_BLOCK, + CompressionTestStrategy.COMPRESS_DECOMPRESS_BLOCK, CompressionTestStrategy.COMPRESS_DECOMPRESS_ERRORS, CompressionTestStrategy.COMPRESS_DECOMPRESS_WITH_EMPTY_STREAM)) .test(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java index 53472a779a16e..6f869679a5818 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java @@ -352,7 +352,7 @@ private void compressDecompressLoop(int rawDataSize) throws IOException { @Test public void testSnappyDirectBlockCompression() { - int[] size = {4 * 1024, 64 * 1024, 128 * 1024, 1024 * 1024}; + int[] size = { 4 * 1024, 64 * 1024, 128 * 1024, 1024 * 1024 }; try { for (int i = 0; i < size.length; i++) { compressDecompressLoop(size[i]); From 65fd9f4f2e40da5f64e46afb072fe70ad2ece6ae Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 10 Sep 2020 12:35:10 -0700 Subject: [PATCH 03/23] Reset compressedDirectBuf and uncompressedDirectBuf. --- .../apache/hadoop/io/compress/snappy/SnappyCompressor.java | 7 +++++-- .../hadoop/io/compress/snappy/SnappyDecompressor.java | 6 ++++-- .../hadoop/io/compress/CompressDecompressTester.java | 7 ++++++- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java index 5cf0a4aab620e..d496373a48712 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java @@ -278,9 +278,12 @@ private int compressBytesDirect() throws IOException { return 0; } else { // Set the position and limit of `uncompressedDirectBuf` for reading - uncompressedDirectBuf.position(0).limit(uncompressedDirectBufLen); - return Snappy.compress((ByteBuffer) uncompressedDirectBuf, + uncompressedDirectBuf.limit(uncompressedDirectBufLen).position(0); + int size = Snappy.compress((ByteBuffer) uncompressedDirectBuf, (ByteBuffer) compressedDirectBuf); + uncompressedDirectBufLen = 0; + uncompressedDirectBuf.limit(directBufferSize).position(0); + return size; } } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java index ec8c662f47d17..8b8c590c24854 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java @@ -263,15 +263,17 @@ private int decompressBytesDirect() throws IOException { return 0; } else { // Set the position and limit of `compressedDirectBuf` for reading - compressedDirectBuf.position(0).limit(compressedDirectBufLen); + compressedDirectBuf.limit(compressedDirectBufLen).position(0); // There is compressed input, decompress it now. int size = Snappy.uncompressedLength((ByteBuffer) compressedDirectBuf); - if (size > uncompressedDirectBuf.capacity()) { + if (size > uncompressedDirectBuf.remaining()) { throw new IOException("Could not decompress data. " + "uncompressedDirectBuf length is too small."); } size = Snappy.uncompress((ByteBuffer) compressedDirectBuf, (ByteBuffer) uncompressedDirectBuf); + compressedDirectBufLen = 0; + compressedDirectBuf.limit(directBufferSize).position(0); return size; } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java index 52831c7fde6e1..356c5ef986f4f 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java @@ -29,6 +29,7 @@ import java.util.Arrays; import java.util.List; +import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.compress.lz4.Lz4Compressor; import org.apache.hadoop.io.compress.snappy.SnappyCompressor; @@ -417,7 +418,11 @@ public void assertCompression(String name, Compressor compressor, joiner.join(name, "byte arrays not equals error !!!"), originalRawData, decompressOut.toByteArray()); } catch (Exception ex) { - fail(joiner.join(name, ex.getMessage())); + if (ex.getMessage() != null) { + fail(joiner.join(name, ex.getMessage())); + } else { + fail(joiner.join(name, ExceptionUtils.getStackTrace(ex))); + } } finally { try { compressedOut.close(); From 9c0f08b274261b00436c50f7fe67f7d46486015c Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 10 Sep 2020 12:55:02 -0700 Subject: [PATCH 04/23] Revert some debugging code. --- .../hadoop/io/compress/CompressDecompressTester.java | 12 +++--------- .../org/apache/hadoop/io/compress/TestCodec.java | 9 ++------- .../io/compress/TestCompressorDecompressor.java | 5 ++--- 3 files changed, 7 insertions(+), 19 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java index 356c5ef986f4f..afa9e0c03c429 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java @@ -401,19 +401,13 @@ public void assertCompression(String name, Compressor compressor, for (Integer step : blockLabels) { decompressor.setInput(compressedBytes, off, step); while (!decompressor.finished()) { - try { - int dSize = decompressor.decompress(operationBlock, 0, - operationBlock.length); - decompressOut.write(operationBlock, 0, dSize); - } catch (NullPointerException ex) { - int b = 10; - - } + int dSize = decompressor.decompress(operationBlock, 0, + operationBlock.length); + decompressOut.write(operationBlock, 0, dSize); } decompressor.reset(); off = off + step; } - int a = 10; assertArrayEquals( joiner.join(name, "byte arrays not equals error !!!"), originalRawData, decompressOut.toByteArray()); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodec.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodec.java index cfb94bd65794f..1fa4e13773912 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodec.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodec.java @@ -244,14 +244,9 @@ private static void codecTest(Configuration conf, int seed, int count, RandomDatum v2 = new RandomDatum(); k2.readFields(inflateIn); v2.readFields(inflateIn); - - boolean debug = k1.equals(k2) && v1.equals(v2); - if(!debug) { - int a = 10; - } assertTrue("original and compressed-then-decompressed-output not equal", - debug); - + k1.equals(k2) && v1.equals(v2)); + // original and compressed-then-decompressed-output have the same // hashCode Map m = new HashMap(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java index 02476056d02ea..1f035974883cf 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCompressorDecompressor.java @@ -59,8 +59,7 @@ public void testCompressorDecompressor() { .withCompressDecompressPair(new SnappyCompressor(), new SnappyDecompressor()) .withCompressDecompressPair(new Lz4Compressor(), new Lz4Decompressor()) .withCompressDecompressPair(new BuiltInZlibDeflater(), new BuiltInZlibInflater()) - .withTestCases(ImmutableSet.of( - // CompressionTestStrategy.COMPRESS_DECOMPRESS_SINGLE_BLOCK, + .withTestCases(ImmutableSet.of(CompressionTestStrategy.COMPRESS_DECOMPRESS_SINGLE_BLOCK, CompressionTestStrategy.COMPRESS_DECOMPRESS_BLOCK, CompressionTestStrategy.COMPRESS_DECOMPRESS_ERRORS, CompressionTestStrategy.COMPRESS_DECOMPRESS_WITH_EMPTY_STREAM)) @@ -84,7 +83,7 @@ public void testCompressorDecompressorWithExeedBufferLimit() { .withCompressDecompressPair(new Lz4Compressor(BYTE_SIZE), new Lz4Decompressor(BYTE_SIZE)) .withTestCases(ImmutableSet.of(CompressionTestStrategy.COMPRESS_DECOMPRESS_SINGLE_BLOCK, -// CompressionTestStrategy.COMPRESS_DECOMPRESS_BLOCK, + CompressionTestStrategy.COMPRESS_DECOMPRESS_BLOCK, CompressionTestStrategy.COMPRESS_DECOMPRESS_ERRORS, CompressionTestStrategy.COMPRESS_DECOMPRESS_WITH_EMPTY_STREAM)) .test(); From f52dd205707259064247b23f7443f5840bace62f Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 10 Sep 2020 15:52:49 -0700 Subject: [PATCH 05/23] Remove snappy native code. --- .../io/compress/snappy/SnappyCompressor.c | 166 ------------------ .../io/compress/snappy/SnappyDecompressor.c | 133 -------------- .../org_apache_hadoop_io_compress_snappy.h | 33 ---- 3 files changed, 332 deletions(-) delete mode 100644 hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyCompressor.c delete mode 100644 hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.c delete mode 100644 hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/org_apache_hadoop_io_compress_snappy.h diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyCompressor.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyCompressor.c deleted file mode 100644 index 9a09f078d8260..0000000000000 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyCompressor.c +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include "org_apache_hadoop_io_compress_snappy.h" - -#if defined HADOOP_SNAPPY_LIBRARY - -#include -#include -#include - -#ifdef UNIX -#include -#include "config.h" -#endif // UNIX - -#ifdef WINDOWS -#include "winutils.h" -#endif - -#include "org_apache_hadoop_io_compress_snappy_SnappyCompressor.h" - -#define JINT_MAX 0x7fffffff - -static jfieldID SnappyCompressor_uncompressedDirectBuf; -static jfieldID SnappyCompressor_uncompressedDirectBufLen; -static jfieldID SnappyCompressor_compressedDirectBuf; -static jfieldID SnappyCompressor_directBufferSize; - -#ifdef UNIX -static snappy_status (*dlsym_snappy_compress)(const char*, size_t, char*, size_t*); -#endif - -#ifdef WINDOWS -typedef snappy_status (__cdecl *__dlsym_snappy_compress)(const char*, size_t, char*, size_t*); -static __dlsym_snappy_compress dlsym_snappy_compress; -#endif - -JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyCompressor_initIDs -(JNIEnv *env, jclass clazz){ -#ifdef UNIX - // Load libsnappy.so - void *libsnappy = dlopen(HADOOP_SNAPPY_LIBRARY, RTLD_LAZY | RTLD_GLOBAL); - if (!libsnappy) { - char msg[1000]; - snprintf(msg, 1000, "%s (%s)!", "Cannot load " HADOOP_SNAPPY_LIBRARY, dlerror()); - THROW(env, "java/lang/UnsatisfiedLinkError", msg); - return; - } -#endif - -#ifdef WINDOWS - HMODULE libsnappy = LoadLibrary(HADOOP_SNAPPY_LIBRARY); - if (!libsnappy) { - THROW(env, "java/lang/UnsatisfiedLinkError", "Cannot load snappy.dll"); - return; - } -#endif - - // Locate the requisite symbols from libsnappy.so -#ifdef UNIX - dlerror(); // Clear any existing error - LOAD_DYNAMIC_SYMBOL(dlsym_snappy_compress, env, libsnappy, "snappy_compress"); -#endif - -#ifdef WINDOWS - LOAD_DYNAMIC_SYMBOL(__dlsym_snappy_compress, dlsym_snappy_compress, env, libsnappy, "snappy_compress"); -#endif - - SnappyCompressor_uncompressedDirectBuf = (*env)->GetFieldID(env, clazz, - "uncompressedDirectBuf", - "Ljava/nio/Buffer;"); - SnappyCompressor_uncompressedDirectBufLen = (*env)->GetFieldID(env, clazz, - "uncompressedDirectBufLen", "I"); - SnappyCompressor_compressedDirectBuf = (*env)->GetFieldID(env, clazz, - "compressedDirectBuf", - "Ljava/nio/Buffer;"); - SnappyCompressor_directBufferSize = (*env)->GetFieldID(env, clazz, - "directBufferSize", "I"); -} - -JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyCompressor_compressBytesDirect -(JNIEnv *env, jobject thisj){ - const char* uncompressed_bytes; - char* compressed_bytes; - snappy_status ret; - // Get members of SnappyCompressor - jobject uncompressed_direct_buf = (*env)->GetObjectField(env, thisj, SnappyCompressor_uncompressedDirectBuf); - jint uncompressed_direct_buf_len = (*env)->GetIntField(env, thisj, SnappyCompressor_uncompressedDirectBufLen); - jobject compressed_direct_buf = (*env)->GetObjectField(env, thisj, SnappyCompressor_compressedDirectBuf); - jint compressed_direct_buf_len = (*env)->GetIntField(env, thisj, SnappyCompressor_directBufferSize); - size_t buf_len; - - // Get the input direct buffer - uncompressed_bytes = (const char*)(*env)->GetDirectBufferAddress(env, uncompressed_direct_buf); - - if (uncompressed_bytes == 0) { - return (jint)0; - } - - // Get the output direct buffer - compressed_bytes = (char *)(*env)->GetDirectBufferAddress(env, compressed_direct_buf); - - if (compressed_bytes == 0) { - return (jint)0; - } - - /* size_t should always be 4 bytes or larger. */ - buf_len = (size_t)compressed_direct_buf_len; - ret = dlsym_snappy_compress(uncompressed_bytes, uncompressed_direct_buf_len, - compressed_bytes, &buf_len); - if (ret != SNAPPY_OK){ - THROW(env, "java/lang/InternalError", "Could not compress data. Buffer length is too small."); - return 0; - } - if (buf_len > JINT_MAX) { - THROW(env, "java/lang/InternalError", "Invalid return buffer length."); - return 0; - } - - (*env)->SetIntField(env, thisj, SnappyCompressor_uncompressedDirectBufLen, 0); - return (jint)buf_len; -} - -JNIEXPORT jstring JNICALL -Java_org_apache_hadoop_io_compress_snappy_SnappyCompressor_getLibraryName(JNIEnv *env, jclass class) { -#ifdef UNIX - if (dlsym_snappy_compress) { - Dl_info dl_info; - if(dladdr( - dlsym_snappy_compress, - &dl_info)) { - return (*env)->NewStringUTF(env, dl_info.dli_fname); - } - } - - return (*env)->NewStringUTF(env, HADOOP_SNAPPY_LIBRARY); -#endif - -#ifdef WINDOWS - LPWSTR filename = NULL; - GetLibraryName(dlsym_snappy_compress, &filename); - if (filename != NULL) { - return (*env)->NewString(env, filename, (jsize) wcslen(filename)); - } else { - return (*env)->NewStringUTF(env, "Unavailable"); - } -#endif -} -#endif //define HADOOP_SNAPPY_LIBRARY diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.c deleted file mode 100644 index 69ec1017526fd..0000000000000 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.c +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "org_apache_hadoop_io_compress_snappy.h" - -#if defined HADOOP_SNAPPY_LIBRARY - -#include -#include -#include - -#ifdef UNIX -#include -#include "config.h" -#endif - -#include "org_apache_hadoop_io_compress_snappy_SnappyDecompressor.h" - -static jfieldID SnappyDecompressor_compressedDirectBuf; -static jfieldID SnappyDecompressor_compressedDirectBufLen; -static jfieldID SnappyDecompressor_uncompressedDirectBuf; -static jfieldID SnappyDecompressor_directBufferSize; - -#ifdef UNIX -static snappy_status (*dlsym_snappy_uncompress)(const char*, size_t, char*, size_t*); -#endif - -#ifdef WINDOWS -typedef snappy_status (__cdecl *__dlsym_snappy_uncompress)(const char*, size_t, char*, size_t*); -static __dlsym_snappy_uncompress dlsym_snappy_uncompress; -#endif - -JNIEXPORT void JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyDecompressor_initIDs -(JNIEnv *env, jclass clazz){ - - // Load libsnappy.so -#ifdef UNIX - void *libsnappy = dlopen(HADOOP_SNAPPY_LIBRARY, RTLD_LAZY | RTLD_GLOBAL); - if (!libsnappy) { - char* msg = (char*)malloc(1000); - snprintf(msg, 1000, "%s (%s)!", "Cannot load " HADOOP_SNAPPY_LIBRARY, dlerror()); - THROW(env, "java/lang/UnsatisfiedLinkError", msg); - return; - } -#endif - -#ifdef WINDOWS - HMODULE libsnappy = LoadLibrary(HADOOP_SNAPPY_LIBRARY); - if (!libsnappy) { - THROW(env, "java/lang/UnsatisfiedLinkError", "Cannot load snappy.dll"); - return; - } -#endif - - // Locate the requisite symbols from libsnappy.so -#ifdef UNIX - dlerror(); // Clear any existing error - LOAD_DYNAMIC_SYMBOL(dlsym_snappy_uncompress, env, libsnappy, "snappy_uncompress"); - -#endif - -#ifdef WINDOWS - LOAD_DYNAMIC_SYMBOL(__dlsym_snappy_uncompress, dlsym_snappy_uncompress, env, libsnappy, "snappy_uncompress"); -#endif - - SnappyDecompressor_compressedDirectBuf = (*env)->GetFieldID(env,clazz, - "compressedDirectBuf", - "Ljava/nio/Buffer;"); - SnappyDecompressor_compressedDirectBufLen = (*env)->GetFieldID(env,clazz, - "compressedDirectBufLen", "I"); - SnappyDecompressor_uncompressedDirectBuf = (*env)->GetFieldID(env,clazz, - "uncompressedDirectBuf", - "Ljava/nio/Buffer;"); - SnappyDecompressor_directBufferSize = (*env)->GetFieldID(env, clazz, - "directBufferSize", "I"); -} - -JNIEXPORT jint JNICALL Java_org_apache_hadoop_io_compress_snappy_SnappyDecompressor_decompressBytesDirect -(JNIEnv *env, jobject thisj){ - const char* compressed_bytes = NULL; - char* uncompressed_bytes = NULL; - snappy_status ret; - // Get members of SnappyDecompressor - jobject compressed_direct_buf = (*env)->GetObjectField(env,thisj, SnappyDecompressor_compressedDirectBuf); - jint compressed_direct_buf_len = (*env)->GetIntField(env,thisj, SnappyDecompressor_compressedDirectBufLen); - jobject uncompressed_direct_buf = (*env)->GetObjectField(env,thisj, SnappyDecompressor_uncompressedDirectBuf); - size_t uncompressed_direct_buf_len = (*env)->GetIntField(env, thisj, SnappyDecompressor_directBufferSize); - - // Get the input direct buffer - compressed_bytes = (const char*)(*env)->GetDirectBufferAddress(env, compressed_direct_buf); - - if (compressed_bytes == 0) { - return (jint)0; - } - - // Get the output direct buffer - uncompressed_bytes = (char *)(*env)->GetDirectBufferAddress(env, uncompressed_direct_buf); - - if (uncompressed_bytes == 0) { - return (jint)0; - } - - ret = dlsym_snappy_uncompress(compressed_bytes, compressed_direct_buf_len, - uncompressed_bytes, &uncompressed_direct_buf_len); - if (ret == SNAPPY_BUFFER_TOO_SMALL){ - THROW(env, "java/lang/InternalError", "Could not decompress data. Buffer length is too small."); - } else if (ret == SNAPPY_INVALID_INPUT){ - THROW(env, "java/lang/InternalError", "Could not decompress data. Input is invalid."); - } else if (ret != SNAPPY_OK){ - THROW(env, "java/lang/InternalError", "Could not decompress data."); - } - - (*env)->SetIntField(env, thisj, SnappyDecompressor_compressedDirectBufLen, 0); - - return (jint)uncompressed_direct_buf_len; -} - -#endif //define HADOOP_SNAPPY_LIBRARY diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/org_apache_hadoop_io_compress_snappy.h b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/org_apache_hadoop_io_compress_snappy.h deleted file mode 100644 index 8394efe477462..0000000000000 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/compress/snappy/org_apache_hadoop_io_compress_snappy.h +++ /dev/null @@ -1,33 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#ifndef ORG_APACHE_HADOOP_IO_COMPRESS_SNAPPY_SNAPPY_H -#define ORG_APACHE_HADOOP_IO_COMPRESS_SNAPPY_SNAPPY_H - -#include "org_apache_hadoop.h" - -#ifdef UNIX -#include -#endif - -#include -#include -#include - -#endif //ORG_APACHE_HADOOP_IO_COMPRESS_SNAPPY_SNAPPY_H From 87903a929342688cc39a512d931441b438b74e72 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 10 Sep 2020 18:28:11 -0700 Subject: [PATCH 06/23] Remove snappy compilation. --- .../hadoop-common/src/CMakeLists.txt | 29 ------------------- 1 file changed, 29 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/CMakeLists.txt b/hadoop-common-project/hadoop-common/src/CMakeLists.txt index 10591f6ce2aa8..71c950b7f12ca 100644 --- a/hadoop-common-project/hadoop-common/src/CMakeLists.txt +++ b/hadoop-common-project/hadoop-common/src/CMakeLists.txt @@ -67,33 +67,6 @@ else() endif() set(CMAKE_FIND_LIBRARY_SUFFIXES ${STORED_CMAKE_FIND_LIBRARY_SUFFIXES}) -# Require snappy. -set(STORED_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) -hadoop_set_find_shared_library_version("1") -find_library(SNAPPY_LIBRARY - NAMES snappy - PATHS ${CUSTOM_SNAPPY_PREFIX} ${CUSTOM_SNAPPY_PREFIX}/lib - ${CUSTOM_SNAPPY_PREFIX}/lib64 ${CUSTOM_SNAPPY_LIB}) -set(CMAKE_FIND_LIBRARY_SUFFIXES ${STORED_CMAKE_FIND_LIBRARY_SUFFIXES}) -find_path(SNAPPY_INCLUDE_DIR - NAMES snappy.h - PATHS ${CUSTOM_SNAPPY_PREFIX} ${CUSTOM_SNAPPY_PREFIX}/include - ${CUSTOM_SNAPPY_INCLUDE}) -if(SNAPPY_LIBRARY AND SNAPPY_INCLUDE_DIR) - get_filename_component(HADOOP_SNAPPY_LIBRARY ${SNAPPY_LIBRARY} NAME) - set(SNAPPY_SOURCE_FILES - "${SRC}/io/compress/snappy/SnappyCompressor.c" - "${SRC}/io/compress/snappy/SnappyDecompressor.c") - set(REQUIRE_SNAPPY ${REQUIRE_SNAPPY}) # Stop warning about unused variable. - message(STATUS "Found Snappy: ${SNAPPY_LIBRARY}") -else() - set(SNAPPY_INCLUDE_DIR "") - set(SNAPPY_SOURCE_FILES "") - if(REQUIRE_SNAPPY) - message(FATAL_ERROR "Required snappy library could not be found. SNAPPY_LIBRARY=${SNAPPY_LIBRARY}, SNAPPY_INCLUDE_DIR=${SNAPPY_INCLUDE_DIR}, CUSTOM_SNAPPY_INCLUDE_DIR=${CUSTOM_SNAPPY_INCLUDE_DIR}, CUSTOM_SNAPPY_PREFIX=${CUSTOM_SNAPPY_PREFIX}, CUSTOM_SNAPPY_INCLUDE=${CUSTOM_SNAPPY_INCLUDE}") - endif() -endif() - # Require zstandard SET(STORED_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) hadoop_set_find_shared_library_version("1") @@ -253,7 +226,6 @@ include_directories( ${JNI_INCLUDE_DIRS} ${ZLIB_INCLUDE_DIRS} ${BZIP2_INCLUDE_DIR} - ${SNAPPY_INCLUDE_DIR} ${ISAL_INCLUDE_DIR} ${ZSTD_INCLUDE_DIR} ${OPENSSL_INCLUDE_DIR} @@ -269,7 +241,6 @@ hadoop_add_dual_library(hadoop ${SRC}/io/compress/lz4/lz4.c ${SRC}/io/compress/lz4/lz4hc.c ${ISAL_SOURCE_FILES} - ${SNAPPY_SOURCE_FILES} ${ZSTD_SOURCE_FILES} ${OPENSSL_SOURCE_FILES} ${SRC}/io/compress/zlib/ZlibCompressor.c From 5adcf53117d0338910376146766365aa25a13e19 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 11 Sep 2020 10:13:16 -0700 Subject: [PATCH 07/23] Fix limit parameter. --- .../apache/hadoop/io/compress/snappy/SnappyDecompressor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java index 8b8c590c24854..ea09ea1e4b01c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java @@ -273,7 +273,7 @@ private int decompressBytesDirect() throws IOException { size = Snappy.uncompress((ByteBuffer) compressedDirectBuf, (ByteBuffer) uncompressedDirectBuf); compressedDirectBufLen = 0; - compressedDirectBuf.limit(directBufferSize).position(0); + compressedDirectBuf.limit(compressedDirectBuf.capacity()).position(0); return size; } } From 40cc18a897efd3bbcfbf14bf77babf77890103e8 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sun, 13 Sep 2020 15:20:17 -0700 Subject: [PATCH 08/23] Remove require.snappy. --- hadoop-common-project/hadoop-common/pom.xml | 17 ------------ .../src/main/native/native.vcxproj | 27 +------------------ hadoop-project-dist/pom.xml | 2 -- hadoop-project/pom.xml | 2 -- 4 files changed, 1 insertion(+), 47 deletions(-) diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index aa04ada65ec8d..3f66a2c422fbf 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -646,10 +646,6 @@ false - - - - false @@ -703,11 +699,7 @@ ${project.build.directory}/native/javah ${sun.arch.data.model} ${require.bzip2} - ${require.snappy} ${require.zstd} - ${snappy.prefix} - ${snappy.lib} - ${snappy.include} ${zstd.prefix} ${zstd.lib} ${zstd.include} @@ -762,14 +754,9 @@ - - - false - false - true @@ -869,10 +856,6 @@ /nologo /p:Configuration=Release /p:OutDir=${project.build.directory}/bin/ - /p:CustomSnappyPrefix=${snappy.prefix} - /p:CustomSnappyLib=${snappy.lib} - /p:CustomSnappyInclude=${snappy.include} - /p:RequireSnappy=${require.snappy} /p:CustomZstdPrefix=${zstd.prefix} /p:CustomZstdLib=${zstd.lib} /p:CustomZstdInclude=${zstd.include} diff --git a/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj b/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj index ac3767b276882..dd884fc650149 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj +++ b/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj @@ -17,7 +17,7 @@ limitations under the License. --> - + Release @@ -68,18 +68,6 @@ ..\..\..\target\native\$(Configuration)\ hadoop - - $(CustomSnappyPrefix) - $(CustomSnappyPrefix)\lib - $(CustomSnappyPrefix)\bin - $(CustomSnappyLib) - $(CustomSnappyPrefix) - $(CustomSnappyPrefix)\include - $(CustomSnappyInclude) - true - $(SnappyInclude);$(IncludePath) - $(ZLIB_HOME);$(IncludePath) - $(CustomIsalPrefix) $(CustomIsalPrefix)\lib @@ -87,11 +75,6 @@ $(CustomIsalLib) true - - - - - /D HADOOP_SNAPPY_LIBRARY=L\"snappy.dll\" - - - /D HADOOP_SNAPPY_LIBRARY=L\"snappy.dll\" - @@ -157,7 +134,6 @@ - /D HADOOP_SNAPPY_LIBRARY=L\"snappy.dll\" /D HADOOP_ISAL_LIBRARY=L\"isa-l.dll\" @@ -181,7 +157,6 @@ - diff --git a/hadoop-project-dist/pom.xml b/hadoop-project-dist/pom.xml index 7cf084bee0eaa..9b3c86e573bc0 100644 --- a/hadoop-project-dist/pom.xml +++ b/hadoop-project-dist/pom.xml @@ -40,7 +40,6 @@ UNDEF false - false false false @@ -341,7 +340,6 @@ --openssllib=${openssl.lib} --opensslbinbundle=${bundle.openssl.in.bin} --openssllibbundle=${bundle.openssl} - --snappybinbundle=${bundle.snappy.in.bin} --snappylib=${snappy.lib} --snappylibbundle=${bundle.snappy} --zstdbinbundle=${bundle.zstd.in.bin} diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 811947f2af8ce..bb86ad6de568d 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -2266,7 +2266,6 @@ file:/dev/urandom - true true true @@ -2278,7 +2277,6 @@ - ${env.PATH};${hadoop.common.build.dir}/bin;${snappy.lib} ${env.PATH};${hadoop.common.build.dir}/bin;${zstd.lib} ${env.PATH};${hadoop.common.build.dir}/bin;${openssl.lib} ${env.PATH};${hadoop.common.build.dir}/bin;${isal.lib} From 0e44d4b810fa94253c6eb9b0ad5ea30bc62175ba Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 15 Sep 2020 10:55:23 -0700 Subject: [PATCH 09/23] trigger CI From 666a37bc56d6512fe953e438ad4224e935ea6cd2 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 15 Sep 2020 14:28:22 -0700 Subject: [PATCH 10/23] Add compatibility test. --- .../TestSnappyCompressorDecompressor.java | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java index 6f869679a5818..9c5442867fc40 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java @@ -33,6 +33,7 @@ import java.nio.ByteBuffer; import java.util.Random; +import org.apache.commons.codec.binary.Hex; import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.compress.BlockCompressorStream; @@ -441,4 +442,43 @@ public void doWork() throws Exception { ctx.waitFor(60000); } + + @Test + public void testSnappyCompatibility() throws Exception { + // HADOOP-17125. Using snappy-java in SnappyCodec. These strings are raw data and compressed data + // using previous native Snappy codec. We use updated Snappy codec to decode it and check if it + // matches. + String rawData = "010a06030a040a0c0109020c0a010204020d02000b010701080605080b090902060a080502060a0d06070908080a0c0105030904090d05090800040c090c0d0d0804000d00040b0b0d010d060907020a030a0c0900040905080107040d0c01060a0b09070a04000b01040b09000e0e00020b06050b060e030e0a07050d06050d"; + String compressed = "8001f07f010a06030a040a0c0109020c0a010204020d02000b010701080605080b090902060a080502060a0d06070908080a0c0105030904090d05090800040c090c0d0d0804000d00040b0b0d010d060907020a030a0c0900040905080107040d0c01060a0b09070a04000b01040b09000e0e00020b06050b060e030e0a07050d06050d"; + + byte[] rawDataBytes = Hex.decodeHex(rawData); + byte[] compressedBytes = Hex.decodeHex(compressed); + + ByteBuffer inBuf = ByteBuffer.allocateDirect(compressedBytes.length); + inBuf.put(compressedBytes, 0, compressedBytes.length); + inBuf.flip(); + + ByteBuffer outBuf = ByteBuffer.allocateDirect(rawDataBytes.length); + ByteBuffer expected = ByteBuffer.wrap(rawDataBytes); + + SnappyDecompressor.SnappyDirectDecompressor decompressor = new SnappyDecompressor.SnappyDirectDecompressor(); + + outBuf.clear(); + while(!decompressor.finished()) { + decompressor.decompress(inBuf, outBuf); + if (outBuf.remaining() == 0) { + outBuf.flip(); + while (outBuf.remaining() > 0) { + assertEquals(expected.get(), outBuf.get()); + } + outBuf.clear(); + } + } + outBuf.flip(); + while (outBuf.remaining() > 0) { + assertEquals(expected.get(), outBuf.get()); + } + outBuf.clear(); + assertEquals(0, expected.remaining()); + } } From 9de4712af35830e483d37c50261c36812ed047fa Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 16 Sep 2020 18:23:24 -0700 Subject: [PATCH 11/23] Check snappy library and remove useless code. --- hadoop-common-project/hadoop-common/pom.xml | 1 + .../io/compress/snappy/SnappyDecompressor.java | 16 +++++++++------- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 3f66a2c422fbf..d4d4ac919d046 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -366,6 +366,7 @@ org.xerial.snappy snappy-java + provided diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java index ea09ea1e4b01c..6cffa5cb2ed3c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java @@ -27,6 +27,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xerial.snappy.Snappy; +import org.xerial.snappy.SnappyLoader; /** * A {@link Decompressor} based on the snappy compression algorithm. @@ -51,6 +52,13 @@ public class SnappyDecompressor implements Decompressor { * @param directBufferSize size of the direct buffer to be used. */ public SnappyDecompressor(int directBufferSize) { + // `snappy-java` is provided scope. We need to check if its availability. + try { + SnappyLoader.getVersion(); + } catch (Throwable t) { + LOG.warn("Error loading snappy libraries: " + t); + } + this.directBufferSize = directBufferSize; compressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize); @@ -264,13 +272,7 @@ private int decompressBytesDirect() throws IOException { } else { // Set the position and limit of `compressedDirectBuf` for reading compressedDirectBuf.limit(compressedDirectBufLen).position(0); - // There is compressed input, decompress it now. - int size = Snappy.uncompressedLength((ByteBuffer) compressedDirectBuf); - if (size > uncompressedDirectBuf.remaining()) { - throw new IOException("Could not decompress data. " + - "uncompressedDirectBuf length is too small."); - } - size = Snappy.uncompress((ByteBuffer) compressedDirectBuf, + int size = Snappy.uncompress((ByteBuffer) compressedDirectBuf, (ByteBuffer) uncompressedDirectBuf); compressedDirectBufLen = 0; compressedDirectBuf.limit(compressedDirectBuf.capacity()).position(0); From 0ed518d238adfee81c821b74434785afb5684710 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 16 Sep 2020 18:31:19 -0700 Subject: [PATCH 12/23] Revert trailing whitespace. --- .../io/compress/snappy/SnappyDecompressor.java | 6 +++--- .../org/apache/hadoop/io/compress/TestCodec.java | 2 +- .../snappy/TestSnappyCompressorDecompressor.java | 16 ++++++++-------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java index 6cffa5cb2ed3c..be2f2ce0bd75d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java @@ -282,7 +282,7 @@ private int decompressBytesDirect() throws IOException { int decompressDirect(ByteBuffer src, ByteBuffer dst) throws IOException { assert (this instanceof SnappyDirectDecompressor); - + ByteBuffer presliced = dst; if (dst.position() > 0) { presliced = dst; @@ -311,10 +311,10 @@ int decompressDirect(ByteBuffer src, ByteBuffer dst) throws IOException { } return n; } - + public static class SnappyDirectDecompressor extends SnappyDecompressor implements DirectDecompressor { - + @Override public boolean finished() { return (endOfInput && super.finished()); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodec.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodec.java index 1fa4e13773912..462225cebfb67 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodec.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/TestCodec.java @@ -246,7 +246,7 @@ private static void codecTest(Configuration conf, int seed, int count, v2.readFields(inflateIn); assertTrue("original and compressed-then-decompressed-output not equal", k1.equals(k2) && v1.equals(v2)); - + // original and compressed-then-decompressed-output have the same // hashCode Map m = new HashMap(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java index 9c5442867fc40..70d739fe2d9a0 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java @@ -315,39 +315,39 @@ public void testSnappyCompressDecompressWithSmallBuffer() throws Exception { } private void compressDecompressLoop(int rawDataSize) throws IOException { - byte[] rawData = BytesGenerator.get(rawDataSize); + byte[] rawData = BytesGenerator.get(rawDataSize); byte[] compressedResult = new byte[rawDataSize+20]; - int directBufferSize = Math.max(rawDataSize*2, 64*1024); + int directBufferSize = Math.max(rawDataSize*2, 64*1024); SnappyCompressor compressor = new SnappyCompressor(directBufferSize); compressor.setInput(rawData, 0, rawDataSize); int compressedSize = compressor.compress(compressedResult, 0, compressedResult.length); SnappyDirectDecompressor decompressor = new SnappyDirectDecompressor(); - + ByteBuffer inBuf = ByteBuffer.allocateDirect(compressedSize); ByteBuffer outBuf = ByteBuffer.allocateDirect(rawDataSize); inBuf.put(compressedResult, 0, compressedSize); - inBuf.flip(); + inBuf.flip(); ByteBuffer expected = ByteBuffer.wrap(rawData); - + outBuf.clear(); while(!decompressor.finished()) { decompressor.decompress(inBuf, outBuf); if (outBuf.remaining() == 0) { outBuf.flip(); - while (outBuf.remaining() > 0) { + while (outBuf.remaining() > 0) { assertEquals(expected.get(), outBuf.get()); } outBuf.clear(); } } outBuf.flip(); - while (outBuf.remaining() > 0) { + while (outBuf.remaining() > 0) { assertEquals(expected.get(), outBuf.get()); } outBuf.clear(); - + assertEquals(0, expected.remaining()); } From 712749c041c012bb8eef41f826d1abc8da937a36 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 17 Sep 2020 15:22:39 -0700 Subject: [PATCH 13/23] For review comment. --- .../hadoop/io/compress/snappy/SnappyCompressor.java | 9 +++++++++ .../hadoop/io/compress/snappy/SnappyDecompressor.java | 3 ++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java index d496373a48712..2ff37e5d9e0d7 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java @@ -27,6 +27,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xerial.snappy.Snappy; +import org.xerial.snappy.SnappyLoader; /** * A {@link Compressor} based on the snappy compression algorithm. @@ -54,6 +55,14 @@ public class SnappyCompressor implements Compressor { * @param directBufferSize size of the direct buffer to be used. */ public SnappyCompressor(int directBufferSize) { + // `snappy-java` is provided scope. We need to check if its availability. + try { + SnappyLoader.getVersion(); + } catch (Throwable t) { + throw new RuntimeException("native snappy library not available: " + + "SnappyCompressor has not been loaded.", t); + } + this.directBufferSize = directBufferSize; uncompressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java index be2f2ce0bd75d..2b79a5d21deef 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java @@ -56,7 +56,8 @@ public SnappyDecompressor(int directBufferSize) { try { SnappyLoader.getVersion(); } catch (Throwable t) { - LOG.warn("Error loading snappy libraries: " + t); + throw new RuntimeException("native snappy library not available: " + + "SnappyDecompressor has not been loaded.", t); } this.directBufferSize = directBufferSize; From 2b5a2122d597c06373d242b0cbc3eed8d3fb7aa4 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 24 Sep 2020 11:34:48 -0700 Subject: [PATCH 14/23] Address review comments. --- .../hadoop/io/compress/snappy/SnappyCompressor.java | 11 ++++++----- .../io/compress/snappy/SnappyDecompressor.java | 13 +++++++------ .../snappy/TestSnappyCompressorDecompressor.java | 10 ++++++++-- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java index 2ff37e5d9e0d7..b46410a6e495f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java @@ -55,12 +55,13 @@ public class SnappyCompressor implements Compressor { * @param directBufferSize size of the direct buffer to be used. */ public SnappyCompressor(int directBufferSize) { - // `snappy-java` is provided scope. We need to check if its availability. + // `snappy-java` is provided scope. We need to check if it is available. try { SnappyLoader.getVersion(); } catch (Throwable t) { - throw new RuntimeException("native snappy library not available: " + - "SnappyCompressor has not been loaded.", t); + throw new RuntimeException("snappy-java library is not available: " + + "SnappyCompressor has not been loaded. You need to add " + + "snappy-java.jar to your CLASSPATH", t); } this.directBufferSize = directBufferSize; @@ -216,7 +217,7 @@ public int compress(byte[] b, int off, int len) } // Compress data - n = compressBytesDirect(); + n = compressDirectBuf(); compressedDirectBuf.limit(n); uncompressedDirectBuf.clear(); // snappy consumes all buffer input @@ -282,7 +283,7 @@ public long getBytesWritten() { public void end() { } - private int compressBytesDirect() throws IOException { + private int compressDirectBuf() throws IOException { if (uncompressedDirectBufLen == 0) { return 0; } else { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java index 2b79a5d21deef..9450ae05b1615 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java @@ -52,12 +52,13 @@ public class SnappyDecompressor implements Decompressor { * @param directBufferSize size of the direct buffer to be used. */ public SnappyDecompressor(int directBufferSize) { - // `snappy-java` is provided scope. We need to check if its availability. + // `snappy-java` is provided scope. We need to check if it is available. try { SnappyLoader.getVersion(); } catch (Throwable t) { - throw new RuntimeException("native snappy library not available: " + - "SnappyDecompressor has not been loaded.", t); + throw new RuntimeException("snappy-java library is not available: " + + "SnappyDecompressor has not been loaded. You need to add " + + "snappy-java.jar to your CLASSPATH", t); } this.directBufferSize = directBufferSize; @@ -223,7 +224,7 @@ public int decompress(byte[] b, int off, int len) uncompressedDirectBuf.limit(directBufferSize); // Decompress data - n = decompressBytesDirect(); + n = decompressDirectBuf(); uncompressedDirectBuf.limit(n); if (userBufLen <= 0) { @@ -267,7 +268,7 @@ public void end() { // do nothing } - private int decompressBytesDirect() throws IOException { + private int decompressDirectBuf() throws IOException { if (compressedDirectBufLen == 0) { return 0; } else { @@ -299,7 +300,7 @@ int decompressDirect(ByteBuffer src, ByteBuffer dst) throws IOException { directBufferSize = dst.remaining(); int n = 0; try { - n = decompressBytesDirect(); + n = decompressDirectBuf(); presliced.position(presliced.position() + n); // SNAPPY always consumes the whole buffer or throws an exception src.position(src.limit()); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java index 70d739fe2d9a0..d648b86a54083 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java @@ -448,8 +448,14 @@ public void testSnappyCompatibility() throws Exception { // HADOOP-17125. Using snappy-java in SnappyCodec. These strings are raw data and compressed data // using previous native Snappy codec. We use updated Snappy codec to decode it and check if it // matches. - String rawData = "010a06030a040a0c0109020c0a010204020d02000b010701080605080b090902060a080502060a0d06070908080a0c0105030904090d05090800040c090c0d0d0804000d00040b0b0d010d060907020a030a0c0900040905080107040d0c01060a0b09070a04000b01040b09000e0e00020b06050b060e030e0a07050d06050d"; - String compressed = "8001f07f010a06030a040a0c0109020c0a010204020d02000b010701080605080b090902060a080502060a0d06070908080a0c0105030904090d05090800040c090c0d0d0804000d00040b0b0d010d060907020a030a0c0900040905080107040d0c01060a0b09070a04000b01040b09000e0e00020b06050b060e030e0a07050d06050d"; + String rawData = "010a06030a040a0c0109020c0a010204020d02000b010701080605080b090902060a08050206" + + "0a0d06070908080a0c0105030904090d05090800040c090c0d0d0804000d00040b0b0d010d060907020a0" + + "30a0c0900040905080107040d0c01060a0b09070a04000b01040b09000e0e00020b06050b060e030e0a07" + + "050d06050d"; + String compressed = "8001f07f010a06030a040a0c0109020c0a010204020d02000b010701080605080b0909020" + + "60a080502060a0d06070908080a0c0105030904090d05090800040c090c0d0d0804000d00040b0b0d010d" + + "060907020a030a0c0900040905080107040d0c01060a0b09070a04000b01040b09000e0e00020b06050b0" + + "60e030e0a07050d06050d"; byte[] rawDataBytes = Hex.decodeHex(rawData); byte[] compressedBytes = Hex.decodeHex(compressed); From 1cb398bbbaf702501f558ce32cda07d1ca7917ca Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 24 Sep 2020 12:09:33 -0700 Subject: [PATCH 15/23] Take safer approach. --- .../org/apache/hadoop/io/compress/snappy/SnappyCompressor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java index b46410a6e495f..882b1321a74cc 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java @@ -292,7 +292,7 @@ private int compressDirectBuf() throws IOException { int size = Snappy.compress((ByteBuffer) uncompressedDirectBuf, (ByteBuffer) compressedDirectBuf); uncompressedDirectBufLen = 0; - uncompressedDirectBuf.limit(directBufferSize).position(0); + uncompressedDirectBuf.limit(uncompressedDirectBuf.capacity()).position(0); return size; } } From aa6a6d550d6c154f072622371263287dc1b34f11 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 25 Sep 2020 12:25:02 -0700 Subject: [PATCH 16/23] For review comments. --- .../io/compress/snappy/SnappyCompressor.java | 1 - .../compress/snappy/SnappyDecompressor.java | 2 +- .../io/compress/CompressDecompressTester.java | 16 ++++++----- .../TestSnappyCompressorDecompressor.java | 27 ++++++++++--------- 4 files changed, 25 insertions(+), 21 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java index 882b1321a74cc..0cdb596c912fb 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java @@ -292,7 +292,6 @@ private int compressDirectBuf() throws IOException { int size = Snappy.compress((ByteBuffer) uncompressedDirectBuf, (ByteBuffer) compressedDirectBuf); uncompressedDirectBufLen = 0; - uncompressedDirectBuf.limit(uncompressedDirectBuf.capacity()).position(0); return size; } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java index 9450ae05b1615..49ae03514d1ce 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java @@ -277,7 +277,7 @@ private int decompressDirectBuf() throws IOException { int size = Snappy.uncompress((ByteBuffer) compressedDirectBuf, (ByteBuffer) uncompressedDirectBuf); compressedDirectBufLen = 0; - compressedDirectBuf.limit(compressedDirectBuf.capacity()).position(0); + compressedDirectBuf.clear(); return size; } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java index afa9e0c03c429..7b21bb08bd39c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java @@ -479,17 +479,19 @@ private static boolean isAvailabl Compressor compressor = pair.compressor; if (compressor.getClass().isAssignableFrom(Lz4Compressor.class) - && (NativeCodeLoader.isNativeCodeLoaded())) { + && (NativeCodeLoader.isNativeCodeLoaded())) return true; - } else if (compressor.getClass().isAssignableFrom(BuiltInZlibDeflater.class) - && NativeCodeLoader.isNativeCodeLoaded()) { + + else if (compressor.getClass().isAssignableFrom(BuiltInZlibDeflater.class) + && NativeCodeLoader.isNativeCodeLoaded()) return true; - } else if (compressor.getClass().isAssignableFrom(ZlibCompressor.class)) { + + else if (compressor.getClass().isAssignableFrom(ZlibCompressor.class)) { return ZlibFactory.isNativeZlibLoaded(new Configuration()); - } else if (compressor.getClass().isAssignableFrom(SnappyCompressor.class)) { - return true; } - + else if (compressor.getClass().isAssignableFrom(SnappyCompressor.class)) + return true; + return false; } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java index d648b86a54083..7d7c694eaec4c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java @@ -445,17 +445,19 @@ public void doWork() throws Exception { @Test public void testSnappyCompatibility() throws Exception { - // HADOOP-17125. Using snappy-java in SnappyCodec. These strings are raw data and compressed data - // using previous native Snappy codec. We use updated Snappy codec to decode it and check if it - // matches. - String rawData = "010a06030a040a0c0109020c0a010204020d02000b010701080605080b090902060a08050206" + - "0a0d06070908080a0c0105030904090d05090800040c090c0d0d0804000d00040b0b0d010d060907020a0" + - "30a0c0900040905080107040d0c01060a0b09070a04000b01040b09000e0e00020b06050b060e030e0a07" + - "050d06050d"; - String compressed = "8001f07f010a06030a040a0c0109020c0a010204020d02000b010701080605080b0909020" + - "60a080502060a0d06070908080a0c0105030904090d05090800040c090c0d0d0804000d00040b0b0d010d" + - "060907020a030a0c0900040905080107040d0c01060a0b09070a04000b01040b09000e0e00020b06050b0" + - "60e030e0a07050d06050d"; + // HADOOP-17125. Using snappy-java in SnappyCodec. These strings are raw + // data and compressed data using previous native Snappy codec. We use + // updated Snappy codec to decode it and check if it matches. + String rawData = "010a06030a040a0c0109020c0a010204020d02000b010701080605" + + "080b090902060a080502060a0d06070908080a0c0105030904090d050908000" + + "40c090c0d0d0804000d00040b0b0d010d060907020a030a0c09000409050801" + + "07040d0c01060a0b09070a04000b01040b09000e0e00020b06050b060e030e0" + + "a07050d06050d"; + String compressed = "8001f07f010a06030a040a0c0109020c0a010204020d02000b0" + + "10701080605080b090902060a080502060a0d06070908080a0c010503090409" + + "0d05090800040c090c0d0d0804000d00040b0b0d010d060907020a030a0c090" + + "0040905080107040d0c01060a0b09070a04000b01040b09000e0e00020b0605" + + "0b060e030e0a07050d06050d"; byte[] rawDataBytes = Hex.decodeHex(rawData); byte[] compressedBytes = Hex.decodeHex(compressed); @@ -467,7 +469,8 @@ public void testSnappyCompatibility() throws Exception { ByteBuffer outBuf = ByteBuffer.allocateDirect(rawDataBytes.length); ByteBuffer expected = ByteBuffer.wrap(rawDataBytes); - SnappyDecompressor.SnappyDirectDecompressor decompressor = new SnappyDecompressor.SnappyDirectDecompressor(); + SnappyDecompressor.SnappyDirectDecompressor decompressor = + new SnappyDecompressor.SnappyDirectDecompressor(); outBuf.clear(); while(!decompressor.finished()) { From 19edba29fa0e6949f11926ada6d606bee39dad67 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 25 Sep 2020 12:33:35 -0700 Subject: [PATCH 17/23] Update BUILDING and NativeLibraries. --- BUILDING.txt | 29 ------------------- .../src/site/markdown/NativeLibraries.md.vm | 3 +- 2 files changed, 1 insertion(+), 31 deletions(-) diff --git a/BUILDING.txt b/BUILDING.txt index 9fc68e09740a2..0141570bc2468 100644 --- a/BUILDING.txt +++ b/BUILDING.txt @@ -82,8 +82,6 @@ Installing required packages for clean install of Ubuntu 14.04 LTS Desktop: Optional packages: -* Snappy compression - $ sudo apt-get install snappy libsnappy-dev * Intel ISA-L library for erasure coding Please refer to https://01.org/intel%C2%AE-storage-acceleration-library-open-source-version (OR https://github.com/01org/isa-l) @@ -164,30 +162,6 @@ Maven build goals: on running Timeline Service v2 with HBase 2.0. - Snappy build options: - - Snappy is a compression library that can be utilized by the native code. - It is currently an optional component, meaning that Hadoop can be built with - or without this dependency. - - * Use -Drequire.snappy to fail the build if libsnappy.so is not found. - If this option is not specified and the snappy library is missing, - we silently build a version of libhadoop.so that cannot make use of snappy. - This option is recommended if you plan on making use of snappy and want - to get more repeatable builds. - - * Use -Dsnappy.prefix to specify a nonstandard location for the libsnappy - header files and library files. You do not need this option if you have - installed snappy using a package manager. - * Use -Dsnappy.lib to specify a nonstandard location for the libsnappy library - files. Similarly to snappy.prefix, you do not need this option if you have - installed snappy using a package manager. - * Use -Dbundle.snappy to copy the contents of the snappy.lib directory into - the final tar file. This option requires that -Dsnappy.lib is also given, - and it ignores the -Dsnappy.prefix option. If -Dsnappy.lib isn't given, the - bundling and building will fail. - - ZStandard build options: ZStandard is a compression library that can be utilized by the native code. @@ -460,9 +434,6 @@ Building on CentOS 8 $ ./b2 $ sudo ./b2 install -* Install optional dependencies (snappy-devel). - $ sudo dnf --enablerepo=PowerTools snappy-devel - * Install optional dependencies (libzstd-devel). $ sudo dnf install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm $ sudo dnf --enablerepo=epel install libzstd-devel diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/NativeLibraries.md.vm b/hadoop-common-project/hadoop-common/src/site/markdown/NativeLibraries.md.vm index e4f720cee8ce1..1e62e94394f91 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/NativeLibraries.md.vm +++ b/hadoop-common-project/hadoop-common/src/site/markdown/NativeLibraries.md.vm @@ -47,7 +47,7 @@ Components The native hadoop library includes various components: -* Compression Codecs (bzip2, lz4, snappy, zlib) +* Compression Codecs (bzip2, lz4, zlib) * Native IO utilities for [HDFS Short-Circuit Local Reads](../hadoop-hdfs/ShortCircuitLocalReads.html) and [Centralized Cache Management in HDFS](../hadoop-hdfs/CentralizedCacheManagement.html) * CRC32 checksum implementation @@ -117,7 +117,6 @@ NativeLibraryChecker is a tool to check whether native libraries are loaded corr Native library checking: hadoop: true /home/ozawa/hadoop/lib/native/libhadoop.so.1.0.0 zlib: true /lib/x86_64-linux-gnu/libz.so.1 - snappy: true /usr/lib/libsnappy.so.1 zstd: true /usr/lib/libzstd.so.1 lz4: true revision:99 bzip2: false From fc525faf79b68662f452fc0cd8233194ef9f4555 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 29 Sep 2020 10:53:36 -0700 Subject: [PATCH 18/23] Make snappy-java as compile scope. --- hadoop-common-project/hadoop-common/pom.xml | 2 +- .../hadoop/io/compress/snappy/SnappyCompressor.java | 10 ---------- .../hadoop/io/compress/snappy/SnappyDecompressor.java | 10 ---------- 3 files changed, 1 insertion(+), 21 deletions(-) diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index d4d4ac919d046..3111cb7b83e05 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -366,7 +366,7 @@ org.xerial.snappy snappy-java - provided + compile diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java index 0cdb596c912fb..2d514705d1e42 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyCompressor.java @@ -27,7 +27,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xerial.snappy.Snappy; -import org.xerial.snappy.SnappyLoader; /** * A {@link Compressor} based on the snappy compression algorithm. @@ -55,15 +54,6 @@ public class SnappyCompressor implements Compressor { * @param directBufferSize size of the direct buffer to be used. */ public SnappyCompressor(int directBufferSize) { - // `snappy-java` is provided scope. We need to check if it is available. - try { - SnappyLoader.getVersion(); - } catch (Throwable t) { - throw new RuntimeException("snappy-java library is not available: " + - "SnappyCompressor has not been loaded. You need to add " + - "snappy-java.jar to your CLASSPATH", t); - } - this.directBufferSize = directBufferSize; uncompressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java index 49ae03514d1ce..d3775e286e895 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/snappy/SnappyDecompressor.java @@ -27,7 +27,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xerial.snappy.Snappy; -import org.xerial.snappy.SnappyLoader; /** * A {@link Decompressor} based on the snappy compression algorithm. @@ -52,15 +51,6 @@ public class SnappyDecompressor implements Decompressor { * @param directBufferSize size of the direct buffer to be used. */ public SnappyDecompressor(int directBufferSize) { - // `snappy-java` is provided scope. We need to check if it is available. - try { - SnappyLoader.getVersion(); - } catch (Throwable t) { - throw new RuntimeException("snappy-java library is not available: " + - "SnappyDecompressor has not been loaded. You need to add " + - "snappy-java.jar to your CLASSPATH", t); - } - this.directBufferSize = directBufferSize; compressedDirectBuf = ByteBuffer.allocateDirect(directBufferSize); From 562b80d036e43dfe4412219b3887f2e557b2d783 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 29 Sep 2020 16:06:44 -0700 Subject: [PATCH 19/23] For review comment. --- dev-support/bin/dist-copynativelibs | 5 ----- .../hadoop-common/src/main/native/native.vcxproj | 3 +++ 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/dev-support/bin/dist-copynativelibs b/dev-support/bin/dist-copynativelibs index ffc82b8fb1b05..7f2b6ad1f5649 100755 --- a/dev-support/bin/dist-copynativelibs +++ b/dev-support/bin/dist-copynativelibs @@ -111,9 +111,6 @@ for i in "$@"; do --openssllibbundle=*) OPENSSLLIBBUNDLE=${i#*=} ;; - --snappybinbundle=*) - SNAPPYBINBUNDLE=${i#*=} - ;; --snappylib=*) SNAPPYLIB=${i#*=} ;; @@ -176,8 +173,6 @@ if [[ -d "${BIN_DIR}" ]] ; then exit 1 fi - bundle_native_bin "${SNAPPYBINBUNDLE}" "${SNAPPYLIBBUNDLE}" "snappy.lib" "snappy" "${SNAPPYLIB}" - bundle_native_bin "${ZSTDBINBUNDLE}" "${ZSTDLIBBUNDLE}" "zstd.lib" "zstd" "${ZSTDLIB}" bundle_native_bin "${OPENSSLBINBUNDLE}" "${OPENSSLLIBBUNDLE}" "openssl.lib" "crypto" "${OPENSSLLIB}" diff --git a/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj b/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj index dd884fc650149..19b4d95e43622 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj +++ b/hadoop-common-project/hadoop-common/src/main/native/native.vcxproj @@ -68,6 +68,9 @@ ..\..\..\target\native\$(Configuration)\ hadoop + + $(ZLIB_HOME);$(IncludePath) + $(CustomIsalPrefix) $(CustomIsalPrefix)\lib From 0600169d3fa5d082c6a28defc59872b2f485873f Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 29 Sep 2020 16:11:40 -0700 Subject: [PATCH 20/23] Revert Snappy description in BUILDING.txt. --- BUILDING.txt | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/BUILDING.txt b/BUILDING.txt index 0141570bc2468..c34946aa993b7 100644 --- a/BUILDING.txt +++ b/BUILDING.txt @@ -82,6 +82,8 @@ Installing required packages for clean install of Ubuntu 14.04 LTS Desktop: Optional packages: +* Snappy compression (only used for hadoop-mapreduce-client-nativetask) + $ sudo apt-get install snappy libsnappy-dev * Intel ISA-L library for erasure coding Please refer to https://01.org/intel%C2%AE-storage-acceleration-library-open-source-version (OR https://github.com/01org/isa-l) @@ -162,6 +164,31 @@ Maven build goals: on running Timeline Service v2 with HBase 2.0. + Snappy build options: + + Snappy is a compression library that can be utilized by the native code. + It is currently an optional component, meaning that Hadoop can be built with + or without this dependency. Snappy library as optional dependency is only + used for hadoop-mapreduce-client-nativetask. + + * Use -Drequire.snappy to fail the build if libsnappy.so is not found. + If this option is not specified and the snappy library is missing, + we silently build a version of libhadoop.so that cannot make use of snappy. + This option is recommended if you plan on making use of snappy and want + to get more repeatable builds. + + * Use -Dsnappy.prefix to specify a nonstandard location for the libsnappy + header files and library files. You do not need this option if you have + installed snappy using a package manager. + * Use -Dsnappy.lib to specify a nonstandard location for the libsnappy library + files. Similarly to snappy.prefix, you do not need this option if you have + installed snappy using a package manager. + * Use -Dbundle.snappy to copy the contents of the snappy.lib directory into + the final tar file. This option requires that -Dsnappy.lib is also given, + and it ignores the -Dsnappy.prefix option. If -Dsnappy.lib isn't given, the + bundling and building will fail. + + ZStandard build options: ZStandard is a compression library that can be utilized by the native code. @@ -434,6 +461,9 @@ Building on CentOS 8 $ ./b2 $ sudo ./b2 install +* Install optional dependencies (snappy-devel). + $ sudo dnf --enablerepo=PowerTools install snappy-devel + * Install optional dependencies (libzstd-devel). $ sudo dnf install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm $ sudo dnf --enablerepo=epel install libzstd-devel From fd71a20df19543a66f23f384c58ad985f0ee2e67 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Thu, 1 Oct 2020 10:10:56 -0700 Subject: [PATCH 21/23] Fix style issue. --- .../apache/hadoop/io/compress/CompressDecompressTester.java | 4 ++-- .../io/compress/snappy/TestSnappyCompressorDecompressor.java | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java index 7b21bb08bd39c..008e4c7b9397f 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java @@ -488,9 +488,9 @@ else if (compressor.getClass().isAssignableFrom(BuiltInZlibDeflater.class) else if (compressor.getClass().isAssignableFrom(ZlibCompressor.class)) { return ZlibFactory.isNativeZlibLoaded(new Configuration()); - } - else if (compressor.getClass().isAssignableFrom(SnappyCompressor.class)) + } else if (compressor.getClass().isAssignableFrom(SnappyCompressor.class)) { return true; + } return false; } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java index 7d7c694eaec4c..dd12573d1babb 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java @@ -353,7 +353,9 @@ private void compressDecompressLoop(int rawDataSize) throws IOException { @Test public void testSnappyDirectBlockCompression() { - int[] size = { 4 * 1024, 64 * 1024, 128 * 1024, 1024 * 1024 }; + int[] size = new int[] { + 4 * 1024, 64 * 1024, 128 * 1024, 1024 * 1024 + }; try { for (int i = 0; i < size.length; i++) { compressDecompressLoop(size[i]); From 7dc320ddddeaeac5a4eee36c703fededbccbf6de Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 2 Oct 2020 11:57:22 -0700 Subject: [PATCH 22/23] Fix another style... --- .../io/compress/snappy/TestSnappyCompressorDecompressor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java index dd12573d1babb..93c24835f2206 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/snappy/TestSnappyCompressorDecompressor.java @@ -354,7 +354,7 @@ private void compressDecompressLoop(int rawDataSize) throws IOException { @Test public void testSnappyDirectBlockCompression() { int[] size = new int[] { - 4 * 1024, 64 * 1024, 128 * 1024, 1024 * 1024 + 4 * 1024, 64 * 1024, 128 * 1024, 1024 * 1024 }; try { for (int i = 0; i < size.length; i++) { From 5685b0b7902e7a6c350d3378dea1159cb968cb24 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 5 Oct 2020 11:07:50 -0700 Subject: [PATCH 23/23] Address comments: throwing AssertionError and exclude jobTokenPassword for license check. --- .../hadoop/io/compress/CompressDecompressTester.java | 7 +------ .../hadoop-mapreduce-client-jobclient/pom.xml | 1 + 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java index 008e4c7b9397f..8082e3ab0b5bd 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/compress/CompressDecompressTester.java @@ -29,7 +29,6 @@ import java.util.Arrays; import java.util.List; -import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.compress.lz4.Lz4Compressor; import org.apache.hadoop.io.compress.snappy.SnappyCompressor; @@ -412,11 +411,7 @@ public void assertCompression(String name, Compressor compressor, joiner.join(name, "byte arrays not equals error !!!"), originalRawData, decompressOut.toByteArray()); } catch (Exception ex) { - if (ex.getMessage() != null) { - fail(joiner.join(name, ex.getMessage())); - } else { - fail(joiner.join(name, ExceptionUtils.getStackTrace(ex))); - } + throw new AssertionError(name + ex, ex); } finally { try { compressedOut.close(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml index b8864fc3efd55..e98b574d1fe38 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml @@ -183,6 +183,7 @@ src/test/java/org/apache/hadoop/cli/data60bytes src/test/resources/job_1329348432655_0001-10.jhist + **/jobTokenPassword