From 47b3848bf54520c5e42ff545da30d5438af60c88 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Sat, 9 Aug 2014 23:28:11 -0700 Subject: [PATCH 1/3] [SPARK-2953] Allow using short names for io compression codecs Instead of requiring "org.apache.spark.io.LZ4CompressionCodec", it is easier for users if Spark just accepts "lz4", "lzf", "snappy". --- .../org/apache/spark/io/CompressionCodec.scala | 11 +++++++++-- .../spark/io/CompressionCodecSuite.scala | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala index 1b66218d86dd9..ef9c43ecf14f6 100644 --- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala +++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala @@ -46,17 +46,24 @@ trait CompressionCodec { private[spark] object CompressionCodec { + + private val shortCompressionCodecNames = Map( + "lz4" -> classOf[LZ4CompressionCodec].getName, + "lzf" -> classOf[LZFCompressionCodec].getName, + "snappy" -> classOf[SnappyCompressionCodec].getName) + def createCodec(conf: SparkConf): CompressionCodec = { createCodec(conf, conf.get("spark.io.compression.codec", DEFAULT_COMPRESSION_CODEC)) } def createCodec(conf: SparkConf, codecName: String): CompressionCodec = { - val ctor = Class.forName(codecName, true, Utils.getContextOrSparkClassLoader) + val codecClass = shortCompressionCodecNames.getOrElse(codecName.toLowerCase, codecName) + val ctor = Class.forName(codecClass, true, Utils.getContextOrSparkClassLoader) .getConstructor(classOf[SparkConf]) ctor.newInstance(conf).asInstanceOf[CompressionCodec] } - val DEFAULT_COMPRESSION_CODEC = classOf[SnappyCompressionCodec].getName + val DEFAULT_COMPRESSION_CODEC = "snappy" } diff --git a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala index 3f882a724b047..25be7f25c21bb 100644 --- a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala +++ b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala @@ -56,15 +56,33 @@ class CompressionCodecSuite extends FunSuite { testCodec(codec) } + test("lz4 compression codec short form") { + val codec = CompressionCodec.createCodec(conf, "lz4") + assert(codec.getClass === classOf[LZ4CompressionCodec]) + testCodec(codec) + } + test("lzf compression codec") { val codec = CompressionCodec.createCodec(conf, classOf[LZFCompressionCodec].getName) assert(codec.getClass === classOf[LZFCompressionCodec]) testCodec(codec) } + test("lzf compression codec short form") { + val codec = CompressionCodec.createCodec(conf, "lzf") + assert(codec.getClass === classOf[LZFCompressionCodec]) + testCodec(codec) + } + test("snappy compression codec") { val codec = CompressionCodec.createCodec(conf, classOf[SnappyCompressionCodec].getName) assert(codec.getClass === classOf[SnappyCompressionCodec]) testCodec(codec) } + + test("snappy compression codec short form") { + val codec = CompressionCodec.createCodec(conf, "snappy") + assert(codec.getClass === classOf[SnappyCompressionCodec]) + testCodec(codec) + } } From 63f78eec8f7e6a757e9a364a1cc0dcb93dd4bdda Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Sat, 9 Aug 2014 23:31:00 -0700 Subject: [PATCH 2/3] Updated configuration documentation. --- docs/configuration.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 617a72a021f6e..9716512442275 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -373,12 +373,12 @@ Apart from these, the following properties are also available, and may be useful spark.io.compression.codec - org.apache.spark.io.
SnappyCompressionCodec + snappy The codec used to compress internal data such as RDD partitions and shuffle outputs. By default, Spark provides three codecs: org.apache.spark.io.LZ4CompressionCodec, org.apache.spark.io.LZFCompressionCodec, - and org.apache.spark.io.SnappyCompressionCodec. + and org.apache.spark.io.SnappyCompressionCodec. You can also use the short form: lz4, lzf, and snappy. From 9f50962366901f81a9c7a83a3742e37e3762c3f4 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Tue, 12 Aug 2014 21:47:52 -0700 Subject: [PATCH 3/3] Specify short-form compression codec names first. --- docs/configuration.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 9716512442275..8136bd62ab6af 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -375,10 +375,12 @@ Apart from these, the following properties are also available, and may be useful spark.io.compression.codec snappy - The codec used to compress internal data such as RDD partitions and shuffle outputs. - By default, Spark provides three codecs: org.apache.spark.io.LZ4CompressionCodec, + The codec used to compress internal data such as RDD partitions and shuffle outputs. By default, + Spark provides three codecs: lz4, lzf, and snappy. You + can also use fully qualified class names to specify the codec, e.g. + org.apache.spark.io.LZ4CompressionCodec, org.apache.spark.io.LZFCompressionCodec, - and org.apache.spark.io.SnappyCompressionCodec. You can also use the short form: lz4, lzf, and snappy. + and org.apache.spark.io.SnappyCompressionCodec.