From b364990239d2ad784b1ec064b6ec08e812ac9180 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 4 Sep 2021 14:53:17 -0700 Subject: [PATCH 01/10] Add e2e test cases for codec. --- pom.xml | 7 ++ project/SparkBuild.scala | 4 +- sql/core/pom.xml | 6 ++ .../datasources/DataSourceCodecTest.scala | 66 +++++++++++++++++++ .../datasources/orc/OrcCodecTestSuite.scala | 31 +++++++++ .../parquet/ParquetCodecTestSuite.scala | 33 ++++++++++ 6 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceCodecTest.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcCodecTestSuite.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecTestSuite.scala diff --git a/pom.xml b/pom.xml index 8c303e9353f75..d91a4778d9ba1 100644 --- a/pom.xml +++ b/pom.xml @@ -300,6 +300,13 @@ false + + + jitpack.io + https://jitpack.io + Jitpack.io repository + + diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 93256bc9e0143..2f82b1ce9c18f 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -274,7 +274,9 @@ object SparkBuild extends PomBuild { "gcs-maven-central-mirror" at "https://maven-central.storage-download.googleapis.com/maven2/", DefaultMavenRepository, Resolver.mavenLocal, - Resolver.file("ivyLocal", file(Path.userHome.absolutePath + "/.ivy2/local"))(Resolver.ivyStylePatterns) + Resolver.file("ivyLocal", file(Path.userHome.absolutePath + "/.ivy2/local"))(Resolver.ivyStylePatterns), + // needed for brotli-codec + "jitpack.io" at "https://jitpack.io" ), externalResolvers := resolvers.value, otherResolvers := SbtPomKeys.mvnLocalRepository(dotM2 => Seq(Resolver.file("dotM2", dotM2))).value, diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 73fa60c2173bc..d7e9cac744ed7 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -178,6 +178,12 @@ htmlunit-driver test + + com.github.rdblue + brotli-codec + 0.1.1 + test + target/scala-${scala.binary.version}/classes diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceCodecTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceCodecTest.scala new file mode 100644 index 0000000000000..63cbed41bfed5 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceCodecTest.scala @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.test.SQLTestUtils + +abstract class DataSourceCodecTest extends QueryTest with SQLTestUtils { + + protected def dataSourceName: String + protected val codecConfigName: String + protected def availableCodecs: Seq[String] + + def testWithAllCodecs(name: String)(f: => Unit): Unit = { + for (codec <- availableCodecs) { + test(s"$name - data source $dataSourceName - codec: $codec") { + withSQLConf(codecConfigName -> codec) { + f + } + } + } + } + + testWithAllCodecs("write and read - single partition") { + withTempPath { dir => + testData + .repartition(1) + .write + .format(dataSourceName) + .save(dir.getCanonicalPath) + + val df = spark.read.format(dataSourceName).load(dir.getCanonicalPath) + checkAnswer(df, testData) + } + } + + testWithAllCodecs("write and read") { + withTempPath { dir => + testData + .repartition(5) + .write + .format(dataSourceName) + .save(dir.getCanonicalPath) + + val df = spark.read.format(dataSourceName).load(dir.getCanonicalPath) + checkAnswer(df, testData) + } + } +} + + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcCodecTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcCodecTestSuite.scala new file mode 100644 index 0000000000000..7249931636f4f --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcCodecTestSuite.scala @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.orc + +import org.apache.spark.sql.execution.datasources.DataSourceCodecTest +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession + +class OrcCodecTestSuite extends DataSourceCodecTest with SharedSparkSession{ + + override def dataSourceName: String = "orc" + override val codecConfigName = SQLConf.ORC_COMPRESSION.key + override protected def availableCodecs = Seq("none", "uncompressed", "snappy", + "zlib", "zstd", "lz4", "lzo") +} + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecTestSuite.scala new file mode 100644 index 0000000000000..e67c00993c491 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecTestSuite.scala @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.parquet + +import org.apache.spark.sql.execution.datasources.DataSourceCodecTest +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession + +class ParquetCodecTestSuite extends DataSourceCodecTest with SharedSparkSession { + + override def dataSourceName: String = "parquet" + override val codecConfigName = SQLConf.PARQUET_COMPRESSION.key + // Exclude "lzo" because it is GPL-licenced so not included in Hadoop. + // TODO (SPARK-36669): Add "lz4" back after fix it. + override protected def availableCodecs: Seq[String] = Seq("none", "uncompressed", "snappy", + "gzip", "brotli", "zstd") +} + From cbb6f0a696b33744ac074bb835c377b0e242e852 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 6 Sep 2021 00:31:35 -0700 Subject: [PATCH 02/10] Add lz4 wrapper classes as workaround for SPARK-36669. --- .../shaded/net/jpountz/lz4/LZ4Compressor.java | 37 ++++++++++++++ .../shaded/net/jpountz/lz4/LZ4Factory.java | 48 +++++++++++++++++++ .../net/jpountz/lz4/LZ4SafeDecompressor.java | 36 ++++++++++++++ sql/core/pom.xml | 5 ++ .../parquet/ParquetCodecTestSuite.scala | 3 +- 5 files changed, 127 insertions(+), 2 deletions(-) create mode 100644 core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java create mode 100644 core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java create mode 100644 core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java new file mode 100644 index 0000000000000..bdd568e7265b8 --- /dev/null +++ b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.shaded.net.jpountz.lz4; + +/** + * A temporary workaround for SPARK-36669. We should remove this after Hadoop 3.3.2 release + * which fixes the LZ4 relocation in shaded Hadoop client libraries. This does not need + * implement all net.jpountz.lz4.LZ4Compressor API, just the ones used by Hadoop Lz4Compressor. + */ +public final class LZ4Compressor { + + private net.jpountz.lz4.LZ4Compressor lz4Compressor; + + public LZ4Compressor(net.jpountz.lz4.LZ4Compressor lz4Compressor) { + this.lz4Compressor = lz4Compressor; + } + + public void compress(java.nio.ByteBuffer src, java.nio.ByteBuffer dest) { + lz4Compressor.compress(src, dest); + } +} + diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java new file mode 100644 index 0000000000000..12619a7149e0b --- /dev/null +++ b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.shaded.net.jpountz.lz4; + +/** + * A temporary workaround for SPARK-36669. We should remove this after Hadoop 3.3.2 release + * which fixes the LZ4 relocation in shaded Hadoop client libraries. This does not need + * implement all net.jpountz.lz4.LZ4Factory API, just the ones used by Hadoop Lz4Compressor. + */ +public final class LZ4Factory { + + private net.jpountz.lz4.LZ4Factory lz4Factory; + + public LZ4Factory(net.jpountz.lz4.LZ4Factory lz4Factory) { + this.lz4Factory = lz4Factory; + } + + public static LZ4Factory fastestInstance() { + return new LZ4Factory(net.jpountz.lz4.LZ4Factory.fastestInstance()); + } + + public LZ4Compressor highCompressor() { + return new LZ4Compressor(lz4Factory.highCompressor()); + } + + public LZ4Compressor fastCompressor() { + return new LZ4Compressor(lz4Factory.fastCompressor()); + } + + public LZ4SafeDecompressor safeDecompressor() { + return new LZ4SafeDecompressor(lz4Factory.safeDecompressor()); + } +} \ No newline at end of file diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java new file mode 100644 index 0000000000000..fa99affea6856 --- /dev/null +++ b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.shaded.net.jpountz.lz4; + +/** + * A temporary workaround for SPARK-36669. We should remove this after Hadoop 3.3.2 release + * which fixes the LZ4 relocation in shaded Hadoop client libraries. This does not need + * implement all net.jpountz.lz4.LZ4SafeDecompressor API, just the ones used by Hadoop + * Lz4Decompressor. + */ +public final class LZ4SafeDecompressor { + private net.jpountz.lz4.LZ4SafeDecompressor lz4Decompressor; + + public LZ4SafeDecompressor(net.jpountz.lz4.LZ4SafeDecompressor lz4Decompressor) { + this.lz4Decompressor = lz4Decompressor; + } + + public void decompress(java.nio.ByteBuffer src, java.nio.ByteBuffer dest) { + lz4Decompressor.decompress(src, dest); + } +} diff --git a/sql/core/pom.xml b/sql/core/pom.xml index d7e9cac744ed7..598826ae50ef5 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -178,6 +178,11 @@ htmlunit-driver test + + org.lz4 + lz4-java + test + com.github.rdblue brotli-codec diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecTestSuite.scala index e67c00993c491..d28627c22c482 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecTestSuite.scala @@ -26,8 +26,7 @@ class ParquetCodecTestSuite extends DataSourceCodecTest with SharedSparkSession override def dataSourceName: String = "parquet" override val codecConfigName = SQLConf.PARQUET_COMPRESSION.key // Exclude "lzo" because it is GPL-licenced so not included in Hadoop. - // TODO (SPARK-36669): Add "lz4" back after fix it. override protected def availableCodecs: Seq[String] = Seq("none", "uncompressed", "snappy", - "gzip", "brotli", "zstd") + "gzip", "brotli", "zstd", "lz4") } From ab5fc9c2b95c0dc9bde0627c164c991e7a618a0d Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 6 Sep 2021 01:42:01 -0700 Subject: [PATCH 03/10] Refactor tests. --- ...cTest.scala => FileSourceCodecSuite.scala} | 33 ++++++++++--------- .../datasources/orc/OrcCodecTestSuite.scala | 31 ----------------- .../parquet/ParquetCodecTestSuite.scala | 32 ------------------ 3 files changed, 18 insertions(+), 78 deletions(-) rename sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/{DataSourceCodecTest.scala => FileSourceCodecSuite.scala} (64%) delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcCodecTestSuite.scala delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecTestSuite.scala diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceCodecTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala similarity index 64% rename from sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceCodecTest.scala rename to sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala index 63cbed41bfed5..7a8d99eb7e68f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceCodecTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala @@ -18,9 +18,10 @@ package org.apache.spark.sql.execution.datasources import org.apache.spark.sql.QueryTest -import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} -abstract class DataSourceCodecTest extends QueryTest with SQLTestUtils { +trait FileSourceCodecSuite extends QueryTest with SQLTestUtils { protected def dataSourceName: String protected val codecConfigName: String @@ -36,19 +37,6 @@ abstract class DataSourceCodecTest extends QueryTest with SQLTestUtils { } } - testWithAllCodecs("write and read - single partition") { - withTempPath { dir => - testData - .repartition(1) - .write - .format(dataSourceName) - .save(dir.getCanonicalPath) - - val df = spark.read.format(dataSourceName).load(dir.getCanonicalPath) - checkAnswer(df, testData) - } - } - testWithAllCodecs("write and read") { withTempPath { dir => testData @@ -63,4 +51,19 @@ abstract class DataSourceCodecTest extends QueryTest with SQLTestUtils { } } +class ParquetCodecSuite extends FileSourceCodecSuite with SharedSparkSession { + override def dataSourceName: String = "parquet" + override val codecConfigName = SQLConf.PARQUET_COMPRESSION.key + // Exclude "lzo" because it is GPL-licenced so not included in Hadoop. + override protected def availableCodecs: Seq[String] = Seq("none", "uncompressed", "snappy", + "gzip", "brotli", "zstd", "lz4") +} + +class OrcCodecSuite extends FileSourceCodecSuite with SharedSparkSession{ + + override def dataSourceName: String = "orc" + override val codecConfigName = SQLConf.ORC_COMPRESSION.key + override protected def availableCodecs = Seq("none", "uncompressed", "snappy", + "zlib", "zstd", "lz4", "lzo") +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcCodecTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcCodecTestSuite.scala deleted file mode 100644 index 7249931636f4f..0000000000000 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcCodecTestSuite.scala +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution.datasources.orc - -import org.apache.spark.sql.execution.datasources.DataSourceCodecTest -import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.test.SharedSparkSession - -class OrcCodecTestSuite extends DataSourceCodecTest with SharedSparkSession{ - - override def dataSourceName: String = "orc" - override val codecConfigName = SQLConf.ORC_COMPRESSION.key - override protected def availableCodecs = Seq("none", "uncompressed", "snappy", - "zlib", "zstd", "lz4", "lzo") -} - diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecTestSuite.scala deleted file mode 100644 index d28627c22c482..0000000000000 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecTestSuite.scala +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution.datasources.parquet - -import org.apache.spark.sql.execution.datasources.DataSourceCodecTest -import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.test.SharedSparkSession - -class ParquetCodecTestSuite extends DataSourceCodecTest with SharedSparkSession { - - override def dataSourceName: String = "parquet" - override val codecConfigName = SQLConf.PARQUET_COMPRESSION.key - // Exclude "lzo" because it is GPL-licenced so not included in Hadoop. - override protected def availableCodecs: Seq[String] = Seq("none", "uncompressed", "snappy", - "gzip", "brotli", "zstd", "lz4") -} - From 5923b6f1cbadd4790d0c73a3b9dcfdb38cbea621 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 6 Sep 2021 02:01:14 -0700 Subject: [PATCH 04/10] Fix java style. --- .../hadoop/shaded/net/jpountz/lz4/LZ4Factory.java | 8 ++++---- .../shaded/net/jpountz/lz4/LZ4SafeDecompressor.java | 2 +- .../execution/datasources/FileSourceCodecSuite.scala | 12 ++++++------ 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java index 12619a7149e0b..d0b4ac09a3e5a 100644 --- a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java +++ b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java @@ -31,18 +31,18 @@ public LZ4Factory(net.jpountz.lz4.LZ4Factory lz4Factory) { } public static LZ4Factory fastestInstance() { - return new LZ4Factory(net.jpountz.lz4.LZ4Factory.fastestInstance()); + return new LZ4Factory(net.jpountz.lz4.LZ4Factory.fastestInstance()); } public LZ4Compressor highCompressor() { - return new LZ4Compressor(lz4Factory.highCompressor()); + return new LZ4Compressor(lz4Factory.highCompressor()); } public LZ4Compressor fastCompressor() { - return new LZ4Compressor(lz4Factory.fastCompressor()); + return new LZ4Compressor(lz4Factory.fastCompressor()); } public LZ4SafeDecompressor safeDecompressor() { return new LZ4SafeDecompressor(lz4Factory.safeDecompressor()); } -} \ No newline at end of file +} diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java index fa99affea6856..3d6259a2347a7 100644 --- a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java +++ b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java @@ -27,7 +27,7 @@ public final class LZ4SafeDecompressor { private net.jpountz.lz4.LZ4SafeDecompressor lz4Decompressor; public LZ4SafeDecompressor(net.jpountz.lz4.LZ4SafeDecompressor lz4Decompressor) { - this.lz4Decompressor = lz4Decompressor; + this.lz4Decompressor = lz4Decompressor; } public void decompress(java.nio.ByteBuffer src, java.nio.ByteBuffer dest) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala index 7a8d99eb7e68f..6a344acce3860 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala @@ -23,13 +23,13 @@ import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} trait FileSourceCodecSuite extends QueryTest with SQLTestUtils { - protected def dataSourceName: String + protected def format: String protected val codecConfigName: String protected def availableCodecs: Seq[String] def testWithAllCodecs(name: String)(f: => Unit): Unit = { for (codec <- availableCodecs) { - test(s"$name - data source $dataSourceName - codec: $codec") { + test(s"$name - file source $format - codec: $codec") { withSQLConf(codecConfigName -> codec) { f } @@ -42,10 +42,10 @@ trait FileSourceCodecSuite extends QueryTest with SQLTestUtils { testData .repartition(5) .write - .format(dataSourceName) + .format(format) .save(dir.getCanonicalPath) - val df = spark.read.format(dataSourceName).load(dir.getCanonicalPath) + val df = spark.read.format(format).load(dir.getCanonicalPath) checkAnswer(df, testData) } } @@ -53,7 +53,7 @@ trait FileSourceCodecSuite extends QueryTest with SQLTestUtils { class ParquetCodecSuite extends FileSourceCodecSuite with SharedSparkSession { - override def dataSourceName: String = "parquet" + override def format: String = "parquet" override val codecConfigName = SQLConf.PARQUET_COMPRESSION.key // Exclude "lzo" because it is GPL-licenced so not included in Hadoop. override protected def availableCodecs: Seq[String] = Seq("none", "uncompressed", "snappy", @@ -62,7 +62,7 @@ class ParquetCodecSuite extends FileSourceCodecSuite with SharedSparkSession { class OrcCodecSuite extends FileSourceCodecSuite with SharedSparkSession{ - override def dataSourceName: String = "orc" + override def format: String = "orc" override val codecConfigName = SQLConf.ORC_COMPRESSION.key override protected def availableCodecs = Seq("none", "uncompressed", "snappy", "zlib", "zstd", "lz4", "lzo") From 194faf2ef4af530677130101c750d1ac8a96e63b Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 6 Sep 2021 14:14:00 -0700 Subject: [PATCH 05/10] Add JIRA todo. --- .../hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java | 7 ++++--- .../apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java | 7 ++++--- .../shaded/net/jpountz/lz4/LZ4SafeDecompressor.java | 8 ++++---- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java index bdd568e7265b8..55c85566472c7 100644 --- a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java +++ b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java @@ -18,9 +18,10 @@ package org.apache.hadoop.shaded.net.jpountz.lz4; /** - * A temporary workaround for SPARK-36669. We should remove this after Hadoop 3.3.2 release - * which fixes the LZ4 relocation in shaded Hadoop client libraries. This does not need - * implement all net.jpountz.lz4.LZ4Compressor API, just the ones used by Hadoop Lz4Compressor. + * TODO(SPARK-36679): A temporary workaround for SPARK-36669. We should remove this after + * Hadoop 3.3.2 release which fixes the LZ4 relocation in shaded Hadoop client libraries. + * This does not need implement all net.jpountz.lz4.LZ4Compressor API, just the ones used + * by Hadoop Lz4Compressor. */ public final class LZ4Compressor { diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java index d0b4ac09a3e5a..61829b2728bce 100644 --- a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java +++ b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java @@ -18,9 +18,10 @@ package org.apache.hadoop.shaded.net.jpountz.lz4; /** - * A temporary workaround for SPARK-36669. We should remove this after Hadoop 3.3.2 release - * which fixes the LZ4 relocation in shaded Hadoop client libraries. This does not need - * implement all net.jpountz.lz4.LZ4Factory API, just the ones used by Hadoop Lz4Compressor. + * TODO(SPARK-36679): A temporary workaround for SPARK-36669. We should remove this after + * Hadoop 3.3.2 release which fixes the LZ4 relocation in shaded Hadoop client libraries. + * This does not need implement all net.jpountz.lz4.LZ4Factory API, just the ones used by + * Hadoop Lz4Compressor. */ public final class LZ4Factory { diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java index 3d6259a2347a7..cd3dd6f060f52 100644 --- a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java +++ b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java @@ -18,10 +18,10 @@ package org.apache.hadoop.shaded.net.jpountz.lz4; /** - * A temporary workaround for SPARK-36669. We should remove this after Hadoop 3.3.2 release - * which fixes the LZ4 relocation in shaded Hadoop client libraries. This does not need - * implement all net.jpountz.lz4.LZ4SafeDecompressor API, just the ones used by Hadoop - * Lz4Decompressor. + * TODO(SPARK-36679): A temporary workaround for SPARK-36669. We should remove this after + * Hadoop 3.3.2 release which fixes the LZ4 relocation in shaded Hadoop client libraries. + * This does not need implement all net.jpountz.lz4.LZ4SafeDecompressor API, just the ones + * used by Hadoop Lz4Decompressor. */ public final class LZ4SafeDecompressor { private net.jpountz.lz4.LZ4SafeDecompressor lz4Decompressor; From ed5f33d997ceaf7ebc7c1e46ce1a1ca5385cdff6 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 6 Sep 2021 15:19:16 -0700 Subject: [PATCH 06/10] Remove lz4-java test dependency. --- sql/core/pom.xml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 598826ae50ef5..d7e9cac744ed7 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -178,11 +178,6 @@ htmlunit-driver test - - org.lz4 - lz4-java - test - com.github.rdblue brotli-codec From 0029f332b4b91efbaed1fee0a7fb9dcac7c183cf Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 6 Sep 2021 19:02:32 -0700 Subject: [PATCH 07/10] For review comment. --- .../sql/execution/datasources/FileSourceCodecSuite.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala index 6a344acce3860..aa9bfab61c37e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala @@ -54,16 +54,16 @@ trait FileSourceCodecSuite extends QueryTest with SQLTestUtils { class ParquetCodecSuite extends FileSourceCodecSuite with SharedSparkSession { override def format: String = "parquet" - override val codecConfigName = SQLConf.PARQUET_COMPRESSION.key + override val codecConfigName: String = SQLConf.PARQUET_COMPRESSION.key // Exclude "lzo" because it is GPL-licenced so not included in Hadoop. override protected def availableCodecs: Seq[String] = Seq("none", "uncompressed", "snappy", "gzip", "brotli", "zstd", "lz4") } -class OrcCodecSuite extends FileSourceCodecSuite with SharedSparkSession{ +class OrcCodecSuite extends FileSourceCodecSuite with SharedSparkSession { override def format: String = "orc" - override val codecConfigName = SQLConf.ORC_COMPRESSION.key + override val codecConfigName: String = SQLConf.ORC_COMPRESSION.key override protected def availableCodecs = Seq("none", "uncompressed", "snappy", "zlib", "zstd", "lz4", "lzo") } From b6f20cf3380a3295efbcc53e1394a96ccce9f013 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 6 Sep 2021 19:49:32 -0700 Subject: [PATCH 08/10] Exclude "brotli" for non-supported arch. --- .../sql/execution/datasources/FileSourceCodecSuite.scala | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala index aa9bfab61c37e..b62a9b77e0043 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala @@ -56,8 +56,13 @@ class ParquetCodecSuite extends FileSourceCodecSuite with SharedSparkSession { override def format: String = "parquet" override val codecConfigName: String = SQLConf.PARQUET_COMPRESSION.key // Exclude "lzo" because it is GPL-licenced so not included in Hadoop. - override protected def availableCodecs: Seq[String] = Seq("none", "uncompressed", "snappy", - "gzip", "brotli", "zstd", "lz4") + override protected def availableCodecs: Seq[String] = + if (System.getProperty("os.arch") == "aarch64") { + // Exclude "brotli" due to PARQUET-1975. + Seq("none", "uncompressed", "snappy", "gzip", "zstd", "lz4") + } else { + Seq("none", "uncompressed", "snappy", "gzip", "brotli", "zstd", "lz4") + } } class OrcCodecSuite extends FileSourceCodecSuite with SharedSparkSession { From e76393b8b6b905d4e9c182e766d99ed2e0edd04d Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 7 Sep 2021 10:16:28 -0700 Subject: [PATCH 09/10] Remove wrapper classes. Keep only working codec. --- .../shaded/net/jpountz/lz4/LZ4Compressor.java | 38 -------------- .../shaded/net/jpountz/lz4/LZ4Factory.java | 49 ------------------- .../net/jpountz/lz4/LZ4SafeDecompressor.java | 36 -------------- .../datasources/FileSourceCodecSuite.scala | 5 +- 4 files changed, 3 insertions(+), 125 deletions(-) delete mode 100644 core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java delete mode 100644 core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java delete mode 100644 core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java deleted file mode 100644 index 55c85566472c7..0000000000000 --- a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.shaded.net.jpountz.lz4; - -/** - * TODO(SPARK-36679): A temporary workaround for SPARK-36669. We should remove this after - * Hadoop 3.3.2 release which fixes the LZ4 relocation in shaded Hadoop client libraries. - * This does not need implement all net.jpountz.lz4.LZ4Compressor API, just the ones used - * by Hadoop Lz4Compressor. - */ -public final class LZ4Compressor { - - private net.jpountz.lz4.LZ4Compressor lz4Compressor; - - public LZ4Compressor(net.jpountz.lz4.LZ4Compressor lz4Compressor) { - this.lz4Compressor = lz4Compressor; - } - - public void compress(java.nio.ByteBuffer src, java.nio.ByteBuffer dest) { - lz4Compressor.compress(src, dest); - } -} - diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java deleted file mode 100644 index 61829b2728bce..0000000000000 --- a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.shaded.net.jpountz.lz4; - -/** - * TODO(SPARK-36679): A temporary workaround for SPARK-36669. We should remove this after - * Hadoop 3.3.2 release which fixes the LZ4 relocation in shaded Hadoop client libraries. - * This does not need implement all net.jpountz.lz4.LZ4Factory API, just the ones used by - * Hadoop Lz4Compressor. - */ -public final class LZ4Factory { - - private net.jpountz.lz4.LZ4Factory lz4Factory; - - public LZ4Factory(net.jpountz.lz4.LZ4Factory lz4Factory) { - this.lz4Factory = lz4Factory; - } - - public static LZ4Factory fastestInstance() { - return new LZ4Factory(net.jpountz.lz4.LZ4Factory.fastestInstance()); - } - - public LZ4Compressor highCompressor() { - return new LZ4Compressor(lz4Factory.highCompressor()); - } - - public LZ4Compressor fastCompressor() { - return new LZ4Compressor(lz4Factory.fastCompressor()); - } - - public LZ4SafeDecompressor safeDecompressor() { - return new LZ4SafeDecompressor(lz4Factory.safeDecompressor()); - } -} diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java deleted file mode 100644 index cd3dd6f060f52..0000000000000 --- a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.shaded.net.jpountz.lz4; - -/** - * TODO(SPARK-36679): A temporary workaround for SPARK-36669. We should remove this after - * Hadoop 3.3.2 release which fixes the LZ4 relocation in shaded Hadoop client libraries. - * This does not need implement all net.jpountz.lz4.LZ4SafeDecompressor API, just the ones - * used by Hadoop Lz4Decompressor. - */ -public final class LZ4SafeDecompressor { - private net.jpountz.lz4.LZ4SafeDecompressor lz4Decompressor; - - public LZ4SafeDecompressor(net.jpountz.lz4.LZ4SafeDecompressor lz4Decompressor) { - this.lz4Decompressor = lz4Decompressor; - } - - public void decompress(java.nio.ByteBuffer src, java.nio.ByteBuffer dest) { - lz4Decompressor.decompress(src, dest); - } -} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala index b62a9b77e0043..4b7e53743ba93 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala @@ -56,12 +56,13 @@ class ParquetCodecSuite extends FileSourceCodecSuite with SharedSparkSession { override def format: String = "parquet" override val codecConfigName: String = SQLConf.PARQUET_COMPRESSION.key // Exclude "lzo" because it is GPL-licenced so not included in Hadoop. + // TODO(SPARK-36669): "lz4" codec fails due to HADOOP-17891. override protected def availableCodecs: Seq[String] = if (System.getProperty("os.arch") == "aarch64") { // Exclude "brotli" due to PARQUET-1975. - Seq("none", "uncompressed", "snappy", "gzip", "zstd", "lz4") + Seq("none", "uncompressed", "snappy", "gzip", "zstd") } else { - Seq("none", "uncompressed", "snappy", "gzip", "brotli", "zstd", "lz4") + Seq("none", "uncompressed", "snappy", "gzip", "brotli", "zstd") } } From 16e7db926486ca83166b214a0a53feafcae03f3b Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 7 Sep 2021 12:08:39 -0700 Subject: [PATCH 10/10] For review comments. --- pom.xml | 1 - .../sql/execution/datasources/FileSourceCodecSuite.scala | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pom.xml b/pom.xml index d91a4778d9ba1..81a0126539b1d 100644 --- a/pom.xml +++ b/pom.xml @@ -300,7 +300,6 @@ false - jitpack.io https://jitpack.io diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala index 4b7e53743ba93..92b887e948da9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala @@ -21,7 +21,7 @@ import org.apache.spark.sql.QueryTest import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} -trait FileSourceCodecSuite extends QueryTest with SQLTestUtils { +trait FileSourceCodecSuite extends QueryTest with SQLTestUtils with SharedSparkSession { protected def format: String protected val codecConfigName: String @@ -51,7 +51,7 @@ trait FileSourceCodecSuite extends QueryTest with SQLTestUtils { } } -class ParquetCodecSuite extends FileSourceCodecSuite with SharedSparkSession { +class ParquetCodecSuite extends FileSourceCodecSuite { override def format: String = "parquet" override val codecConfigName: String = SQLConf.PARQUET_COMPRESSION.key @@ -66,7 +66,7 @@ class ParquetCodecSuite extends FileSourceCodecSuite with SharedSparkSession { } } -class OrcCodecSuite extends FileSourceCodecSuite with SharedSparkSession { +class OrcCodecSuite extends FileSourceCodecSuite { override def format: String = "orc" override val codecConfigName: String = SQLConf.ORC_COMPRESSION.key