From b364990239d2ad784b1ec064b6ec08e812ac9180 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sat, 4 Sep 2021 14:53:17 -0700
Subject: [PATCH 01/10] Add e2e test cases for codec.

---
 pom.xml                                       |  7 ++
 project/SparkBuild.scala                      |  4 +-
 sql/core/pom.xml                              |  6 ++
 .../datasources/DataSourceCodecTest.scala     | 66 +++++++++++++++++++
 .../datasources/orc/OrcCodecTestSuite.scala   | 31 +++++++++
 .../parquet/ParquetCodecTestSuite.scala       | 33 ++++++++++
 6 files changed, 146 insertions(+), 1 deletion(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceCodecTest.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcCodecTestSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecTestSuite.scala
diff --git a/pom.xml b/pom.xml
index 8c303e9353f75..d91a4778d9ba1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -300,6 +300,13 @@
         <enabled>false</enabled>
       </snapshots>
     </repository>
+
+    <repository>
+      <id>jitpack.io</id>
+      <url>https://jitpack.io</url>
+      <name>Jitpack.io repository</name>
+      <!-- needed for brotli-codec -->
+    </repository>
   </repositories>
   <pluginRepositories>
     <pluginRepository>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 93256bc9e0143..2f82b1ce9c18f 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -274,7 +274,9 @@ object SparkBuild extends PomBuild {
       "gcs-maven-central-mirror" at "https://maven-central.storage-download.googleapis.com/maven2/",
       DefaultMavenRepository,
       Resolver.mavenLocal,
-      Resolver.file("ivyLocal", file(Path.userHome.absolutePath + "/.ivy2/local"))(Resolver.ivyStylePatterns)
+      Resolver.file("ivyLocal", file(Path.userHome.absolutePath + "/.ivy2/local"))(Resolver.ivyStylePatterns),
+      // needed for brotli-codec
+      "jitpack.io" at "https://jitpack.io"
     ),
     externalResolvers := resolvers.value,
     otherResolvers := SbtPomKeys.mvnLocalRepository(dotM2 => Seq(Resolver.file("dotM2", dotM2))).value,
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 73fa60c2173bc..d7e9cac744ed7 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -178,6 +178,12 @@
       <artifactId>htmlunit-driver</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>com.github.rdblue</groupId>
+      <artifactId>brotli-codec</artifactId>
+      <version>0.1.1</version>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceCodecTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceCodecTest.scala
new file mode 100644
index 0000000000000..63cbed41bfed5
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceCodecTest.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.test.SQLTestUtils
+
+abstract class DataSourceCodecTest extends QueryTest with SQLTestUtils {
+
+  protected def dataSourceName: String
+  protected val codecConfigName: String
+  protected def availableCodecs: Seq[String]
+
+  def testWithAllCodecs(name: String)(f: => Unit): Unit = {
+    for (codec <- availableCodecs) {
+      test(s"$name - data source $dataSourceName - codec: $codec") {
+        withSQLConf(codecConfigName -> codec) {
+          f
+        }
+      }
+    }
+  }
+
+  testWithAllCodecs("write and read - single partition") {
+    withTempPath { dir =>
+      testData
+        .repartition(1)
+        .write
+        .format(dataSourceName)
+        .save(dir.getCanonicalPath)
+
+      val df = spark.read.format(dataSourceName).load(dir.getCanonicalPath)
+      checkAnswer(df, testData)
+    }
+  }
+
+  testWithAllCodecs("write and read") {
+    withTempPath { dir =>
+      testData
+        .repartition(5)
+        .write
+        .format(dataSourceName)
+        .save(dir.getCanonicalPath)
+
+      val df = spark.read.format(dataSourceName).load(dir.getCanonicalPath)
+      checkAnswer(df, testData)
+    }
+  }
+}
+
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcCodecTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcCodecTestSuite.scala
new file mode 100644
index 0000000000000..7249931636f4f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcCodecTestSuite.scala
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.orc
+
+import org.apache.spark.sql.execution.datasources.DataSourceCodecTest
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+class OrcCodecTestSuite extends DataSourceCodecTest with SharedSparkSession{
+
+  override def dataSourceName: String = "orc"
+  override val codecConfigName = SQLConf.ORC_COMPRESSION.key
+  override protected def availableCodecs = Seq("none", "uncompressed", "snappy",
+    "zlib", "zstd", "lz4", "lzo")
+}
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecTestSuite.scala
new file mode 100644
index 0000000000000..e67c00993c491
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecTestSuite.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.parquet
+
+import org.apache.spark.sql.execution.datasources.DataSourceCodecTest
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+class ParquetCodecTestSuite extends DataSourceCodecTest with SharedSparkSession {
+
+  override def dataSourceName: String = "parquet"
+  override val codecConfigName = SQLConf.PARQUET_COMPRESSION.key
+  // Exclude "lzo" because it is GPL-licenced so not included in Hadoop.
+  // TODO (SPARK-36669): Add "lz4" back after fix it.
+  override protected def availableCodecs: Seq[String] = Seq("none", "uncompressed", "snappy",
+    "gzip", "brotli", "zstd")
+}
+

From cbb6f0a696b33744ac074bb835c377b0e242e852 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 6 Sep 2021 00:31:35 -0700
Subject: [PATCH 02/10] Add lz4 wrapper classes as workaround for SPARK-36669.

---
 .../shaded/net/jpountz/lz4/LZ4Compressor.java | 37 ++++++++++++++
 .../shaded/net/jpountz/lz4/LZ4Factory.java    | 48 +++++++++++++++++++
 .../net/jpountz/lz4/LZ4SafeDecompressor.java  | 36 ++++++++++++++
 sql/core/pom.xml                              |  5 ++
 .../parquet/ParquetCodecTestSuite.scala       |  3 +-
 5 files changed, 127 insertions(+), 2 deletions(-)
 create mode 100644 core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java
 create mode 100644 core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java
 create mode 100644 core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java

diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java
new file mode 100644
index 0000000000000..bdd568e7265b8
--- /dev/null
+++ b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.shaded.net.jpountz.lz4;
+
+/**
+ * A temporary workaround for SPARK-36669. We should remove this after Hadoop 3.3.2 release
+ * which fixes the LZ4 relocation in shaded Hadoop client libraries. This does not need
+ * implement all net.jpountz.lz4.LZ4Compressor API, just the ones used by Hadoop Lz4Compressor.
+ */
+public final class LZ4Compressor {
+
+  private net.jpountz.lz4.LZ4Compressor lz4Compressor;
+
+  public LZ4Compressor(net.jpountz.lz4.LZ4Compressor lz4Compressor) {
+    this.lz4Compressor = lz4Compressor;
+  }
+
+  public void compress(java.nio.ByteBuffer src, java.nio.ByteBuffer dest) {
+    lz4Compressor.compress(src, dest);
+  }
+}
+
diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java
new file mode 100644
index 0000000000000..12619a7149e0b
--- /dev/null
+++ b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.shaded.net.jpountz.lz4;
+
+/**
+ * A temporary workaround for SPARK-36669. We should remove this after Hadoop 3.3.2 release
+ * which fixes the LZ4 relocation in shaded Hadoop client libraries. This does not need
+ * implement all net.jpountz.lz4.LZ4Factory API, just the ones used by Hadoop Lz4Compressor.
+ */
+public final class LZ4Factory {
+
+  private net.jpountz.lz4.LZ4Factory lz4Factory;
+
+  public LZ4Factory(net.jpountz.lz4.LZ4Factory lz4Factory) {
+    this.lz4Factory = lz4Factory;
+  }
+
+  public static LZ4Factory fastestInstance() {
+	return new LZ4Factory(net.jpountz.lz4.LZ4Factory.fastestInstance());
+  }
+
+  public LZ4Compressor highCompressor() {
+	return new LZ4Compressor(lz4Factory.highCompressor());
+  }
+
+  public LZ4Compressor fastCompressor() {
+	return new LZ4Compressor(lz4Factory.fastCompressor());
+  }
+
+  public LZ4SafeDecompressor safeDecompressor() {
+    return new LZ4SafeDecompressor(lz4Factory.safeDecompressor());
+  }
+}
\ No newline at end of file
diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java
new file mode 100644
index 0000000000000..fa99affea6856
--- /dev/null
+++ b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.shaded.net.jpountz.lz4;
+
+/**
+ * A temporary workaround for SPARK-36669. We should remove this after Hadoop 3.3.2 release
+ * which fixes the LZ4 relocation in shaded Hadoop client libraries. This does not need
+ * implement all net.jpountz.lz4.LZ4SafeDecompressor API, just the ones used by Hadoop
+ * Lz4Decompressor.
+ */
+public final class LZ4SafeDecompressor {
+  private net.jpountz.lz4.LZ4SafeDecompressor lz4Decompressor;
+
+  public LZ4SafeDecompressor(net.jpountz.lz4.LZ4SafeDecompressor lz4Decompressor) {
+	this.lz4Decompressor = lz4Decompressor;
+  }
+
+  public void decompress(java.nio.ByteBuffer src, java.nio.ByteBuffer dest) {
+    lz4Decompressor.decompress(src, dest);
+  }
+}
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index d7e9cac744ed7..598826ae50ef5 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -178,6 +178,11 @@
       <artifactId>htmlunit-driver</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.lz4</groupId>
+      <artifactId>lz4-java</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>com.github.rdblue</groupId>
       <artifactId>brotli-codec</artifactId>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecTestSuite.scala
index e67c00993c491..d28627c22c482 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecTestSuite.scala
@@ -26,8 +26,7 @@ class ParquetCodecTestSuite extends DataSourceCodecTest with SharedSparkSession
   override def dataSourceName: String = "parquet"
   override val codecConfigName = SQLConf.PARQUET_COMPRESSION.key
   // Exclude "lzo" because it is GPL-licenced so not included in Hadoop.
-  // TODO (SPARK-36669): Add "lz4" back after fix it.
   override protected def availableCodecs: Seq[String] = Seq("none", "uncompressed", "snappy",
-    "gzip", "brotli", "zstd")
+    "gzip", "brotli", "zstd", "lz4")
 }
 

From ab5fc9c2b95c0dc9bde0627c164c991e7a618a0d Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 6 Sep 2021 01:42:01 -0700
Subject: [PATCH 03/10] Refactor tests.

---
 ...cTest.scala => FileSourceCodecSuite.scala} | 33 ++++++++++---------
 .../datasources/orc/OrcCodecTestSuite.scala   | 31 -----------------
 .../parquet/ParquetCodecTestSuite.scala       | 32 ------------------
 3 files changed, 18 insertions(+), 78 deletions(-)
 rename sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/{DataSourceCodecTest.scala => FileSourceCodecSuite.scala} (64%)
 delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcCodecTestSuite.scala
 delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecTestSuite.scala

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceCodecTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala
similarity index 64%
rename from sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceCodecTest.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala
index 63cbed41bfed5..7a8d99eb7e68f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceCodecTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala
@@ -18,9 +18,10 @@
 package org.apache.spark.sql.execution.datasources
 
 import org.apache.spark.sql.QueryTest
-import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
 
-abstract class DataSourceCodecTest extends QueryTest with SQLTestUtils {
+trait FileSourceCodecSuite extends QueryTest with SQLTestUtils {
 
   protected def dataSourceName: String
   protected val codecConfigName: String
@@ -36,19 +37,6 @@ abstract class DataSourceCodecTest extends QueryTest with SQLTestUtils {
     }
   }
 
-  testWithAllCodecs("write and read - single partition") {
-    withTempPath { dir =>
-      testData
-        .repartition(1)
-        .write
-        .format(dataSourceName)
-        .save(dir.getCanonicalPath)
-
-      val df = spark.read.format(dataSourceName).load(dir.getCanonicalPath)
-      checkAnswer(df, testData)
-    }
-  }
-
   testWithAllCodecs("write and read") {
     withTempPath { dir =>
       testData
@@ -63,4 +51,19 @@ abstract class DataSourceCodecTest extends QueryTest with SQLTestUtils {
   }
 }
 
+class ParquetCodecSuite extends FileSourceCodecSuite with SharedSparkSession {
 
+  override def dataSourceName: String = "parquet"
+  override val codecConfigName = SQLConf.PARQUET_COMPRESSION.key
+  // Exclude "lzo" because it is GPL-licenced so not included in Hadoop.
+  override protected def availableCodecs: Seq[String] = Seq("none", "uncompressed", "snappy",
+    "gzip", "brotli", "zstd", "lz4")
+}
+
+class OrcCodecSuite extends FileSourceCodecSuite with SharedSparkSession{
+
+  override def dataSourceName: String = "orc"
+  override val codecConfigName = SQLConf.ORC_COMPRESSION.key
+  override protected def availableCodecs = Seq("none", "uncompressed", "snappy",
+    "zlib", "zstd", "lz4", "lzo")
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcCodecTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcCodecTestSuite.scala
deleted file mode 100644
index 7249931636f4f..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcCodecTestSuite.scala
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.orc
-
-import org.apache.spark.sql.execution.datasources.DataSourceCodecTest
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SharedSparkSession
-
-class OrcCodecTestSuite extends DataSourceCodecTest with SharedSparkSession{
-
-  override def dataSourceName: String = "orc"
-  override val codecConfigName = SQLConf.ORC_COMPRESSION.key
-  override protected def availableCodecs = Seq("none", "uncompressed", "snappy",
-    "zlib", "zstd", "lz4", "lzo")
-}
-
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecTestSuite.scala
deleted file mode 100644
index d28627c22c482..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecTestSuite.scala
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.parquet
-
-import org.apache.spark.sql.execution.datasources.DataSourceCodecTest
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SharedSparkSession
-
-class ParquetCodecTestSuite extends DataSourceCodecTest with SharedSparkSession {
-
-  override def dataSourceName: String = "parquet"
-  override val codecConfigName = SQLConf.PARQUET_COMPRESSION.key
-  // Exclude "lzo" because it is GPL-licenced so not included in Hadoop.
-  override protected def availableCodecs: Seq[String] = Seq("none", "uncompressed", "snappy",
-    "gzip", "brotli", "zstd", "lz4")
-}
-

From 5923b6f1cbadd4790d0c73a3b9dcfdb38cbea621 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 6 Sep 2021 02:01:14 -0700
Subject: [PATCH 04/10] Fix java style.

---
 .../hadoop/shaded/net/jpountz/lz4/LZ4Factory.java    |  8 ++++----
 .../shaded/net/jpountz/lz4/LZ4SafeDecompressor.java  |  2 +-
 .../execution/datasources/FileSourceCodecSuite.scala | 12 ++++++------
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java
index 12619a7149e0b..d0b4ac09a3e5a 100644
--- a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java
+++ b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java
@@ -31,18 +31,18 @@ public LZ4Factory(net.jpountz.lz4.LZ4Factory lz4Factory) {
   }
 
   public static LZ4Factory fastestInstance() {
-	return new LZ4Factory(net.jpountz.lz4.LZ4Factory.fastestInstance());
+    return new LZ4Factory(net.jpountz.lz4.LZ4Factory.fastestInstance());
   }
 
   public LZ4Compressor highCompressor() {
-	return new LZ4Compressor(lz4Factory.highCompressor());
+    return new LZ4Compressor(lz4Factory.highCompressor());
   }
 
   public LZ4Compressor fastCompressor() {
-	return new LZ4Compressor(lz4Factory.fastCompressor());
+    return new LZ4Compressor(lz4Factory.fastCompressor());
   }
 
   public LZ4SafeDecompressor safeDecompressor() {
     return new LZ4SafeDecompressor(lz4Factory.safeDecompressor());
   }
-}
\ No newline at end of file
+}
diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java
index fa99affea6856..3d6259a2347a7 100644
--- a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java
+++ b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java
@@ -27,7 +27,7 @@ public final class LZ4SafeDecompressor {
   private net.jpountz.lz4.LZ4SafeDecompressor lz4Decompressor;
 
   public LZ4SafeDecompressor(net.jpountz.lz4.LZ4SafeDecompressor lz4Decompressor) {
-	this.lz4Decompressor = lz4Decompressor;
+    this.lz4Decompressor = lz4Decompressor;
   }
 
   public void decompress(java.nio.ByteBuffer src, java.nio.ByteBuffer dest) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala
index 7a8d99eb7e68f..6a344acce3860 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala
@@ -23,13 +23,13 @@ import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
 
 trait FileSourceCodecSuite extends QueryTest with SQLTestUtils {
 
-  protected def dataSourceName: String
+  protected def format: String
   protected val codecConfigName: String
   protected def availableCodecs: Seq[String]
 
   def testWithAllCodecs(name: String)(f: => Unit): Unit = {
     for (codec <- availableCodecs) {
-      test(s"$name - data source $dataSourceName - codec: $codec") {
+      test(s"$name - file source $format - codec: $codec") {
         withSQLConf(codecConfigName -> codec) {
           f
         }
@@ -42,10 +42,10 @@ trait FileSourceCodecSuite extends QueryTest with SQLTestUtils {
       testData
         .repartition(5)
         .write
-        .format(dataSourceName)
+        .format(format)
         .save(dir.getCanonicalPath)
 
-      val df = spark.read.format(dataSourceName).load(dir.getCanonicalPath)
+      val df = spark.read.format(format).load(dir.getCanonicalPath)
       checkAnswer(df, testData)
     }
   }
@@ -53,7 +53,7 @@ trait FileSourceCodecSuite extends QueryTest with SQLTestUtils {
 
 class ParquetCodecSuite extends FileSourceCodecSuite with SharedSparkSession {
 
-  override def dataSourceName: String = "parquet"
+  override def format: String = "parquet"
   override val codecConfigName = SQLConf.PARQUET_COMPRESSION.key
   // Exclude "lzo" because it is GPL-licenced so not included in Hadoop.
   override protected def availableCodecs: Seq[String] = Seq("none", "uncompressed", "snappy",
@@ -62,7 +62,7 @@ class ParquetCodecSuite extends FileSourceCodecSuite with SharedSparkSession {
 
 class OrcCodecSuite extends FileSourceCodecSuite with SharedSparkSession{
 
-  override def dataSourceName: String = "orc"
+  override def format: String = "orc"
   override val codecConfigName = SQLConf.ORC_COMPRESSION.key
   override protected def availableCodecs = Seq("none", "uncompressed", "snappy",
     "zlib", "zstd", "lz4", "lzo")

From 194faf2ef4af530677130101c750d1ac8a96e63b Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 6 Sep 2021 14:14:00 -0700
Subject: [PATCH 05/10] Add JIRA todo.

---
 .../hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java      | 7 ++++---
 .../apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java  | 7 ++++---
 .../shaded/net/jpountz/lz4/LZ4SafeDecompressor.java       | 8 ++++----
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java
index bdd568e7265b8..55c85566472c7 100644
--- a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java
+++ b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java
@@ -18,9 +18,10 @@
 package org.apache.hadoop.shaded.net.jpountz.lz4;
 
 /**
- * A temporary workaround for SPARK-36669. We should remove this after Hadoop 3.3.2 release
- * which fixes the LZ4 relocation in shaded Hadoop client libraries. This does not need
- * implement all net.jpountz.lz4.LZ4Compressor API, just the ones used by Hadoop Lz4Compressor.
+ * TODO(SPARK-36679): A temporary workaround for SPARK-36669. We should remove this after
+ * Hadoop 3.3.2 release which fixes the LZ4 relocation in shaded Hadoop client libraries.
+ * This does not need implement all net.jpountz.lz4.LZ4Compressor API, just the ones used
+ * by Hadoop Lz4Compressor.
  */
 public final class LZ4Compressor {
 
diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java
index d0b4ac09a3e5a..61829b2728bce 100644
--- a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java
+++ b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java
@@ -18,9 +18,10 @@
 package org.apache.hadoop.shaded.net.jpountz.lz4;
 
 /**
- * A temporary workaround for SPARK-36669. We should remove this after Hadoop 3.3.2 release
- * which fixes the LZ4 relocation in shaded Hadoop client libraries. This does not need
- * implement all net.jpountz.lz4.LZ4Factory API, just the ones used by Hadoop Lz4Compressor.
+ * TODO(SPARK-36679): A temporary workaround for SPARK-36669. We should remove this after
+ * Hadoop 3.3.2 release which fixes the LZ4 relocation in shaded Hadoop client libraries.
+ * This does not need implement all net.jpountz.lz4.LZ4Factory API, just the ones used by
+ * Hadoop Lz4Compressor.
  */
 public final class LZ4Factory {
 
diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java
index 3d6259a2347a7..cd3dd6f060f52 100644
--- a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java
+++ b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java
@@ -18,10 +18,10 @@
 package org.apache.hadoop.shaded.net.jpountz.lz4;
 
 /**
- * A temporary workaround for SPARK-36669. We should remove this after Hadoop 3.3.2 release
- * which fixes the LZ4 relocation in shaded Hadoop client libraries. This does not need
- * implement all net.jpountz.lz4.LZ4SafeDecompressor API, just the ones used by Hadoop
- * Lz4Decompressor.
+ * TODO(SPARK-36679): A temporary workaround for SPARK-36669. We should remove this after
+ * Hadoop 3.3.2 release which fixes the LZ4 relocation in shaded Hadoop client libraries.
+ * This does not need implement all net.jpountz.lz4.LZ4SafeDecompressor API, just the ones
+ * used by Hadoop Lz4Decompressor.
  */
 public final class LZ4SafeDecompressor {
   private net.jpountz.lz4.LZ4SafeDecompressor lz4Decompressor;

From ed5f33d997ceaf7ebc7c1e46ce1a1ca5385cdff6 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 6 Sep 2021 15:19:16 -0700
Subject: [PATCH 06/10] Remove lz4-java test dependency.

---
 sql/core/pom.xml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 598826ae50ef5..d7e9cac744ed7 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -178,11 +178,6 @@
       <artifactId>htmlunit-driver</artifactId>
       <scope>test</scope>
     </dependency>
-    <dependency>
-      <groupId>org.lz4</groupId>
-      <artifactId>lz4-java</artifactId>
-      <scope>test</scope>
-    </dependency>
     <dependency>
       <groupId>com.github.rdblue</groupId>
       <artifactId>brotli-codec</artifactId>

From 0029f332b4b91efbaed1fee0a7fb9dcac7c183cf Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 6 Sep 2021 19:02:32 -0700
Subject: [PATCH 07/10] For review comment.

---
 .../sql/execution/datasources/FileSourceCodecSuite.scala    | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala
index 6a344acce3860..aa9bfab61c37e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala
@@ -54,16 +54,16 @@ trait FileSourceCodecSuite extends QueryTest with SQLTestUtils {
 class ParquetCodecSuite extends FileSourceCodecSuite with SharedSparkSession {
 
   override def format: String = "parquet"
-  override val codecConfigName = SQLConf.PARQUET_COMPRESSION.key
+  override val codecConfigName: String = SQLConf.PARQUET_COMPRESSION.key
   // Exclude "lzo" because it is GPL-licenced so not included in Hadoop.
   override protected def availableCodecs: Seq[String] = Seq("none", "uncompressed", "snappy",
     "gzip", "brotli", "zstd", "lz4")
 }
 
-class OrcCodecSuite extends FileSourceCodecSuite with SharedSparkSession{
+class OrcCodecSuite extends FileSourceCodecSuite with SharedSparkSession {
 
   override def format: String = "orc"
-  override val codecConfigName = SQLConf.ORC_COMPRESSION.key
+  override val codecConfigName: String = SQLConf.ORC_COMPRESSION.key
   override protected def availableCodecs = Seq("none", "uncompressed", "snappy",
     "zlib", "zstd", "lz4", "lzo")
 }

From b6f20cf3380a3295efbcc53e1394a96ccce9f013 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 6 Sep 2021 19:49:32 -0700
Subject: [PATCH 08/10] Exclude "brotli" for non-supported arch.

---
 .../sql/execution/datasources/FileSourceCodecSuite.scala | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala
index aa9bfab61c37e..b62a9b77e0043 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala
@@ -56,8 +56,13 @@ class ParquetCodecSuite extends FileSourceCodecSuite with SharedSparkSession {
   override def format: String = "parquet"
   override val codecConfigName: String = SQLConf.PARQUET_COMPRESSION.key
   // Exclude "lzo" because it is GPL-licenced so not included in Hadoop.
-  override protected def availableCodecs: Seq[String] = Seq("none", "uncompressed", "snappy",
-    "gzip", "brotli", "zstd", "lz4")
+  override protected def availableCodecs: Seq[String] =
+    if (System.getProperty("os.arch") == "aarch64") {
+      // Exclude "brotli" due to PARQUET-1975.
+      Seq("none", "uncompressed", "snappy", "gzip", "zstd", "lz4")
+    } else {
+      Seq("none", "uncompressed", "snappy", "gzip", "brotli", "zstd", "lz4")
+    }
 }
 
 class OrcCodecSuite extends FileSourceCodecSuite with SharedSparkSession {

From e76393b8b6b905d4e9c182e766d99ed2e0edd04d Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 7 Sep 2021 10:16:28 -0700
Subject: [PATCH 09/10] Remove wrapper classes. Keep only working codec.

---
 .../shaded/net/jpountz/lz4/LZ4Compressor.java | 38 --------------
 .../shaded/net/jpountz/lz4/LZ4Factory.java    | 49 -------------------
 .../net/jpountz/lz4/LZ4SafeDecompressor.java  | 36 --------------
 .../datasources/FileSourceCodecSuite.scala    |  5 +-
 4 files changed, 3 insertions(+), 125 deletions(-)
 delete mode 100644 core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java
 delete mode 100644 core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java
 delete mode 100644 core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java

diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java
deleted file mode 100644
index 55c85566472c7..0000000000000
--- a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.shaded.net.jpountz.lz4;
-
-/**
- * TODO(SPARK-36679): A temporary workaround for SPARK-36669. We should remove this after
- * Hadoop 3.3.2 release which fixes the LZ4 relocation in shaded Hadoop client libraries.
- * This does not need implement all net.jpountz.lz4.LZ4Compressor API, just the ones used
- * by Hadoop Lz4Compressor.
- */
-public final class LZ4Compressor {
-
-  private net.jpountz.lz4.LZ4Compressor lz4Compressor;
-
-  public LZ4Compressor(net.jpountz.lz4.LZ4Compressor lz4Compressor) {
-    this.lz4Compressor = lz4Compressor;
-  }
-
-  public void compress(java.nio.ByteBuffer src, java.nio.ByteBuffer dest) {
-    lz4Compressor.compress(src, dest);
-  }
-}
-
diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java
deleted file mode 100644
index 61829b2728bce..0000000000000
--- a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.shaded.net.jpountz.lz4;
-
-/**
- * TODO(SPARK-36679): A temporary workaround for SPARK-36669. We should remove this after
- * Hadoop 3.3.2 release which fixes the LZ4 relocation in shaded Hadoop client libraries.
- * This does not need implement all net.jpountz.lz4.LZ4Factory API, just the ones used by
- * Hadoop Lz4Compressor.
- */
-public final class LZ4Factory {
-
-  private net.jpountz.lz4.LZ4Factory lz4Factory;
-
-  public LZ4Factory(net.jpountz.lz4.LZ4Factory lz4Factory) {
-    this.lz4Factory = lz4Factory;
-  }
-
-  public static LZ4Factory fastestInstance() {
-    return new LZ4Factory(net.jpountz.lz4.LZ4Factory.fastestInstance());
-  }
-
-  public LZ4Compressor highCompressor() {
-    return new LZ4Compressor(lz4Factory.highCompressor());
-  }
-
-  public LZ4Compressor fastCompressor() {
-    return new LZ4Compressor(lz4Factory.fastCompressor());
-  }
-
-  public LZ4SafeDecompressor safeDecompressor() {
-    return new LZ4SafeDecompressor(lz4Factory.safeDecompressor());
-  }
-}
diff --git a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java b/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java
deleted file mode 100644
index cd3dd6f060f52..0000000000000
--- a/core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.shaded.net.jpountz.lz4;
-
-/**
- * TODO(SPARK-36679): A temporary workaround for SPARK-36669. We should remove this after
- * Hadoop 3.3.2 release which fixes the LZ4 relocation in shaded Hadoop client libraries.
- * This does not need implement all net.jpountz.lz4.LZ4SafeDecompressor API, just the ones
- * used by Hadoop Lz4Decompressor.
- */
-public final class LZ4SafeDecompressor {
-  private net.jpountz.lz4.LZ4SafeDecompressor lz4Decompressor;
-
-  public LZ4SafeDecompressor(net.jpountz.lz4.LZ4SafeDecompressor lz4Decompressor) {
-    this.lz4Decompressor = lz4Decompressor;
-  }
-
-  public void decompress(java.nio.ByteBuffer src, java.nio.ByteBuffer dest) {
-    lz4Decompressor.decompress(src, dest);
-  }
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala
index b62a9b77e0043..4b7e53743ba93 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala
@@ -56,12 +56,13 @@ class ParquetCodecSuite extends FileSourceCodecSuite with SharedSparkSession {
   override def format: String = "parquet"
   override val codecConfigName: String = SQLConf.PARQUET_COMPRESSION.key
   // Exclude "lzo" because it is GPL-licenced so not included in Hadoop.
+  // TODO(SPARK-36669): "lz4" codec fails due to HADOOP-17891.
   override protected def availableCodecs: Seq[String] =
     if (System.getProperty("os.arch") == "aarch64") {
       // Exclude "brotli" due to PARQUET-1975.
-      Seq("none", "uncompressed", "snappy", "gzip", "zstd", "lz4")
+      Seq("none", "uncompressed", "snappy", "gzip", "zstd")
     } else {
-      Seq("none", "uncompressed", "snappy", "gzip", "brotli", "zstd", "lz4")
+      Seq("none", "uncompressed", "snappy", "gzip", "brotli", "zstd")
     }
 }
 

From 16e7db926486ca83166b214a0a53feafcae03f3b Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 7 Sep 2021 12:08:39 -0700
Subject: [PATCH 10/10] For review comments.

---
 pom.xml                                                     | 1 -
 .../sql/execution/datasources/FileSourceCodecSuite.scala    | 6 +++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/pom.xml b/pom.xml
index d91a4778d9ba1..81a0126539b1d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -300,7 +300,6 @@
         <enabled>false</enabled>
       </snapshots>
     </repository>
-
     <repository>
       <id>jitpack.io</id>
       <url>https://jitpack.io</url>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala
index 4b7e53743ba93..92b887e948da9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
 
-trait FileSourceCodecSuite extends QueryTest with SQLTestUtils {
+trait FileSourceCodecSuite extends QueryTest with SQLTestUtils with SharedSparkSession {
 
   protected def format: String
   protected val codecConfigName: String
@@ -51,7 +51,7 @@ trait FileSourceCodecSuite extends QueryTest with SQLTestUtils {
   }
 }
 
-class ParquetCodecSuite extends FileSourceCodecSuite with SharedSparkSession {
+class ParquetCodecSuite extends FileSourceCodecSuite {
 
   override def format: String = "parquet"
   override val codecConfigName: String = SQLConf.PARQUET_COMPRESSION.key
@@ -66,7 +66,7 @@ class ParquetCodecSuite extends FileSourceCodecSuite with SharedSparkSession {
     }
 }
 
-class OrcCodecSuite extends FileSourceCodecSuite with SharedSparkSession {
+class OrcCodecSuite extends FileSourceCodecSuite {
 
   override def format: String = "orc"
   override val codecConfigName: String = SQLConf.ORC_COMPRESSION.key