apache · viirya · Sep 4, 2021 · Sep 6, 2021 · Sep 6, 2021 · Sep 6, 2021
diff --git a/pom.xml b/pom.xml
@@ -300,6 +300,12 @@
         <enabled>false</enabled>
       </snapshots>
     </repository>
+    <repository>
+      <id>jitpack.io</id>
+      <url>https://jitpack.io</url>
+      <name>Jitpack.io repository</name>
+      <!-- needed for brotli-codec -->
+    </repository>
   </repositories>
   <pluginRepositories>
     <pluginRepository>

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
@@ -274,7 +274,9 @@ object SparkBuild extends PomBuild {
       "gcs-maven-central-mirror" at "https://maven-central.storage-download.googleapis.com/maven2/",
       DefaultMavenRepository,
       Resolver.mavenLocal,
-      Resolver.file("ivyLocal", file(Path.userHome.absolutePath + "/.ivy2/local"))(Resolver.ivyStylePatterns)
+      Resolver.file("ivyLocal", file(Path.userHome.absolutePath + "/.ivy2/local"))(Resolver.ivyStylePatterns),
+      // needed for brotli-codec
+      "jitpack.io" at "https://jitpack.io"
     ),
     externalResolvers := resolvers.value,
     otherResolvers := SbtPomKeys.mvnLocalRepository(dotM2 => Seq(Resolver.file("dotM2", dotM2))).value,

diff --git a/sql/core/pom.xml b/sql/core/pom.xml
@@ -178,6 +178,12 @@
       <artifactId>htmlunit-driver</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>com.github.rdblue</groupId>
+      <artifactId>brotli-codec</artifactId>
+      <version>0.1.1</version>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>

diff --git a/...core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala b/...core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
+
+trait FileSourceCodecSuite extends QueryTest with SQLTestUtils with SharedSparkSession {
+
+  protected def format: String
+  protected val codecConfigName: String
+  protected def availableCodecs: Seq[String]
+
+  def testWithAllCodecs(name: String)(f: => Unit): Unit = {
+    for (codec <- availableCodecs) {
+      test(s"$name - file source $format - codec: $codec") {
+        withSQLConf(codecConfigName -> codec) {
+          f
+        }
+      }
+    }
+  }
+
+  testWithAllCodecs("write and read") {
+    withTempPath { dir =>
+      testData
+        .repartition(5)
+        .write
+        .format(format)
+        .save(dir.getCanonicalPath)
+
+      val df = spark.read.format(format).load(dir.getCanonicalPath)
+      checkAnswer(df, testData)
+    }
+  }
+}
+
+class ParquetCodecSuite extends FileSourceCodecSuite {
+
+  override def format: String = "parquet"
+  override val codecConfigName: String = SQLConf.PARQUET_COMPRESSION.key
+  // Exclude "lzo" because it is GPL-licenced so not included in Hadoop.
+  // TODO(SPARK-36669): "lz4" codec fails due to HADOOP-17891.
+  override protected def availableCodecs: Seq[String] =
+    if (System.getProperty("os.arch") == "aarch64") {
+      // Exclude "brotli" due to PARQUET-1975.
+      Seq("none", "uncompressed", "snappy", "gzip", "zstd")
+    } else {
+      Seq("none", "uncompressed", "snappy", "gzip", "brotli", "zstd")
+    }
+}
+
+class OrcCodecSuite extends FileSourceCodecSuite {
+
+  override def format: String = "orc"
+  override val codecConfigName: String = SQLConf.ORC_COMPRESSION.key
+  override protected def availableCodecs = Seq("none", "uncompressed", "snappy",
+    "zlib", "zstd", "lz4", "lzo")
+}