diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 index da5c506f0f041..9db3b5a158c3a 100644 --- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3 @@ -201,12 +201,12 @@ orc-shims/1.6.10//orc-shims-1.6.10.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar -parquet-column/1.12.0//parquet-column-1.12.0.jar -parquet-common/1.12.0//parquet-common-1.12.0.jar -parquet-encoding/1.12.0//parquet-encoding-1.12.0.jar -parquet-format-structures/1.12.0//parquet-format-structures-1.12.0.jar -parquet-hadoop/1.12.0//parquet-hadoop-1.12.0.jar -parquet-jackson/1.12.0//parquet-jackson-1.12.0.jar +parquet-column/1.12.1//parquet-column-1.12.1.jar +parquet-common/1.12.1//parquet-common-1.12.1.jar +parquet-encoding/1.12.1//parquet-encoding-1.12.1.jar +parquet-format-structures/1.12.1//parquet-format-structures-1.12.1.jar +parquet-hadoop/1.12.1//parquet-hadoop-1.12.1.jar +parquet-jackson/1.12.1//parquet-jackson-1.12.1.jar protobuf-java/2.5.0//protobuf-java-2.5.0.jar py4j/0.10.9.2//py4j-0.10.9.2.jar pyrolite/4.30//pyrolite-4.30.jar diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 index 8aa8fe49994cd..d20f6339e8a15 100644 --- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3 @@ -172,12 +172,12 @@ orc-shims/1.6.10//orc-shims-1.6.10.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar -parquet-column/1.12.0//parquet-column-1.12.0.jar -parquet-common/1.12.0//parquet-common-1.12.0.jar -parquet-encoding/1.12.0//parquet-encoding-1.12.0.jar -parquet-format-structures/1.12.0//parquet-format-structures-1.12.0.jar -parquet-hadoop/1.12.0//parquet-hadoop-1.12.0.jar -parquet-jackson/1.12.0//parquet-jackson-1.12.0.jar +parquet-column/1.12.1//parquet-column-1.12.1.jar +parquet-common/1.12.1//parquet-common-1.12.1.jar +parquet-encoding/1.12.1//parquet-encoding-1.12.1.jar +parquet-format-structures/1.12.1//parquet-format-structures-1.12.1.jar +parquet-hadoop/1.12.1//parquet-hadoop-1.12.1.jar +parquet-jackson/1.12.1//parquet-jackson-1.12.1.jar protobuf-java/2.5.0//protobuf-java-2.5.0.jar py4j/0.10.9.2//py4j-0.10.9.2.jar pyrolite/4.30//pyrolite-4.30.jar diff --git a/pom.xml b/pom.xml index 81a0126539b1d..989a230f2c856 100644 --- a/pom.xml +++ b/pom.xml @@ -136,7 +136,7 @@ 2.8.0 10.14.2.0 - 1.12.0 + 1.12.1 1.6.10 9.4.43.v20210629 4.0.3 diff --git a/sql/core/src/test/resources/test-data/malformed-file-offset.parquet b/sql/core/src/test/resources/test-data/malformed-file-offset.parquet new file mode 100644 index 0000000000000..5abeabea42ba9 Binary files /dev/null and b/sql/core/src/test/resources/test-data/malformed-file-offset.parquet differ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala index a330b82de2d0c..e03a50b69ae5f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala @@ -855,6 +855,12 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession } } + test("SPARK-36726: test incorrect Parquet row group file offset") { + readParquetFile(testFile("test-data/malformed-file-offset.parquet")) { df => + assert(df.count() == 3650) + } + } + test("VectorizedParquetRecordReader - direct path read") { val data = (0 to 10).map(i => (i, (i + 'a').toChar.toString)) withTempPath { dir =>