From 5f151b811ac3660d31c03fc7bbda12796c2a2c06 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Wed, 30 Jan 2019 10:17:09 -0800 Subject: [PATCH 1/5] Update Parquet to 1.10.1 with notEq pushdown fix. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 6676c5dcf979c..cbac15f1dfad1 100644 --- a/pom.xml +++ b/pom.xml @@ -132,7 +132,7 @@ 2.1.0 10.12.1.1 - 1.10.0 + 1.10.1 1.5.4 nohive 1.6.0 From 8606395ca59eb3eca6b12230b133c447090e9678 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Wed, 30 Jan 2019 10:37:19 -0800 Subject: [PATCH 2/5] Update dependency maniefsts. --- dev/deps/spark-deps-hadoop-2.7 | 10 +++++----- dev/deps/spark-deps-hadoop-3.1 | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7 index 0154fd26586b7..d41be281a1926 100644 --- a/dev/deps/spark-deps-hadoop-2.7 +++ b/dev/deps/spark-deps-hadoop-2.7 @@ -161,13 +161,13 @@ orc-shims-1.5.4.jar oro-2.0.8.jar osgi-resource-locator-1.0.1.jar paranamer-2.8.jar -parquet-column-1.10.0.jar -parquet-common-1.10.0.jar -parquet-encoding-1.10.0.jar +parquet-column-1.10.1.jar +parquet-common-1.10.1.jar +parquet-encoding-1.10.1.jar parquet-format-2.4.0.jar -parquet-hadoop-1.10.0.jar +parquet-hadoop-1.10.1.jar parquet-hadoop-bundle-1.6.0.jar -parquet-jackson-1.10.0.jar +parquet-jackson-1.10.1.jar protobuf-java-2.5.0.jar py4j-0.10.8.1.jar pyrolite-4.13.jar diff --git a/dev/deps/spark-deps-hadoop-3.1 b/dev/deps/spark-deps-hadoop-3.1 index 7d5325c55e2e4..a6069c5f8ae88 100644 --- a/dev/deps/spark-deps-hadoop-3.1 +++ b/dev/deps/spark-deps-hadoop-3.1 @@ -178,13 +178,13 @@ orc-shims-1.5.4.jar oro-2.0.8.jar osgi-resource-locator-1.0.1.jar paranamer-2.8.jar -parquet-column-1.10.0.jar -parquet-common-1.10.0.jar -parquet-encoding-1.10.0.jar +parquet-column-1.10.1.jar +parquet-common-1.10.1.jar +parquet-encoding-1.10.1.jar parquet-format-2.4.0.jar -parquet-hadoop-1.10.0.jar +parquet-hadoop-1.10.1.jar parquet-hadoop-bundle-1.6.0.jar -parquet-jackson-1.10.0.jar +parquet-jackson-1.10.1.jar protobuf-java-2.5.0.jar py4j-0.10.8.1.jar pyrolite-4.13.jar From 0e21e69680aa4e58e8161a4b96b7a7be59fccad9 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Wed, 30 Jan 2019 10:39:51 -0800 Subject: [PATCH 3/5] Add Parquet 1.10.1 candidate repository. --- pom.xml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pom.xml b/pom.xml index cbac15f1dfad1..380210f125cf7 100644 --- a/pom.xml +++ b/pom.xml @@ -244,6 +244,18 @@ false + + parquet-1.10.1 + + Parquet 1.10.1 RC0 + https://repository.apache.org/content/repositories/orgapacheparquet-1022/ + + true + + + false + + From e1e10f55af31a9b39e857ca0c760b0f6d090ff8b Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Thu, 31 Jan 2019 09:39:04 +0800 Subject: [PATCH 4/5] Add a test --- .../datasources/parquet/ParquetQuerySuite.scala | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala index ce1dc6e159c61..beb89d91c9266 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala @@ -890,6 +890,21 @@ class ParquetQuerySuite extends QueryTest with ParquetTest with SharedSQLContext } } } + + test("SPARK-26677: negated null-safe equality comparison should not filter matched row groups") { + (true :: false :: Nil).foreach { vectorized => + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorized.toString) { + withTempPath { path => + // Repeated values for dictionary encoding. + Seq(Some("A"), Some("A"), None).toDF.repartition(1) + .write.parquet(path.getAbsolutePath) + val df = spark.read.parquet(path.getAbsolutePath) + checkAnswer(stripSparkFilter(df.where("NOT (value <=> 'A')")), df) + } + } + } + } + } object TestingUDT { From b0a61ae44c5644a79efe3ad0d21024c65d7f1556 Mon Sep 17 00:00:00 2001 From: Ryan Blue Date: Fri, 1 Feb 2019 10:18:36 -0800 Subject: [PATCH 5/5] Revert "Add Parquet 1.10.1 candidate repository." This reverts commit 0e21e69680aa4e58e8161a4b96b7a7be59fccad9. --- pom.xml | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/pom.xml b/pom.xml index 380210f125cf7..cbac15f1dfad1 100644 --- a/pom.xml +++ b/pom.xml @@ -244,18 +244,6 @@ false - - parquet-1.10.1 - - Parquet 1.10.1 RC0 - https://repository.apache.org/content/repositories/orgapacheparquet-1022/ - - true - - - false - -