From be6cfc8db6e3724da865bd80e06b71bdaa81abc2 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Mon, 7 Dec 2020 23:43:21 +0800 Subject: [PATCH 1/3] use dataSchema instead of schema --- .../execution/datasources/v2/parquet/ParquetScanBuilder.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala index 2f861356e949..0214d269f237 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala @@ -50,7 +50,7 @@ case class ParquetScanBuilder( val pushDownInFilterThreshold = sqlConf.parquetFilterPushDownInFilterThreshold val isCaseSensitive = sqlConf.caseSensitiveAnalysis val parquetSchema = - new SparkToParquetSchemaConverter(sparkSession.sessionState.conf).convert(schema) + new SparkToParquetSchemaConverter(sparkSession.sessionState.conf).convert(dataSchema) val parquetFilters = new ParquetFilters(parquetSchema, pushDownDate, pushDownTimestamp, pushDownDecimal, pushDownStringStartWith, pushDownInFilterThreshold, isCaseSensitive) parquetFilters.convertibleFilters(this.filters).toArray From fb01c3f6d7241813975f2001329bc209ea371083 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Tue, 8 Dec 2020 11:13:00 +0800 Subject: [PATCH 2/3] fix ExplainSuite --- sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala index ddc4f1dab8e6..c8c439490388 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala @@ -367,7 +367,7 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite val basePath = dir.getCanonicalPath + "/" + fmt val pushFilterMaps = Map ( "parquet" -> - "|PushedFilers: \\[.*\\(id\\), .*\\(value\\), .*\\(id,1\\), .*\\(value,2\\)\\]", + "|PushedFilers: \\[IsNotNull\\(value\\), GreaterThan\\(value,2\\)\\]", "orc" -> "|PushedFilers: \\[.*\\(id\\), .*\\(value\\), .*\\(id,1\\), .*\\(value,2\\)\\]", "csv" -> From b9f8eb25c99e8270c1108cf76820a45fc0815efb Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Wed, 9 Dec 2020 10:40:05 +0800 Subject: [PATCH 3/3] use readDataSchema instead of dataSchema --- .../execution/datasources/v2/parquet/ParquetScanBuilder.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala index 0214d269f237..44053830defe 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala @@ -50,7 +50,7 @@ case class ParquetScanBuilder( val pushDownInFilterThreshold = sqlConf.parquetFilterPushDownInFilterThreshold val isCaseSensitive = sqlConf.caseSensitiveAnalysis val parquetSchema = - new SparkToParquetSchemaConverter(sparkSession.sessionState.conf).convert(dataSchema) + new SparkToParquetSchemaConverter(sparkSession.sessionState.conf).convert(readDataSchema()) val parquetFilters = new ParquetFilters(parquetSchema, pushDownDate, pushDownTimestamp, pushDownDecimal, pushDownStringStartWith, pushDownInFilterThreshold, isCaseSensitive) parquetFilters.convertibleFilters(this.filters).toArray