diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala index 544d5eccec03..41a5f03db5c4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala @@ -85,9 +85,12 @@ private[sql] object DataSourceStrategy extends Strategy with Logging { s"Selected $selected partitions out of $total, pruned $percentPruned% partitions." } + // need to add projections from combineFilters in + val combineProjections = + projects.toSet.union(combineFilters.flatMap(_.references).toSet).toSeq val scan = buildPartitionedTableScan( l, - projects, + combineProjections, pushedFilters, t.partitionSpec.partitionColumns, selectedPartitions) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala index e34875471f09..0b4c59038dd9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala @@ -194,4 +194,31 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSQLContext { assert(out1(4) === Row("Amy", null, null)) assert(out1(5) === Row(null, null, null)) } + + test("dropna with partitionBy and groupBy") { + withTempPath { dir => + val df = sqlContext.range(10) + val df1 = df.withColumn("a", $"id".cast("int")) + df1.write.partitionBy("id").parquet(dir.getCanonicalPath) + + val df2 = sqlContext.read.parquet(dir.getCanonicalPath) + + val group = df2.na.drop().groupBy().count() + group.collect() + } + } + + test("dropna with partitionBy") { + withTempPath { dir => + val df = sqlContext.range(10) + val df1 = df.withColumn("a", $"id".cast("int")) + df1.write.partitionBy("id").parquet(dir.getCanonicalPath) + + val df2 = sqlContext.read.parquet(dir.getCanonicalPath) + + val group = df2.na.drop().count() + + } + } + }