@@ -22,8 +22,10 @@ import parquet.filter2.predicate.{FilterPredicate, Operators}
2222
2323import org .apache .spark .sql .catalyst .dsl .expressions ._
2424import org .apache .spark .sql .catalyst .expressions .{Attribute , Cast , Literal , Predicate , Row }
25- import org .apache .spark .sql .types ._
25+ import org .apache .spark .sql .catalyst .planning .PhysicalOperation
26+ import org .apache .spark .sql .sources .LogicalRelation
2627import org .apache .spark .sql .test .TestSQLContext
28+ import org .apache .spark .sql .types ._
2729import org .apache .spark .sql .{Column , DataFrame , QueryTest , SQLConf }
2830
2931/**
@@ -54,9 +56,17 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
5456 .select(output.map(e => Column (e)): _* )
5557 .where(Column (predicate))
5658
57- val maybeAnalyzedPredicate = query.queryExecution.executedPlan.collect {
58- case plan : ParquetTableScan => plan.columnPruningPred
59- }.flatten.reduceOption(_ && _)
59+ val maybeAnalyzedPredicate = {
60+ val forParquetTableScan = query.queryExecution.executedPlan.collect {
61+ case plan : ParquetTableScan => plan.columnPruningPred
62+ }.flatten.reduceOption(_ && _)
63+
64+ val forParquetDataSource = query.queryExecution.optimizedPlan.collect {
65+ case PhysicalOperation (_, filters, LogicalRelation (_ : ParquetRelation2 )) => filters
66+ }.flatten.reduceOption(_ && _)
67+
68+ forParquetTableScan.orElse(forParquetDataSource)
69+ }
6070
6171 assert(maybeAnalyzedPredicate.isDefined)
6272 maybeAnalyzedPredicate.foreach { pred =>
@@ -86,35 +96,38 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
8696
8797 test(" filter pushdown - boolean" ) {
8898 withParquetRDD((true :: false :: Nil ).map(b => Tuple1 .apply(Option (b)))) { implicit rdd =>
89- checkFilterPredicate(' _1 .isNull, classOf [Eq [_]], Seq .empty[Row ])
99+ checkFilterPredicate(' _1 .isNull, classOf [Eq [_]], Seq .empty[Row ])
90100 checkFilterPredicate(' _1 .isNotNull, classOf [NotEq [_]], Seq (Row (true ), Row (false )))
91101
92- checkFilterPredicate(' _1 === true , classOf [Eq [_]], true )
102+ checkFilterPredicate(' _1 === true , classOf [Eq [_]], true )
93103 checkFilterPredicate(' _1 !== true , classOf [NotEq [_]], false )
94104 }
95105 }
96106
97107 test(" filter pushdown - short" ) {
98108 withParquetRDD((1 to 4 ).map(i => Tuple1 (Option (i.toShort)))) { implicit rdd =>
99- checkFilterPredicate(Cast (' _1 , IntegerType ) === 1 , classOf [Eq [_]], 1 )
100- checkFilterPredicate(Cast (' _1 , IntegerType ) !== 1 , classOf [NotEq [_]], (2 to 4 ).map(Row .apply(_)))
101-
102- checkFilterPredicate(Cast (' _1 , IntegerType ) < 2 , classOf [Lt [_]], 1 )
103- checkFilterPredicate(Cast (' _1 , IntegerType ) > 3 , classOf [Gt [_]], 4 )
109+ checkFilterPredicate(Cast (' _1 , IntegerType ) === 1 , classOf [Eq [_]], 1 )
110+ checkFilterPredicate(
111+ Cast (' _1 , IntegerType ) !== 1 , classOf [NotEq [_]], (2 to 4 ).map(Row .apply(_)))
112+
113+ checkFilterPredicate(Cast (' _1 , IntegerType ) < 2 , classOf [Lt [_]], 1 )
114+ checkFilterPredicate(Cast (' _1 , IntegerType ) > 3 , classOf [Gt [_]], 4 )
104115 checkFilterPredicate(Cast (' _1 , IntegerType ) <= 1 , classOf [LtEq [_]], 1 )
105116 checkFilterPredicate(Cast (' _1 , IntegerType ) >= 4 , classOf [GtEq [_]], 4 )
106-
107- checkFilterPredicate(Literal (1 ) === Cast (' _1 , IntegerType ), classOf [Eq [_]], 1 )
108- checkFilterPredicate(Literal (2 ) > Cast (' _1 , IntegerType ), classOf [Lt [_]], 1 )
109- checkFilterPredicate(Literal (3 ) < Cast (' _1 , IntegerType ), classOf [Gt [_]], 4 )
110- checkFilterPredicate(Literal (1 ) >= Cast (' _1 , IntegerType ), classOf [LtEq [_]], 1 )
111- checkFilterPredicate(Literal (4 ) <= Cast (' _1 , IntegerType ), classOf [GtEq [_]], 4 )
112-
117+
118+ checkFilterPredicate(Literal (1 ) === Cast (' _1 , IntegerType ), classOf [Eq [_]], 1 )
119+ checkFilterPredicate(Literal (2 ) > Cast (' _1 , IntegerType ), classOf [Lt [_]], 1 )
120+ checkFilterPredicate(Literal (3 ) < Cast (' _1 , IntegerType ), classOf [Gt [_]], 4 )
121+ checkFilterPredicate(Literal (1 ) >= Cast (' _1 , IntegerType ), classOf [LtEq [_]], 1 )
122+ checkFilterPredicate(Literal (4 ) <= Cast (' _1 , IntegerType ), classOf [GtEq [_]], 4 )
123+
113124 checkFilterPredicate(! (Cast (' _1 , IntegerType ) < 4 ), classOf [GtEq [_]], 4 )
114- checkFilterPredicate(Cast (' _1 , IntegerType ) > 2 && Cast (' _1 , IntegerType ) < 4 ,
115- classOf [Operators .And ], 3 )
116- checkFilterPredicate(Cast (' _1 , IntegerType ) < 2 || Cast (' _1 , IntegerType ) > 3 ,
117- classOf [Operators .Or ], Seq (Row (1 ), Row (4 )))
125+ checkFilterPredicate(
126+ Cast (' _1 , IntegerType ) > 2 && Cast (' _1 , IntegerType ) < 4 , classOf [Operators .And ], 3 )
127+ checkFilterPredicate(
128+ Cast (' _1 , IntegerType ) < 2 || Cast (' _1 , IntegerType ) > 3 ,
129+ classOf [Operators .Or ],
130+ Seq (Row (1 ), Row (4 )))
118131 }
119132 }
120133
@@ -131,15 +144,15 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
131144 checkFilterPredicate(' _1 <= 1 , classOf [LtEq [_]], 1 )
132145 checkFilterPredicate(' _1 >= 4 , classOf [GtEq [_]], 4 )
133146
134- checkFilterPredicate(Literal (1 ) === ' _1 , classOf [Eq [_]], 1 )
135- checkFilterPredicate(Literal (2 ) > ' _1 , classOf [Lt [_]], 1 )
136- checkFilterPredicate(Literal (3 ) < ' _1 , classOf [Gt [_]], 4 )
147+ checkFilterPredicate(Literal (1 ) === ' _1 , classOf [Eq [_]], 1 )
148+ checkFilterPredicate(Literal (2 ) > ' _1 , classOf [Lt [_]], 1 )
149+ checkFilterPredicate(Literal (3 ) < ' _1 , classOf [Gt [_]], 4 )
137150 checkFilterPredicate(Literal (1 ) >= ' _1 , classOf [LtEq [_]], 1 )
138151 checkFilterPredicate(Literal (4 ) <= ' _1 , classOf [GtEq [_]], 4 )
139152
140153 checkFilterPredicate(! (' _1 < 4 ), classOf [GtEq [_]], 4 )
141154 checkFilterPredicate(' _1 > 2 && ' _1 < 4 , classOf [Operators .And ], 3 )
142- checkFilterPredicate(' _1 < 2 || ' _1 > 3 , classOf [Operators .Or ], Seq (Row (1 ), Row (4 )))
155+ checkFilterPredicate(' _1 < 2 || ' _1 > 3 , classOf [Operators .Or ], Seq (Row (1 ), Row (4 )))
143156 }
144157 }
145158
@@ -151,20 +164,20 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
151164 checkFilterPredicate(' _1 === 1 , classOf [Eq [_]], 1 )
152165 checkFilterPredicate(' _1 !== 1 , classOf [NotEq [_]], (2 to 4 ).map(Row .apply(_)))
153166
154- checkFilterPredicate(' _1 < 2 , classOf [Lt [_]], 1 )
155- checkFilterPredicate(' _1 > 3 , classOf [Gt [_]], 4 )
167+ checkFilterPredicate(' _1 < 2 , classOf [Lt [_]], 1 )
168+ checkFilterPredicate(' _1 > 3 , classOf [Gt [_]], 4 )
156169 checkFilterPredicate(' _1 <= 1 , classOf [LtEq [_]], 1 )
157170 checkFilterPredicate(' _1 >= 4 , classOf [GtEq [_]], 4 )
158171
159172 checkFilterPredicate(Literal (1 ) === ' _1 , classOf [Eq [_]], 1 )
160- checkFilterPredicate(Literal (2 ) > ' _1 , classOf [Lt [_]], 1 )
161- checkFilterPredicate(Literal (3 ) < ' _1 , classOf [Gt [_]], 4 )
162- checkFilterPredicate(Literal (1 ) >= ' _1 , classOf [LtEq [_]], 1 )
163- checkFilterPredicate(Literal (4 ) <= ' _1 , classOf [GtEq [_]], 4 )
173+ checkFilterPredicate(Literal (2 ) > ' _1 , classOf [Lt [_]], 1 )
174+ checkFilterPredicate(Literal (3 ) < ' _1 , classOf [Gt [_]], 4 )
175+ checkFilterPredicate(Literal (1 ) >= ' _1 , classOf [LtEq [_]], 1 )
176+ checkFilterPredicate(Literal (4 ) <= ' _1 , classOf [GtEq [_]], 4 )
164177
165178 checkFilterPredicate(! (' _1 < 4 ), classOf [GtEq [_]], 4 )
166179 checkFilterPredicate(' _1 > 2 && ' _1 < 4 , classOf [Operators .And ], 3 )
167- checkFilterPredicate(' _1 < 2 || ' _1 > 3 , classOf [Operators .Or ], Seq (Row (1 ), Row (4 )))
180+ checkFilterPredicate(' _1 < 2 || ' _1 > 3 , classOf [Operators .Or ], Seq (Row (1 ), Row (4 )))
168181 }
169182 }
170183
@@ -176,8 +189,8 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
176189 checkFilterPredicate(' _1 === 1 , classOf [Eq [_]], 1 )
177190 checkFilterPredicate(' _1 !== 1 , classOf [NotEq [_]], (2 to 4 ).map(Row .apply(_)))
178191
179- checkFilterPredicate(' _1 < 2 , classOf [Lt [_]], 1 )
180- checkFilterPredicate(' _1 > 3 , classOf [Gt [_]], 4 )
192+ checkFilterPredicate(' _1 < 2 , classOf [Lt [_]], 1 )
193+ checkFilterPredicate(' _1 > 3 , classOf [Gt [_]], 4 )
181194 checkFilterPredicate(' _1 <= 1 , classOf [LtEq [_]], 1 )
182195 checkFilterPredicate(' _1 >= 4 , classOf [GtEq [_]], 4 )
183196
@@ -189,7 +202,7 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
189202
190203 checkFilterPredicate(! (' _1 < 4 ), classOf [GtEq [_]], 4 )
191204 checkFilterPredicate(' _1 > 2 && ' _1 < 4 , classOf [Operators .And ], 3 )
192- checkFilterPredicate(' _1 < 2 || ' _1 > 3 , classOf [Operators .Or ], Seq (Row (1 ), Row (4 )))
205+ checkFilterPredicate(' _1 < 2 || ' _1 > 3 , classOf [Operators .Or ], Seq (Row (1 ), Row (4 )))
193206 }
194207 }
195208
@@ -201,20 +214,20 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
201214 checkFilterPredicate(' _1 === 1 , classOf [Eq [_]], 1 )
202215 checkFilterPredicate(' _1 !== 1 , classOf [NotEq [_]], (2 to 4 ).map(Row .apply(_)))
203216
204- checkFilterPredicate(' _1 < 2 , classOf [Lt [_]], 1 )
205- checkFilterPredicate(' _1 > 3 , classOf [Gt [_]], 4 )
217+ checkFilterPredicate(' _1 < 2 , classOf [Lt [_]], 1 )
218+ checkFilterPredicate(' _1 > 3 , classOf [Gt [_]], 4 )
206219 checkFilterPredicate(' _1 <= 1 , classOf [LtEq [_]], 1 )
207220 checkFilterPredicate(' _1 >= 4 , classOf [GtEq [_]], 4 )
208221
209- checkFilterPredicate(Literal (1 ) === ' _1 , classOf [Eq [_]], 1 )
210- checkFilterPredicate(Literal (2 ) > ' _1 , classOf [Lt [_]], 1 )
211- checkFilterPredicate(Literal (3 ) < ' _1 , classOf [Gt [_]], 4 )
222+ checkFilterPredicate(Literal (1 ) === ' _1 , classOf [Eq [_]], 1 )
223+ checkFilterPredicate(Literal (2 ) > ' _1 , classOf [Lt [_]], 1 )
224+ checkFilterPredicate(Literal (3 ) < ' _1 , classOf [Gt [_]], 4 )
212225 checkFilterPredicate(Literal (1 ) >= ' _1 , classOf [LtEq [_]], 1 )
213226 checkFilterPredicate(Literal (4 ) <= ' _1 , classOf [GtEq [_]], 4 )
214227
215228 checkFilterPredicate(! (' _1 < 4 ), classOf [GtEq [_]], 4 )
216229 checkFilterPredicate(' _1 > 2 && ' _1 < 4 , classOf [Operators .And ], 3 )
217- checkFilterPredicate(' _1 < 2 || ' _1 > 3 , classOf [Operators .Or ], Seq (Row (1 ), Row (4 )))
230+ checkFilterPredicate(' _1 < 2 || ' _1 > 3 , classOf [Operators .Or ], Seq (Row (1 ), Row (4 )))
218231 }
219232 }
220233
@@ -227,8 +240,8 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
227240 checkFilterPredicate(' _1 === " 1" , classOf [Eq [_]], " 1" )
228241 checkFilterPredicate(' _1 !== " 1" , classOf [NotEq [_]], (2 to 4 ).map(i => Row .apply(i.toString)))
229242
230- checkFilterPredicate(' _1 < " 2" , classOf [Lt [_]], " 1" )
231- checkFilterPredicate(' _1 > " 3" , classOf [Gt [_]], " 4" )
243+ checkFilterPredicate(' _1 < " 2" , classOf [Lt [_]], " 1" )
244+ checkFilterPredicate(' _1 > " 3" , classOf [Gt [_]], " 4" )
232245 checkFilterPredicate(' _1 <= " 1" , classOf [LtEq [_]], " 1" )
233246 checkFilterPredicate(' _1 >= " 4" , classOf [GtEq [_]], " 4" )
234247
@@ -268,11 +281,12 @@ class ParquetFilterSuite extends QueryTest with ParquetTest {
268281 }
269282
270283 withParquetRDD((1 to 4 ).map(i => Tuple1 (i.b))) { implicit rdd =>
284+ checkBinaryFilterPredicate(' _1 === 1 .b, classOf [Eq [_]], 1 .b)
285+
271286 checkBinaryFilterPredicate(' _1 .isNull, classOf [Eq [_]], Seq .empty[Row ])
272287 checkBinaryFilterPredicate(
273288 ' _1 .isNotNull, classOf [NotEq [_]], (1 to 4 ).map(i => Row .apply(i.b)).toSeq)
274289
275- checkBinaryFilterPredicate(' _1 === 1 .b, classOf [Eq [_]], 1 .b)
276290 checkBinaryFilterPredicate(
277291 ' _1 !== 1 .b, classOf [NotEq [_]], (2 to 4 ).map(i => Row .apply(i.b)).toSeq)
278292
0 commit comments