@@ -350,28 +350,47 @@ class OrcQuerySuite extends QueryTest with BeforeAndAfterAll with OrcTest {
350350 withTempPath { dir =>
351351 withSQLConf(SQLConf .ORC_FILTER_PUSHDOWN_ENABLED .key -> " true" ) {
352352 import testImplicits ._
353-
354353 val path = dir.getCanonicalPath
355- sqlContext.range(10 ).coalesce(1 ).write.orc(path)
354+
355+ // For field "a", the first column has odds integers. This is to check the filtered count
356+ // when `isNull` is performed. For Field "b", `isNotNull` of ORC file filters rows
357+ // only when all the values are null (maybe this works differently when the data
358+ // or query is complicated). So, simply here a column only having `null` is added.
359+ val data = (0 until 10 ).map { i =>
360+ val maybeInt = if (i % 2 == 0 ) None else Some (i)
361+ val nullValue : Option [String ] = None
362+ (maybeInt, nullValue)
363+ }
364+ createDataFrame(data).toDF(" a" , " b" ).write.orc(path)
356365 val df = sqlContext.read.orc(path)
357366
358- def checkPredicate (pred : Column , answer : Seq [Long ]): Unit = {
359- checkAnswer(df.where(pred), answer.map(Row (_)))
367+ def checkPredicate (pred : Column , answer : Seq [Row ]): Unit = {
368+ val sourceDf = stripSparkFilter(df.where(pred))
369+ val data = sourceDf.collect().toSet
370+ val expectedData = answer.toSet
371+
372+ // When a filter is pushed to ORC, ORC can apply it to rows. So, we can check
373+ // the number of rows returned from the ORC to make sure our filter pushdown work.
374+ // A tricky part is, ORC does not process filter rows fully but return some possible
375+ // results. So, this checks if the number of result is less than the original count
376+ // of data, and then checks if it contains the expected data.
377+ val isOrcFiltered = sourceDf.count < 10 && expectedData.subsetOf(data)
378+ assert(isOrcFiltered)
360379 }
361380
362- checkPredicate(' id === 5 , Seq ( 5L ))
363- checkPredicate(' id <=> 5 , Seq ( 5L ))
364- checkPredicate(' id < 5 , 0L to 4L )
365- checkPredicate(' id <= 5 , 0L to 5L )
366- checkPredicate(' id > 5 , 6L to 9L )
367- checkPredicate(' id >= 5 , 5L to 9L )
368- checkPredicate(' id .isNull, Seq .empty[ Long ] )
369- checkPredicate(' id .isNotNull, 0L to 9L )
370- checkPredicate(' id .isin(1L , 3L , 5L ), Seq ( 1L , 3L , 5L ))
371- checkPredicate(' id > 0 && ' id < 3 , 1L to 2L )
372- checkPredicate(' id < 1 || ' id > 8 , Seq ( 0L , 9L ))
373- checkPredicate(! (' id > 3 ), 0L to 3L )
374- checkPredicate(! (' id > 0 && ' id < 3 ), Seq ( 0L ) ++ ( 3L to 9L ))
381+ checkPredicate(' a === 5 , List ( 5 ).map( Row (_, null ) ))
382+ checkPredicate(' a <=> 5 , List ( 5 ).map( Row (_, null ) ))
383+ checkPredicate(' a < 5 , List ( 1 , 3 ).map( Row (_, null )) )
384+ checkPredicate(' a <= 5 , List ( 1 , 3 , 5 ).map( Row (_, null )) )
385+ checkPredicate(' a > 5 , List ( 7 , 9 ).map( Row (_, null )) )
386+ checkPredicate(' a >= 5 , List ( 5 , 7 , 9 ).map( Row (_, null )) )
387+ checkPredicate(' a .isNull, List ( null ).map( Row (_, null )) )
388+ checkPredicate(' b .isNotNull, List () )
389+ checkPredicate(' a .isin(3 , 5 , 7 ), List ( 3 , 5 , 7 ).map( Row (_, null ) ))
390+ checkPredicate(' a > 0 && ' a < 3 , List ( 1 ).map( Row (_, null )) )
391+ checkPredicate(' a < 1 || ' a > 8 , List ( 9 ).map( Row (_, null ) ))
392+ checkPredicate(! (' a > 3 ), List ( 1 , 3 ).map( Row (_, null )) )
393+ checkPredicate(! (' a > 0 && ' a < 3 ), List ( 3 , 5 , 7 , 9 ).map( Row (_, null ) ))
375394 }
376395 }
377396 }
0 commit comments