Skip to content

Commit caeb64d

Browse files
committed
improve OrcFilters in Hive project as well
1 parent 529b207 commit caeb64d

File tree

2 files changed

+64
-15
lines changed

2 files changed

+64
-15
lines changed

sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -155,12 +155,23 @@ private[orc] object OrcFilters extends Logging {
155155
}
156156

157157
case Or(left, right) =>
158+
// The Or predicate is convertible when both of its children can be pushed down.
159+
// That is to say, if one/both of the children can be partially pushed down, the Or
160+
// predicate can be partially pushed down as well.
161+
//
162+
// Here is an example used to explain the reason.
163+
// Let's say we have
164+
// (a1 AND a2) OR (b1 AND b2),
165+
// a1 and b1 is convertible, while a2 and b2 is not.
166+
// The predicate can be converted as
167+
// (a1 OR b1) AND (a1 OR b2) AND (a2 OR b1) AND (a2 OR b2)
168+
// As per the logical in And predicate, we can push down (a1 OR b1).
158169
for {
159-
_ <- createBuilder(dataTypeMap, left, newBuilder, canPartialPushDownConjuncts = false)
160-
_ <- createBuilder(dataTypeMap, right, newBuilder, canPartialPushDownConjuncts = false)
170+
_ <- createBuilder(dataTypeMap, left, newBuilder, canPartialPushDownConjuncts)
171+
_ <- createBuilder(dataTypeMap, right, newBuilder, canPartialPushDownConjuncts)
161172
lhs <- createBuilder(dataTypeMap, left,
162-
builder.startOr(), canPartialPushDownConjuncts = false)
163-
rhs <- createBuilder(dataTypeMap, right, lhs, canPartialPushDownConjuncts = false)
173+
builder.startOr(), canPartialPushDownConjuncts)
174+
rhs <- createBuilder(dataTypeMap, right, lhs, canPartialPushDownConjuncts)
164175
} yield rhs.end()
165176

166177
case Not(child) =>

sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala

Lines changed: 49 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -399,17 +399,6 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton {
399399
)).get.toString
400400
}
401401

402-
// Can not remove unsupported `StringContains` predicate since it is under `Or` operator.
403-
assert(OrcFilters.createFilter(schema, Array(
404-
Or(
405-
LessThan("a", 10),
406-
And(
407-
StringContains("b", "prefix"),
408-
GreaterThan("a", 1)
409-
)
410-
)
411-
)).isEmpty)
412-
413402
// Safely remove unsupported `StringContains` predicate and push down `LessThan`
414403
assertResultWithDiffHiveVersion(
415404
"""leaf-0 = (LESS_THAN a 10)
@@ -442,4 +431,53 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton {
442431
)).get.toString
443432
}
444433
}
434+
435+
test("SPARK-27699 Converting disjunctions into ORC SearchArguments") {
436+
import org.apache.spark.sql.sources._
437+
// The `LessThan` should be converted while the `StringContains` shouldn't
438+
val schema = new StructType(
439+
Array(
440+
StructField("a", IntegerType, nullable = true),
441+
StructField("b", StringType, nullable = true)))
442+
443+
// The predicate `StringContains` predicate is not able to be pushed down.
444+
assertResult("leaf-0 = (LESS_THAN_EQUALS a 10)\nleaf-1 = (LESS_THAN a 1)\n" +
445+
"expr = (or (not leaf-0) leaf-1)") {
446+
OrcFilters.createFilter(schema, Array(
447+
Or(
448+
GreaterThan("a", 10),
449+
And(
450+
StringContains("b", "prefix"),
451+
LessThan("a", 1)
452+
)
453+
)
454+
)).get.toString
455+
}
456+
457+
assertResult("leaf-0 = (LESS_THAN_EQUALS a 10)\nleaf-1 = (LESS_THAN a 1)\n" +
458+
"expr = (or (not leaf-0) leaf-1)") {
459+
OrcFilters.createFilter(schema, Array(
460+
Or(
461+
And(
462+
GreaterThan("a", 10),
463+
StringContains("b", "foobar")
464+
),
465+
And(
466+
StringContains("b", "prefix"),
467+
LessThan("a", 1)
468+
)
469+
)
470+
)).get.toString
471+
}
472+
473+
assert(OrcFilters.createFilter(schema, Array(
474+
Or(
475+
StringContains("b", "foobar"),
476+
And(
477+
StringContains("b", "prefix"),
478+
LessThan("a", 1)
479+
)
480+
)
481+
)).isEmpty)
482+
}
445483
}

0 commit comments

Comments
 (0)