-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-12409][SPARK-12387][SPARK-12391][SQL] Support AND/OR/IN/LIKE push-down filters for JDBC #10468
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-12409][SPARK-12387][SPARK-12391][SQL] Support AND/OR/IN/LIKE push-down filters for JDBC #10468
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,6 +26,7 @@ import org.scalatest.BeforeAndAfter | |
| import org.scalatest.PrivateMethodTester | ||
|
|
||
| import org.apache.spark.SparkFunSuite | ||
| import org.apache.spark.sql.Row | ||
| import org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD | ||
| import org.apache.spark.sql.test.SharedSQLContext | ||
| import org.apache.spark.sql.types._ | ||
|
|
@@ -186,8 +187,26 @@ class JDBCSuite extends SparkFunSuite | |
| assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME = 'fred'")).collect().size == 1) | ||
| assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME > 'fred'")).collect().size == 2) | ||
| assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME != 'fred'")).collect().size == 2) | ||
| assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME IN ('mary', 'fred')")) | ||
| .collect().size == 2) | ||
| assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME NOT IN ('fred')")) | ||
| .collect().size === 2) | ||
| assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID = 1 OR NAME = 'mary'")) | ||
| .collect().size == 2) | ||
| assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID = 1 OR NAME = 'mary' " | ||
| + "AND THEID = 2")).collect().size == 2) | ||
| assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME LIKE 'fr%'")).collect().size == 1) | ||
| assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME LIKE '%ed'")).collect().size == 1) | ||
| assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME LIKE '%re%'")).collect().size == 1) | ||
| assert(stripSparkFilter(sql("SELECT * FROM nulltypes WHERE A IS NULL")).collect().size == 1) | ||
| assert(stripSparkFilter(sql("SELECT * FROM nulltypes WHERE A IS NOT NULL")).collect().size == 0) | ||
|
|
||
| // This is a test to reflect discussion in SPARK-12218. | ||
| // The older versions of spark have this kind of bugs in parquet data source. | ||
| val df1 = sql("SELECT * FROM foobar WHERE NOT (THEID != 2 AND NAME != 'mary')") | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The two sub-conditions are both ok to be pushed down. So this doesn't actually test against the nested AND issue in SPARK-12218. See #19776 Btw, the two sub-conditions are filtered out the same rows. This doesn't reflect the issue too.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yea, I think ok to drop this test. |
||
| val df2 = sql("SELECT * FROM foobar WHERE NOT (THEID != 2) OR NOT (NAME != 'mary')") | ||
| assert(df1.collect.toSet === Set(Row("mary", 2))) | ||
| assert(df2.collect.toSet === Set(Row("mary", 2))) | ||
| } | ||
|
|
||
| test("SELECT * WHERE (quoted strings)") { | ||
|
|
@@ -437,14 +456,21 @@ class JDBCSuite extends SparkFunSuite | |
| val compileFilter = PrivateMethod[String]('compileFilter) | ||
| def doCompileFilter(f: Filter): String = JDBCRDD invokePrivate compileFilter(f) | ||
| assert(doCompileFilter(EqualTo("col0", 3)) === "col0 = 3") | ||
| assert(doCompileFilter(Not(EqualTo("col1", "abc"))) === "col1 != 'abc'") | ||
| assert(doCompileFilter(Not(EqualTo("col1", "abc"))) === "(NOT (col1 = 'abc'))") | ||
| assert(doCompileFilter(And(EqualTo("col0", 0), EqualTo("col1", "def"))) | ||
| === "(col0 = 0) AND (col1 = 'def')") | ||
| assert(doCompileFilter(Or(EqualTo("col0", 2), EqualTo("col1", "ghi"))) | ||
| === "(col0 = 2) OR (col1 = 'ghi')") | ||
| assert(doCompileFilter(LessThan("col0", 5)) === "col0 < 5") | ||
| assert(doCompileFilter(LessThan("col3", | ||
| Timestamp.valueOf("1995-11-21 00:00:00.0"))) === "col3 < '1995-11-21 00:00:00.0'") | ||
| assert(doCompileFilter(LessThan("col4", Date.valueOf("1983-08-04"))) === "col4 < '1983-08-04'") | ||
| assert(doCompileFilter(LessThanOrEqual("col0", 5)) === "col0 <= 5") | ||
| assert(doCompileFilter(GreaterThan("col0", 3)) === "col0 > 3") | ||
| assert(doCompileFilter(GreaterThanOrEqual("col0", 3)) === "col0 >= 3") | ||
| assert(doCompileFilter(In("col1", Array("jkl"))) === "col1 IN ('jkl')") | ||
| assert(doCompileFilter(Not(In("col1", Array("mno", "pqr")))) | ||
| === "(NOT (col1 IN ('mno', 'pqr')))") | ||
| assert(doCompileFilter(IsNull("col1")) === "col1 IS NULL") | ||
| assert(doCompileFilter(IsNotNull("col1")) === "col1 IS NOT NULL") | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we compare using
===instead of==here and elsewhere?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have no strong opinion on this.... is it better to fix them? ISTM collection types, e.g., set, need
===comparisons.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does the newer version of scalatest library we use already uses macro so == and === are the same? Can you confirm? Anyway it's not that big of a deal here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah yes, now
Assertionstrait providesassertmacro and the trait mixes inTripleEqualstrait so we don't need to change==to===.