From 7c44c70fe664e73b36a49a974ece93a0c83d7f8e Mon Sep 17 00:00:00 2001 From: DB Tsai Date: Mon, 28 May 2018 00:27:09 -0700 Subject: [PATCH 1/7] Optimize `In` --- .../sql/catalyst/optimizer/expressions.scala | 9 +++++- .../catalyst/optimizer/OptimizeInSuite.scala | 32 +++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 1c0b7bd806801..bc2247aa64af4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -219,7 +219,14 @@ object ReorderAssociativeOperator extends Rule[LogicalPlan] { object OptimizeIn extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { case q: LogicalPlan => q transformExpressionsDown { - case In(v, list) if list.isEmpty && !v.nullable => FalseLiteral + case In(v, list) if list.isEmpty => + // When v is not nullable, the following expression will be optimized + // to FalseLiteral which is tested in OptimizeInSuite.scala + If(IsNotNull(v), FalseLiteral, Literal(null, BooleanType)) + case In(v, Seq(elem@Literal(_, _))) => + // `Expression` like `ListQuery` contains subquery which can not + // be converted into `EqualTo`. Only `Literal` is converted for safety. + EqualTo(v, elem) case expr @ In(v, list) if expr.inSetConvertible => val newList = ExpressionSet(list).toSeq if (newList.size > SQLConf.get.optimizerInSetConversionThreshold) { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala index 478118ed709f7..8349c14c4b8e3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala @@ -176,6 +176,21 @@ class OptimizeInSuite extends PlanTest { } } + test("OptimizedIn test: one element in list gets transformed to EqualTo.") { + val originalQuery = + testRelation + .where(In(UnresolvedAttribute("a"), Seq(Literal(1)))) + .analyze + + val optimized = Optimize.execute(originalQuery) + val correctAnswer = + testRelation + .where(EqualTo(UnresolvedAttribute("a"), Literal(1))) + .analyze + + comparePlans(optimized, correctAnswer) + } + test("OptimizedIn test: In empty list gets transformed to FalseLiteral " + "when value is not nullable") { val originalQuery = @@ -191,4 +206,21 @@ class OptimizeInSuite extends PlanTest { comparePlans(optimized, correctAnswer) } + + test("OptimizedIn test: In empty list gets transformed to " + + "If(IsNotNull(v), FalseLiteral, Literal(null, BooleanType)) when value is nullable") { + val originalQuery = + testRelation + .where(In(UnresolvedAttribute("a"), Nil)) + .analyze + + val optimized = Optimize.execute(originalQuery) + val correctAnswer = + testRelation + .where(If(IsNotNull(UnresolvedAttribute("a")), + Literal(false), Literal.create(null, BooleanType))) + .analyze + + comparePlans(optimized, correctAnswer) + } } From 449613aab8e631582e5a152a9a4b67a8d2468738 Mon Sep 17 00:00:00 2001 From: DB Tsai Date: Mon, 28 May 2018 00:31:13 -0700 Subject: [PATCH 2/7] style --- .../org/apache/spark/sql/catalyst/optimizer/expressions.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index bc2247aa64af4..aaa47c10410da 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -223,7 +223,7 @@ object OptimizeIn extends Rule[LogicalPlan] { // When v is not nullable, the following expression will be optimized // to FalseLiteral which is tested in OptimizeInSuite.scala If(IsNotNull(v), FalseLiteral, Literal(null, BooleanType)) - case In(v, Seq(elem@Literal(_, _))) => + case In(v, Seq(elem @ Literal(_, _))) => // `Expression` like `ListQuery` contains subquery which can not // be converted into `EqualTo`. Only `Literal` is converted for safety. EqualTo(v, elem) From 7a354fcd154ec2d8f88a5c1fbf1bd75fdb15ec49 Mon Sep 17 00:00:00 2001 From: DB Tsai Date: Tue, 29 May 2018 00:09:53 -0700 Subject: [PATCH 3/7] Addressed feedback --- .../apache/spark/sql/catalyst/optimizer/expressions.scala | 8 +++----- .../spark/sql/catalyst/optimizer/OptimizeInSuite.scala | 4 ++-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index aaa47c10410da..81f9dcd55da8f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -223,13 +223,11 @@ object OptimizeIn extends Rule[LogicalPlan] { // When v is not nullable, the following expression will be optimized // to FalseLiteral which is tested in OptimizeInSuite.scala If(IsNotNull(v), FalseLiteral, Literal(null, BooleanType)) - case In(v, Seq(elem @ Literal(_, _))) => - // `Expression` like `ListQuery` contains subquery which can not - // be converted into `EqualTo`. Only `Literal` is converted for safety. - EqualTo(v, elem) case expr @ In(v, list) if expr.inSetConvertible => val newList = ExpressionSet(list).toSeq - if (newList.size > SQLConf.get.optimizerInSetConversionThreshold) { + if (newList.length == 1) { + EqualTo(v, newList.head) + } else if (newList.size > SQLConf.get.optimizerInSetConversionThreshold) { val hSet = newList.map(e => e.eval(EmptyRow)) InSet(v, HashSet() ++ hSet) } else if (newList.size < list.size) { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala index 8349c14c4b8e3..86522a6a54ed5 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala @@ -207,8 +207,8 @@ class OptimizeInSuite extends PlanTest { comparePlans(optimized, correctAnswer) } - test("OptimizedIn test: In empty list gets transformed to " + - "If(IsNotNull(v), FalseLiteral, Literal(null, BooleanType)) when value is nullable") { + test("OptimizedIn test: In empty list gets transformed to `If` expression " + + "when value is nullable") { val originalQuery = testRelation .where(In(UnresolvedAttribute("a"), Nil)) From 23fedd8d65cb51201e1f032c938671ebc21eb432 Mon Sep 17 00:00:00 2001 From: DB Tsai Date: Mon, 16 Jul 2018 11:47:20 -0700 Subject: [PATCH 4/7] Addressed feedback --- .../apache/spark/sql/catalyst/optimizer/expressions.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 7eb72f7fa7f5e..e210c9619ed7a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -224,14 +224,14 @@ object OptimizeIn extends Rule[LogicalPlan] { If(IsNotNull(v), FalseLiteral, Literal(null, BooleanType)) case expr @ In(v, list) if expr.inSetConvertible => val newList = ExpressionSet(list).toSeq - if (newList.length == 1) { + if (newList.length == 1 && !list.isInstanceOf[ListQuery]) { EqualTo(v, newList.head) - } else if (newList.size > SQLConf.get.optimizerInSetConversionThreshold) { + } else if (newList.length > SQLConf.get.optimizerInSetConversionThreshold) { val hSet = newList.map(e => e.eval(EmptyRow)) InSet(v, HashSet() ++ hSet) - } else if (newList.size < list.size) { + } else if (newList.length < list.length) { expr.copy(list = newList) - } else { // newList.length == list.length + } else { // newList.length == list.length && newList.length > 1 expr } } From 5079833cc25949c806575f365f62f423a3205282 Mon Sep 17 00:00:00 2001 From: DB Tsai Date: Mon, 16 Jul 2018 11:57:07 -0700 Subject: [PATCH 5/7] update --- .../org/apache/spark/sql/catalyst/optimizer/expressions.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index e210c9619ed7a..f78a0ff95f382 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -224,7 +224,7 @@ object OptimizeIn extends Rule[LogicalPlan] { If(IsNotNull(v), FalseLiteral, Literal(null, BooleanType)) case expr @ In(v, list) if expr.inSetConvertible => val newList = ExpressionSet(list).toSeq - if (newList.length == 1 && !list.isInstanceOf[ListQuery]) { + if (newList.length == 1 && !newList.isInstanceOf[ListQuery]) { EqualTo(v, newList.head) } else if (newList.length > SQLConf.get.optimizerInSetConversionThreshold) { val hSet = newList.map(e => e.eval(EmptyRow)) From c519c4096690c2f8aa26a853a038218f3aaa100a Mon Sep 17 00:00:00 2001 From: DB Tsai Date: Mon, 16 Jul 2018 13:37:55 -0700 Subject: [PATCH 6/7] Add one more test --- .../catalyst/optimizer/OptimizeInSuite.scala | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala index 86522a6a54ed5..6f8757b475647 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala @@ -207,6 +207,23 @@ class OptimizeInSuite extends PlanTest { comparePlans(optimized, correctAnswer) } + test("OptimizedIn test: In empty list gets transformed to " + + "If(IsNotNull(v), FalseLiteral, Literal(null, BooleanType)) when value is nullable") { + val originalQuery = + testRelation + .where(In(UnresolvedAttribute("a"), Nil)) + .analyze + + val optimized = Optimize.execute(originalQuery) + val correctAnswer = + testRelation + .where(If(IsNotNull(UnresolvedAttribute("a")), + Literal(false), Literal.create(null, BooleanType))) + .analyze + + comparePlans(optimized, correctAnswer) + } + test("OptimizedIn test: In empty list gets transformed to `If` expression " + "when value is nullable") { val originalQuery = From 9174a30b092740284250002f5a5fee50eadfc754 Mon Sep 17 00:00:00 2001 From: DB Tsai Date: Mon, 16 Jul 2018 13:42:12 -0700 Subject: [PATCH 7/7] Remove duplicated code --- .../catalyst/optimizer/OptimizeInSuite.scala | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala index 6f8757b475647..86522a6a54ed5 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala @@ -207,23 +207,6 @@ class OptimizeInSuite extends PlanTest { comparePlans(optimized, correctAnswer) } - test("OptimizedIn test: In empty list gets transformed to " + - "If(IsNotNull(v), FalseLiteral, Literal(null, BooleanType)) when value is nullable") { - val originalQuery = - testRelation - .where(In(UnresolvedAttribute("a"), Nil)) - .analyze - - val optimized = Optimize.execute(originalQuery) - val correctAnswer = - testRelation - .where(If(IsNotNull(UnresolvedAttribute("a")), - Literal(false), Literal.create(null, BooleanType))) - .analyze - - comparePlans(optimized, correctAnswer) - } - test("OptimizedIn test: In empty list gets transformed to `If` expression " + "when value is nullable") { val originalQuery =