|
1 | | -/* |
2 | | - * Licensed to the Apache Software Foundation (ASF) under one or more |
3 | | - * contributor license agreements. See the NOTICE file distributed with |
4 | | - * this work for additional information regarding copyright ownership. |
5 | | - * The ASF licenses this file to You under the Apache License, Version 2.0 |
6 | | - * (the "License"); you may not use this file except in compliance with |
7 | | - * the License. You may obtain a copy of the License at |
8 | | - * |
9 | | - * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | - * |
11 | | - * Unless required by applicable law or agreed to in writing, software |
12 | | - * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | - * See the License for the specific language governing permissions and |
15 | | - * limitations under the License. |
16 | | - */ |
17 | | - |
18 | | -package org.apache.spark.sql.execution.joins |
19 | | - |
20 | | -import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys |
21 | | -import org.apache.spark.sql.catalyst.plans.logical.Join |
22 | | -import org.apache.spark.sql.types.{IntegerType, DoubleType, StructType} |
23 | | -import org.apache.spark.sql.{DataFrame, Row} |
24 | | -import org.apache.spark.sql.catalyst.expressions._ |
25 | | -import org.apache.spark.sql.catalyst.plans._ |
26 | | -import org.apache.spark.sql.execution.{EnsureRequirements, joins, SparkPlan, SparkPlanTest} |
27 | | - |
28 | | -class OuterJoinSuite extends SparkPlanTest { |
29 | | - |
30 | | - private def testOuterJoin( |
31 | | - testName: String, |
32 | | - leftRows: DataFrame, |
33 | | - rightRows: DataFrame, |
34 | | - joinType: JoinType, |
35 | | - condition: Expression, |
36 | | - expectedAnswer: Seq[Product]): Unit = { |
37 | | - val join = Join(leftRows.logicalPlan, rightRows.logicalPlan, Inner, Some(condition)) |
38 | | - ExtractEquiJoinKeys.unapply(join).foreach { |
39 | | - case (_, leftKeys, rightKeys, boundCondition, leftChild, rightChild) => |
40 | | - test(s"$testName using ShuffledHashOuterJoin") { |
41 | | - checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => |
42 | | - EnsureRequirements(left.sqlContext).apply( |
43 | | - ShuffledHashOuterJoin(leftKeys, rightKeys, joinType, boundCondition, left, right)), |
44 | | - expectedAnswer.map(Row.fromTuple), |
45 | | - sortAnswers = false) |
46 | | - } |
47 | | - |
48 | | - if (joinType != FullOuter) { |
49 | | - test(s"$testName using BroadcastHashOuterJoin") { |
50 | | - checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => |
51 | | - BroadcastHashOuterJoin(leftKeys, rightKeys, joinType, boundCondition, left, right), |
52 | | - expectedAnswer.map(Row.fromTuple), |
53 | | - sortAnswers = false) |
54 | | - } |
55 | | - } |
56 | | - } |
57 | | - |
58 | | - test(s"$testName using BroadcastNestedLoopJoin (build=left)") { |
59 | | - checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => |
60 | | - joins.BroadcastNestedLoopJoin(left, right, joins.BuildLeft, joinType, Some(condition)), |
61 | | - expectedAnswer.map(Row.fromTuple), |
62 | | - sortAnswers = true) |
63 | | - } |
64 | | - |
65 | | - test(s"$testName using BroadcastNestedLoopJoin (build=right)") { |
66 | | - checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => |
67 | | - joins.BroadcastNestedLoopJoin(left, right, joins.BuildRight, joinType, Some(condition)), |
68 | | - expectedAnswer.map(Row.fromTuple), |
69 | | - sortAnswers = true) |
70 | | - } |
71 | | - } |
72 | | - |
73 | | - val left = sqlContext.createDataFrame(sqlContext.sparkContext.parallelize(Seq( |
74 | | - Row(1, 2.0), |
75 | | - Row(2, 1.0), |
76 | | - Row(3, 3.0), |
77 | | - Row(null, null) |
78 | | - )), new StructType().add("a", IntegerType).add("b", DoubleType)) |
79 | | - |
80 | | - val right = sqlContext.createDataFrame(sqlContext.sparkContext.parallelize(Seq( |
81 | | - Row(2, 3.0), |
82 | | - Row(3, 2.0), |
83 | | - Row(4, 1.0), |
84 | | - Row(null, null) |
85 | | - )), new StructType().add("c", IntegerType).add("d", DoubleType)) |
86 | | - |
87 | | - val condition = { |
88 | | - And( |
89 | | - (left.col("a") === right.col("c")).expr, |
90 | | - LessThan(left.col("b").expr, right.col("d").expr)) |
91 | | - } |
92 | | - |
93 | | - // --- Basic outer joins ------------------------------------------------------------------------ |
94 | | - |
95 | | - testOuterJoin( |
96 | | - "basic left outer join", |
97 | | - left, |
98 | | - right, |
99 | | - LeftOuter, |
100 | | - condition, |
101 | | - Seq( |
102 | | - (1, 2.0, null, null), |
103 | | - (2, 1.0, 2, 3.0), |
104 | | - (3, 3.0, null, null), |
105 | | - (null, null, null, null) |
106 | | - ) |
107 | | - ) |
108 | | - |
109 | | - testOuterJoin( |
110 | | - "basic right outer join", |
111 | | - left, |
112 | | - right, |
113 | | - RightOuter, |
114 | | - condition, |
115 | | - Seq( |
116 | | - (2, 1.0, 2, 3.0), |
117 | | - (null, null, 3, 2.0), |
118 | | - (null, null, 4, 1.0), |
119 | | - (null, null, null, null) |
120 | | - ) |
121 | | - ) |
122 | | - |
123 | | - testOuterJoin( |
124 | | - "basic full outer join", |
125 | | - left, |
126 | | - right, |
127 | | - FullOuter, |
128 | | - condition, |
129 | | - Seq( |
130 | | - (1, 2.0, null, null), |
131 | | - (2, 1.0, 2, 3.0), |
132 | | - (3, 3.0, null, null), |
133 | | - (null, null, 3, 2.0), |
134 | | - (null, null, 4, 1.0), |
135 | | - (null, null, null, null), |
136 | | - (null, null, null, null) |
137 | | - ) |
138 | | - ) |
139 | | - |
140 | | - // --- Both inputs empty ------------------------------------------------------------------------ |
141 | | - |
142 | | - testOuterJoin( |
143 | | - "left outer join with both inputs empty", |
144 | | - left.filter("false"), |
145 | | - right.filter("false"), |
146 | | - LeftOuter, |
147 | | - condition, |
148 | | - Seq.empty |
149 | | - ) |
150 | | - |
151 | | - testOuterJoin( |
152 | | - "right outer join with both inputs empty", |
153 | | - left.filter("false"), |
154 | | - right.filter("false"), |
155 | | - RightOuter, |
156 | | - condition, |
157 | | - Seq.empty |
158 | | - ) |
159 | | - |
160 | | - testOuterJoin( |
161 | | - "full outer join with both inputs empty", |
162 | | - left.filter("false"), |
163 | | - right.filter("false"), |
164 | | - FullOuter, |
165 | | - condition, |
166 | | - Seq.empty |
167 | | - ) |
168 | | -} |
| 1 | +/* |
| 2 | + * Licensed to the Apache Software Foundation (ASF) under one or more |
| 3 | + * contributor license agreements. See the NOTICE file distributed with |
| 4 | + * this work for additional information regarding copyright ownership. |
| 5 | + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| 6 | + * (the "License"); you may not use this file except in compliance with |
| 7 | + * the License. You may obtain a copy of the License at |
| 8 | + * |
| 9 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | + * |
| 11 | + * Unless required by applicable law or agreed to in writing, software |
| 12 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | + * See the License for the specific language governing permissions and |
| 15 | + * limitations under the License. |
| 16 | + */ |
| 17 | + |
| 18 | +package org.apache.spark.sql.execution.joins |
| 19 | + |
| 20 | +import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys |
| 21 | +import org.apache.spark.sql.catalyst.plans.logical.Join |
| 22 | +import org.apache.spark.sql.types.{IntegerType, DoubleType, StructType} |
| 23 | +import org.apache.spark.sql.{DataFrame, Row} |
| 24 | +import org.apache.spark.sql.catalyst.expressions._ |
| 25 | +import org.apache.spark.sql.catalyst.plans._ |
| 26 | +import org.apache.spark.sql.execution.{EnsureRequirements, joins, SparkPlan, SparkPlanTest} |
| 27 | + |
| 28 | +class OuterJoinSuite extends SparkPlanTest { |
| 29 | + |
| 30 | + private def testOuterJoin( |
| 31 | + testName: String, |
| 32 | + leftRows: DataFrame, |
| 33 | + rightRows: DataFrame, |
| 34 | + joinType: JoinType, |
| 35 | + condition: Expression, |
| 36 | + expectedAnswer: Seq[Product]): Unit = { |
| 37 | + val join = Join(leftRows.logicalPlan, rightRows.logicalPlan, Inner, Some(condition)) |
| 38 | + ExtractEquiJoinKeys.unapply(join).foreach { |
| 39 | + case (_, leftKeys, rightKeys, boundCondition, leftChild, rightChild) => |
| 40 | + test(s"$testName using ShuffledHashOuterJoin") { |
| 41 | + checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => |
| 42 | + EnsureRequirements(left.sqlContext).apply( |
| 43 | + ShuffledHashOuterJoin(leftKeys, rightKeys, joinType, boundCondition, left, right)), |
| 44 | + expectedAnswer.map(Row.fromTuple), |
| 45 | + sortAnswers = false) |
| 46 | + } |
| 47 | + |
| 48 | + if (joinType != FullOuter) { |
| 49 | + test(s"$testName using BroadcastHashOuterJoin") { |
| 50 | + checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => |
| 51 | + BroadcastHashOuterJoin(leftKeys, rightKeys, joinType, boundCondition, left, right), |
| 52 | + expectedAnswer.map(Row.fromTuple), |
| 53 | + sortAnswers = false) |
| 54 | + } |
| 55 | + } |
| 56 | + } |
| 57 | + |
| 58 | + test(s"$testName using BroadcastNestedLoopJoin (build=left)") { |
| 59 | + checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => |
| 60 | + joins.BroadcastNestedLoopJoin(left, right, joins.BuildLeft, joinType, Some(condition)), |
| 61 | + expectedAnswer.map(Row.fromTuple), |
| 62 | + sortAnswers = true) |
| 63 | + } |
| 64 | + |
| 65 | + test(s"$testName using BroadcastNestedLoopJoin (build=right)") { |
| 66 | + checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => |
| 67 | + joins.BroadcastNestedLoopJoin(left, right, joins.BuildRight, joinType, Some(condition)), |
| 68 | + expectedAnswer.map(Row.fromTuple), |
| 69 | + sortAnswers = true) |
| 70 | + } |
| 71 | + } |
| 72 | + |
| 73 | + val left = sqlContext.createDataFrame(sqlContext.sparkContext.parallelize(Seq( |
| 74 | + Row(1, 2.0), |
| 75 | + Row(2, 1.0), |
| 76 | + Row(3, 3.0), |
| 77 | + Row(null, null) |
| 78 | + )), new StructType().add("a", IntegerType).add("b", DoubleType)) |
| 79 | + |
| 80 | + val right = sqlContext.createDataFrame(sqlContext.sparkContext.parallelize(Seq( |
| 81 | + Row(2, 3.0), |
| 82 | + Row(3, 2.0), |
| 83 | + Row(4, 1.0), |
| 84 | + Row(null, null) |
| 85 | + )), new StructType().add("c", IntegerType).add("d", DoubleType)) |
| 86 | + |
| 87 | + val condition = { |
| 88 | + And( |
| 89 | + (left.col("a") === right.col("c")).expr, |
| 90 | + LessThan(left.col("b").expr, right.col("d").expr)) |
| 91 | + } |
| 92 | + |
| 93 | + // --- Basic outer joins ------------------------------------------------------------------------ |
| 94 | + |
| 95 | + testOuterJoin( |
| 96 | + "basic left outer join", |
| 97 | + left, |
| 98 | + right, |
| 99 | + LeftOuter, |
| 100 | + condition, |
| 101 | + Seq( |
| 102 | + (1, 2.0, null, null), |
| 103 | + (2, 1.0, 2, 3.0), |
| 104 | + (3, 3.0, null, null), |
| 105 | + (null, null, null, null) |
| 106 | + ) |
| 107 | + ) |
| 108 | + |
| 109 | + testOuterJoin( |
| 110 | + "basic right outer join", |
| 111 | + left, |
| 112 | + right, |
| 113 | + RightOuter, |
| 114 | + condition, |
| 115 | + Seq( |
| 116 | + (2, 1.0, 2, 3.0), |
| 117 | + (null, null, 3, 2.0), |
| 118 | + (null, null, 4, 1.0), |
| 119 | + (null, null, null, null) |
| 120 | + ) |
| 121 | + ) |
| 122 | + |
| 123 | + testOuterJoin( |
| 124 | + "basic full outer join", |
| 125 | + left, |
| 126 | + right, |
| 127 | + FullOuter, |
| 128 | + condition, |
| 129 | + Seq( |
| 130 | + (1, 2.0, null, null), |
| 131 | + (2, 1.0, 2, 3.0), |
| 132 | + (3, 3.0, null, null), |
| 133 | + (null, null, 3, 2.0), |
| 134 | + (null, null, 4, 1.0), |
| 135 | + (null, null, null, null), |
| 136 | + (null, null, null, null) |
| 137 | + ) |
| 138 | + ) |
| 139 | + |
| 140 | + // --- Both inputs empty ------------------------------------------------------------------------ |
| 141 | + |
| 142 | + testOuterJoin( |
| 143 | + "left outer join with both inputs empty", |
| 144 | + left.filter("false"), |
| 145 | + right.filter("false"), |
| 146 | + LeftOuter, |
| 147 | + condition, |
| 148 | + Seq.empty |
| 149 | + ) |
| 150 | + |
| 151 | + testOuterJoin( |
| 152 | + "right outer join with both inputs empty", |
| 153 | + left.filter("false"), |
| 154 | + right.filter("false"), |
| 155 | + RightOuter, |
| 156 | + condition, |
| 157 | + Seq.empty |
| 158 | + ) |
| 159 | + |
| 160 | + testOuterJoin( |
| 161 | + "full outer join with both inputs empty", |
| 162 | + left.filter("false"), |
| 163 | + right.filter("false"), |
| 164 | + FullOuter, |
| 165 | + condition, |
| 166 | + Seq.empty |
| 167 | + ) |
| 168 | +} |
0 commit comments