@@ -52,8 +52,10 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
5252 * the two join sides. When planning a [[execution.BroadcastHashJoin ]], if one side has an
5353 * estimated physical size smaller than the user-settable threshold
5454 * `spark.sql.auto.convert.join.size`, the planner would mark it as the ''build'' relation and
55- * mark the other relation as the ''stream'' side. If both estimates exceed the threshold,
56- * they will instead be used to decide the build side in a [[execution.ShuffledHashJoin ]].
55+ * mark the other relation as the ''stream'' side. The build table will be ''broadcasted'' to
56+ * all of the executors involved in the join, as a [[org.apache.spark.broadcast.Broadcast ]]
57+ * object. If both estimates exceed the threshold, they will instead be used to decide the build
58+ * side in a [[execution.ShuffledHashJoin ]].
5759 */
5860 object HashJoin extends Strategy with PredicateHelper {
5961 private [this ] def broadcastHashJoin (
@@ -144,11 +146,6 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
144146 }
145147 }
146148
147- /**
148- * This strategy applies a simple optimization based on the estimates of the physical sizes of
149- * the two join sides: the planner would mark the relation with the smaller estimated physical
150- * size as the ''build'' (broadcast) relation and mark the other as the ''stream'' relation.
151- */
152149 object BroadcastNestedLoopJoin extends Strategy {
153150 def apply (plan : LogicalPlan ): Seq [SparkPlan ] = plan match {
154151 case logical.Join (left, right, joinType, condition) =>
0 commit comments