@@ -1456,7 +1456,7 @@ class Dataset[T] private[sql](
14561456 * @group typedrel
14571457 * @since 2.0.0
14581458 */
1459- def union (other : Dataset [T ]): Dataset [T ] = withTypedPlan {
1459+ def union (other : Dataset [T ]): Dataset [T ] = withSetOperator {
14601460 // This breaks caching, but it's usually ok because it addresses a very specific use case:
14611461 // using union to union many files or partitions.
14621462 CombineUnions (Union (logicalPlan, other.logicalPlan))
@@ -1472,7 +1472,7 @@ class Dataset[T] private[sql](
14721472 * @group typedrel
14731473 * @since 1.6.0
14741474 */
1475- def intersect (other : Dataset [T ]): Dataset [T ] = withTypedPlan {
1475+ def intersect (other : Dataset [T ]): Dataset [T ] = withSetOperator {
14761476 Intersect (logicalPlan, other.logicalPlan)
14771477 }
14781478
@@ -1486,7 +1486,7 @@ class Dataset[T] private[sql](
14861486 * @group typedrel
14871487 * @since 2.0.0
14881488 */
1489- def except (other : Dataset [T ]): Dataset [T ] = withTypedPlan {
1489+ def except (other : Dataset [T ]): Dataset [T ] = withSetOperator {
14901490 Except (logicalPlan, other.logicalPlan)
14911491 }
14921492
@@ -2607,4 +2607,14 @@ class Dataset[T] private[sql](
26072607 @ inline private def withTypedPlan [U : Encoder ](logicalPlan : => LogicalPlan ): Dataset [U ] = {
26082608 Dataset (sparkSession, logicalPlan)
26092609 }
2610+
2611+ /** A convenient function to wrap a set based logical plan and produce a Dataset. */
2612+ @ inline private def withSetOperator [U : Encoder ](logicalPlan : => LogicalPlan ): Dataset [U ] = {
2613+ if (classTag.runtimeClass.isAssignableFrom(classOf [Row ])) {
2614+ // Set operators widen types (change the schema), so we cannot reuse the row encoder.
2615+ Dataset .ofRows(sparkSession, logicalPlan).asInstanceOf [Dataset [U ]]
2616+ } else {
2617+ Dataset (sparkSession, logicalPlan)
2618+ }
2619+ }
26102620}
0 commit comments