@@ -62,7 +62,7 @@ import org.apache.spark.sql.execution.datasources.LogicalRelation
6262import org .apache .spark .sql .execution .datasources .v2 .{DataSourceV2Relation , DataSourceV2ScanRelation , FileTable }
6363import org .apache .spark .sql .execution .python .EvaluatePython
6464import org .apache .spark .sql .execution .stat .StatFunctions
65- import org .apache .spark .sql .internal .{DataFrameWriterImpl , DataFrameWriterV2Impl , MergeIntoWriterImpl , SQLConf , ToScalaUDF }
65+ import org .apache .spark .sql .internal .{DataFrameWriterImpl , DataFrameWriterV2Impl , MergeIntoWriterImpl , SQLConf }
6666import org .apache .spark .sql .internal .ExpressionUtils .column
6767import org .apache .spark .sql .internal .TypedAggUtils .withInputType
6868import org .apache .spark .sql .streaming .DataStreamWriter
@@ -865,24 +865,7 @@ class Dataset[T] private[sql](
865865 Filter (condition.expr, logicalPlan)
866866 }
867867
868- /**
869- * Groups the Dataset using the specified columns, so we can run aggregation on them. See
870- * [[RelationalGroupedDataset ]] for all the available aggregate functions.
871- *
872- * {{{
873- * // Compute the average for all numeric columns grouped by department.
874- * ds.groupBy($"department").avg()
875- *
876- * // Compute the max age and average salary, grouped by department and gender.
877- * ds.groupBy($"department", $"gender").agg(Map(
878- * "salary" -> "avg",
879- * "age" -> "max"
880- * ))
881- * }}}
882- *
883- * @group untypedrel
884- * @since 2.0.0
885- */
868+ /** @inheritdoc */
886869 @ scala.annotation.varargs
887870 def groupBy (cols : Column * ): RelationalGroupedDataset = {
888871 RelationalGroupedDataset (toDF(), cols.map(_.expr), RelationalGroupedDataset .GroupByType )
@@ -914,13 +897,7 @@ class Dataset[T] private[sql](
914897 rdd.reduce(func)
915898 }
916899
917- /**
918- * (Scala-specific)
919- * Returns a [[KeyValueGroupedDataset ]] where the data is grouped by the given key `func`.
920- *
921- * @group typedrel
922- * @since 2.0.0
923- */
900+ /** @inheritdoc */
924901 def groupByKey [K : Encoder ](func : T => K ): KeyValueGroupedDataset [K , T ] = {
925902 val withGroupingKey = AppendColumns (func, logicalPlan)
926903 val executed = sparkSession.sessionState.executePlan(withGroupingKey)
@@ -933,16 +910,6 @@ class Dataset[T] private[sql](
933910 withGroupingKey.newColumns)
934911 }
935912
936- /**
937- * (Java-specific)
938- * Returns a [[KeyValueGroupedDataset ]] where the data is grouped by the given key `func`.
939- *
940- * @group typedrel
941- * @since 2.0.0
942- */
943- def groupByKey [K ](func : MapFunction [T , K ], encoder : Encoder [K ]): KeyValueGroupedDataset [K , T ] =
944- groupByKey(ToScalaUDF (func))(encoder)
945-
946913 /** @inheritdoc */
947914 def unpivot (
948915 ids : Array [Column ],
@@ -1640,28 +1607,7 @@ class Dataset[T] private[sql](
16401607 new DataFrameWriterV2Impl [T ](table, this )
16411608 }
16421609
1643- /**
1644- * Merges a set of updates, insertions, and deletions based on a source table into
1645- * a target table.
1646- *
1647- * Scala Examples:
1648- * {{{
1649- * spark.table("source")
1650- * .mergeInto("target", $"source.id" === $"target.id")
1651- * .whenMatched($"salary" === 100)
1652- * .delete()
1653- * .whenNotMatched()
1654- * .insertAll()
1655- * .whenNotMatchedBySource($"salary" === 100)
1656- * .update(Map(
1657- * "salary" -> lit(200)
1658- * ))
1659- * .merge()
1660- * }}}
1661- *
1662- * @group basic
1663- * @since 4.0.0
1664- */
1610+ /** @inheritdoc */
16651611 def mergeInto (table : String , condition : Column ): MergeIntoWriter [T ] = {
16661612 if (isStreaming) {
16671613 logicalPlan.failAnalysis(
@@ -2024,6 +1970,12 @@ class Dataset[T] private[sql](
20241970 @ scala.annotation.varargs
20251971 override def agg (expr : Column , exprs : Column * ): DataFrame = super .agg(expr, exprs : _* )
20261972
1973+ /** @inheritdoc */
1974+ override def groupByKey [K ](
1975+ func : MapFunction [T , K ],
1976+ encoder : Encoder [K ]): KeyValueGroupedDataset [K , T ] =
1977+ super .groupByKey(func, encoder).asInstanceOf [KeyValueGroupedDataset [K , T ]]
1978+
20271979 // //////////////////////////////////////////////////////////////////////////
20281980 // For Python API
20291981 // //////////////////////////////////////////////////////////////////////////
0 commit comments