Skip to content

Commit 47d52b9

Browse files
committed
Add comment.
1 parent a8f1b33 commit 47d52b9

File tree

1 file changed

+20
-2
lines changed

1 file changed

+20
-2
lines changed

sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,14 +52,14 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
5252
* The algorithm was first present in [[http://dx.doi.org/10.1145/375663.375670 Space-efficient
5353
* Online Computation of Quantile Summaries]] by Greenwald and Khanna.
5454
*
55-
* @param col the name of the numerical column
55+
* @param col the name of the numerical column.
5656
* @param probabilities a list of quantile probabilities
5757
* Each number must belong to [0, 1].
5858
* For example 0 is the minimum, 0.5 is the median, 1 is the maximum.
5959
* @param relativeError The relative target precision to achieve (>= 0).
6060
* If set to zero, the exact quantiles are computed, which could be very expensive.
6161
* Note that values greater than 1 are accepted but give the same result as 1.
62-
* @return the approximate quantiles at the given probabilities
62+
* @return the approximate quantiles at the given probabilities.
6363
*
6464
* @since 2.0.0
6565
*/
@@ -70,6 +70,20 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
7070
StatFunctions.multipleApproxQuantiles(df, Seq(col), probabilities, relativeError).head.toArray
7171
}
7272

73+
/**
74+
* Calculates the approximate quantiles of numerical columns of a DataFrame.
75+
*
76+
* @param cols the names of the numerical columns.
77+
* @param probabilities a list of quantile probabilities
78+
* Each number must belong to [0, 1].
79+
* For example 0 is the minimum, 0.5 is the median, 1 is the maximum.
80+
* @param relativeError The relative target precision to achieve (>= 0).
81+
* If set to zero, the exact quantiles are computed, which could be very expensive.
82+
* Note that values greater than 1 are accepted but give the same result as 1.
83+
* @return the approximate quantiles at the given probabilities for given columns.
84+
*
85+
* @since 2.0.0
86+
*/
7387
def approxQuantile(
7488
cols: Array[String],
7589
probabilities: Array[Double],
@@ -88,6 +102,10 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
88102
approxQuantile(col, probabilities.toArray, relativeError).toList.asJava
89103
}
90104

105+
/**
106+
* Python-friendly version of [[approxQuantile()]] that computes approximate quantiles
107+
* for multiple columns.
108+
*/
91109
private[spark] def approxQuantile(
92110
cols: List[String],
93111
probabilities: List[Double],

0 commit comments

Comments
 (0)