@@ -359,6 +359,8 @@ abstract class OffsetWindowFunction
359359 * default offset is 1. When the value of 'x' is null at the offset, or when the offset is larger
360360 * than the window, the default expression is evaluated.
361361 *
362+ * This documentation has been based upon similar documentation for the Hive and Presto projects.
363+ *
362364 * @param input expression to evaluate 'offset' rows after the current row.
363365 * @param offset rows to jump ahead in the partition.
364366 * @param default to use when the input value is null or when the offset is larger than the window.
@@ -383,6 +385,8 @@ case class Lead(input: Expression, offset: Expression, default: Expression)
383385 * default offset is 1. When the value of 'x' is null at the offset, or when the offset is smaller
384386 * than the window, the default expression is evaluated.
385387 *
388+ * This documentation has been based upon similar documentation for the Hive and Presto projects.
389+ *
386390 * @param input expression to evaluate 'offset' rows before the current row.
387391 * @param offset rows to jump back in the partition.
388392 * @param default to use when the input value is null or when the offset is smaller than the window.
@@ -436,6 +440,8 @@ object SizeBasedWindowFunction {
436440/**
437441 * The RowNumber function computes a unique, sequential number to each row, starting with one,
438442 * according to the ordering of rows within the window partition.
443+ *
444+ * This documentation has been based upon similar documentation for the Hive and Presto projects.
439445 */
440446@ ExpressionDescription (usage = " _FUNC_() - The ROW_NUMBER() function assigns a unique, sequential" +
441447 " number to each row, starting with one, according to the ordering of rows within the window" +
@@ -449,6 +455,8 @@ case class RowNumber() extends RowNumberLike {
449455 * The result is the number of rows preceding or equal to the current row in the ordering of the
450456 * partition divided by the total number of rows in the window partition. Any tie values in the
451457 * ordering will evaluate to the same position.
458+ *
459+ * This documentation has been based upon similar documentation for the Hive and Presto projects.
452460 */
453461@ ExpressionDescription (usage = " _FUNC_(x) - The CUME_DIST() function computes the position of a " +
454462 " value relative to a all values in the partition." )
@@ -469,7 +477,17 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction {
469477 * The NTile function is particularly useful for the calculation of tertiles, quartiles, deciles and
470478 * other common summary statistics
471479 *
472- * @param buckets number of buckets to divide the rows in.
480+ * The function calculates two variables during initialization. The size of a regular bucket, and
481+ * the number of buckets that will have one extra row added to it (when the rows do not evenly fit
482+ * into the number of buckets); both variables are based on the size of the current partition.
483+ * During the calculation process the function keeps track of the current row number, the current
484+ * bucket number, and the row number at which the bucket will change (bucketThreshold). When the
485+ * current row number reaches bucket threshold, the bucket value is increased by one and the the
486+ * threshold is increased by the bucket size (plus one extra if the current bucket is padded).
487+ *
488+ * This documentation has been based upon similar documentation for the Hive and Presto projects.
489+ *
490+ * @param buckets number of buckets to divide the rows in. Default value is 1.
473491 */
474492@ ExpressionDescription (usage = " _FUNC_(x) - The NTILE(n) function divides the rows for each " +
475493 " window partition into 'n' buckets ranging from 1 to at most 'n'." )
@@ -526,6 +544,8 @@ case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindow
526544 * the order of the window in which is processed. For instance, when the value of 'x' changes in a
527545 * window ordered by 'x' the rank function also changes. The size of the change of the rank function
528546 * is (typically) not dependent on the size of the change in 'x'.
547+ *
548+ * This documentation has been based upon similar documentation for the Hive and Presto projects.
529549 */
530550abstract class RankLike extends AggregateWindowFunction {
531551 override def inputTypes : Seq [AbstractDataType ] = children.map(_ => AnyDataType )
@@ -570,6 +590,8 @@ abstract class RankLike extends AggregateWindowFunction {
570590 * number of rows preceding or equal to the current row in the ordering of the partition. Tie values
571591 * will produce gaps in the sequence.
572592 *
593+ * This documentation has been based upon similar documentation for the Hive and Presto projects.
594+ *
573595 * @param children to base the rank on; a change in the value of one the children will trigger a
574596 * change in rank. This is an internal parameter and will be assigned by the
575597 * Analyser.
@@ -587,6 +609,8 @@ case class Rank(children: Seq[Expression]) extends RankLike {
587609 * the previously assigned rank values. Unlike Rank, DenseRank will not produce gaps in the ranking
588610 * sequence.
589611 *
612+ * This documentation has been based upon similar documentation for the Hive and Presto projects.
613+ *
590614 * @param children to base the rank on; a change in the value of one the children will trigger a
591615 * change in rank. This is an internal parameter and will be assigned by the
592616 * Analyser.
@@ -611,6 +635,8 @@ case class DenseRank(children: Seq[Expression]) extends RankLike {
611635 * The PercentRank function is similar to the CumeDist function, but it uses rank values instead of
612636 * row counts in the its numerator.
613637 *
638+ * This documentation has been based upon similar documentation for the Hive and Presto projects.
639+ *
614640 * @param children to base the rank on; a change in the value of one the children will trigger a
615641 * change in rank. This is an internal parameter and will be assigned by the
616642 * Analyser.
0 commit comments