From e1205622c1eed41b383881031d72c52b931798a4 Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Sun, 20 Dec 2015 14:54:26 +0100 Subject: [PATCH 1/5] Add documentation to window functions. --- .../expressions/windowExpressions.scala | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index 06252ac4fc616..67750a0ce09fe 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -353,6 +353,18 @@ abstract class OffsetWindowFunction override def toString: String = s"$prettyName($input, $offset, $default)" } +/** + * The Lead function returns the value of 'x' at 'offset' rows after the current row in the window. + * Offsets start at 0, which is the current row. The offset must be constant integer value. The + * default offset is 1. When the value of 'x' is null at the offset, or when the offset is larger + * than the window, the default expression is evaluated. + * + * @param input expression to evaluate 'offset' rows after the current row. + * @param offset rows to jump ahead in the partition. + * @param default to use when the input value is null or when the offset is larger than the window. + */ +@ExpressionDescription(usage = "_FUNC_(input, offset, default) - LEAD returns the value of 'x' " + + "at 'offset' rows after the current row in the window") case class Lead(input: Expression, offset: Expression, default: Expression) extends OffsetWindowFunction { @@ -365,6 +377,18 @@ case class Lead(input: Expression, offset: Expression, default: Expression) override val direction = Ascending } +/** + * The Lag function returns the value of 'x' at 'offset' rows before the current row in the window. + * Offsets start at 0, which is the current row. The offset must be constant integer value. The + * default offset is 1. When the value of 'x' is null at the offset, or when the offset is smaller + * than the window, the default expression is evaluated. + * + * @param input expression to evaluate 'offset' rows before the current row. + * @param offset rows to jump back in the partition. + * @param default to use when the input value is null or when the offset is smaller than the window. + */ +@ExpressionDescription(usage = "_FUNC_(input, offset, default) - LAG returns the value of 'x' " + + "at 'offset' rows before the current row in the window") case class Lag(input: Expression, offset: Expression, default: Expression) extends OffsetWindowFunction { @@ -409,10 +433,25 @@ object SizeBasedWindowFunction { val n = AttributeReference("window__partition__size", IntegerType, nullable = false)() } +/** + * The RowNumber function computes a unique, sequential number to each row, starting with one, + * according to the ordering of rows within the window partition. + */ +@ExpressionDescription(usage = "_FUNC_() - The ROW_NUMBER() function assigns a unique, sequential" + + "number to each row, starting with one, according to the ordering of rows within the window" + + "partition.") case class RowNumber() extends RowNumberLike { override val evaluateExpression = rowNumber } +/** + * The CumeDist function computes the position of a value relative to a all values in the partition. + * The result is the number of rows preceding or equal to the current row in the ordering of the + * partition divided by the total number of rows in the window partition. Any tie values in the + * ordering will evaluate to the same position. + */ +@ExpressionDescription(usage = "_FUNC_(x) - The CUME_DIST() function computes the position of a " + + "value relative to a all values in the partition.") case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction { override def dataType: DataType = DoubleType // The frame for CUME_DIST is Range based instead of Row based, because CUME_DIST must @@ -421,6 +460,19 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction { override val evaluateExpression = Divide(Cast(rowNumber, DoubleType), Cast(n, DoubleType)) } +/** + * The NTile function divides the rows for each window partition into 'n' buckets ranging from 1 to + * at most 'n'. Bucket values will differ by at most 1. If the number of rows in the partition does + * not divide evenly into the number of buckets, then the remainder values are distributed one per + * bucket, starting with the first bucket. + * + * The NTile function is particularly useful for the calculation of tertiles, quartiles, deciles and + * other common summary statistics + * + * @param buckets number of buckets to divide the rows in. + */ +@ExpressionDescription(usage = "_FUNC_(x) - The NTILE(n) function divides the rows for each " + + "window partition into 'n' buckets ranging from 1 to at most 'n'.") case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindowFunction { def this() = this(Literal(1)) @@ -513,11 +565,35 @@ abstract class RankLike extends AggregateWindowFunction { def withOrder(order: Seq[Expression]): RankLike } +/** + * The Rank function computes the rank of a value in a group of values. The result is one plus the + * number of rows preceding or equal to the current row in the ordering of the partition. Tie values + * will produce gaps in the sequence. + * + * @param children to base the rank on; a change in the value of one the children will trigger a + * change in rank. This is an internal parameter and will be assigned by the + * Analyser. + */ +@ExpressionDescription(usage = "_FUNC_(x) - RANK() computes the rank of a value in a group of" + + "values. The result is one plus the number of rows preceding or equal to the current row in " + + "the ordering of the partition. Tie values will produce gaps in the sequence.") case class Rank(children: Seq[Expression]) extends RankLike { def this() = this(Nil) override def withOrder(order: Seq[Expression]): Rank = Rank(order) } +/** + * The DenseRank function computes the rank of a value in a group of values. The result is one plus + * the previously assigned rank values. Unlike Rank, DenseRank will not produce gaps in the ranking + * sequence. + * + * @param children to base the rank on; a change in the value of one the children will trigger a + * change in rank. This is an internal parameter and will be assigned by the + * Analyser. + */ +@ExpressionDescription(usage = "_FUNC_(x) - The DENSE_RANK() function computes the rank of a " + + "value in a group of values. The result is one plus the previously assigned rank values. " + + "Unlike Rank, DenseRank will not produce gaps in the ranking sequence.") case class DenseRank(children: Seq[Expression]) extends RankLike { def this() = this(Nil) override def withOrder(order: Seq[Expression]): DenseRank = DenseRank(order) @@ -527,6 +603,20 @@ case class DenseRank(children: Seq[Expression]) extends RankLike { override val initialValues = zero +: orderInit } +/** + * The PercentRank function computes the percentage ranking of a value in a group of values. The + * result the rank of the minus one divided by the total number of rows in the partitiion minus one: + * (r - 1) / (n - 1). If a partition only contains one row, the function will return 0. + * + * The PercentRank function is similar to the CumeDist function, but it uses rank values instead of + * row counts in the its numerator. + * + * @param children to base the rank on; a change in the value of one the children will trigger a + * change in rank. This is an internal parameter and will be assigned by the + * Analyser. + */ +@ExpressionDescription(usage = "_FUNC_(x) - PERCENT_RANK() The PercentRank function computes the " + + "percentage ranking of a value in a group of values.") case class PercentRank(children: Seq[Expression]) extends RankLike with SizeBasedWindowFunction { def this() = this(Nil) override def withOrder(order: Seq[Expression]): PercentRank = PercentRank(order) From 2b806bbfcd0a8331f17239df2ab24bb82f462d01 Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Sun, 20 Dec 2015 15:48:32 +0100 Subject: [PATCH 2/5] Add NULL tests --- .../spark/sql/DataFrameWindowSuite.scala | 20 +++++++++++++++++++ .../sql/hive/execution/WindowQuerySuite.scala | 15 ++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala index b50d7604e0ec7..3917b9762ba63 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala @@ -292,4 +292,24 @@ class DataFrameWindowSuite extends QueryTest with SharedSQLContext { Row("b", 3, 8, 32), Row("b", 2, 4, 8))) } + + test("null inputs") { + val df = Seq(("a", 1), ("a", 1), ("a", 2), ("a", 2), ("b", 4), ("b", 3), ("b", 2)) + .toDF("key", "value") + val window = Window.orderBy() + checkAnswer( + df.select( + $"key", + $"value", + avg(lit(null)).over(window), + sum(lit(null)).over(window)), + Seq( + Row("a", 1, null, null), + Row("a", 1, null, null), + Row("a", 2, null, null), + Row("a", 2, null, null), + Row("b", 4, null, null), + Row("b", 3, null, null), + Row("b", 2, null, null))) + } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala index c05dbfd7608d9..ea82b8c459695 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala @@ -227,4 +227,19 @@ class WindowQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleto Row("Manufacturer#5", "almond azure blanched chiffon midnight", 23, 315.9225931564038, 315.9225931564038, 46, 99807.08486666666, -0.9978877469246935, -5664.856666666666))) // scalastyle:on } + + test("null arguments") { + checkAnswer(sql(""" + |select p_mfgr, p_name, p_size, + |sum(null) over(distribute by p_mfgr sort by p_name) as sum, + |avg(null) over(distribute by p_mfgr sort by p_name) as avg + |from part + """.stripMargin), + sql(""" + |select p_mfgr, p_name, p_size, + |null as sum, + |null as avg + |from part + """.stripMargin)) + } } From cf5895421eb5d07305b169268bc3b127dd1a5c39 Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Thu, 24 Dec 2015 08:11:33 +0100 Subject: [PATCH 3/5] Further Docs for NTile and acknowledging Hive and Presto projects. --- .../expressions/windowExpressions.scala | 28 ++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index 67750a0ce09fe..5f77889552993 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -359,6 +359,8 @@ abstract class OffsetWindowFunction * default offset is 1. When the value of 'x' is null at the offset, or when the offset is larger * than the window, the default expression is evaluated. * + * This documentation has been based upon similar documentation for the Hive and Presto projects. + * * @param input expression to evaluate 'offset' rows after the current row. * @param offset rows to jump ahead in the partition. * @param default to use when the input value is null or when the offset is larger than the window. @@ -383,6 +385,8 @@ case class Lead(input: Expression, offset: Expression, default: Expression) * default offset is 1. When the value of 'x' is null at the offset, or when the offset is smaller * than the window, the default expression is evaluated. * + * This documentation has been based upon similar documentation for the Hive and Presto projects. + * * @param input expression to evaluate 'offset' rows before the current row. * @param offset rows to jump back in the partition. * @param default to use when the input value is null or when the offset is smaller than the window. @@ -436,6 +440,8 @@ object SizeBasedWindowFunction { /** * The RowNumber function computes a unique, sequential number to each row, starting with one, * according to the ordering of rows within the window partition. + * + * This documentation has been based upon similar documentation for the Hive and Presto projects. */ @ExpressionDescription(usage = "_FUNC_() - The ROW_NUMBER() function assigns a unique, sequential" + "number to each row, starting with one, according to the ordering of rows within the window" + @@ -449,6 +455,8 @@ case class RowNumber() extends RowNumberLike { * The result is the number of rows preceding or equal to the current row in the ordering of the * partition divided by the total number of rows in the window partition. Any tie values in the * ordering will evaluate to the same position. + * + * This documentation has been based upon similar documentation for the Hive and Presto projects. */ @ExpressionDescription(usage = "_FUNC_(x) - The CUME_DIST() function computes the position of a " + "value relative to a all values in the partition.") @@ -469,7 +477,17 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction { * The NTile function is particularly useful for the calculation of tertiles, quartiles, deciles and * other common summary statistics * - * @param buckets number of buckets to divide the rows in. + * The function calculates two variables during initialization. The size of a regular bucket, and + * the number of buckets that will have one extra row added to it (when the rows do not evenly fit + * into the number of buckets); both variables are based on the size of the current partition. + * During the calculation process the function keeps track of the current row number, the current + * bucket number, and the row number at which the bucket will change (bucketThreshold). When the + * current row number reaches bucket threshold, the bucket value is increased by one and the the + * threshold is increased by the bucket size (plus one extra if the current bucket is padded). + * + * This documentation has been based upon similar documentation for the Hive and Presto projects. + * + * @param buckets number of buckets to divide the rows in. Default value is 1. */ @ExpressionDescription(usage = "_FUNC_(x) - The NTILE(n) function divides the rows for each " + "window partition into 'n' buckets ranging from 1 to at most 'n'.") @@ -526,6 +544,8 @@ case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindow * the order of the window in which is processed. For instance, when the value of 'x' changes in a * window ordered by 'x' the rank function also changes. The size of the change of the rank function * is (typically) not dependent on the size of the change in 'x'. + * + * This documentation has been based upon similar documentation for the Hive and Presto projects. */ abstract class RankLike extends AggregateWindowFunction { override def inputTypes: Seq[AbstractDataType] = children.map(_ => AnyDataType) @@ -570,6 +590,8 @@ abstract class RankLike extends AggregateWindowFunction { * number of rows preceding or equal to the current row in the ordering of the partition. Tie values * will produce gaps in the sequence. * + * This documentation has been based upon similar documentation for the Hive and Presto projects. + * * @param children to base the rank on; a change in the value of one the children will trigger a * change in rank. This is an internal parameter and will be assigned by the * Analyser. @@ -587,6 +609,8 @@ case class Rank(children: Seq[Expression]) extends RankLike { * the previously assigned rank values. Unlike Rank, DenseRank will not produce gaps in the ranking * sequence. * + * This documentation has been based upon similar documentation for the Hive and Presto projects. + * * @param children to base the rank on; a change in the value of one the children will trigger a * change in rank. This is an internal parameter and will be assigned by the * Analyser. @@ -611,6 +635,8 @@ case class DenseRank(children: Seq[Expression]) extends RankLike { * The PercentRank function is similar to the CumeDist function, but it uses rank values instead of * row counts in the its numerator. * + * This documentation has been based upon similar documentation for the Hive and Presto projects. + * * @param children to base the rank on; a change in the value of one the children will trigger a * change in rank. This is an internal parameter and will be assigned by the * Analyser. From 767305a58c47fffb1ced4483e3c4a938e5383143 Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Mon, 28 Dec 2015 09:40:34 +0100 Subject: [PATCH 4/5] Fix OffsetWindowFunction foldable and nullable. Use raw strings instead of concatenated strings in annotations. Minor doc changes. --- .../expressions/windowExpressions.scala | 51 +++++++++++-------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index 5f77889552993..c6341a5b5b70a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -314,8 +314,8 @@ abstract class OffsetWindowFunction val offset: Expression /** - * Direction (above = 1/below = -1) of the number of rows between the current row and the row - * where the input expression is evaluated. + * Direction of the number of rows between the current row and the row where the input expression + * is evaluated. */ val direction: SortDirection @@ -327,9 +327,9 @@ abstract class OffsetWindowFunction * both the input and the default expression are foldable, the result is still not foldable due to * the frame. */ - override def foldable: Boolean = input.foldable && (default == null || default.foldable) + override def foldable: Boolean = false - override def nullable: Boolean = input.nullable && (default == null || default.nullable) + override def nullable: Boolean = default == null || default.nullable override lazy val frame = { // This will be triggered by the Analyzer. @@ -365,8 +365,9 @@ abstract class OffsetWindowFunction * @param offset rows to jump ahead in the partition. * @param default to use when the input value is null or when the offset is larger than the window. */ -@ExpressionDescription(usage = "_FUNC_(input, offset, default) - LEAD returns the value of 'x' " + - "at 'offset' rows after the current row in the window") +@ExpressionDescription(usage = + """_FUNC_(input, offset, default) - LEAD returns the value of 'x' at 'offset' rows after the + current row in the window""") case class Lead(input: Expression, offset: Expression, default: Expression) extends OffsetWindowFunction { @@ -391,8 +392,9 @@ case class Lead(input: Expression, offset: Expression, default: Expression) * @param offset rows to jump back in the partition. * @param default to use when the input value is null or when the offset is smaller than the window. */ -@ExpressionDescription(usage = "_FUNC_(input, offset, default) - LAG returns the value of 'x' " + - "at 'offset' rows before the current row in the window") +@ExpressionDescription(usage = + """_FUNC_(input, offset, default) - LAG returns the value of 'x' at 'offset' rows before the + current row in the window""") case class Lag(input: Expression, offset: Expression, default: Expression) extends OffsetWindowFunction { @@ -443,9 +445,10 @@ object SizeBasedWindowFunction { * * This documentation has been based upon similar documentation for the Hive and Presto projects. */ -@ExpressionDescription(usage = "_FUNC_() - The ROW_NUMBER() function assigns a unique, sequential" + - "number to each row, starting with one, according to the ordering of rows within the window" + - "partition.") +@ExpressionDescription(usage = + """_FUNC_() - The ROW_NUMBER() function assigns a unique, sequential + number to each row, starting with one, according to the ordering of rows within the window + partition.""") case class RowNumber() extends RowNumberLike { override val evaluateExpression = rowNumber } @@ -458,8 +461,9 @@ case class RowNumber() extends RowNumberLike { * * This documentation has been based upon similar documentation for the Hive and Presto projects. */ -@ExpressionDescription(usage = "_FUNC_(x) - The CUME_DIST() function computes the position of a " + - "value relative to a all values in the partition.") +@ExpressionDescription(usage = + """_FUNC_() - The CUME_DIST() function computes the position of a value relative to a all values + in the partition.""") case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction { override def dataType: DataType = DoubleType // The frame for CUME_DIST is Range based instead of Row based, because CUME_DIST must @@ -477,7 +481,7 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction { * The NTile function is particularly useful for the calculation of tertiles, quartiles, deciles and * other common summary statistics * - * The function calculates two variables during initialization. The size of a regular bucket, and + * The function calculates two variables during initialization: The size of a regular bucket, and * the number of buckets that will have one extra row added to it (when the rows do not evenly fit * into the number of buckets); both variables are based on the size of the current partition. * During the calculation process the function keeps track of the current row number, the current @@ -489,8 +493,9 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction { * * @param buckets number of buckets to divide the rows in. Default value is 1. */ -@ExpressionDescription(usage = "_FUNC_(x) - The NTILE(n) function divides the rows for each " + - "window partition into 'n' buckets ranging from 1 to at most 'n'.") +@ExpressionDescription(usage = + """_FUNC_(x) - The NTILE(n) function divides the rows for each window partition into 'n' buckets + ranging from 1 to at most 'n'.""") case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindowFunction { def this() = this(Literal(1)) @@ -596,7 +601,7 @@ abstract class RankLike extends AggregateWindowFunction { * change in rank. This is an internal parameter and will be assigned by the * Analyser. */ -@ExpressionDescription(usage = "_FUNC_(x) - RANK() computes the rank of a value in a group of" + +@ExpressionDescription(usage = "_FUNC_() - RANK() computes the rank of a value in a group of" + "values. The result is one plus the number of rows preceding or equal to the current row in " + "the ordering of the partition. Tie values will produce gaps in the sequence.") case class Rank(children: Seq[Expression]) extends RankLike { @@ -615,9 +620,10 @@ case class Rank(children: Seq[Expression]) extends RankLike { * change in rank. This is an internal parameter and will be assigned by the * Analyser. */ -@ExpressionDescription(usage = "_FUNC_(x) - The DENSE_RANK() function computes the rank of a " + - "value in a group of values. The result is one plus the previously assigned rank values. " + - "Unlike Rank, DenseRank will not produce gaps in the ranking sequence.") +@ExpressionDescription(usage = + """_FUNC_() - The DENSE_RANK() function computes the rank of a value in a group of values. The + result is one plus the previously assigned rank values. Unlike Rank, DenseRank will not produce + gaps in the ranking sequence.""") case class DenseRank(children: Seq[Expression]) extends RankLike { def this() = this(Nil) override def withOrder(order: Seq[Expression]): DenseRank = DenseRank(order) @@ -641,8 +647,9 @@ case class DenseRank(children: Seq[Expression]) extends RankLike { * change in rank. This is an internal parameter and will be assigned by the * Analyser. */ -@ExpressionDescription(usage = "_FUNC_(x) - PERCENT_RANK() The PercentRank function computes the " + - "percentage ranking of a value in a group of values.") +@ExpressionDescription(usage = + """_FUNC_() - PERCENT_RANK() The PercentRank function computes the percentage ranking of a value + in a group of values.""") case class PercentRank(children: Seq[Expression]) extends RankLike with SizeBasedWindowFunction { def this() = this(Nil) override def withOrder(order: Seq[Expression]): PercentRank = PercentRank(order) From 13f9c95590bbee7790e74768e7b42fb0e0161b9d Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Mon, 28 Dec 2015 10:08:54 +0100 Subject: [PATCH 5/5] Fix rank strings. --- .../sql/catalyst/expressions/windowExpressions.scala | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index c6341a5b5b70a..7c6117c9f7ccc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -601,9 +601,10 @@ abstract class RankLike extends AggregateWindowFunction { * change in rank. This is an internal parameter and will be assigned by the * Analyser. */ -@ExpressionDescription(usage = "_FUNC_() - RANK() computes the rank of a value in a group of" + - "values. The result is one plus the number of rows preceding or equal to the current row in " + - "the ordering of the partition. Tie values will produce gaps in the sequence.") +@ExpressionDescription(usage = + """_FUNC_() - RANK() computes the rank of a value in a group of values. The result is one plus + the number of rows preceding or equal to the current row in the ordering of the partition. Tie + values will produce gaps in the sequence.""") case class Rank(children: Seq[Expression]) extends RankLike { def this() = this(Nil) override def withOrder(order: Seq[Expression]): Rank = Rank(order) @@ -611,7 +612,7 @@ case class Rank(children: Seq[Expression]) extends RankLike { /** * The DenseRank function computes the rank of a value in a group of values. The result is one plus - * the previously assigned rank values. Unlike Rank, DenseRank will not produce gaps in the ranking + * the previously assigned rank value. Unlike Rank, DenseRank will not produce gaps in the ranking * sequence. * * This documentation has been based upon similar documentation for the Hive and Presto projects. @@ -622,7 +623,7 @@ case class Rank(children: Seq[Expression]) extends RankLike { */ @ExpressionDescription(usage = """_FUNC_() - The DENSE_RANK() function computes the rank of a value in a group of values. The - result is one plus the previously assigned rank values. Unlike Rank, DenseRank will not produce + result is one plus the previously assigned rank value. Unlike Rank, DenseRank will not produce gaps in the ranking sequence.""") case class DenseRank(children: Seq[Expression]) extends RankLike { def this() = this(Nil)