From e1205622c1eed41b383881031d72c52b931798a4 Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@questtec.nl>
Date: Sun, 20 Dec 2015 14:54:26 +0100
Subject: [PATCH 1/5] Add documentation to window functions.

---
 .../expressions/windowExpressions.scala       | 90 +++++++++++++++++++
 1 file changed, 90 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 06252ac4fc616..67750a0ce09fe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -353,6 +353,18 @@ abstract class OffsetWindowFunction
   override def toString: String = s"$prettyName($input, $offset, $default)"
 }
 
+/**
+ * The Lead function returns the value of 'x' at 'offset' rows after the current row in the window.
+ * Offsets start at 0, which is the current row. The offset must be constant integer value. The
+ * default offset is 1. When the value of 'x' is null at the offset, or when the offset is larger
+ * than the window, the default expression is evaluated.
+ *
+ * @param input expression to evaluate 'offset' rows after the current row.
+ * @param offset rows to jump ahead in the partition.
+ * @param default to use when the input value is null or when the offset is larger than the window.
+ */
+@ExpressionDescription(usage = "_FUNC_(input, offset, default) - LEAD returns the value of 'x' " +
+  "at 'offset' rows after the current row in the window")
 case class Lead(input: Expression, offset: Expression, default: Expression)
     extends OffsetWindowFunction {
 
@@ -365,6 +377,18 @@ case class Lead(input: Expression, offset: Expression, default: Expression)
   override val direction = Ascending
 }
 
+/**
+ * The Lag function returns the value of 'x' at 'offset' rows before the current row in the window.
+ * Offsets start at 0, which is the current row. The offset must be constant integer value. The
+ * default offset is 1. When the value of 'x' is null at the offset, or when the offset is smaller
+ * than the window, the default expression is evaluated.
+ *
+ * @param input expression to evaluate 'offset' rows before the current row.
+ * @param offset rows to jump back in the partition.
+ * @param default to use when the input value is null or when the offset is smaller than the window.
+ */
+@ExpressionDescription(usage = "_FUNC_(input, offset, default) - LAG returns the value of 'x' " +
+  "at 'offset' rows before the current row in the window")
 case class Lag(input: Expression, offset: Expression, default: Expression)
     extends OffsetWindowFunction {
 
@@ -409,10 +433,25 @@ object SizeBasedWindowFunction {
   val n = AttributeReference("window__partition__size", IntegerType, nullable = false)()
 }
 
+/**
+ * The RowNumber function computes a unique, sequential number to each row, starting with one,
+ * according to the ordering of rows within the window partition.
+ */
+@ExpressionDescription(usage = "_FUNC_() - The ROW_NUMBER() function assigns a unique, sequential" +
+  "number to each row, starting with one, according to the ordering of rows within the window" +
+  "partition.")
 case class RowNumber() extends RowNumberLike {
   override val evaluateExpression = rowNumber
 }
 
+/**
+ * The CumeDist function computes the position of a value relative to a all values in the partition.
+ * The result is the number of rows preceding or equal to the current row in the ordering of the
+ * partition divided by the total number of rows in the window partition. Any tie values in the
+ * ordering will evaluate to the same position.
+ */
+@ExpressionDescription(usage = "_FUNC_(x) - The CUME_DIST() function computes the position of a " +
+  "value relative to a all values in the partition.")
 case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction {
   override def dataType: DataType = DoubleType
   // The frame for CUME_DIST is Range based instead of Row based, because CUME_DIST must
@@ -421,6 +460,19 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction {
   override val evaluateExpression = Divide(Cast(rowNumber, DoubleType), Cast(n, DoubleType))
 }
 
+/**
+ * The NTile function divides the rows for each window partition into 'n' buckets ranging from 1 to
+ * at most 'n'. Bucket values will differ by at most 1. If the number of rows in the partition does
+ * not divide evenly into the number of buckets, then the remainder values are distributed one per
+ * bucket, starting with the first bucket.
+ *
+ * The NTile function is particularly useful for the calculation of tertiles, quartiles, deciles and
+ * other common summary statistics
+ *
+ * @param buckets number of buckets to divide the rows in.
+ */
+@ExpressionDescription(usage = "_FUNC_(x) - The NTILE(n) function divides the rows for each " +
+  "window partition into 'n' buckets ranging from 1 to at most 'n'.")
 case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindowFunction {
   def this() = this(Literal(1))
 
@@ -513,11 +565,35 @@ abstract class RankLike extends AggregateWindowFunction {
   def withOrder(order: Seq[Expression]): RankLike
 }
 
+/**
+ * The Rank function computes the rank of a value in a group of values. The result is one plus the
+ * number of rows preceding or equal to the current row in the ordering of the partition. Tie values
+ * will produce gaps in the sequence.
+ *
+ * @param children to base the rank on; a change in the value of one the children will trigger a
+ *                 change in rank. This is an internal parameter and will be assigned by the
+ *                 Analyser.
+ */
+@ExpressionDescription(usage = "_FUNC_(x) -  RANK() computes the rank of a value in a group of" +
+  "values. The result is one plus the number of rows preceding or equal to the current row in " +
+  "the ordering of the partition. Tie values will produce gaps in the sequence.")
 case class Rank(children: Seq[Expression]) extends RankLike {
   def this() = this(Nil)
   override def withOrder(order: Seq[Expression]): Rank = Rank(order)
 }
 
+/**
+ * The DenseRank function computes the rank of a value in a group of values. The result is one plus
+ * the previously assigned rank values. Unlike Rank, DenseRank will not produce gaps in the ranking
+ * sequence.
+ *
+ * @param children to base the rank on; a change in the value of one the children will trigger a
+ *                 change in rank. This is an internal parameter and will be assigned by the
+ *                 Analyser.
+ */
+@ExpressionDescription(usage = "_FUNC_(x) - The DENSE_RANK() function computes the rank of a " +
+  "value in a group of values. The result is one plus the previously assigned rank values. " +
+  "Unlike Rank, DenseRank will not produce gaps in the ranking sequence.")
 case class DenseRank(children: Seq[Expression]) extends RankLike {
   def this() = this(Nil)
   override def withOrder(order: Seq[Expression]): DenseRank = DenseRank(order)
@@ -527,6 +603,20 @@ case class DenseRank(children: Seq[Expression]) extends RankLike {
   override val initialValues = zero +: orderInit
 }
 
+/**
+ * The PercentRank function computes the percentage ranking of a value in a group of values. The
+ * result the rank of the minus one divided by the total number of rows in the partitiion minus one:
+ * (r - 1) / (n - 1). If a partition only contains one row, the function will return 0.
+ *
+ * The PercentRank function is similar to the CumeDist function, but it uses rank values instead of
+ * row counts in the its numerator.
+ *
+ * @param children to base the rank on; a change in the value of one the children will trigger a
+ *                 change in rank. This is an internal parameter and will be assigned by the
+ *                 Analyser.
+ */
+@ExpressionDescription(usage = "_FUNC_(x) - PERCENT_RANK() The PercentRank function computes the " +
+  "percentage ranking of a value in a group of values.")
 case class PercentRank(children: Seq[Expression]) extends RankLike with SizeBasedWindowFunction {
   def this() = this(Nil)
   override def withOrder(order: Seq[Expression]): PercentRank = PercentRank(order)

From 2b806bbfcd0a8331f17239df2ab24bb82f462d01 Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@questtec.nl>
Date: Sun, 20 Dec 2015 15:48:32 +0100
Subject: [PATCH 2/5] Add NULL tests

---
 .../spark/sql/DataFrameWindowSuite.scala      | 20 +++++++++++++++++++
 .../sql/hive/execution/WindowQuerySuite.scala | 15 ++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala
index b50d7604e0ec7..3917b9762ba63 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala
@@ -292,4 +292,24 @@ class DataFrameWindowSuite extends QueryTest with SharedSQLContext {
         Row("b", 3, 8, 32),
         Row("b", 2, 4, 8)))
   }
+
+  test("null inputs") {
+    val df = Seq(("a", 1), ("a", 1), ("a", 2), ("a", 2), ("b", 4), ("b", 3), ("b", 2))
+      .toDF("key", "value")
+    val window = Window.orderBy()
+    checkAnswer(
+      df.select(
+        $"key",
+        $"value",
+        avg(lit(null)).over(window),
+        sum(lit(null)).over(window)),
+      Seq(
+        Row("a", 1, null, null),
+        Row("a", 1, null, null),
+        Row("a", 2, null, null),
+        Row("a", 2, null, null),
+        Row("b", 4, null, null),
+        Row("b", 3, null, null),
+        Row("b", 2, null, null)))
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala
index c05dbfd7608d9..ea82b8c459695 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala
@@ -227,4 +227,19 @@ class WindowQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleto
         Row("Manufacturer#5", "almond azure blanched chiffon midnight", 23, 315.9225931564038, 315.9225931564038, 46, 99807.08486666666, -0.9978877469246935, -5664.856666666666)))
       // scalastyle:on
   }
+
+  test("null arguments") {
+    checkAnswer(sql("""
+        |select  p_mfgr, p_name, p_size,
+        |sum(null) over(distribute by p_mfgr sort by p_name) as sum,
+        |avg(null) over(distribute by p_mfgr sort by p_name) as avg
+        |from part
+      """.stripMargin),
+      sql("""
+        |select  p_mfgr, p_name, p_size,
+        |null as sum,
+        |null as avg
+        |from part
+        """.stripMargin))
+  }
 }

From cf5895421eb5d07305b169268bc3b127dd1a5c39 Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@questtec.nl>
Date: Thu, 24 Dec 2015 08:11:33 +0100
Subject: [PATCH 3/5] Further Docs for NTile and acknowledging Hive and Presto
 projects.

---
 .../expressions/windowExpressions.scala       | 28 ++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 67750a0ce09fe..5f77889552993 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -359,6 +359,8 @@ abstract class OffsetWindowFunction
  * default offset is 1. When the value of 'x' is null at the offset, or when the offset is larger
  * than the window, the default expression is evaluated.
  *
+ * This documentation has been based upon similar documentation for the Hive and Presto projects.
+ *
  * @param input expression to evaluate 'offset' rows after the current row.
  * @param offset rows to jump ahead in the partition.
  * @param default to use when the input value is null or when the offset is larger than the window.
@@ -383,6 +385,8 @@ case class Lead(input: Expression, offset: Expression, default: Expression)
  * default offset is 1. When the value of 'x' is null at the offset, or when the offset is smaller
  * than the window, the default expression is evaluated.
  *
+ * This documentation has been based upon similar documentation for the Hive and Presto projects.
+ *
  * @param input expression to evaluate 'offset' rows before the current row.
  * @param offset rows to jump back in the partition.
  * @param default to use when the input value is null or when the offset is smaller than the window.
@@ -436,6 +440,8 @@ object SizeBasedWindowFunction {
 /**
  * The RowNumber function computes a unique, sequential number to each row, starting with one,
  * according to the ordering of rows within the window partition.
+ *
+ * This documentation has been based upon similar documentation for the Hive and Presto projects.
  */
 @ExpressionDescription(usage = "_FUNC_() - The ROW_NUMBER() function assigns a unique, sequential" +
   "number to each row, starting with one, according to the ordering of rows within the window" +
@@ -449,6 +455,8 @@ case class RowNumber() extends RowNumberLike {
  * The result is the number of rows preceding or equal to the current row in the ordering of the
  * partition divided by the total number of rows in the window partition. Any tie values in the
  * ordering will evaluate to the same position.
+ *
+ * This documentation has been based upon similar documentation for the Hive and Presto projects.
  */
 @ExpressionDescription(usage = "_FUNC_(x) - The CUME_DIST() function computes the position of a " +
   "value relative to a all values in the partition.")
@@ -469,7 +477,17 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction {
  * The NTile function is particularly useful for the calculation of tertiles, quartiles, deciles and
  * other common summary statistics
  *
- * @param buckets number of buckets to divide the rows in.
+ * The function calculates two variables during initialization. The size of a regular bucket, and
+ * the number of buckets that will have one extra row added to it (when the rows do not evenly fit
+ * into the number of buckets); both variables are based on the size of the current partition.
+ * During the calculation process the function keeps track of the current row number, the current
+ * bucket number, and the row number at which the bucket will change (bucketThreshold). When the
+ * current row number reaches bucket threshold, the bucket value is increased by one and the the
+ * threshold is increased by the bucket size (plus one extra if the current bucket is padded).
+ *
+ * This documentation has been based upon similar documentation for the Hive and Presto projects.
+ *
+ * @param buckets number of buckets to divide the rows in. Default value is 1.
  */
 @ExpressionDescription(usage = "_FUNC_(x) - The NTILE(n) function divides the rows for each " +
   "window partition into 'n' buckets ranging from 1 to at most 'n'.")
@@ -526,6 +544,8 @@ case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindow
  * the order of the window in which is processed. For instance, when the value of 'x' changes in a
  * window ordered by 'x' the rank function also changes. The size of the change of the rank function
  * is (typically) not dependent on the size of the change in 'x'.
+ *
+ * This documentation has been based upon similar documentation for the Hive and Presto projects.
  */
 abstract class RankLike extends AggregateWindowFunction {
   override def inputTypes: Seq[AbstractDataType] = children.map(_ => AnyDataType)
@@ -570,6 +590,8 @@ abstract class RankLike extends AggregateWindowFunction {
  * number of rows preceding or equal to the current row in the ordering of the partition. Tie values
  * will produce gaps in the sequence.
  *
+ * This documentation has been based upon similar documentation for the Hive and Presto projects.
+ *
  * @param children to base the rank on; a change in the value of one the children will trigger a
  *                 change in rank. This is an internal parameter and will be assigned by the
  *                 Analyser.
@@ -587,6 +609,8 @@ case class Rank(children: Seq[Expression]) extends RankLike {
  * the previously assigned rank values. Unlike Rank, DenseRank will not produce gaps in the ranking
  * sequence.
  *
+ * This documentation has been based upon similar documentation for the Hive and Presto projects.
+ *
  * @param children to base the rank on; a change in the value of one the children will trigger a
  *                 change in rank. This is an internal parameter and will be assigned by the
  *                 Analyser.
@@ -611,6 +635,8 @@ case class DenseRank(children: Seq[Expression]) extends RankLike {
  * The PercentRank function is similar to the CumeDist function, but it uses rank values instead of
  * row counts in the its numerator.
  *
+ * This documentation has been based upon similar documentation for the Hive and Presto projects.
+ *
  * @param children to base the rank on; a change in the value of one the children will trigger a
  *                 change in rank. This is an internal parameter and will be assigned by the
  *                 Analyser.

From 767305a58c47fffb1ced4483e3c4a938e5383143 Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@questtec.nl>
Date: Mon, 28 Dec 2015 09:40:34 +0100
Subject: [PATCH 4/5] Fix OffsetWindowFunction foldable and nullable. Use raw
 strings instead of concatenated strings in annotations. Minor doc changes.

---
 .../expressions/windowExpressions.scala       | 51 +++++++++++--------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 5f77889552993..c6341a5b5b70a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -314,8 +314,8 @@ abstract class OffsetWindowFunction
   val offset: Expression
 
   /**
-   * Direction (above = 1/below = -1) of the number of rows between the current row and the row
-   * where the input expression is evaluated.
+   * Direction of the number of rows between the current row and the row where the input expression
+   * is evaluated.
    */
   val direction: SortDirection
 
@@ -327,9 +327,9 @@ abstract class OffsetWindowFunction
    * both the input and the default expression are foldable, the result is still not foldable due to
    * the frame.
    */
-  override def foldable: Boolean = input.foldable && (default == null || default.foldable)
+  override def foldable: Boolean = false
 
-  override def nullable: Boolean = input.nullable && (default == null || default.nullable)
+  override def nullable: Boolean = default == null || default.nullable
 
   override lazy val frame = {
     // This will be triggered by the Analyzer.
@@ -365,8 +365,9 @@ abstract class OffsetWindowFunction
  * @param offset rows to jump ahead in the partition.
  * @param default to use when the input value is null or when the offset is larger than the window.
  */
-@ExpressionDescription(usage = "_FUNC_(input, offset, default) - LEAD returns the value of 'x' " +
-  "at 'offset' rows after the current row in the window")
+@ExpressionDescription(usage =
+  """_FUNC_(input, offset, default) - LEAD returns the value of 'x' at 'offset' rows after the
+     current row in the window""")
 case class Lead(input: Expression, offset: Expression, default: Expression)
     extends OffsetWindowFunction {
 
@@ -391,8 +392,9 @@ case class Lead(input: Expression, offset: Expression, default: Expression)
  * @param offset rows to jump back in the partition.
  * @param default to use when the input value is null or when the offset is smaller than the window.
  */
-@ExpressionDescription(usage = "_FUNC_(input, offset, default) - LAG returns the value of 'x' " +
-  "at 'offset' rows before the current row in the window")
+@ExpressionDescription(usage =
+  """_FUNC_(input, offset, default) - LAG returns the value of 'x' at 'offset' rows before the
+     current row in the window""")
 case class Lag(input: Expression, offset: Expression, default: Expression)
     extends OffsetWindowFunction {
 
@@ -443,9 +445,10 @@ object SizeBasedWindowFunction {
  *
  * This documentation has been based upon similar documentation for the Hive and Presto projects.
  */
-@ExpressionDescription(usage = "_FUNC_() - The ROW_NUMBER() function assigns a unique, sequential" +
-  "number to each row, starting with one, according to the ordering of rows within the window" +
-  "partition.")
+@ExpressionDescription(usage =
+  """_FUNC_() - The ROW_NUMBER() function assigns a unique, sequential
+     number to each row, starting with one, according to the ordering of rows within the window
+     partition.""")
 case class RowNumber() extends RowNumberLike {
   override val evaluateExpression = rowNumber
 }
@@ -458,8 +461,9 @@ case class RowNumber() extends RowNumberLike {
  *
  * This documentation has been based upon similar documentation for the Hive and Presto projects.
  */
-@ExpressionDescription(usage = "_FUNC_(x) - The CUME_DIST() function computes the position of a " +
-  "value relative to a all values in the partition.")
+@ExpressionDescription(usage =
+  """_FUNC_() - The CUME_DIST() function computes the position of a value relative to a all values
+     in the partition.""")
 case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction {
   override def dataType: DataType = DoubleType
   // The frame for CUME_DIST is Range based instead of Row based, because CUME_DIST must
@@ -477,7 +481,7 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction {
  * The NTile function is particularly useful for the calculation of tertiles, quartiles, deciles and
  * other common summary statistics
  *
- * The function calculates two variables during initialization. The size of a regular bucket, and
+ * The function calculates two variables during initialization: The size of a regular bucket, and
  * the number of buckets that will have one extra row added to it (when the rows do not evenly fit
  * into the number of buckets); both variables are based on the size of the current partition.
  * During the calculation process the function keeps track of the current row number, the current
@@ -489,8 +493,9 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction {
  *
  * @param buckets number of buckets to divide the rows in. Default value is 1.
  */
-@ExpressionDescription(usage = "_FUNC_(x) - The NTILE(n) function divides the rows for each " +
-  "window partition into 'n' buckets ranging from 1 to at most 'n'.")
+@ExpressionDescription(usage =
+  """_FUNC_(x) - The NTILE(n) function divides the rows for each window partition into 'n' buckets
+     ranging from 1 to at most 'n'.""")
 case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindowFunction {
   def this() = this(Literal(1))
 
@@ -596,7 +601,7 @@ abstract class RankLike extends AggregateWindowFunction {
  *                 change in rank. This is an internal parameter and will be assigned by the
  *                 Analyser.
  */
-@ExpressionDescription(usage = "_FUNC_(x) -  RANK() computes the rank of a value in a group of" +
+@ExpressionDescription(usage = "_FUNC_() -  RANK() computes the rank of a value in a group of" +
   "values. The result is one plus the number of rows preceding or equal to the current row in " +
   "the ordering of the partition. Tie values will produce gaps in the sequence.")
 case class Rank(children: Seq[Expression]) extends RankLike {
@@ -615,9 +620,10 @@ case class Rank(children: Seq[Expression]) extends RankLike {
  *                 change in rank. This is an internal parameter and will be assigned by the
  *                 Analyser.
  */
-@ExpressionDescription(usage = "_FUNC_(x) - The DENSE_RANK() function computes the rank of a " +
-  "value in a group of values. The result is one plus the previously assigned rank values. " +
-  "Unlike Rank, DenseRank will not produce gaps in the ranking sequence.")
+@ExpressionDescription(usage =
+  """_FUNC_() - The DENSE_RANK() function computes the rank of a value in a group of values. The
+     result is one plus the previously assigned rank values. Unlike Rank, DenseRank will not produce
+     gaps in the ranking sequence.""")
 case class DenseRank(children: Seq[Expression]) extends RankLike {
   def this() = this(Nil)
   override def withOrder(order: Seq[Expression]): DenseRank = DenseRank(order)
@@ -641,8 +647,9 @@ case class DenseRank(children: Seq[Expression]) extends RankLike {
  *                 change in rank. This is an internal parameter and will be assigned by the
  *                 Analyser.
  */
-@ExpressionDescription(usage = "_FUNC_(x) - PERCENT_RANK() The PercentRank function computes the " +
-  "percentage ranking of a value in a group of values.")
+@ExpressionDescription(usage =
+  """_FUNC_() - PERCENT_RANK() The PercentRank function computes the percentage ranking of a value
+     in a group of values.""")
 case class PercentRank(children: Seq[Expression]) extends RankLike with SizeBasedWindowFunction {
   def this() = this(Nil)
   override def withOrder(order: Seq[Expression]): PercentRank = PercentRank(order)

From 13f9c95590bbee7790e74768e7b42fb0e0161b9d Mon Sep 17 00:00:00 2001
From: Herman van Hovell <hvanhovell@questtec.nl>
Date: Mon, 28 Dec 2015 10:08:54 +0100
Subject: [PATCH 5/5] Fix rank strings.

---
 .../sql/catalyst/expressions/windowExpressions.scala  | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index c6341a5b5b70a..7c6117c9f7ccc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -601,9 +601,10 @@ abstract class RankLike extends AggregateWindowFunction {
  *                 change in rank. This is an internal parameter and will be assigned by the
  *                 Analyser.
  */
-@ExpressionDescription(usage = "_FUNC_() -  RANK() computes the rank of a value in a group of" +
-  "values. The result is one plus the number of rows preceding or equal to the current row in " +
-  "the ordering of the partition. Tie values will produce gaps in the sequence.")
+@ExpressionDescription(usage =
+  """_FUNC_() -  RANK() computes the rank of a value in a group of values. The result is one plus
+     the number of rows preceding or equal to the current row in the ordering of the partition. Tie
+     values will produce gaps in the sequence.""")
 case class Rank(children: Seq[Expression]) extends RankLike {
   def this() = this(Nil)
   override def withOrder(order: Seq[Expression]): Rank = Rank(order)
@@ -611,7 +612,7 @@ case class Rank(children: Seq[Expression]) extends RankLike {
 
 /**
  * The DenseRank function computes the rank of a value in a group of values. The result is one plus
- * the previously assigned rank values. Unlike Rank, DenseRank will not produce gaps in the ranking
+ * the previously assigned rank value. Unlike Rank, DenseRank will not produce gaps in the ranking
  * sequence.
  *
  * This documentation has been based upon similar documentation for the Hive and Presto projects.
@@ -622,7 +623,7 @@ case class Rank(children: Seq[Expression]) extends RankLike {
  */
 @ExpressionDescription(usage =
   """_FUNC_() - The DENSE_RANK() function computes the rank of a value in a group of values. The
-     result is one plus the previously assigned rank values. Unlike Rank, DenseRank will not produce
+     result is one plus the previously assigned rank value. Unlike Rank, DenseRank will not produce
      gaps in the ranking sequence.""")
 case class DenseRank(children: Seq[Expression]) extends RankLike {
   def this() = this(Nil)