diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index 4d94b4cd05d44..be53ad190a832 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -2317,7 +2317,8 @@ setMethod("date_format", signature(y = "Column", x = "character"), #' from_utc_timestamp #' -#' Assumes given timestamp is UTC and converts to given timezone. +#' Given a timestamp, which corresponds to a certain time of day in UTC, returns another timestamp +#' that corresponds to the same time of day in the given timezone. #' #' @param y Column to compute on. #' @param x time zone to use. @@ -2340,7 +2341,7 @@ setMethod("from_utc_timestamp", signature(y = "Column", x = "character"), #' Locate the position of the first occurrence of substr column in the given string. #' Returns null if either of the arguments are null. #' -#' NOTE: The position is not zero based, but 1 based index, returns 0 if substr +#' NOTE: The position is not zero based, but 1 based index. Returns 0 if substr #' could not be found in str. #' #' @param y column to check @@ -2391,7 +2392,8 @@ setMethod("next_day", signature(y = "Column", x = "character"), #' to_utc_timestamp #' -#' Assumes given timestamp is in given timezone and converts to UTC. +#' Given a timestamp, which corresponds to a certain time of day in the given timezone, returns +#' another timestamp that corresponds to the same time of day in UTC. #' #' @param y Column to compute on #' @param x timezone to use @@ -2539,7 +2541,7 @@ setMethod("shiftLeft", signature(y = "Column", x = "numeric"), #' shiftRight #' -#' Shift the given value numBits right. If the given value is a long value, it will return +#' (Signed) shift the given value numBits right. If the given value is a long value, it will return #' a long value else it will return an integer value. #' #' @param y column to compute on. @@ -2777,7 +2779,7 @@ setMethod("window", signature(x = "Column"), #' locate #' #' Locate the position of the first occurrence of substr. -#' NOTE: The position is not zero based, but 1 based index, returns 0 if substr +#' NOTE: The position is not zero based, but 1 based index. Returns 0 if substr #' could not be found in str. #' #' @param substr a character string to be matched. @@ -2823,7 +2825,8 @@ setMethod("lpad", signature(x = "Column", len = "numeric", pad = "character"), #' rand #' -#' Generate a random column with i.i.d. samples from U[0.0, 1.0]. +#' Generate a random column with independent and identically distributed (i.i.d.) samples +#' from U[0.0, 1.0]. #' #' @param seed a random seed. Can be missing. #' @family normal_funcs @@ -2852,7 +2855,8 @@ setMethod("rand", signature(seed = "numeric"), #' randn #' -#' Generate a column with i.i.d. samples from the standard normal distribution. +#' Generate a column with independent and identically distributed (i.i.d.) samples from +#' the standard normal distribution. #' #' @param seed a random seed. Can be missing. #' @family normal_funcs @@ -3442,8 +3446,8 @@ setMethod("size", #' sort_array #' -#' Sorts the input array for the given column in ascending order, -#' according to the natural ordering of the array elements. +#' Sorts the input array in ascending or descending order according +#' to the natural ordering of the array elements. #' #' @param x A Column to sort #' @param asc A logical flag indicating the sorting order. diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 45e3c22bfc6a9..3a6789341d72f 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -359,8 +359,8 @@ def grouping_id(*cols): (grouping(c1) << (n-1)) + (grouping(c2) << (n-2)) + ... + grouping(cn) - Note: the list of columns should match with grouping columns exactly, or empty (means all the - grouping columns). + .. note:: the list of columns should match with grouping columns exactly, or empty (means all + the grouping columns). >>> df.cube("name").agg(grouping_id(), sum("age")).orderBy("name").show() +-----+-------------+--------+ @@ -457,7 +457,8 @@ def nanvl(col1, col2): @since(1.4) def rand(seed=None): - """Generates a random column with i.i.d. samples from U[0.0, 1.0]. + """Generates a random column with independent and identically distributed (i.i.d.) samples + from U[0.0, 1.0]. """ sc = SparkContext._active_spark_context if seed is not None: @@ -469,7 +470,8 @@ def rand(seed=None): @since(1.4) def randn(seed=None): - """Generates a column with i.i.d. samples from the standard normal distribution. + """Generates a column with independent and identically distributed (i.i.d.) samples from + the standard normal distribution. """ sc = SparkContext._active_spark_context if seed is not None: @@ -518,7 +520,7 @@ def shiftLeft(col, numBits): @since(1.5) def shiftRight(col, numBits): - """Shift the given value numBits right. + """(Signed) shift the given value numBits right. >>> spark.createDataFrame([(42,)], ['a']).select(shiftRight('a', 1).alias('r')).collect() [Row(r=21)] @@ -777,8 +779,8 @@ def date_format(date, format): A pattern could be for instance `dd.MM.yyyy` and could return a string like '18.03.1993'. All pattern letters of the Java class `java.text.SimpleDateFormat` can be used. - NOTE: Use when ever possible specialized functions like `year`. These benefit from a - specialized implementation. + .. note:: Use when ever possible specialized functions like `year`. These benefit from a + specialized implementation. >>> df = spark.createDataFrame([('2015-04-08',)], ['a']) >>> df.select(date_format('a', 'MM/dd/yyy').alias('date')).collect() @@ -1059,7 +1061,8 @@ def unix_timestamp(timestamp=None, format='yyyy-MM-dd HH:mm:ss'): @since(1.5) def from_utc_timestamp(timestamp, tz): """ - Assumes given timestamp is UTC and converts to given timezone. + Given a timestamp, which corresponds to a certain time of day in UTC, returns another timestamp + that corresponds to the same time of day in the given timezone. >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t']) >>> df.select(from_utc_timestamp(df.t, "PST").alias('t')).collect() @@ -1072,7 +1075,8 @@ def from_utc_timestamp(timestamp, tz): @since(1.5) def to_utc_timestamp(timestamp, tz): """ - Assumes given timestamp is in given timezone and converts to UTC. + Given a timestamp, which corresponds to a certain time of day in the given timezone, returns + another timestamp that corresponds to the same time of day in UTC. >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t']) >>> df.select(to_utc_timestamp(df.t, "PST").alias('t')).collect() @@ -1314,8 +1318,8 @@ def instr(str, substr): Locate the position of the first occurrence of substr column in the given string. Returns null if either of the arguments are null. - NOTE: The position is not zero based, but 1 based index, returns 0 if substr - could not be found in str. + .. note:: The position is not zero based, but 1 based index. Returns 0 if substr + could not be found in str. >>> df = spark.createDataFrame([('abcd',)], ['s',]) >>> df.select(instr(df.s, 'b').alias('s')).collect() @@ -1379,8 +1383,8 @@ def locate(substr, str, pos=1): """ Locate the position of the first occurrence of substr in a string column, after position pos. - NOTE: The position is not zero based, but 1 based index. returns 0 if substr - could not be found in str. + .. note:: The position is not zero based, but 1 based index. Returns 0 if substr + could not be found in str. :param substr: a string :param str: a Column of :class:`pyspark.sql.types.StringType` @@ -1442,7 +1446,7 @@ def split(str, pattern): """ Splits str around pattern (pattern is a regular expression). - NOTE: pattern is a string represent the regular expression. + .. note:: pattern is a string represent the regular expression. >>> df = spark.createDataFrame([('ab12cd',)], ['s',]) >>> df.select(split(df.s, '[0-9]+').alias('s')).collect() @@ -1785,7 +1789,8 @@ def size(col): @since(1.5) def sort_array(col, asc=True): """ - Collection function: sorts the input array for the given column in ascending order. + Collection function: sorts the input array in ascending or descending order according + to the natural ordering of the array elements. :param col: name of column or expression diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 944a476114faf..e221c032b82f6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -1117,7 +1117,8 @@ object functions { def not(e: Column): Column = !e /** - * Generate a random column with i.i.d. samples from U[0.0, 1.0]. + * Generate a random column with independent and identically distributed (i.i.d.) samples + * from U[0.0, 1.0]. * * Note that this is indeterministic when data partitions are not fixed. * @@ -1127,7 +1128,8 @@ object functions { def rand(seed: Long): Column = withExpr { Rand(seed) } /** - * Generate a random column with i.i.d. samples from U[0.0, 1.0]. + * Generate a random column with independent and identically distributed (i.i.d.) samples + * from U[0.0, 1.0]. * * @group normal_funcs * @since 1.4.0 @@ -1135,7 +1137,8 @@ object functions { def rand(): Column = rand(Utils.random.nextLong) /** - * Generate a column with i.i.d. samples from the standard normal distribution. + * Generate a column with independent and identically distributed (i.i.d.) samples from + * the standard normal distribution. * * Note that this is indeterministic when data partitions are not fixed. * @@ -1145,7 +1148,8 @@ object functions { def randn(seed: Long): Column = withExpr { Randn(seed) } /** - * Generate a column with i.i.d. samples from the standard normal distribution. + * Generate a column with independent and identically distributed (i.i.d.) samples from + * the standard normal distribution. * * @group normal_funcs * @since 1.4.0 @@ -1153,7 +1157,7 @@ object functions { def randn(): Column = randn(Utils.random.nextLong) /** - * Partition ID of the Spark task. + * Partition ID. * * Note that this is indeterministic because it depends on data partitioning and task scheduling. * @@ -1877,8 +1881,8 @@ object functions { def shiftLeft(e: Column, numBits: Int): Column = withExpr { ShiftLeft(e.expr, lit(numBits).expr) } /** - * Shift the given value numBits right. If the given value is a long value, it will return - * a long value else it will return an integer value. + * (Signed) shift the given value numBits right. If the given value is a long value, it will + * return a long value else it will return an integer value. * * @group math_funcs * @since 1.5.0 @@ -2203,7 +2207,7 @@ object functions { * Locate the position of the first occurrence of substr column in the given string. * Returns null if either of the arguments are null. * - * NOTE: The position is not zero based, but 1 based index, returns 0 if substr + * NOTE: The position is not zero based, but 1 based index. Returns 0 if substr * could not be found in str. * * @group string_funcs @@ -2238,7 +2242,7 @@ object functions { /** * Locate the position of the first occurrence of substr. - * NOTE: The position is not zero based, but 1 based index, returns 0 if substr + * NOTE: The position is not zero based, but 1 based index. Returns 0 if substr * could not be found in str. * * @group string_funcs @@ -2666,7 +2670,8 @@ object functions { } /** - * Assumes given timestamp is UTC and converts to given timezone. + * Given a timestamp, which corresponds to a certain time of day in UTC, returns another timestamp + * that corresponds to the same time of day in the given timezone. * @group datetime_funcs * @since 1.5.0 */ @@ -2675,7 +2680,8 @@ object functions { } /** - * Assumes given timestamp is in given timezone and converts to UTC. + * Given a timestamp, which corresponds to a certain time of day in the given timezone, returns + * another timestamp that corresponds to the same time of day in UTC. * @group datetime_funcs * @since 1.5.0 */ @@ -2996,7 +3002,7 @@ object functions { def sort_array(e: Column): Column = sort_array(e, asc = true) /** - * Sorts the input array for the given column in ascending / descending order, + * Sorts the input array for the given column in ascending or descending order, * according to the natural ordering of the array elements. * * @group collection_funcs