@@ -194,10 +194,12 @@ NULL
194194# ' \itemize{
195195# ' \item \code{array_contains}: a value to be checked if contained in the column.
196196# ' \item \code{array_position}: a value to locate in the given array.
197+ # ' \item \code{array_remove}: a value to remove in the given array.
197198# ' }
198199# ' @param ... additional argument(s). In \code{to_json} and \code{from_json}, this contains
199200# ' additional named properties to control how it is converted, accepts the same
200- # ' options as the JSON data source.
201+ # ' options as the JSON data source. In \code{arrays_zip}, this contains additional
202+ # ' Columns of arrays to be merged.
201203# ' @name column_collection_functions
202204# ' @rdname column_collection_functions
203205# ' @family collection functions
207209# ' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
208210# ' tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp))
209211# ' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1)))
210- # ' head(select(tmp, array_max(tmp$v1), array_min(tmp$v1)))
212+ # ' head(select(tmp, array_max(tmp$v1), array_min(tmp$v1), array_distinct(tmp$v1) ))
211213# ' head(select(tmp, array_position(tmp$v1, 21), array_repeat(df$mpg, 3), array_sort(tmp$v1)))
212- # ' head(select(tmp, flatten(tmp$v1), reverse(tmp$v1)))
214+ # ' head(select(tmp, flatten(tmp$v1), reverse(tmp$v1), array_remove(tmp$v1, 21) ))
213215# ' tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
214216# ' head(tmp2)
215217# ' head(select(tmp, posexplode(tmp$v1)))
221223# ' head(select(tmp3, element_at(tmp3$v3, "Valiant")))
222224# ' tmp4 <- mutate(df, v4 = create_array(df$mpg, df$cyl), v5 = create_array(df$cyl, df$hp))
223225# ' head(select(tmp4, concat(tmp4$v4, tmp4$v5), arrays_overlap(tmp4$v4, tmp4$v5)))
226+ # ' head(select(tmp4, arrays_zip(tmp4$v4, tmp4$v5), map_from_arrays(tmp4$v4, tmp4$v5)))
224227# ' head(select(tmp, concat(df$mpg, df$cyl, df$hp)))
225228# ' tmp5 <- mutate(df, v6 = create_array(df$model, df$model))
226229# ' head(select(tmp5, array_join(tmp5$v6, "#"), array_join(tmp5$v6, "#", "NULL")))}
@@ -1978,7 +1981,7 @@ setMethod("levenshtein", signature(y = "Column"),
19781981 })
19791982
19801983# ' @details
1981- # ' \code{months_between}: Returns number of months between dates \code{y} and \code{x}.
1984+ # ' \code{months_between}: Returns number of months between dates \code{y} and \code{x}.
19821985# ' If \code{y} is later than \code{x}, then the result is positive. If \code{y} and \code{x}
19831986# ' are on the same day of month, or both are the last day of month, time of day will be ignored.
19841987# ' Otherwise, the difference is calculated based on 31 days per month, and rounded to 8 digits.
@@ -3008,6 +3011,19 @@ setMethod("array_contains",
30083011 column(jc )
30093012 })
30103013
3014+ # ' @details
3015+ # ' \code{array_distinct}: Removes duplicate values from the array.
3016+ # '
3017+ # ' @rdname column_collection_functions
3018+ # ' @aliases array_distinct array_distinct,Column-method
3019+ # ' @note array_distinct since 2.4.0
3020+ setMethod ("array_distinct ",
3021+ signature(x = " Column" ),
3022+ function (x ) {
3023+ jc <- callJStatic(" org.apache.spark.sql.functions" , " array_distinct" , x @ jc )
3024+ column(jc )
3025+ })
3026+
30113027# ' @details
30123028# ' \code{array_join}: Concatenates the elements of column using the delimiter.
30133029# ' Null values are replaced with nullReplacement if set, otherwise they are ignored.
@@ -3071,6 +3087,19 @@ setMethod("array_position",
30713087 column(jc )
30723088 })
30733089
3090+ # ' @details
3091+ # ' \code{array_remove}: Removes all elements that equal to element from the given array.
3092+ # '
3093+ # ' @rdname column_collection_functions
3094+ # ' @aliases array_remove array_remove,Column-method
3095+ # ' @note array_remove since 2.4.0
3096+ setMethod ("array_remove ",
3097+ signature(x = " Column" , value = " ANY" ),
3098+ function (x , value ) {
3099+ jc <- callJStatic(" org.apache.spark.sql.functions" , " array_remove" , x @ jc , value )
3100+ column(jc )
3101+ })
3102+
30743103# ' @details
30753104# ' \code{array_repeat}: Creates an array containing \code{x} repeated the number of times
30763105# ' given by \code{count}.
@@ -3120,6 +3149,24 @@ setMethod("arrays_overlap",
31203149 column(jc )
31213150 })
31223151
3152+ # ' @details
3153+ # ' \code{arrays_zip}: Returns a merged array of structs in which the N-th struct contains all N-th
3154+ # ' values of input arrays.
3155+ # '
3156+ # ' @rdname column_collection_functions
3157+ # ' @aliases arrays_zip arrays_zip,Column-method
3158+ # ' @note arrays_zip since 2.4.0
3159+ setMethod ("arrays_zip ",
3160+ signature(x = " Column" ),
3161+ function (x , ... ) {
3162+ jcols <- lapply(list (x , ... ), function (arg ) {
3163+ stopifnot(class(arg ) == " Column" )
3164+ arg @ jc
3165+ })
3166+ jc <- callJStatic(" org.apache.spark.sql.functions" , " arrays_zip" , jcols )
3167+ column(jc )
3168+ })
3169+
31233170# ' @details
31243171# ' \code{flatten}: Creates a single array from an array of arrays.
31253172# ' If a structure of nested arrays is deeper than two levels, only one level of nesting is removed.
@@ -3147,6 +3194,21 @@ setMethod("map_entries",
31473194 column(jc )
31483195 })
31493196
3197+ # ' @details
3198+ # ' \code{map_from_arrays}: Creates a new map column. The array in the first column is used for
3199+ # ' keys. The array in the second column is used for values. All elements in the array for key
3200+ # ' should not be null.
3201+ # '
3202+ # ' @rdname column_collection_functions
3203+ # ' @aliases map_from_arrays map_from_arrays,Column-method
3204+ # ' @note map_from_arrays since 2.4.0
3205+ setMethod ("map_from_arrays ",
3206+ signature(x = " Column" , y = " Column" ),
3207+ function (x , y ) {
3208+ jc <- callJStatic(" org.apache.spark.sql.functions" , " map_from_arrays" , x @ jc , y @ jc )
3209+ column(jc )
3210+ })
3211+
31503212# ' @details
31513213# ' \code{map_keys}: Returns an unordered array containing the keys of the map.
31523214# '
0 commit comments