apache
diff --git a/‎R/pkg/R/DataFrame.R‎
Lines changed: 72 additions & 50 deletions b/‎R/pkg/R/DataFrame.R‎
Lines changed: 72 additions & 50 deletions
diff --git a/‎R/pkg/R/RDD.R‎
Lines changed: 1 addition & 1 deletion b/‎R/pkg/R/RDD.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/pkg/R/functions.R‎
Lines changed: 0 additions & 1 deletion b/‎R/pkg/R/functions.R‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎R/pkg/R/schema.R‎
Lines changed: 3 additions & 4 deletions b/‎R/pkg/R/schema.R‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎R/pkg/R/sparkR.R‎
Lines changed: 1 addition & 1 deletion b/‎R/pkg/R/sparkR.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/pkg/R/types.R‎
Lines changed: 6 additions & 11 deletions b/‎R/pkg/R/types.R‎
Lines changed: 6 additions & 11 deletions
diff --git a/‎R/pkg/tests/fulltests/test_sparkSQL.R‎
Lines changed: 16 additions & 16 deletions b/‎R/pkg/tests/fulltests/test_sparkSQL.R‎
Lines changed: 16 additions & 16 deletions
diff --git a/‎dev/deps/spark-deps-hadoop-2.7-hive-1.2‎
Lines changed: 3 additions & 3 deletions b/‎dev/deps/spark-deps-hadoop-2.7-hive-1.2‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎dev/deps/spark-deps-hadoop-2.7-hive-2.3‎
Lines changed: 3 additions & 3 deletions b/‎dev/deps/spark-deps-hadoop-2.7-hive-2.3‎
Lines changed: 3 additions & 3 deletions
@@ -271,7 +271,7 @@ setMethod("show", "SparkDataFrame",
                 paste(l, collapse = ":")
               })
               s <- paste(cols, collapse = ", ")
-              cat(paste(class(object), "[", s, "]\n", sep = ""))
+              cat(paste0(class(object), "[", s, "]\n"))
             }
           })
 
@@ -1659,9 +1659,7 @@ setMethod("dapplyCollect",
 #'
 #' @param cols grouping columns.
 #' @param func a function to be applied to each group partition specified by grouping
-#'             column of the SparkDataFrame. The function \code{func} takes as argument
-#'             a key - grouping columns and a data frame - a local R data.frame.
-#'             The output of \code{func} is a local R data.frame.
+#'             column of the SparkDataFrame. See Details.
 #' @param schema the schema of the resulting SparkDataFrame after the function is applied.
 #'               The schema must match to output of \code{func}. It has to be defined for each
 #'               output column with preferred output column name and corresponding data type.
@@ -1671,29 +1669,43 @@ setMethod("dapplyCollect",
 #' @aliases gapply,SparkDataFrame-method
 #' @rdname gapply
 #' @name gapply
+#' @details
+#' \code{func} is a function of two arguments. The first, usually named \code{key}
+#' (though this is not enforced) corresponds to the grouping key, will be an
+#' unnamed \code{list} of \code{length(cols)} length-one objects corresponding
+#' to the grouping columns' values for the current group.
+#'
+#' The second, herein \code{x}, will be a local \code{\link{data.frame}} with the
+#' columns of the input not in \code{cols} for the rows corresponding to \code{key}.
+#'
+#' The output of \code{func} must be a \code{data.frame} matching \code{schema} --
+#' in particular this means the names of the output \code{data.frame} are irrelevant
+#'
 #' @seealso \link{gapplyCollect}
 #' @examples
 #'
 #' \dontrun{
-#' Computes the arithmetic mean of the second column by grouping
-#' on the first and third columns. Output the grouping values and the average.
+#' # Computes the arithmetic mean of the second column by grouping
+#' # on the first and third columns. Output the grouping values and the average.
 #'
 #' df <- createDataFrame (
 #' list(list(1L, 1, "1", 0.1), list(1L, 2, "1", 0.2), list(3L, 3, "3", 0.3)),
 #'   c("a", "b", "c", "d"))
 #'
-#' Here our output contains three columns, the key which is a combination of two
-#' columns with data types integer and string and the mean which is a double.
+#' # Here our output contains three columns, the key which is a combination of two
+#' # columns with data types integer and string and the mean which is a double.
 #' schema <- structType(structField("a", "integer"), structField("c", "string"),
 #'   structField("avg", "double"))
 #' result <- gapply(
 #'   df,
 #'   c("a", "c"),
 #'   function(key, x) {
+#'     # key will either be list(1L, '1') (for the group where a=1L,c='1') or
+#'     #   list(3L, '3') (for the group where a=3L,c='3')
 #'     y <- data.frame(key, mean(x$b), stringsAsFactors = FALSE)
 #' }, schema)
 #'
-#' The schema also can be specified in a DDL-formatted string.
+#' # The schema also can be specified in a DDL-formatted string.
 #' schema <- "a INT, c STRING, avg DOUBLE"
 #' result <- gapply(
 #'   df,
@@ -1702,8 +1714,8 @@ setMethod("dapplyCollect",
 #'     y <- data.frame(key, mean(x$b), stringsAsFactors = FALSE)
 #' }, schema)
 #'
-#' We can also group the data and afterwards call gapply on GroupedData.
-#' For Example:
+#' # We can also group the data and afterwards call gapply on GroupedData.
+#' # For example:
 #' gdf <- group_by(df, "a", "c")
 #' result <- gapply(
 #'   gdf,
@@ -1712,15 +1724,15 @@ setMethod("dapplyCollect",
 #' }, schema)
 #' collect(result)
 #'
-#' Result
-#' ------
-#' a c avg
-#' 3 3 3.0
-#' 1 1 1.5
+#' # Result
+#' # ------
+#' # a c avg
+#' # 3 3 3.0
+#' # 1 1 1.5
 #'
-#' Fits linear models on iris dataset by grouping on the 'Species' column and
-#' using 'Sepal_Length' as a target variable, 'Sepal_Width', 'Petal_Length'
-#' and 'Petal_Width' as training features.
+#' # Fits linear models on iris dataset by grouping on the 'Species' column and
+#' # using 'Sepal_Length' as a target variable, 'Sepal_Width', 'Petal_Length'
+#' # and 'Petal_Width' as training features.
 #'
 #' df <- createDataFrame (iris)
 #' schema <- structType(structField("(Intercept)", "double"),
@@ -1736,12 +1748,12 @@ setMethod("dapplyCollect",
 #'   }, schema)
 #' collect(df1)
 #'
-#' Result
-#' ---------
-#' Model  (Intercept)  Sepal_Width  Petal_Length  Petal_Width
-#' 1        0.699883    0.3303370    0.9455356    -0.1697527
-#' 2        1.895540    0.3868576    0.9083370    -0.6792238
-#' 3        2.351890    0.6548350    0.2375602     0.2521257
+#' # Result
+#' # ---------
+#' # Model  (Intercept)  Sepal_Width  Petal_Length  Petal_Width
+#' # 1        0.699883    0.3303370    0.9455356    -0.1697527
+#' # 2        1.895540    0.3868576    0.9083370    -0.6792238
+#' # 3        2.351890    0.6548350    0.2375602     0.2521257
 #'
 #'}
 #' @note gapply(SparkDataFrame) since 2.0.0
@@ -1759,20 +1771,30 @@ setMethod("gapply",
 #'
 #' @param cols grouping columns.
 #' @param func a function to be applied to each group partition specified by grouping
-#'             column of the SparkDataFrame. The function \code{func} takes as argument
-#'             a key - grouping columns and a data frame - a local R data.frame.
-#'             The output of \code{func} is a local R data.frame.
+#'             column of the SparkDataFrame. See Details.
 #' @return A data.frame.
 #' @family SparkDataFrame functions
 #' @aliases gapplyCollect,SparkDataFrame-method
 #' @rdname gapplyCollect
 #' @name gapplyCollect
+#' @details
+#' \code{func} is a function of two arguments. The first, usually named \code{key}
+#' (though this is not enforced) corresponds to the grouping key, will be an
+#' unnamed \code{list} of \code{length(cols)} length-one objects corresponding
+#' to the grouping columns' values for the current group.
+#'
+#' The second, herein \code{x}, will be a local \code{\link{data.frame}} with the
+#' columns of the input not in \code{cols} for the rows corresponding to \code{key}.
+#'
+#' The output of \code{func} must be a \code{data.frame} matching \code{schema} --
+#' in particular this means the names of the output \code{data.frame} are irrelevant
+#'
 #' @seealso \link{gapply}
 #' @examples
 #'
 #' \dontrun{
-#' Computes the arithmetic mean of the second column by grouping
-#' on the first and third columns. Output the grouping values and the average.
+#' # Computes the arithmetic mean of the second column by grouping
+#' # on the first and third columns. Output the grouping values and the average.
 #'
 #' df <- createDataFrame (
 #' list(list(1L, 1, "1", 0.1), list(1L, 2, "1", 0.2), list(3L, 3, "3", 0.3)),
@@ -1787,8 +1809,8 @@ setMethod("gapply",
 #'     y
 #'   })
 #'
-#' We can also group the data and afterwards call gapply on GroupedData.
-#' For Example:
+#' # We can also group the data and afterwards call gapply on GroupedData.
+#' # For example:
 #' gdf <- group_by(df, "a", "c")
 #' result <- gapplyCollect(
 #'   gdf,
@@ -1798,15 +1820,15 @@ setMethod("gapply",
 #'     y
 #'   })
 #'
-#' Result
-#' ------
-#' key_a key_c mean_b
-#' 3 3 3.0
-#' 1 1 1.5
+#' # Result
+#' # ------
+#' # key_a key_c mean_b
+#' # 3 3 3.0
+#' # 1 1 1.5
 #'
-#' Fits linear models on iris dataset by grouping on the 'Species' column and
-#' using 'Sepal_Length' as a target variable, 'Sepal_Width', 'Petal_Length'
-#' and 'Petal_Width' as training features.
+#' # Fits linear models on iris dataset by grouping on the 'Species' column and
+#' # using 'Sepal_Length' as a target variable, 'Sepal_Width', 'Petal_Length'
+#' # and 'Petal_Width' as training features.
 #'
 #' df <- createDataFrame (iris)
 #' result <- gapplyCollect(
@@ -1818,12 +1840,12 @@ setMethod("gapply",
 #'     data.frame(t(coef(m)))
 #'   })
 #'
-#' Result
-#'---------
-#' Model  X.Intercept.  Sepal_Width  Petal_Length  Petal_Width
-#' 1        0.699883    0.3303370    0.9455356    -0.1697527
-#' 2        1.895540    0.3868576    0.9083370    -0.6792238
-#' 3        2.351890    0.6548350    0.2375602     0.2521257
+#' # Result
+#' # ---------
+#' # Model  X.Intercept.  Sepal_Width  Petal_Length  Petal_Width
+#' # 1        0.699883    0.3303370    0.9455356    -0.1697527
+#' # 2        1.895540    0.3868576    0.9083370    -0.6792238
+#' # 3        2.351890    0.6548350    0.2375602     0.2521257
 #'
 #'}
 #' @note gapplyCollect(SparkDataFrame) since 2.0.0
@@ -2735,10 +2757,10 @@ setMethod("merge",
               colY <- joinY[[i]]
 
               if (colX %in% by) {
-                colX <- paste(colX, suffixes[1], sep = "")
+                colX <- paste0(colX, suffixes[1])
               }
               if (colY %in% by) {
-                colY <- paste(colY, suffixes[2], sep = "")
+                colY <- paste0(colY, suffixes[2])
               }
 
               colX <- getColumn(xsel, colX)
@@ -2753,7 +2775,7 @@ setMethod("merge",
 
             # sorts the result by 'by' columns if sort = TRUE
             if (sort && length(by) > 0) {
-              colNameWithSuffix <- paste(by, suffixes[2], sep = "")
+              colNameWithSuffix <- paste0(by, suffixes[2])
               joinRes <- do.call("arrange", c(joinRes, colNameWithSuffix, decreasing = FALSE))
             }
 
@@ -2776,7 +2798,7 @@ genAliasesForIntersectedCols <- function(x, intersectedColNames, suffix) {
   cols <- lapply(allColNames, function(colName) {
     col <- getColumn(x, colName)
     if (colName %in% intersectedColNames) {
-      newJoin <- paste(colName, suffix, sep = "")
+      newJoin <- paste0(colName, suffix)
       if (newJoin %in% allColNames) {
         stop("The following column name: ", newJoin, " occurs more than once in the 'DataFrame'.",
           "Please use different suffixes for the intersected columns.")
 
@@ -69,7 +69,7 @@ setMethod("initialize", "RDD", function(.Object, jrdd, serializedMode,
 
 setMethod("showRDD", "RDD",
           function(object) {
-              cat(paste(callJMethod(getJRDD(object), "toString"), "\n", sep = ""))
+              cat(paste0(callJMethod(getJRDD(object), "toString"), "\n"))
           })
 
 setMethod("initialize", "PipelinedRDD", function(.Object, prev, func, jrdd_val) {
 
@@ -3951,7 +3951,6 @@ setMethod("map_values",
 #' @rdname column_collection_functions
 #' @aliases map_zip_with map_zip_with,characterOrColumn,characterOrColumn,function-method
 #'
-#' @examples
 #' @note map_zip_with since 3.1.0
 setMethod("map_zip_with",
           signature(x = "characterOrColumn", y = "characterOrColumn", f = "function"),
 
@@ -99,10 +99,9 @@ print.structType <- function(x, ...) {
   cat("StructType\n",
       sapply(x$fields(),
              function(field) {
-               paste("|-", "name = \"", field$name(),
-                     "\", type = \"", field$dataType.toString(),
-                     "\", nullable = ", field$nullable(), "\n",
-                     sep = "")
+               paste0("|-", "name = \"", field$name(),
+                      "\", type = \"", field$dataType.toString(),
+                      "\", nullable = ", field$nullable(), "\n")
              }),
       sep = "")
 }
 
@@ -244,7 +244,7 @@ sparkR.sparkContext <- function(
     uriSep <- "////"
   }
   localJarPaths <- lapply(jars,
-                          function(j) { utils::URLencode(paste("file:", uriSep, j, sep = "")) })
+                          function(j) { utils::URLencode(paste0("file:", uriSep, j)) })
 
   # Set the start time to identify jobjs
   # Seconds resolution is good enough for this purpose, so use ints
 
@@ -94,27 +94,22 @@ checkSchemaInArrow <- function(schema) {
   }
 
   # Both cases below produce a corrupt value for unknown reason. It needs to be investigated.
-  if (any(sapply(schema$fields(), function(x) x$dataType.toString() == "FloatType"))) {
+  field_strings <- sapply(schema$fields(), function(x) x$dataType.toString())
+  if (any(field_strings == "FloatType")) {
     stop("Arrow optimization in R does not support float type yet.")
   }
-  if (any(sapply(schema$fields(), function(x) x$dataType.toString() == "BinaryType"))) {
+  if (any(field_strings == "BinaryType")) {
     stop("Arrow optimization in R does not support binary type yet.")
   }
-  if (any(sapply(schema$fields(),
-                 function(x) startsWith(x$dataType.toString(),
-                 "ArrayType")))) {
+  if (any(startsWith(field_strings, "ArrayType"))) {
     stop("Arrow optimization in R does not support array type yet.")
   }
 
   # Arrow optimization in Spark does not yet support both cases below.
-  if (any(sapply(schema$fields(),
-                 function(x) startsWith(x$dataType.toString(),
-                 "StructType")))) {
+  if (any(startsWith(field_strings, "StructType"))) {
     stop("Arrow optimization in R does not support nested struct type yet.")
   }
-  if (any(sapply(schema$fields(),
-                 function(x) startsWith(x$dataType.toString(),
-                 "MapType")))) {
+  if (any(startsWith(field_strings, "MapType"))) {
     stop("Arrow optimization in R does not support map type yet.")
   }
 }
@@ -2593,8 +2593,8 @@ test_that("join(), crossJoin() and merge() on a DataFrame", {
   writeLines(mockLines3, jsonPath3)
   df3 <- read.json(jsonPath3)
   expect_error(merge(df, df3),
-               paste("The following column name: name_y occurs more than once in the 'DataFrame'.",
-                     "Please use different suffixes for the intersected columns.", sep = ""))
+               paste0("The following column name: name_y occurs more than once in the 'DataFrame'.",
+                      "Please use different suffixes for the intersected columns."))
 
   unlink(jsonPath2)
   unlink(jsonPath3)
@@ -2637,20 +2637,20 @@ test_that("toJSON() on DataFrame", {
 
 test_that("showDF()", {
   df <- read.json(jsonPath)
-  expected <- paste("+----+-------+\n",
-                    "| age|   name|\n",
-                    "+----+-------+\n",
-                    "|null|Michael|\n",
-                    "|  30|   Andy|\n",
-                    "|  19| Justin|\n",
-                    "+----+-------+\n", sep = "")
-  expected2 <- paste("+---+----+\n",
-                     "|age|name|\n",
-                     "+---+----+\n",
-                     "|nul| Mic|\n",
-                     "| 30| And|\n",
-                     "| 19| Jus|\n",
-                     "+---+----+\n", sep = "")
+  expected <- paste("+----+-------+",
+                    "| age|   name|",
+                    "+----+-------+",
+                    "|null|Michael|",
+                    "|  30|   Andy|",
+                    "|  19| Justin|",
+                    "+----+-------+\n", sep = "\n")
+  expected2 <- paste("+---+----+",
+                     "|age|name|",
+                     "+---+----+",
+                     "|nul| Mic|",
+                     "| 30| And|",
+                     "| 19| Jus|",
+                     "+---+----+\n", sep = "\n")
   expect_output(showDF(df), expected)
   expect_output(showDF(df, truncate = 3), expected2)
 })
 
@@ -160,9 +160,9 @@ objenesis/2.5.1//objenesis-2.5.1.jar
 okhttp/3.12.6//okhttp-3.12.6.jar
 okio/1.15.0//okio-1.15.0.jar
 opencsv/2.3//opencsv-2.3.jar
-orc-core/1.5.9/nohive/orc-core-1.5.9-nohive.jar
-orc-mapreduce/1.5.9/nohive/orc-mapreduce-1.5.9-nohive.jar
-orc-shims/1.5.9//orc-shims-1.5.9.jar
+orc-core/1.5.10/nohive/orc-core-1.5.10-nohive.jar
+orc-mapreduce/1.5.10/nohive/orc-mapreduce-1.5.10-nohive.jar
+orc-shims/1.5.10//orc-shims-1.5.10.jar
 oro/2.0.8//oro-2.0.8.jar
 osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
 paranamer/2.8//paranamer-2.8.jar
 
@@ -175,9 +175,9 @@ objenesis/2.5.1//objenesis-2.5.1.jar
 okhttp/3.12.6//okhttp-3.12.6.jar
 okio/1.15.0//okio-1.15.0.jar
 opencsv/2.3//opencsv-2.3.jar
-orc-core/1.5.9//orc-core-1.5.9.jar
-orc-mapreduce/1.5.9//orc-mapreduce-1.5.9.jar
-orc-shims/1.5.9//orc-shims-1.5.9.jar
+orc-core/1.5.10//orc-core-1.5.10.jar
+orc-mapreduce/1.5.10//orc-mapreduce-1.5.10.jar
+orc-shims/1.5.10//orc-shims-1.5.10.jar
 oro/2.0.8//oro-2.0.8.jar
 osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
 paranamer/2.8//paranamer-2.8.jar
Original file line number	Diff line number	Diff line change
`@@ -244,7 +244,7 @@ sparkR.sparkContext <- function(`
`244`	`244`	`uriSep <- "////"`
`245`	`245`	`}`
`246`	`246`	`localJarPaths <- lapply(jars,`
`247`		`- function(j) { utils::URLencode(paste("file:", uriSep, j, sep = "")) })`
	`247`	`+ function(j) { utils::URLencode(paste0("file:", uriSep, j)) })`
`248`	`248`
`249`	`249`	`# Set the start time to identify jobjs`
`250`	`250`	`# Seconds resolution is good enough for this purpose, so use ints`
Original file line number	Diff line number	Diff line change
`@@ -94,27 +94,22 @@ checkSchemaInArrow <- function(schema) {`
`94`	`94`	`}`
`95`	`95`
`96`	`96`	`# Both cases below produce a corrupt value for unknown reason. It needs to be investigated.`
`97`		`- if (any(sapply(schema$fields(), function(x) x$dataType.toString() == "FloatType"))) {`
	`97`	`+ field_strings <- sapply(schema$fields(), function(x) x$dataType.toString())`
	`98`	`+ if (any(field_strings == "FloatType")) {`
`98`	`99`	`stop("Arrow optimization in R does not support float type yet.")`
`99`	`100`	`}`
`100`		`- if (any(sapply(schema$fields(), function(x) x$dataType.toString() == "BinaryType"))) {`
	`101`	`+ if (any(field_strings == "BinaryType")) {`
`101`	`102`	`stop("Arrow optimization in R does not support binary type yet.")`
`102`	`103`	`}`
`103`		`- if (any(sapply(schema$fields(),`
`104`		`- function(x) startsWith(x$dataType.toString(),`
`105`		`- "ArrayType")))) {`
	`104`	`+ if (any(startsWith(field_strings, "ArrayType"))) {`
`106`	`105`	`stop("Arrow optimization in R does not support array type yet.")`
`107`	`106`	`}`
`108`	`107`
`109`	`108`	`# Arrow optimization in Spark does not yet support both cases below.`
`110`		`- if (any(sapply(schema$fields(),`
`111`		`- function(x) startsWith(x$dataType.toString(),`
`112`		`- "StructType")))) {`
	`109`	`+ if (any(startsWith(field_strings, "StructType"))) {`
`113`	`110`	`stop("Arrow optimization in R does not support nested struct type yet.")`
`114`	`111`	`}`
`115`		`- if (any(sapply(schema$fields(),`
`116`		`- function(x) startsWith(x$dataType.toString(),`
`117`		`- "MapType")))) {`
	`112`	`+ if (any(startsWith(field_strings, "MapType"))) {`
`118`	`113`	`stop("Arrow optimization in R does not support map type yet.")`
`119`	`114`	`}`
`120`	`115`	`}`