apache
diff --git a/‎.rat-excludes‎
Lines changed: 6 additions & 0 deletions b/‎.rat-excludes‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎LICENSE‎
Lines changed: 1 addition & 1 deletion b/‎LICENSE‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/README.md‎
Lines changed: 1 addition & 1 deletion b/‎R/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/pkg/R/DataFrame.R‎
Lines changed: 2 additions & 2 deletions b/‎R/pkg/R/DataFrame.R‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎R/pkg/R/SQLContext.R‎
Lines changed: 3 additions & 1 deletion b/‎R/pkg/R/SQLContext.R‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎R/pkg/R/client.R‎
Lines changed: 1 addition & 1 deletion b/‎R/pkg/R/client.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/pkg/R/generics.R‎
Lines changed: 2 additions & 1 deletion b/‎R/pkg/R/generics.R‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎R/pkg/R/pairRDD.R‎
Lines changed: 6 additions & 6 deletions b/‎R/pkg/R/pairRDD.R‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎R/pkg/R/sparkR.R‎
Lines changed: 8 additions & 5 deletions b/‎R/pkg/R/sparkR.R‎
Lines changed: 8 additions & 5 deletions
diff --git a/‎R/pkg/R/utils.R‎
Lines changed: 18 additions & 13 deletions b/‎R/pkg/R/utils.R‎
Lines changed: 18 additions & 13 deletions
@@ -86,4 +86,10 @@ local-1430917381535_2
 DESCRIPTION
 NAMESPACE
 test_support/*
+.*Rd
+help/*
+html/*
+INDEX
 .lintr
+gen-java.*
+.*avpr
@@ -948,6 +948,6 @@ The following components are provided under the MIT License. See project link fo
      (MIT License) SLF4J LOG4J-12 Binding (org.slf4j:slf4j-log4j12:1.7.5 - http://www.slf4j.org)
      (MIT License) pyrolite (org.spark-project:pyrolite:2.0.1 - http://pythonhosted.org/Pyro4/)
      (MIT License) scopt (com.github.scopt:scopt_2.10:3.2.0 - https://github.com/scopt/scopt)
-     (The MIT License) Mockito (org.mockito:mockito-all:1.8.5 - http://www.mockito.org)
+     (The MIT License) Mockito (org.mockito:mockito-core:1.9.5 - http://www.mockito.org)
      (MIT License) jquery (https://jquery.org/license/)
      (MIT License) AnchorJS (https://github.com/bryanbraun/anchorjs)
@@ -6,7 +6,7 @@ SparkR is an R package that provides a light-weight frontend to use Spark from R
 
 #### Build Spark
 
-Build Spark with [Maven](http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn) and include the `-PsparkR` profile to build the R package. For example to use the default Hadoop versions you can run
+Build Spark with [Maven](http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn) and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run
 ```
   build/mvn -DskipTests -Psparkr package
 ```
 
@@ -169,8 +169,8 @@ setMethod("isLocal",
 #'}
 setMethod("showDF",
           signature(x = "DataFrame"),
-          function(x, numRows = 20) {
-            s <- callJMethod(x@sdf, "showString", numToInt(numRows))
+          function(x, numRows = 20, truncate = TRUE) {
+            s <- callJMethod(x@sdf, "showString", numToInt(numRows), truncate)
             cat(s)
           })
 
 
@@ -86,7 +86,9 @@ infer_type <- function(x) {
 createDataFrame <- function(sqlContext, data, schema = NULL, samplingRatio = 1.0) {
   if (is.data.frame(data)) {
       # get the names of columns, they will be put into RDD
-      schema <- names(data)
+      if (is.null(schema)) {
+        schema <- names(data)
+      }
       n <- nrow(data)
       m <- ncol(data)
       # get rid of factor type
 
@@ -57,7 +57,7 @@ generateSparkSubmitArgs <- function(args, sparkHome, jars, sparkSubmitOpts, pack
 }
 
 launchBackend <- function(args, sparkHome, jars, sparkSubmitOpts, packages) {
-  sparkSubmitBin <- determineSparkSubmitBin()
+  sparkSubmitBinName <- determineSparkSubmitBin()
   if (sparkHome != "") {
     sparkSubmitBin <- file.path(sparkHome, "bin", sparkSubmitBinName)
   } else {
 
@@ -20,7 +20,8 @@
 # @rdname aggregateRDD
 # @seealso reduce
 # @export
-setGeneric("aggregateRDD", function(x, zeroValue, seqOp, combOp) { standardGeneric("aggregateRDD") })
+setGeneric("aggregateRDD",
+           function(x, zeroValue, seqOp, combOp) { standardGeneric("aggregateRDD") })
 
 # @rdname cache-methods
 # @export
 
@@ -560,8 +560,8 @@ setMethod("join",
 # Left outer join two RDDs
 #
 # @description
-# \code{leftouterjoin} This function left-outer-joins two RDDs where every element is of the form list(K, V).
-# The key types of the two RDDs should be the same.
+# \code{leftouterjoin} This function left-outer-joins two RDDs where every element is of
+# the form list(K, V). The key types of the two RDDs should be the same.
 #
 # @param x An RDD to be joined. Should be an RDD where each element is
 #             list(K, V).
@@ -597,8 +597,8 @@ setMethod("leftOuterJoin",
 # Right outer join two RDDs
 #
 # @description
-# \code{rightouterjoin} This function right-outer-joins two RDDs where every element is of the form list(K, V).
-# The key types of the two RDDs should be the same.
+# \code{rightouterjoin} This function right-outer-joins two RDDs where every element is of
+# the form list(K, V). The key types of the two RDDs should be the same.
 #
 # @param x An RDD to be joined. Should be an RDD where each element is
 #             list(K, V).
@@ -634,8 +634,8 @@ setMethod("rightOuterJoin",
 # Full outer join two RDDs
 #
 # @description
-# \code{fullouterjoin} This function full-outer-joins two RDDs where every element is of the form list(K, V).
-# The key types of the two RDDs should be the same.
+# \code{fullouterjoin} This function full-outer-joins two RDDs where every element is of
+# the form list(K, V). The key types of the two RDDs should be the same.
 #
 # @param x An RDD to be joined. Should be an RDD where each element is
 #             list(K, V).
 
@@ -105,11 +105,12 @@ sparkR.init <- function(
   sparkPackages = "") {
 
   if (exists(".sparkRjsc", envir = .sparkREnv)) {
-    cat("Re-using existing Spark Context. Please stop SparkR with sparkR.stop() or restart R to create a new Spark Context\n")
+    cat(paste("Re-using existing Spark Context.",
+              "Please stop SparkR with sparkR.stop() or restart R to create a new Spark Context\n"))
     return(get(".sparkRjsc", envir = .sparkREnv))
   }
 
-  sparkMem <- Sys.getenv("SPARK_MEM", "512m")
+  sparkMem <- Sys.getenv("SPARK_MEM", "1024m")
   jars <- suppressWarnings(normalizePath(as.character(sparkJars)))
 
   # Classpath separator is ";" on Windows
@@ -132,7 +133,7 @@ sparkR.init <- function(
         sparkHome = sparkHome,
         jars = jars,
         sparkSubmitOpts = Sys.getenv("SPARKR_SUBMIT_ARGS", "sparkr-shell"),
-        sparkPackages = sparkPackages)
+        packages = sparkPackages)
     # wait atmost 100 seconds for JVM to launch
     wait <- 0.1
     for (i in 1:25) {
@@ -180,14 +181,16 @@ sparkR.init <- function(
 
   sparkExecutorEnvMap <- new.env()
   if (!any(names(sparkExecutorEnv) == "LD_LIBRARY_PATH")) {
-    sparkExecutorEnvMap[["LD_LIBRARY_PATH"]] <- paste0("$LD_LIBRARY_PATH:",Sys.getenv("LD_LIBRARY_PATH"))
+    sparkExecutorEnvMap[["LD_LIBRARY_PATH"]] <-
+      paste0("$LD_LIBRARY_PATH:",Sys.getenv("LD_LIBRARY_PATH"))
   }
   for (varname in names(sparkExecutorEnv)) {
     sparkExecutorEnvMap[[varname]] <- sparkExecutorEnv[[varname]]
   }
 
   nonEmptyJars <- Filter(function(x) { x != "" }, jars)
-  localJarPaths <- sapply(nonEmptyJars, function(j) { utils::URLencode(paste("file:", uriSep, j, sep = "")) })
+  localJarPaths <- sapply(nonEmptyJars,
+                          function(j) { utils::URLencode(paste("file:", uriSep, j, sep = "")) })
 
   # Set the start time to identify jobjs
   # Seconds resolution is good enough for this purpose, so use ints
 
@@ -334,18 +334,21 @@ getStorageLevel <- function(newLevel = c("DISK_ONLY",
                                          "MEMORY_ONLY_SER_2",
                                          "OFF_HEAP")) {
   match.arg(newLevel)
+  storageLevelClass <- "org.apache.spark.storage.StorageLevel"
   storageLevel <- switch(newLevel,
-                         "DISK_ONLY" = callJStatic("org.apache.spark.storage.StorageLevel", "DISK_ONLY"),
-                         "DISK_ONLY_2" = callJStatic("org.apache.spark.storage.StorageLevel", "DISK_ONLY_2"),
-                         "MEMORY_AND_DISK" = callJStatic("org.apache.spark.storage.StorageLevel", "MEMORY_AND_DISK"),
-                         "MEMORY_AND_DISK_2" = callJStatic("org.apache.spark.storage.StorageLevel", "MEMORY_AND_DISK_2"),
-                         "MEMORY_AND_DISK_SER" = callJStatic("org.apache.spark.storage.StorageLevel", "MEMORY_AND_DISK_SER"),
-                         "MEMORY_AND_DISK_SER_2" = callJStatic("org.apache.spark.storage.StorageLevel", "MEMORY_AND_DISK_SER_2"),
-                         "MEMORY_ONLY" = callJStatic("org.apache.spark.storage.StorageLevel", "MEMORY_ONLY"),
-                         "MEMORY_ONLY_2" = callJStatic("org.apache.spark.storage.StorageLevel", "MEMORY_ONLY_2"),
-                         "MEMORY_ONLY_SER" = callJStatic("org.apache.spark.storage.StorageLevel", "MEMORY_ONLY_SER"),
-                         "MEMORY_ONLY_SER_2" = callJStatic("org.apache.spark.storage.StorageLevel", "MEMORY_ONLY_SER_2"),
-                         "OFF_HEAP" = callJStatic("org.apache.spark.storage.StorageLevel", "OFF_HEAP"))
+                         "DISK_ONLY" = callJStatic(storageLevelClass, "DISK_ONLY"),
+                         "DISK_ONLY_2" = callJStatic(storageLevelClass, "DISK_ONLY_2"),
+                         "MEMORY_AND_DISK" = callJStatic(storageLevelClass, "MEMORY_AND_DISK"),
+                         "MEMORY_AND_DISK_2" = callJStatic(storageLevelClass, "MEMORY_AND_DISK_2"),
+                         "MEMORY_AND_DISK_SER" = callJStatic(storageLevelClass,
+                                                             "MEMORY_AND_DISK_SER"),
+                         "MEMORY_AND_DISK_SER_2" = callJStatic(storageLevelClass,
+                                                               "MEMORY_AND_DISK_SER_2"),
+                         "MEMORY_ONLY" = callJStatic(storageLevelClass, "MEMORY_ONLY"),
+                         "MEMORY_ONLY_2" = callJStatic(storageLevelClass, "MEMORY_ONLY_2"),
+                         "MEMORY_ONLY_SER" = callJStatic(storageLevelClass, "MEMORY_ONLY_SER"),
+                         "MEMORY_ONLY_SER_2" = callJStatic(storageLevelClass, "MEMORY_ONLY_SER_2"),
+                         "OFF_HEAP" = callJStatic(storageLevelClass, "OFF_HEAP"))
 }
 
 # Utility function for functions where an argument needs to be integer but we want to allow
@@ -545,9 +548,11 @@ mergePartitions <- function(rdd, zip) {
         lengthOfKeys <- part[[len - lengthOfValues]]
         stopifnot(len == lengthOfKeys + lengthOfValues)
 
-        # For zip operation, check if corresponding partitions of both RDDs have the same number of elements.
+        # For zip operation, check if corresponding partitions
+        # of both RDDs have the same number of elements.
         if (zip && lengthOfKeys != lengthOfValues) {
-          stop("Can only zip RDDs with same number of elements in each pair of corresponding partitions.")
+          stop(paste("Can only zip RDDs with same number of elements",
+                     "in each pair of corresponding partitions."))
         }
 
         if (lengthOfKeys > 1) {
Original file line number	Diff line number	Diff line change
`@@ -57,7 +57,7 @@ generateSparkSubmitArgs <- function(args, sparkHome, jars, sparkSubmitOpts, pack`
`57`	`57`	`}`
`58`	`58`
`59`	`59`	`launchBackend <- function(args, sparkHome, jars, sparkSubmitOpts, packages) {`
`60`		`- sparkSubmitBin <- determineSparkSubmitBin()`
	`60`	`+ sparkSubmitBinName <- determineSparkSubmitBin()`
`61`	`61`	`if (sparkHome != "") {`
`62`	`62`	`sparkSubmitBin <- file.path(sparkHome, "bin", sparkSubmitBinName)`
`63`	`63`	`} else {`
Original file line number	Diff line number	Diff line change
`@@ -560,8 +560,8 @@ setMethod("join",`
`560`	`560`	`# Left outer join two RDDs`
`561`	`561`	`#`
`562`	`562`	`# @description`
`563`		`-# \code{leftouterjoin} This function left-outer-joins two RDDs where every element is of the form list(K, V).`
`564`		`-# The key types of the two RDDs should be the same.`
	`563`	`+# \code{leftouterjoin} This function left-outer-joins two RDDs where every element is of`
	`564`	`+# the form list(K, V). The key types of the two RDDs should be the same.`
`565`	`565`	`#`
`566`	`566`	`# @param x An RDD to be joined. Should be an RDD where each element is`
`567`	`567`	`# list(K, V).`
`@@ -597,8 +597,8 @@ setMethod("leftOuterJoin",`
`597`	`597`	`# Right outer join two RDDs`
`598`	`598`	`#`
`599`	`599`	`# @description`
`600`		`-# \code{rightouterjoin} This function right-outer-joins two RDDs where every element is of the form list(K, V).`
`601`		`-# The key types of the two RDDs should be the same.`
	`600`	`+# \code{rightouterjoin} This function right-outer-joins two RDDs where every element is of`
	`601`	`+# the form list(K, V). The key types of the two RDDs should be the same.`
`602`	`602`	`#`
`603`	`603`	`# @param x An RDD to be joined. Should be an RDD where each element is`
`604`	`604`	`# list(K, V).`
`@@ -634,8 +634,8 @@ setMethod("rightOuterJoin",`
`634`	`634`	`# Full outer join two RDDs`
`635`	`635`	`#`
`636`	`636`	`# @description`
`637`		`-# \code{fullouterjoin} This function full-outer-joins two RDDs where every element is of the form list(K, V).`
`638`		`-# The key types of the two RDDs should be the same.`
	`637`	`+# \code{fullouterjoin} This function full-outer-joins two RDDs where every element is of`
	`638`	`+# the form list(K, V). The key types of the two RDDs should be the same.`
`639`	`639`	`#`
`640`	`640`	`# @param x An RDD to be joined. Should be an RDD where each element is`
`641`	`641`	`# list(K, V).`