BryanCutler · hardmettle · May 13, 2015 · May 13, 2015 · May 13, 2015 · May 13, 2015
diff --git a/LICENSE b/LICENSE
@@ -861,7 +861,7 @@ The following components are provided under a BSD-style license. See project lin
 
      (BSD 3 Clause) core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core)
      (BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.1.15 - https://github.com/jpmml/jpmml-model)
-     (BSD 3-clause style license) jblas (org.jblas:jblas:1.2.3 - http://jblas.org/)
+     (BSD 3-clause style license) jblas (org.jblas:jblas:1.2.4 - http://jblas.org/)
      (BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/)
      (BSD License) Javolution (javolution:javolution:5.5.1 - http://javolution.org)
      (BSD licence) ANTLR ST4 4.0.4 (org.antlr:ST4:4.0.4 - http://www.stringtemplate.org)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
@@ -37,7 +37,7 @@ exportMethods("arrange",
               "registerTempTable",
               "rename",
               "repartition",
-              "sampleDF",
+              "sample",
               "sample_frac",
               "saveAsParquetFile",
               "saveAsTable",
@@ -53,38 +53,62 @@ exportMethods("arrange",
               "unpersist",
               "where",
               "withColumn",
-              "withColumnRenamed")
+              "withColumnRenamed",
+              "write.df")
 
 exportClasses("Column")
 
 exportMethods("abs",
+              "acos",
               "alias",
               "approxCountDistinct",
               "asc",
+              "asin",
+              "atan",
+              "atan2",
               "avg",
               "cast",
+              "cbrt",
+              "ceiling",
               "contains",
+              "cos",
+              "cosh",
               "countDistinct",
               "desc",
               "endsWith",
+              "exp",
+              "expm1",
+              "floor",
               "getField",
               "getItem",
+              "hypot",
               "isNotNull",
               "isNull",
               "last",
               "like",
+              "log",
+              "log10",
+              "log1p",
               "lower",
               "max",
               "mean",
               "min",
               "n",
               "n_distinct",
+              "rint",
               "rlike",
+              "sign",
+              "sin",
+              "sinh",
               "sqrt",
               "startsWith",
               "substr",
               "sum",
               "sumDistinct",
+              "tan",
+              "tanh",
+              "toDegrees",
+              "toRadians",
               "upper")
 
 exportClasses("GroupedData")
@@ -101,6 +125,7 @@ export("cacheTable",
        "jsonFile",
        "loadDF",
        "parquetFile",
+       "read.df",
        "sql",
        "table",
        "tableNames",

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
@@ -294,8 +294,8 @@ setMethod("registerTempTable",
 #'\dontrun{
 #' sc <- sparkR.init()
 #' sqlCtx <- sparkRSQL.init(sc)
-#' df <- loadDF(sqlCtx, path, "parquet")
-#' df2 <- loadDF(sqlCtx, path2, "parquet")
+#' df <- read.df(sqlCtx, path, "parquet")
+#' df2 <- read.df(sqlCtx, path2, "parquet")
 #' registerTempTable(df, "table1")
 #' insertInto(df2, "table1", overwrite = TRUE)
 #'}
@@ -473,14 +473,14 @@ setMethod("distinct",
             dataFrame(sdf)
           })
 
-#' SampleDF
+#' Sample
 #'
 #' Return a sampled subset of this DataFrame using a random seed.
 #'
 #' @param x A SparkSQL DataFrame
 #' @param withReplacement Sampling with replacement or not
 #' @param fraction The (rough) sample target fraction
-#' @rdname sampleDF
+#' @rdname sample
 #' @aliases sample_frac
 #' @export
 #' @examples
@@ -489,10 +489,10 @@ setMethod("distinct",
 #' sqlCtx <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
 #' df <- jsonFile(sqlCtx, path)
-#' collect(sampleDF(df, FALSE, 0.5)) 
-#' collect(sampleDF(df, TRUE, 0.5))
+#' collect(sample(df, FALSE, 0.5)) 
+#' collect(sample(df, TRUE, 0.5))
 #'}
-setMethod("sampleDF",
+setMethod("sample",
           # TODO : Figure out how to send integer as java.lang.Long to JVM so
           # we can send seed as an argument through callJMethod
           signature(x = "DataFrame", withReplacement = "logical",
@@ -503,13 +503,13 @@ setMethod("sampleDF",
             dataFrame(sdf)
           })
 
-#' @rdname sampleDF
-#' @aliases sampleDF
+#' @rdname sample
+#' @aliases sample
 setMethod("sample_frac",
           signature(x = "DataFrame", withReplacement = "logical",
                     fraction = "numeric"),
           function(x, withReplacement, fraction) {
-            sampleDF(x, withReplacement, fraction)
+            sample(x, withReplacement, fraction)
           })
 
 #' Count
@@ -1303,17 +1303,17 @@ setMethod("except",
 #' @param source A name for external data source
 #' @param mode One of 'append', 'overwrite', 'error', 'ignore'
 #'
-#' @rdname saveAsTable
+#' @rdname write.df 
 #' @export
 #' @examples
 #'\dontrun{
 #' sc <- sparkR.init()
 #' sqlCtx <- sparkRSQL.init(sc)
 #' path <- "path/to/file.json"
 #' df <- jsonFile(sqlCtx, path)
-#' saveAsTable(df, "myfile")
+#' write.df(df, "myfile", "parquet", "overwrite")
 #' }
-setMethod("saveDF",
+setMethod("write.df",
           signature(df = "DataFrame", path = 'character', source = 'character',
                     mode = 'character'),
           function(df, path = NULL, source = NULL, mode = "append", ...){
@@ -1334,6 +1334,15 @@ setMethod("saveDF",
             callJMethod(df@sdf, "save", source, jmode, options)
           })
 
+#' @rdname write.df
+#' @aliases saveDF
+#' @export
+setMethod("saveDF",
+          signature(df = "DataFrame", path = 'character', source = 'character',
+                    mode = 'character'),
+          function(df, path = NULL, source = NULL, mode = "append", ...){
+            write.df(df, path, source, mode, ...)
+          })
 
 #' saveAsTable
 #'

diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
@@ -927,7 +927,7 @@ setMethod("takeSample", signature(x = "RDD", withReplacement = "logical",
                                                                     MAXINT)))))
 
             # TODO(zongheng): investigate if this call is an in-place shuffle?
-            sample(samples)[1:total]
+            base::sample(samples)[1:total]
           })
 
 # Creates tuples of the elements in this RDD by applying a function.
@@ -996,7 +996,7 @@ setMethod("coalesce",
              if (shuffle || numPartitions > SparkR:::numPartitions(x)) {
                func <- function(partIndex, part) {
                  set.seed(partIndex)  # partIndex as seed
-                 start <- as.integer(sample(numPartitions, 1) - 1)
+                 start <- as.integer(base::sample(numPartitions, 1) - 1)
                  lapply(seq_along(part),
                         function(i) {
                           pos <- (start + i) %% numPartitions

diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
@@ -421,7 +421,7 @@ clearCache <- function(sqlCtx) {
 #' \dontrun{
 #' sc <- sparkR.init()
 #' sqlCtx <- sparkRSQL.init(sc)
-#' df <- loadDF(sqlCtx, path, "parquet")
+#' df <- read.df(sqlCtx, path, "parquet")
 #' registerTempTable(df, "table")
 #' dropTempTable(sqlCtx, "table")
 #' }
@@ -450,10 +450,10 @@ dropTempTable <- function(sqlCtx, tableName) {
 #'\dontrun{
 #' sc <- sparkR.init()
 #' sqlCtx <- sparkRSQL.init(sc)
-#' df <- load(sqlCtx, "path/to/file.json", source = "json")
+#' df <- read.df(sqlCtx, "path/to/file.json", source = "json")
 #' }
 
-loadDF <- function(sqlCtx, path = NULL, source = NULL, ...) {
+read.df <- function(sqlCtx, path = NULL, source = NULL, ...) {
   options <- varargsToEnv(...)
   if (!is.null(path)) {
     options[['path']] <- path
@@ -462,6 +462,13 @@ loadDF <- function(sqlCtx, path = NULL, source = NULL, ...) {
   dataFrame(sdf)
 }
 
+#' @aliases loadDF
+#' @export
+
+loadDF <- function(sqlCtx, path = NULL, source = NULL, ...) {
+  read.df(sqlCtx, path, source, ...)
+}
+
 #' Create an external table
 #'
 #' Creates an external table based on the dataset in a data source,

diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
@@ -55,12 +55,17 @@ operators <- list(
   "+" = "plus", "-" = "minus", "*" = "multiply", "/" = "divide", "%%" = "mod",
   "==" = "equalTo", ">" = "gt", "<" = "lt", "!=" = "notEqual", "<=" = "leq", ">=" = "geq",
   # we can not override `&&` and `||`, so use `&` and `|` instead
-  "&" = "and", "|" = "or" #, "!" = "unary_$bang"
+  "&" = "and", "|" = "or", #, "!" = "unary_$bang"
+  "^" = "pow"
 )
 column_functions1 <- c("asc", "desc", "isNull", "isNotNull")
 column_functions2 <- c("like", "rlike", "startsWith", "endsWith", "getField", "getItem", "contains")
 functions <- c("min", "max", "sum", "avg", "mean", "count", "abs", "sqrt",
-               "first", "last", "lower", "upper", "sumDistinct")
+               "first", "last", "lower", "upper", "sumDistinct",
+               "acos", "asin", "atan", "cbrt", "ceiling", "cos", "cosh", "exp",
+               "expm1", "floor", "log", "log10", "log1p", "rint", "sign",
+               "sin", "sinh", "tan", "tanh", "toDegrees", "toRadians")
+binary_mathfunctions<- c("atan2", "hypot")
 
 createOperator <- function(op) {
   setMethod(op,
@@ -76,7 +81,11 @@ createOperator <- function(op) {
                 if (class(e2) == "Column") {
                   e2 <- e2@jc
                 }
-                callJMethod(e1@jc, operators[[op]], e2)
+                if (op == "^") {
+                  jc <- callJStatic("org.apache.spark.sql.functions", operators[[op]], e1@jc, e2)
+                } else {
+                  callJMethod(e1@jc, operators[[op]], e2)
+                }
               }
               column(jc)
             })
@@ -106,11 +115,29 @@ createStaticFunction <- function(name) {
   setMethod(name,
             signature(x = "Column"),
             function(x) {
+              if (name == "ceiling") {
+                  name <- "ceil"
+              }
+              if (name == "sign") {
+                  name <- "signum"
+              }
               jc <- callJStatic("org.apache.spark.sql.functions", name, x@jc)
               column(jc)
             })
 }
 
+createBinaryMathfunctions <- function(name) {
+  setMethod(name,
+            signature(y = "Column"),
+            function(y, x) {
+              if (class(x) == "Column") {
+                x <- x@jc
+              }
+              jc <- callJStatic("org.apache.spark.sql.functions", name, y@jc, x)
+              column(jc)
+            })
+}
+
 createMethods <- function() {
   for (op in names(operators)) {
     createOperator(op)
@@ -124,6 +151,9 @@ createMethods <- function() {
   for (x in functions) {
     createStaticFunction(x)
   }
+  for (name in binary_mathfunctions) {
+    createBinaryMathfunctions(name)
+  }
 }
 
 createMethods()