apache · yanji84 · Apr 12, 2017 · Apr 12, 2017 · Apr 12, 2017 · Apr 12, 2017
diff --git a/LICENSE b/LICENSE
@@ -297,3 +297,4 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
      (MIT License) RowsGroup (http://datatables.net/license/mit)
      (MIT License) jsonFormatter (http://www.jqueryscript.net/other/jQuery-Plugin-For-Pretty-JSON-Formatting-jsonFormatter.html)
      (MIT License) modernizr (https://github.com/Modernizr/Modernizr/blob/master/LICENSE)
+     (MIT License) machinist (https://github.com/typelevel/machinist)
diff --git a/R/check-cran.sh b/R/check-cran.sh
@@ -20,18 +20,18 @@
 set -o pipefail
 set -e
 
-FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)"
-pushd $FWDIR > /dev/null
+FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
+pushd "$FWDIR" > /dev/null
 
-. $FWDIR/find-r.sh
+. "$FWDIR/find-r.sh"
 
 # Install the package (this is required for code in vignettes to run when building it later)
 # Build the latest docs, but not vignettes, which is built with the package next
-. $FWDIR/install-dev.sh
+. "$FWDIR/install-dev.sh"
 
 # Build source package with vignettes
 SPARK_HOME="$(cd "${FWDIR}"/..; pwd)"
-. "${SPARK_HOME}"/bin/load-spark-env.sh
+. "${SPARK_HOME}/bin/load-spark-env.sh"
 if [ -f "${SPARK_HOME}/RELEASE" ]; then
   SPARK_JARS_DIR="${SPARK_HOME}/jars"
 else
@@ -40,16 +40,16 @@ fi
 
 if [ -d "$SPARK_JARS_DIR" ]; then
   # Build a zip file containing the source package with vignettes
-  SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/"R CMD build $FWDIR/pkg
+  SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/R" CMD build "$FWDIR/pkg"
 
   find pkg/vignettes/. -not -name '.' -not -name '*.Rmd' -not -name '*.md' -not -name '*.pdf' -not -name '*.html' -delete
 else
-  echo "Error Spark JARs not found in $SPARK_HOME"
+  echo "Error Spark JARs not found in '$SPARK_HOME'"
   exit 1
 fi
 
 # Run check as-cran.
-VERSION=`grep Version $FWDIR/pkg/DESCRIPTION | awk '{print $NF}'`
+VERSION=`grep Version "$FWDIR/pkg/DESCRIPTION" | awk '{print $NF}'`
 
 CRAN_CHECK_OPTIONS="--as-cran"
 
@@ -67,10 +67,10 @@ echo "Running CRAN check with $CRAN_CHECK_OPTIONS options"
 
 if [ -n "$NO_TESTS" ] && [ -n "$NO_MANUAL" ]
 then
-  "$R_SCRIPT_PATH/"R CMD check $CRAN_CHECK_OPTIONS SparkR_"$VERSION".tar.gz
+  "$R_SCRIPT_PATH/R" CMD check $CRAN_CHECK_OPTIONS "SparkR_$VERSION.tar.gz"
 else
   # This will run tests and/or build vignettes, and require SPARK_HOME
-  SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/"R CMD check $CRAN_CHECK_OPTIONS SparkR_"$VERSION".tar.gz
+  SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/R" CMD check $CRAN_CHECK_OPTIONS "SparkR_$VERSION.tar.gz"
 fi
 
 popd > /dev/null
diff --git a/R/create-docs.sh b/R/create-docs.sh
@@ -33,23 +33,23 @@ export FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
 export SPARK_HOME="$(cd "`dirname "${BASH_SOURCE[0]}"`"/..; pwd)"
 
 # Required for setting SPARK_SCALA_VERSION
-. "${SPARK_HOME}"/bin/load-spark-env.sh
+. "${SPARK_HOME}/bin/load-spark-env.sh"
 
 echo "Using Scala $SPARK_SCALA_VERSION"
 
-pushd $FWDIR > /dev/null
-. $FWDIR/find-r.sh
+pushd "$FWDIR" > /dev/null
+. "$FWDIR/find-r.sh"
 
 # Install the package (this will also generate the Rd files)
-. $FWDIR/install-dev.sh
+. "$FWDIR/install-dev.sh"
 
 # Now create HTML files
 
 # knit_rd puts html in current working directory
 mkdir -p pkg/html
 pushd pkg/html
 
-"$R_SCRIPT_PATH/"Rscript -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); library(knitr); knit_rd("SparkR", links = tools::findHTMLlinks(paste(libDir, "SparkR", sep="/")))'
+"$R_SCRIPT_PATH/Rscript" -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); library(knitr); knit_rd("SparkR", links = tools::findHTMLlinks(paste(libDir, "SparkR", sep="/")))'
 
 popd
 

diff --git a/R/create-rd.sh b/R/create-rd.sh
@@ -29,9 +29,9 @@
 set -o pipefail
 set -e
 
-FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)"
-pushd $FWDIR > /dev/null
-. $FWDIR/find-r.sh
+FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
+pushd "$FWDIR" > /dev/null
+. "$FWDIR/find-r.sh"
 
 # Generate Rd files if devtools is installed
-"$R_SCRIPT_PATH/"Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }'
+"$R_SCRIPT_PATH/Rscript" -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }'
diff --git a/R/install-dev.sh b/R/install-dev.sh
@@ -29,21 +29,21 @@
 set -o pipefail
 set -e
 
-FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)"
+FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
 LIB_DIR="$FWDIR/lib"
 
-mkdir -p $LIB_DIR
+mkdir -p "$LIB_DIR"
 
-pushd $FWDIR > /dev/null
-. $FWDIR/find-r.sh
+pushd "$FWDIR" > /dev/null
+. "$FWDIR/find-r.sh"
 
-. $FWDIR/create-rd.sh
+. "$FWDIR/create-rd.sh"
 
 # Install SparkR to $LIB_DIR
-"$R_SCRIPT_PATH/"R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/
+"$R_SCRIPT_PATH/R" CMD INSTALL --library="$LIB_DIR" "$FWDIR/pkg/"
 
 # Zip the SparkR package so that it can be distributed to worker nodes on YARN
-cd $LIB_DIR
+cd "$LIB_DIR"
 jar cfM "$LIB_DIR/sparkr.zip" SparkR
 
 popd > /dev/null
diff --git a/R/install-source-package.sh b/R/install-source-package.sh
@@ -29,28 +29,28 @@
 set -o pipefail
 set -e
 
-FWDIR="$(cd `dirname "${BASH_SOURCE[0]}"`; pwd)"
-pushd $FWDIR > /dev/null
-. $FWDIR/find-r.sh
+FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
+pushd "$FWDIR" > /dev/null
+. "$FWDIR/find-r.sh"
 
 if [ -z "$VERSION" ]; then
-  VERSION=`grep Version $FWDIR/pkg/DESCRIPTION | awk '{print $NF}'`
+  VERSION=`grep Version "$FWDIR/pkg/DESCRIPTION" | awk '{print $NF}'`
 fi
 
-if [ ! -f "$FWDIR"/SparkR_"$VERSION".tar.gz ]; then
-  echo -e "R source package file $FWDIR/SparkR_$VERSION.tar.gz is not found."
+if [ ! -f "$FWDIR/SparkR_$VERSION.tar.gz" ]; then
+  echo -e "R source package file '$FWDIR/SparkR_$VERSION.tar.gz' is not found."
   echo -e "Please build R source package with check-cran.sh"
   exit -1;
 fi
 
 echo "Removing lib path and installing from source package"
 LIB_DIR="$FWDIR/lib"
-rm -rf $LIB_DIR
-mkdir -p $LIB_DIR
-"$R_SCRIPT_PATH/"R CMD INSTALL SparkR_"$VERSION".tar.gz --library=$LIB_DIR
+rm -rf "$LIB_DIR"
+mkdir -p "$LIB_DIR"
+"$R_SCRIPT_PATH/R" CMD INSTALL "SparkR_$VERSION.tar.gz" --library="$LIB_DIR"
 
 # Zip the SparkR package so that it can be distributed to worker nodes on YARN
-pushd $LIB_DIR > /dev/null
+pushd "$LIB_DIR" > /dev/null
 jar cfM "$LIB_DIR/sparkr.zip" SparkR
 popd > /dev/null
 

diff --git a/R/pkg/.lintr b/R/pkg/.lintr
@@ -1,2 +1,2 @@
-linters: with_defaults(line_length_linter(100), camel_case_linter = NULL, open_curly_linter(allow_single_line = TRUE), closed_curly_linter(allow_single_line = TRUE))
+linters: with_defaults(line_length_linter(100), multiple_dots_linter = NULL, camel_case_linter = NULL, open_curly_linter(allow_single_line = TRUE), closed_curly_linter(allow_single_line = TRUE))
 exclusions: list("inst/profile/general.R" = 1, "inst/profile/shell.R")
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
@@ -35,6 +35,7 @@ Collate:
     'WindowSpec.R'
     'backend.R'
     'broadcast.R'
+    'catalog.R'
     'client.R'
     'context.R'
     'deserialize.R'
@@ -43,6 +44,7 @@ Collate:
     'jvm.R'
     'mllib_classification.R'
     'mllib_clustering.R'
+    'mllib_fpm.R'
     'mllib_recommendation.R'
     'mllib_regression.R'
     'mllib_stat.R'
@@ -51,6 +53,7 @@ Collate:
     'serialize.R'
     'sparkR.R'
     'stats.R'
+    'streaming.R'
     'types.R'
     'utils.R'
     'window.R'

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
@@ -66,7 +66,10 @@ exportMethods("glm",
               "spark.randomForest",
               "spark.gbt",
               "spark.bisectingKmeans",
-              "spark.svmLinear")
+              "spark.svmLinear",
+              "spark.fpGrowth",
+              "spark.freqItemsets",
+              "spark.associationRules")
 
 # Job group lifecycle management methods
 export("setJobGroup",
@@ -82,6 +85,7 @@ exportMethods("arrange",
               "as.data.frame",
               "attach",
               "cache",
+              "checkpoint",
               "coalesce",
               "collect",
               "colnames",
@@ -97,6 +101,7 @@ exportMethods("arrange",
               "createOrReplaceTempView",
               "crossJoin",
               "crosstab",
+              "cube",
               "dapply",
               "dapplyCollect",
               "describe",
@@ -118,9 +123,11 @@ exportMethods("arrange",
               "group_by",
               "groupBy",
               "head",
+              "hint",
               "insertInto",
               "intersect",
               "isLocal",
+              "isStreaming",
               "join",
               "limit",
               "merge",
@@ -138,6 +145,7 @@ exportMethods("arrange",
               "registerTempTable",
               "rename",
               "repartition",
+              "rollup",
               "sample",
               "sample_frac",
               "sampleBy",
@@ -169,12 +177,14 @@ exportMethods("arrange",
               "write.json",
               "write.orc",
               "write.parquet",
+              "write.stream",
               "write.text",
               "write.ml")
 
 exportClasses("Column")
 
-exportMethods("%in%",
+exportMethods("%<=>%",
+              "%in%",
               "abs",
               "acos",
               "add_months",
@@ -197,6 +207,8 @@ exportMethods("%in%",
               "cbrt",
               "ceil",
               "ceiling",
+              "collect_list",
+              "collect_set",
               "column",
               "concat",
               "concat_ws",
@@ -207,6 +219,8 @@ exportMethods("%in%",
               "count",
               "countDistinct",
               "crc32",
+              "create_array",
+              "create_map",
               "hash",
               "cume_dist",
               "date_add",
@@ -222,6 +236,7 @@ exportMethods("%in%",
               "endsWith",
               "exp",
               "explode",
+              "explode_outer",
               "expm1",
               "expr",
               "factorial",
@@ -235,12 +250,15 @@ exportMethods("%in%",
               "getField",
               "getItem",
               "greatest",
+              "grouping_bit",
+              "grouping_id",
               "hex",
               "histogram",
               "hour",
               "hypot",
               "ifelse",
               "initcap",
+              "input_file_name",
               "instr",
               "isNaN",
               "isNotNull",
@@ -278,18 +296,21 @@ exportMethods("%in%",
               "nanvl",
               "negate",
               "next_day",
+              "not",
               "ntile",
               "otherwise",
               "over",
               "percent_rank",
               "pmod",
               "posexplode",
+              "posexplode_outer",
               "quarter",
               "rand",
               "randn",
               "rank",
               "regexp_extract",
               "regexp_replace",
+              "repeat_string",
               "reverse",
               "rint",
               "rlike",
@@ -313,6 +334,7 @@ exportMethods("%in%",
               "sort_array",
               "soundex",
               "spark_partition_id",
+              "split_string",
               "stddev",
               "stddev_pop",
               "stddev_samp",
@@ -355,17 +377,29 @@ export("as.DataFrame",
        "clearCache",
        "createDataFrame",
        "createExternalTable",
+       "createTable",
+       "currentDatabase",
        "dropTempTable",
        "dropTempView",
        "jsonFile",
+       "listColumns",
+       "listDatabases",
+       "listFunctions",
+       "listTables",
        "loadDF",
        "parquetFile",
        "read.df",
        "read.jdbc",
        "read.json",
        "read.orc",
        "read.parquet",
+       "read.stream",
        "read.text",
+       "recoverPartitions",
+       "refreshByPath",
+       "refreshTable",
+       "setCheckpointDir",
+       "setCurrentDatabase",
        "spark.lapply",
        "spark.addFile",
        "spark.getSparkFilesRootDirectory",
@@ -402,6 +436,16 @@ export("partitionBy",
 export("windowPartitionBy",
        "windowOrderBy")
 
+exportClasses("StreamingQuery")
+
+export("awaitTermination",
+       "isActive",
+       "lastProgress",
+       "queryName",
+       "status",
+       "stopQuery")
+
+
 S3method(print, jobj)
 S3method(print, structField)
 S3method(print, structType)