[SPARK-12232][SPARKR] New R API for read.table to avoid name conflict

felixcheung · shivaram · commit 488bbb216c82 · 2016-01-19T18:31:03.000-08:00
shivaram sorry it took longer to fix some conflicts, this is the change to add an alias for `table` Author: felixcheung <felixcheung_m@hotmail.com> Closes #10406 from felixcheung/readtable.
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
@@ -280,7 +280,7 @@ export("as.DataFrame",
        "read.text",
        "sql",
        "str",
-       "table",
+       "tableToDF",
        "tableNames",
        "tables",
        "uncacheTable")
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
@@ -352,6 +352,8 @@ sql <- function(sqlContext, sqlQuery) {
 #' @param sqlContext SQLContext to use
 #' @param tableName The SparkSQL Table to convert to a DataFrame.
 #' @return DataFrame
+#' @rdname tableToDF
+#' @name tableToDF
 #' @export
 #' @examples
 #'\dontrun{
@@ -360,15 +362,14 @@ sql <- function(sqlContext, sqlQuery) {
 #' path <- "path/to/file.json"
 #' df <- read.json(sqlContext, path)
 #' registerTempTable(df, "table")
-#' new_df <- table(sqlContext, "table")
+#' new_df <- tableToDF(sqlContext, "table")
 #' }
 
-table <- function(sqlContext, tableName) {
+tableToDF <- function(sqlContext, tableName) {
   sdf <- callJMethod(sqlContext, "table", tableName)
   dataFrame(sdf)
 }
 
-
 #' Tables
 #'
 #' Returns a DataFrame containing names of tables in the given database.
diff --git a/R/pkg/inst/tests/testthat/test_context.R b/R/pkg/inst/tests/testthat/test_context.R
@@ -24,20 +24,21 @@ test_that("Check masked functions", {
   func <- lapply(masked, function(x) { capture.output(showMethods(x))[[1]] })
   funcSparkROrEmpty <- grepl("\\(package SparkR\\)$|^$", func)
   maskedBySparkR <- masked[funcSparkROrEmpty]
-  expect_equal(length(maskedBySparkR), 18)
-  expect_equal(sort(maskedBySparkR), sort(c("describe", "cov", "filter", "lag", "na.omit",
-                                            "predict", "sd", "var", "colnames", "colnames<-",
-                                            "intersect", "rank", "rbind", "sample", "subset",
-                                            "summary", "table", "transform")))
+  namesOfMasked <- c("describe", "cov", "filter", "lag", "na.omit", "predict", "sd", "var",
+                     "colnames", "colnames<-", "intersect", "rank", "rbind", "sample", "subset",
+                     "summary", "transform")
+  expect_equal(length(maskedBySparkR), length(namesOfMasked))
+  expect_equal(sort(maskedBySparkR), sort(namesOfMasked))
   # above are those reported as masked when `library(SparkR)`
   # note that many of these methods are still callable without base:: or stats:: prefix
   # there should be a test for each of these, except followings, which are currently "broken"
   funcHasAny <- unlist(lapply(masked, function(x) {
                                         any(grepl("=\"ANY\"", capture.output(showMethods(x)[-1])))
                                       }))
   maskedCompletely <- masked[!funcHasAny]
-  expect_equal(length(maskedCompletely), 4)
-  expect_equal(sort(maskedCompletely), sort(c("cov", "filter", "sample", "table")))
+  namesOfMaskedCompletely <- c("cov", "filter", "sample")
+  expect_equal(length(maskedCompletely), length(namesOfMaskedCompletely))
+  expect_equal(sort(maskedCompletely), sort(namesOfMaskedCompletely))
 })
 
 test_that("repeatedly starting and stopping SparkR", {
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -335,7 +335,6 @@ writeLines(mockLinesMapType, mapTypeJsonPath)
 test_that("Collect DataFrame with complex types", {
   # ArrayType
   df <- read.json(sqlContext, complexTypeJsonPath)
-
   ldf <- collect(df)
   expect_equal(nrow(ldf), 3)
   expect_equal(ncol(ldf), 3)
@@ -490,19 +489,15 @@ test_that("insertInto() on a registered table", {
   unlink(parquetPath2)
 })
 
-test_that("table() returns a new DataFrame", {
+test_that("tableToDF() returns a new DataFrame", {
   df <- read.json(sqlContext, jsonPath)
   registerTempTable(df, "table1")
-  tabledf <- table(sqlContext, "table1")
+  tabledf <- tableToDF(sqlContext, "table1")
   expect_is(tabledf, "DataFrame")
   expect_equal(count(tabledf), 3)
+  tabledf2 <- tableToDF(sqlContext, "table1")
+  expect_equal(count(tabledf2), 3)
   dropTempTable(sqlContext, "table1")
-
-  # nolint start
-  # Test base::table is working
-  #a <- letters[1:3]
-  #expect_equal(class(table(a, sample(a))), "table")
-  # nolint end
 })
 
 test_that("toRDD() returns an RRDD", {
diff --git a/docs/sparkr.md b/docs/sparkr.md
@@ -375,13 +375,6 @@ The following functions are masked by the SparkR package:
     <td><code>sample</code> in <code>package:base</code></td>
     <td><code>base::sample(x, size, replace = FALSE, prob = NULL)</code></td>
   </tr>
-  <tr>
-    <td><code>table</code> in <code>package:base</code></td>
-    <td><code><pre>base::table(...,
-            exclude = if (useNA == "no") c(NA, NaN),
-            useNA = c("no", "ifany", "always"),
-            dnn = list.names(...), deparse.level = 1)</pre></code></td>
-  </tr>
 </table>
 
 Since part of SparkR is modeled on the `dplyr` package, certain functions in SparkR share the same names with those in `dplyr`. Depending on the load order of the two packages, some functions from the package loaded first are masked by those in the package loaded after. In such case, prefix such calls with the package name, for instance, `SparkR::cume_dist(x)` or `dplyr::cume_dist(x)`.
@@ -394,3 +387,7 @@ You can inspect the search path in R with [`search()`](https://stat.ethz.ch/R-ma
 ## Upgrading From SparkR 1.5.x to 1.6
 
  - Before Spark 1.6, the default mode for writes was `append`. It was changed in Spark 1.6.0 to `error` to match the Scala API.
+
+## Upgrading From SparkR 1.6.x to 2.0
+
+ - The method `table` has been removed and replaced by `tableToDF`.