apache · sun-rui · Nov 29, 2015 · Nov 29, 2015 · Nov 29, 2015 · felixcheung
diff --git a/R/pkg/inst/tests/jarTest.R → R/pkg/inst/tests/testthat/jarTest.R b/R/pkg/inst/tests/jarTest.R → R/pkg/inst/tests/testthat/jarTest.R
diff --git a/R/pkg/inst/tests/packageInAJarTest.R → ...g/inst/tests/testthat/packageInAJarTest.R b/R/pkg/inst/tests/packageInAJarTest.R → ...g/inst/tests/testthat/packageInAJarTest.R
diff --git a/R/pkg/inst/tests/test_Serde.R → R/pkg/inst/tests/testthat/test_Serde.R b/R/pkg/inst/tests/test_Serde.R → R/pkg/inst/tests/testthat/test_Serde.R
diff --git a/R/pkg/inst/tests/test_binaryFile.R → R/pkg/inst/tests/testthat/test_binaryFile.R b/R/pkg/inst/tests/test_binaryFile.R → R/pkg/inst/tests/testthat/test_binaryFile.R
diff --git a/R/pkg/inst/tests/test_binary_function.R → ...nst/tests/testthat/test_binary_function.R b/R/pkg/inst/tests/test_binary_function.R → ...nst/tests/testthat/test_binary_function.R
diff --git a/R/pkg/inst/tests/test_broadcast.R → R/pkg/inst/tests/testthat/test_broadcast.R b/R/pkg/inst/tests/test_broadcast.R → R/pkg/inst/tests/testthat/test_broadcast.R
diff --git a/R/pkg/inst/tests/test_client.R → R/pkg/inst/tests/testthat/test_client.R b/R/pkg/inst/tests/test_client.R → R/pkg/inst/tests/testthat/test_client.R
diff --git a/R/pkg/inst/tests/test_context.R → R/pkg/inst/tests/testthat/test_context.R b/R/pkg/inst/tests/test_context.R → R/pkg/inst/tests/testthat/test_context.R
diff --git a/R/pkg/inst/tests/test_includeJAR.R → R/pkg/inst/tests/testthat/test_includeJAR.R b/R/pkg/inst/tests/test_includeJAR.R → R/pkg/inst/tests/testthat/test_includeJAR.R
@@ -20,7 +20,7 @@ runScript <- function() {
   sparkHome <- Sys.getenv("SPARK_HOME")
   sparkTestJarPath <- "R/lib/SparkR/test_support/sparktestjar_2.10-1.0.jar"
   jarPath <- paste("--jars", shQuote(file.path(sparkHome, sparkTestJarPath)))
-  scriptPath <- file.path(sparkHome, "R/lib/SparkR/tests/jarTest.R")
+  scriptPath <- file.path(sparkHome, "R/lib/SparkR/tests/testthat/jarTest.R")
   submitPath <- file.path(sparkHome, "bin/spark-submit")
   res <- system2(command = submitPath,
                  args = c(jarPath, scriptPath),

diff --git a/R/pkg/inst/tests/test_includePackage.R → ...inst/tests/testthat/test_includePackage.R b/R/pkg/inst/tests/test_includePackage.R → ...inst/tests/testthat/test_includePackage.R
diff --git a/R/pkg/inst/tests/test_mllib.R → R/pkg/inst/tests/testthat/test_mllib.R b/R/pkg/inst/tests/test_mllib.R → R/pkg/inst/tests/testthat/test_mllib.R
@@ -26,7 +26,7 @@ sc <- sparkR.init()
 sqlContext <- sparkRSQL.init(sc)
 
 test_that("glm and predict", {
-  training <- createDataFrame(sqlContext, iris)
+  training <- suppressWarnings(createDataFrame(sqlContext, iris))
   test <- select(training, "Sepal_Length")
   model <- glm(Sepal_Width ~ Sepal_Length, training, family = "gaussian")
   prediction <- predict(model, test)
@@ -39,7 +39,7 @@ test_that("glm and predict", {
 })
 
 test_that("glm should work with long formula", {
-  training <- createDataFrame(sqlContext, iris)
+  training <- suppressWarnings(createDataFrame(sqlContext, iris))
   training$LongLongLongLongLongName <- training$Sepal_Width
   training$VeryLongLongLongLonLongName <- training$Sepal_Length
   training$AnotherLongLongLongLongName <- training$Species
@@ -51,31 +51,31 @@ test_that("glm should work with long formula", {
 })
 
 test_that("predictions match with native glm", {
-  training <- createDataFrame(sqlContext, iris)
+  training <- suppressWarnings(createDataFrame(sqlContext, iris))
   model <- glm(Sepal_Width ~ Sepal_Length + Species, data = training)
   vals <- collect(select(predict(model, training), "prediction"))
   rVals <- predict(glm(Sepal.Width ~ Sepal.Length + Species, data = iris), iris)
   expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
 })
 
 test_that("dot minus and intercept vs native glm", {
-  training <- createDataFrame(sqlContext, iris)
+  training <- suppressWarnings(createDataFrame(sqlContext, iris))
   model <- glm(Sepal_Width ~ . - Species + 0, data = training)
   vals <- collect(select(predict(model, training), "prediction"))
   rVals <- predict(glm(Sepal.Width ~ . - Species + 0, data = iris), iris)
   expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
 })
 
 test_that("feature interaction vs native glm", {
-  training <- createDataFrame(sqlContext, iris)
+  training <- suppressWarnings(createDataFrame(sqlContext, iris))
   model <- glm(Sepal_Width ~ Species:Sepal_Length, data = training)
   vals <- collect(select(predict(model, training), "prediction"))
   rVals <- predict(glm(Sepal.Width ~ Species:Sepal.Length, data = iris), iris)
   expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
 })
 
 test_that("summary coefficients match with native glm", {
-  training <- createDataFrame(sqlContext, iris)
+  training <- suppressWarnings(createDataFrame(sqlContext, iris))
   stats <- summary(glm(Sepal_Width ~ Sepal_Length + Species, data = training, solver = "normal"))
   coefs <- unlist(stats$coefficients)
   devianceResiduals <- unlist(stats$devianceResiduals)
@@ -92,7 +92,7 @@ test_that("summary coefficients match with native glm", {
 })
 
 test_that("summary coefficients match with native glm of family 'binomial'", {
-  df <- createDataFrame(sqlContext, iris)
+  df <- suppressWarnings(createDataFrame(sqlContext, iris))
   training <- filter(df, df$Species != "setosa")
   stats <- summary(glm(Species ~ Sepal_Length + Sepal_Width, data = training,
     family = "binomial"))

diff --git a/R/pkg/inst/tests/test_parallelize_collect.R → ...tests/testthat/test_parallelize_collect.R b/R/pkg/inst/tests/test_parallelize_collect.R → ...tests/testthat/test_parallelize_collect.R
diff --git a/R/pkg/inst/tests/test_rdd.R → R/pkg/inst/tests/testthat/test_rdd.R b/R/pkg/inst/tests/test_rdd.R → R/pkg/inst/tests/testthat/test_rdd.R
diff --git a/R/pkg/inst/tests/test_shuffle.R → R/pkg/inst/tests/testthat/test_shuffle.R b/R/pkg/inst/tests/test_shuffle.R → R/pkg/inst/tests/testthat/test_shuffle.R
diff --git a/R/pkg/inst/tests/test_sparkSQL.R → R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R → R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -133,38 +133,45 @@ test_that("create DataFrame from RDD", {
   expect_equal(columns(df), c("a", "b"))
   expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
 
-  df <- jsonFile(sqlContext, jsonPathNa)
-  hiveCtx <- tryCatch({
-    newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc)
-  },
-  error = function(err) {
-    skip("Hive is not build with SparkSQL, skipped")
-  })
-  sql(hiveCtx, "CREATE TABLE people (name string, age double, height float)")
-  insertInto(df, "people")
-  expect_equal(sql(hiveCtx, "SELECT age from people WHERE name = 'Bob'"), c(16))
-  expect_equal(sql(hiveCtx, "SELECT height from people WHERE name ='Bob'"), c(176.5))
-
   schema <- structType(structField("name", "string"), structField("age", "integer"),
                        structField("height", "float"))
-  df2 <- createDataFrame(sqlContext, df.toRDD, schema)
-  df2AsDF <- as.DataFrame(sqlContext, df.toRDD, schema)
+  df <- read.df(sqlContext, jsonPathNa, "json", schema)
+  df2 <- createDataFrame(sqlContext, toRDD(df), schema)
+  df2AsDF <- as.DataFrame(sqlContext, toRDD(df), schema)
   expect_equal(columns(df2), c("name", "age", "height"))
   expect_equal(columns(df2AsDF), c("name", "age", "height"))
   expect_equal(dtypes(df2), list(c("name", "string"), c("age", "int"), c("height", "float")))
   expect_equal(dtypes(df2AsDF), list(c("name", "string"), c("age", "int"), c("height", "float")))
-  expect_equal(collect(where(df2, df2$name == "Bob")), c("Bob", 16, 176.5))
-  expect_equal(collect(where(df2AsDF, df2$name == "Bob")), c("Bob", 16, 176.5))
+  expect_equal(as.list(collect(where(df2, df2$name == "Bob"))),
+               list(name = "Bob", age = 16, height = 176.5))
+  expect_equal(as.list(collect(where(df2AsDF, df2AsDF$name == "Bob"))),
+               list(name = "Bob", age = 16, height = 176.5))
 
   localDF <- data.frame(name=c("John", "Smith", "Sarah"),
-                        age=c(19, 23, 18),
-                        height=c(164.10, 181.4, 173.7))
+                        age=c(19L, 23L, 18L),
+                        height=c(176.5, 181.4, 173.7))
   df <- createDataFrame(sqlContext, localDF, schema)
   expect_is(df, "DataFrame")
   expect_equal(count(df), 3)
   expect_equal(columns(df), c("name", "age", "height"))
   expect_equal(dtypes(df), list(c("name", "string"), c("age", "int"), c("height", "float")))
-  expect_equal(collect(where(df, df$name == "John")), c("John", 19, 164.10))
+  expect_equal(as.list(collect(where(df, df$name == "John"))),
+               list(name = "John", age = 19L, height = 176.5))
+
+  ssc <- callJMethod(sc, "sc")
+  hiveCtx <- tryCatch({
+    newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc)
+  },
+  error = function(err) {
+    skip("Hive is not build with SparkSQL, skipped")
+  })
+  sql(hiveCtx, "CREATE TABLE people (name string, age double, height float)")
+  df <- read.df(hiveCtx, jsonPathNa, "json", schema)
+  invisible(insertInto(df, "people"))
+  expect_equal(collect(sql(hiveCtx, "SELECT age from people WHERE name = 'Bob'"))$age,
+               c(16))
+  expect_equal(collect(sql(hiveCtx, "SELECT height from people WHERE name ='Bob'"))$height,
+               c(176.5))
 })
 
 test_that("convert NAs to null type in DataFrames", {
@@ -250,7 +257,7 @@ test_that("create DataFrame from list or data.frame", {
   ldf2 <- collect(df)
   expect_equal(ldf$a, ldf2$a)
 
-  irisdf <- createDataFrame(sqlContext, iris)
+  irisdf <- suppressWarnings(createDataFrame(sqlContext, iris))
   iris_collected <- collect(irisdf)
   expect_equivalent(iris_collected[,-5], iris[,-5])
   expect_equal(iris_collected$Species, as.character(iris$Species))
@@ -463,7 +470,7 @@ test_that("union on two RDDs created from DataFrames returns an RRDD", {
   RDD2 <- toRDD(df)
   unioned <- unionRDD(RDD1, RDD2)
   expect_is(unioned, "RDD")
-  expect_equal(SparkR:::getSerializedMode(unioned), "byte")
+  expect_equal(getSerializedMode(unioned), "byte")
   expect_equal(collect(unioned)[[2]]$name, "Andy")
 })
 
@@ -485,13 +492,13 @@ test_that("union on mixed serialization types correctly returns a byte RRDD", {
 
   unionByte <- unionRDD(rdd, dfRDD)
   expect_is(unionByte, "RDD")
-  expect_equal(SparkR:::getSerializedMode(unionByte), "byte")
+  expect_equal(getSerializedMode(unionByte), "byte")
   expect_equal(collect(unionByte)[[1]], 1)
   expect_equal(collect(unionByte)[[12]]$name, "Andy")
 
   unionString <- unionRDD(textRDD, dfRDD)
   expect_is(unionString, "RDD")
-  expect_equal(SparkR:::getSerializedMode(unionString), "byte")
+  expect_equal(getSerializedMode(unionString), "byte")
   expect_equal(collect(unionString)[[1]], "Michael")
   expect_equal(collect(unionString)[[5]]$name, "Andy")
 })
@@ -504,7 +511,7 @@ test_that("objectFile() works with row serialization", {
   objectIn <- objectFile(sc, objectPath)
 
   expect_is(objectIn, "RDD")
-  expect_equal(SparkR:::getSerializedMode(objectIn), "byte")
+  expect_equal(getSerializedMode(objectIn), "byte")
   expect_equal(collect(objectIn)[[2]]$age, 30)
 })
 
@@ -849,6 +856,7 @@ test_that("write.df() as parquet file", {
 })
 
 test_that("test HiveContext", {
+  ssc <- callJMethod(sc, "sc")
   hiveCtx <- tryCatch({
     newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc)
   },
@@ -863,10 +871,10 @@ test_that("test HiveContext", {
   expect_equal(count(df2), 3)
 
   jsonPath2 <- tempfile(pattern="sparkr-test", fileext=".tmp")
-  saveAsTable(df, "json", "json", "append", path = jsonPath2)
-  df3 <- sql(hiveCtx, "select * from json")
+  invisible(saveAsTable(df, "json2", "json", "append", path = jsonPath2))
+  df3 <- sql(hiveCtx, "select * from json2")
   expect_is(df3, "DataFrame")
-  expect_equal(count(df3), 6)
+  expect_equal(count(df3), 3)
 })
 
 test_that("column operators", {
@@ -1311,7 +1319,7 @@ test_that("toJSON() returns an RDD of the correct values", {
   df <- jsonFile(sqlContext, jsonPath)
   testRDD <- toJSON(df)
   expect_is(testRDD, "RDD")
-  expect_equal(SparkR:::getSerializedMode(testRDD), "string")
+  expect_equal(getSerializedMode(testRDD), "string")
   expect_equal(collect(testRDD)[[1]], mockLines[1])
 })
 
@@ -1641,7 +1649,7 @@ test_that("SQL error message is returned from JVM", {
   expect_equal(grepl("Table not found: blah", retError), TRUE)
 })
 
-irisDF <- createDataFrame(sqlContext, iris)
+irisDF <- suppressWarnings(createDataFrame(sqlContext, iris))
 
 test_that("Method as.data.frame as a synonym for collect()", {
   expect_equal(as.data.frame(irisDF), collect(irisDF))
@@ -1670,7 +1678,7 @@ test_that("attach() on a DataFrame", {
 })
 
 test_that("with() on a DataFrame", {
-  df <- createDataFrame(sqlContext, iris)
+  df <- suppressWarnings(createDataFrame(sqlContext, iris))
   expect_error(Sepal_Length)
   sum1 <- with(df, list(summary(Sepal_Length), summary(Sepal_Width)))
   expect_equal(collect(sum1[[1]])[1, "Sepal_Length"], "150")

diff --git a/R/pkg/inst/tests/test_take.R → R/pkg/inst/tests/testthat/test_take.R b/R/pkg/inst/tests/test_take.R → R/pkg/inst/tests/testthat/test_take.R
diff --git a/R/pkg/inst/tests/test_textFile.R → R/pkg/inst/tests/testthat/test_textFile.R b/R/pkg/inst/tests/test_textFile.R → R/pkg/inst/tests/testthat/test_textFile.R
diff --git a/R/pkg/inst/tests/test_utils.R → R/pkg/inst/tests/testthat/test_utils.R b/R/pkg/inst/tests/test_utils.R → R/pkg/inst/tests/testthat/test_utils.R
diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R
@@ -18,4 +18,7 @@
 library(testthat)
 library(SparkR)
 
+# Turn all warnings into errors
+options("warn" = 2)
+
 test_package("SparkR")
diff --git a/R/run-tests.sh b/R/run-tests.sh
@@ -23,7 +23,7 @@ FAILED=0
 LOGFILE=$FWDIR/unit-tests.out
 rm -f $LOGFILE
 
-SPARK_TESTING=1 $FWDIR/../bin/sparkR --driver-java-options "-Dlog4j.configuration=file:$FWDIR/log4j.properties" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
+SPARK_TESTING=1 $FWDIR/../bin/sparkR --driver-java-options "-Dlog4j.configuration=file:$FWDIR/log4j.properties" --conf spark.hadoop.fs.default.name="file:///" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
 FAILED=$((PIPESTATUS[0]||$FAILED))
 
 if [[ $FAILED != 0 ]]; then