Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ runScript <- function() {
sparkHome <- Sys.getenv("SPARK_HOME")
sparkTestJarPath <- "R/lib/SparkR/test_support/sparktestjar_2.10-1.0.jar"
jarPath <- paste("--jars", shQuote(file.path(sparkHome, sparkTestJarPath)))
scriptPath <- file.path(sparkHome, "R/lib/SparkR/tests/jarTest.R")
scriptPath <- file.path(sparkHome, "R/lib/SparkR/tests/testthat/jarTest.R")
submitPath <- file.path(sparkHome, "bin/spark-submit")
res <- system2(command = submitPath,
args = c(jarPath, scriptPath),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ sc <- sparkR.init()
sqlContext <- sparkRSQL.init(sc)

test_that("glm and predict", {
training <- createDataFrame(sqlContext, iris)
training <- suppressWarnings(createDataFrame(sqlContext, iris))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you add a note in JIRA SPARK-11976 that once that is fixed we should remove suppressWarnings around DF from iris?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there are a number of suppressWarnings in this PR. Instead of adding notes for them in this PR, I'd like to add a note in SPARK-11976 that cleaning these warnings is part of the job. Is it OK?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right, that's what I mean, add a note in SPARK-11976 to remove these.
alternatively, we could open another JIRA to change all these tests to use a different data frame that doesn't have column names with .

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added note in SPARK-11976

test <- select(training, "Sepal_Length")
model <- glm(Sepal_Width ~ Sepal_Length, training, family = "gaussian")
prediction <- predict(model, test)
Expand All @@ -39,7 +39,7 @@ test_that("glm and predict", {
})

test_that("glm should work with long formula", {
training <- createDataFrame(sqlContext, iris)
training <- suppressWarnings(createDataFrame(sqlContext, iris))
training$LongLongLongLongLongName <- training$Sepal_Width
training$VeryLongLongLongLonLongName <- training$Sepal_Length
training$AnotherLongLongLongLongName <- training$Species
Expand All @@ -51,31 +51,31 @@ test_that("glm should work with long formula", {
})

test_that("predictions match with native glm", {
training <- createDataFrame(sqlContext, iris)
training <- suppressWarnings(createDataFrame(sqlContext, iris))
model <- glm(Sepal_Width ~ Sepal_Length + Species, data = training)
vals <- collect(select(predict(model, training), "prediction"))
rVals <- predict(glm(Sepal.Width ~ Sepal.Length + Species, data = iris), iris)
expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
})

test_that("dot minus and intercept vs native glm", {
training <- createDataFrame(sqlContext, iris)
training <- suppressWarnings(createDataFrame(sqlContext, iris))
model <- glm(Sepal_Width ~ . - Species + 0, data = training)
vals <- collect(select(predict(model, training), "prediction"))
rVals <- predict(glm(Sepal.Width ~ . - Species + 0, data = iris), iris)
expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
})

test_that("feature interaction vs native glm", {
training <- createDataFrame(sqlContext, iris)
training <- suppressWarnings(createDataFrame(sqlContext, iris))
model <- glm(Sepal_Width ~ Species:Sepal_Length, data = training)
vals <- collect(select(predict(model, training), "prediction"))
rVals <- predict(glm(Sepal.Width ~ Species:Sepal.Length, data = iris), iris)
expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
})

test_that("summary coefficients match with native glm", {
training <- createDataFrame(sqlContext, iris)
training <- suppressWarnings(createDataFrame(sqlContext, iris))
stats <- summary(glm(Sepal_Width ~ Sepal_Length + Species, data = training, solver = "normal"))
coefs <- unlist(stats$coefficients)
devianceResiduals <- unlist(stats$devianceResiduals)
Expand All @@ -92,7 +92,7 @@ test_that("summary coefficients match with native glm", {
})

test_that("summary coefficients match with native glm of family 'binomial'", {
df <- createDataFrame(sqlContext, iris)
df <- suppressWarnings(createDataFrame(sqlContext, iris))
training <- filter(df, df$Species != "setosa")
stats <- summary(glm(Species ~ Sepal_Length + Sepal_Width, data = training,
family = "binomial"))
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -133,38 +133,45 @@ test_that("create DataFrame from RDD", {
expect_equal(columns(df), c("a", "b"))
expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))

df <- jsonFile(sqlContext, jsonPathNa)
hiveCtx <- tryCatch({
newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc)
},
error = function(err) {
skip("Hive is not build with SparkSQL, skipped")
})
sql(hiveCtx, "CREATE TABLE people (name string, age double, height float)")
insertInto(df, "people")
expect_equal(sql(hiveCtx, "SELECT age from people WHERE name = 'Bob'"), c(16))
expect_equal(sql(hiveCtx, "SELECT height from people WHERE name ='Bob'"), c(176.5))

schema <- structType(structField("name", "string"), structField("age", "integer"),
structField("height", "float"))
df2 <- createDataFrame(sqlContext, df.toRDD, schema)
df2AsDF <- as.DataFrame(sqlContext, df.toRDD, schema)
df <- read.df(sqlContext, jsonPathNa, "json", schema)
df2 <- createDataFrame(sqlContext, toRDD(df), schema)
df2AsDF <- as.DataFrame(sqlContext, toRDD(df), schema)
expect_equal(columns(df2), c("name", "age", "height"))
expect_equal(columns(df2AsDF), c("name", "age", "height"))
expect_equal(dtypes(df2), list(c("name", "string"), c("age", "int"), c("height", "float")))
expect_equal(dtypes(df2AsDF), list(c("name", "string"), c("age", "int"), c("height", "float")))
expect_equal(collect(where(df2, df2$name == "Bob")), c("Bob", 16, 176.5))
expect_equal(collect(where(df2AsDF, df2$name == "Bob")), c("Bob", 16, 176.5))
expect_equal(as.list(collect(where(df2, df2$name == "Bob"))),
list(name = "Bob", age = 16, height = 176.5))
expect_equal(as.list(collect(where(df2AsDF, df2AsDF$name == "Bob"))),
list(name = "Bob", age = 16, height = 176.5))

localDF <- data.frame(name=c("John", "Smith", "Sarah"),
age=c(19, 23, 18),
height=c(164.10, 181.4, 173.7))
age=c(19L, 23L, 18L),
height=c(176.5, 181.4, 173.7))
df <- createDataFrame(sqlContext, localDF, schema)
expect_is(df, "DataFrame")
expect_equal(count(df), 3)
expect_equal(columns(df), c("name", "age", "height"))
expect_equal(dtypes(df), list(c("name", "string"), c("age", "int"), c("height", "float")))
expect_equal(collect(where(df, df$name == "John")), c("John", 19, 164.10))
expect_equal(as.list(collect(where(df, df$name == "John"))),
list(name = "John", age = 19L, height = 176.5))

ssc <- callJMethod(sc, "sc")
hiveCtx <- tryCatch({
newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc)
},
error = function(err) {
skip("Hive is not build with SparkSQL, skipped")
})
sql(hiveCtx, "CREATE TABLE people (name string, age double, height float)")
df <- read.df(hiveCtx, jsonPathNa, "json", schema)
invisible(insertInto(df, "people"))
expect_equal(collect(sql(hiveCtx, "SELECT age from people WHERE name = 'Bob'"))$age,
c(16))
expect_equal(collect(sql(hiveCtx, "SELECT height from people WHERE name ='Bob'"))$height,
c(176.5))
})

test_that("convert NAs to null type in DataFrames", {
Expand Down Expand Up @@ -250,7 +257,7 @@ test_that("create DataFrame from list or data.frame", {
ldf2 <- collect(df)
expect_equal(ldf$a, ldf2$a)

irisdf <- createDataFrame(sqlContext, iris)
irisdf <- suppressWarnings(createDataFrame(sqlContext, iris))
iris_collected <- collect(irisdf)
expect_equivalent(iris_collected[,-5], iris[,-5])
expect_equal(iris_collected$Species, as.character(iris$Species))
Expand Down Expand Up @@ -463,7 +470,7 @@ test_that("union on two RDDs created from DataFrames returns an RRDD", {
RDD2 <- toRDD(df)
unioned <- unionRDD(RDD1, RDD2)
expect_is(unioned, "RDD")
expect_equal(SparkR:::getSerializedMode(unioned), "byte")
expect_equal(getSerializedMode(unioned), "byte")
expect_equal(collect(unioned)[[2]]$name, "Andy")
})

Expand All @@ -485,13 +492,13 @@ test_that("union on mixed serialization types correctly returns a byte RRDD", {

unionByte <- unionRDD(rdd, dfRDD)
expect_is(unionByte, "RDD")
expect_equal(SparkR:::getSerializedMode(unionByte), "byte")
expect_equal(getSerializedMode(unionByte), "byte")
expect_equal(collect(unionByte)[[1]], 1)
expect_equal(collect(unionByte)[[12]]$name, "Andy")

unionString <- unionRDD(textRDD, dfRDD)
expect_is(unionString, "RDD")
expect_equal(SparkR:::getSerializedMode(unionString), "byte")
expect_equal(getSerializedMode(unionString), "byte")
expect_equal(collect(unionString)[[1]], "Michael")
expect_equal(collect(unionString)[[5]]$name, "Andy")
})
Expand All @@ -504,7 +511,7 @@ test_that("objectFile() works with row serialization", {
objectIn <- objectFile(sc, objectPath)

expect_is(objectIn, "RDD")
expect_equal(SparkR:::getSerializedMode(objectIn), "byte")
expect_equal(getSerializedMode(objectIn), "byte")
expect_equal(collect(objectIn)[[2]]$age, 30)
})

Expand Down Expand Up @@ -849,6 +856,7 @@ test_that("write.df() as parquet file", {
})

test_that("test HiveContext", {
ssc <- callJMethod(sc, "sc")
hiveCtx <- tryCatch({
newJObject("org.apache.spark.sql.hive.test.TestHiveContext", ssc)
},
Expand All @@ -863,10 +871,10 @@ test_that("test HiveContext", {
expect_equal(count(df2), 3)

jsonPath2 <- tempfile(pattern="sparkr-test", fileext=".tmp")
saveAsTable(df, "json", "json", "append", path = jsonPath2)
df3 <- sql(hiveCtx, "select * from json")
invisible(saveAsTable(df, "json2", "json", "append", path = jsonPath2))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is invisible necessary here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is to avoid printing NULL in the console. Maybe not necessary within testthat. But I think it does not hurt here.

df3 <- sql(hiveCtx, "select * from json2")
expect_is(df3, "DataFrame")
expect_equal(count(df3), 6)
expect_equal(count(df3), 3)
})

test_that("column operators", {
Expand Down Expand Up @@ -1311,7 +1319,7 @@ test_that("toJSON() returns an RDD of the correct values", {
df <- jsonFile(sqlContext, jsonPath)
testRDD <- toJSON(df)
expect_is(testRDD, "RDD")
expect_equal(SparkR:::getSerializedMode(testRDD), "string")
expect_equal(getSerializedMode(testRDD), "string")
expect_equal(collect(testRDD)[[1]], mockLines[1])
})

Expand Down Expand Up @@ -1641,7 +1649,7 @@ test_that("SQL error message is returned from JVM", {
expect_equal(grepl("Table not found: blah", retError), TRUE)
})

irisDF <- createDataFrame(sqlContext, iris)
irisDF <- suppressWarnings(createDataFrame(sqlContext, iris))

test_that("Method as.data.frame as a synonym for collect()", {
expect_equal(as.data.frame(irisDF), collect(irisDF))
Expand Down Expand Up @@ -1670,7 +1678,7 @@ test_that("attach() on a DataFrame", {
})

test_that("with() on a DataFrame", {
df <- createDataFrame(sqlContext, iris)
df <- suppressWarnings(createDataFrame(sqlContext, iris))
expect_error(Sepal_Length)
sum1 <- with(df, list(summary(Sepal_Length), summary(Sepal_Width)))
expect_equal(collect(sum1[[1]])[1, "Sepal_Length"], "150")
Expand Down
File renamed without changes.
File renamed without changes.
3 changes: 3 additions & 0 deletions R/pkg/tests/run-all.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,7 @@
library(testthat)
library(SparkR)

# Turn all warnings into errors
options("warn" = 2)

test_package("SparkR")
2 changes: 1 addition & 1 deletion R/run-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ FAILED=0
LOGFILE=$FWDIR/unit-tests.out
rm -f $LOGFILE

SPARK_TESTING=1 $FWDIR/../bin/sparkR --driver-java-options "-Dlog4j.configuration=file:$FWDIR/log4j.properties" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
SPARK_TESTING=1 $FWDIR/../bin/sparkR --driver-java-options "-Dlog4j.configuration=file:$FWDIR/log4j.properties" --conf spark.hadoop.fs.default.name="file:///" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
FAILED=$((PIPESTATUS[0]||$FAILED))

if [[ $FAILED != 0 ]]; then
Expand Down