@@ -61,14 +61,22 @@ test_that("spark.glm and predict", {
6161
6262 # poisson family
6363 model <- spark.glm(training , Sepal_Width ~ Sepal_Length + Species ,
64- family = poisson(link = identity ))
64+ family = poisson(link = identity ))
6565 prediction <- predict(model , training )
6666 expect_equal(typeof(take(select(prediction , " prediction" ), 1 )$ prediction ), " double" )
6767 vals <- collect(select(prediction , " prediction" ))
6868 rVals <- suppressWarnings(predict(glm(Sepal.Width ~ Sepal.Length + Species ,
69- data = iris , family = poisson(link = identity )), iris ))
69+ data = iris , family = poisson(link = identity )), iris ))
7070 expect_true(all(abs(rVals - vals ) < 1e-6 ), rVals - vals )
7171
72+ # Gamma family
73+ x <- runif(100 , - 1 , 1 )
74+ y <- rgamma(100 , rate = 10 / exp(0.5 + 1.2 * x ), shape = 10 )
75+ df <- as.DataFrame(as.data.frame(list (x = x , y = y )))
76+ model <- glm(y ~ x , family = Gamma , df )
77+ out <- capture.output(print(summary(model )))
78+ expect_true(any(grepl(" Dispersion parameter for gamma family" , out )))
79+
7280 # Test stats::predict is working
7381 x <- rnorm(15 )
7482 y <- x + rnorm(15 )
@@ -103,11 +111,11 @@ test_that("spark.glm summary", {
103111 df <- suppressWarnings(createDataFrame(iris ))
104112 training <- df [df $ Species %in% c(" versicolor" , " virginica" ), ]
105113 stats <- summary(spark.glm(training , Species ~ Sepal_Length + Sepal_Width ,
106- family = binomial(link = " logit" )))
114+ family = binomial(link = " logit" )))
107115
108116 rTraining <- iris [iris $ Species %in% c(" versicolor" , " virginica" ), ]
109117 rStats <- summary(glm(Species ~ Sepal.Length + Sepal.Width , data = rTraining ,
110- family = binomial(link = " logit" )))
118+ family = binomial(link = " logit" )))
111119
112120 coefs <- unlist(stats $ coefficients )
113121 rCoefs <- unlist(rStats $ coefficients )
@@ -222,7 +230,7 @@ test_that("glm and predict", {
222230 training <- suppressWarnings(createDataFrame(iris ))
223231 # gaussian family
224232 model <- glm(Sepal_Width ~ Sepal_Length + Species , data = training )
225- prediction <- predict(model , training )
233+ prediction <- predict(model , training )
226234 expect_equal(typeof(take(select(prediction , " prediction" ), 1 )$ prediction ), " double" )
227235 vals <- collect(select(prediction , " prediction" ))
228236 rVals <- predict(glm(Sepal.Width ~ Sepal.Length + Species , data = iris ), iris )
@@ -235,7 +243,7 @@ test_that("glm and predict", {
235243 expect_equal(typeof(take(select(prediction , " prediction" ), 1 )$ prediction ), " double" )
236244 vals <- collect(select(prediction , " prediction" ))
237245 rVals <- suppressWarnings(predict(glm(Sepal.Width ~ Sepal.Length + Species ,
238- data = iris , family = poisson(link = identity )), iris ))
246+ data = iris , family = poisson(link = identity )), iris ))
239247 expect_true(all(abs(rVals - vals ) < 1e-6 ), rVals - vals )
240248
241249 # Test stats::predict is working
@@ -268,11 +276,11 @@ test_that("glm summary", {
268276 df <- suppressWarnings(createDataFrame(iris ))
269277 training <- df [df $ Species %in% c(" versicolor" , " virginica" ), ]
270278 stats <- summary(glm(Species ~ Sepal_Length + Sepal_Width , data = training ,
271- family = binomial(link = " logit" )))
279+ family = binomial(link = " logit" )))
272280
273281 rTraining <- iris [iris $ Species %in% c(" versicolor" , " virginica" ), ]
274282 rStats <- summary(glm(Species ~ Sepal.Length + Sepal.Width , data = rTraining ,
275- family = binomial(link = " logit" )))
283+ family = binomial(link = " logit" )))
276284
277285 coefs <- unlist(stats $ coefficients )
278286 rCoefs <- unlist(rStats $ coefficients )
@@ -409,7 +417,7 @@ test_that("spark.survreg", {
409417 x = c(0 , 2 , 1 , 1 , 1 , 0 , 0 ), sex = c(0 , 0 , 0 , 0 , 1 , 1 , 1 ))
410418 expect_error(
411419 model <- survival :: survreg(formula = survival :: Surv(time , status ) ~ x + sex , data = rData ),
412- NA )
420+ NA )
413421 expect_equal(predict(model , rData )[[1 ]], 3.724591 , tolerance = 1e-4 )
414422 }
415423})
0 commit comments