diff --git a/NEWS.md b/NEWS.md index 4dcde5106..e0c9193ac 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,21 +1,33 @@ # parsnip (development version) -* `generics::required_pkgs()` was extended for `parsnip` objects. - -* The `liquidSVM` engine for `svm_rbf()` was deprecated due to that package's removal from CRAN. (#425) +## Model Specification Changes * A new linear SVM model `svm_linear()` is now available with the `LiblineaR` engine (#424) and the `kernlab` engine (#438), and the `LiblineaR` engine is available for `logistic_reg()` as well (#429). These models can use sparse matrices via `fit_xy()` (#447) and have a `tidy` method (#474). +* For models with `glmnet` engines: + + - A single value is required for `penalty` (either a single numeric value or a value of `tune()`) (#481). + - A special argument called `path_values` can be used to set the `lambda` path as a specific set of numbers (independent of the value of `penalty`). A pure ridge regression models (i.e., `mixture = 1`) will generate incorrect values if the path does not include zero. See issue #431 for discussion (#486). + +* The `liquidSVM` engine for `svm_rbf()` was deprecated due to that package's removal from CRAN. (#425) + * New model specification `survival_reg()` for the new mode `"censored regression"` (#444). `surv_reg()` is now soft-deprecated (#448). * New model specification `proportional_hazards()` for the `"censored regression"` mode (#451). +## Other Changes + * Re-licensed package from GPL-2 to MIT. See [consent from copyright holders here](https://github.com/tidymodels/parsnip/issues/462). * `set_mode()` now checks if `mode` is compatible with the model class, similar to `new_model_spec()` (@jtlandis, #467). * Re-organized model documentation for `update` methods (#479). + + +* `generics::required_pkgs()` was extended for `parsnip` objects. + + # parsnip 0.1.5 diff --git a/R/linear_reg.R b/R/linear_reg.R index 372c7f503..187c52fcc 100644 --- a/R/linear_reg.R +++ b/R/linear_reg.R @@ -107,14 +107,23 @@ translate.linear_reg <- function(x, engine = x$engine, ...) { x <- translate.default(x, engine, ...) if (engine == "glmnet") { - # See discussion in https://github.com/tidymodels/parsnip/issues/195 - x$method$fit$args$lambda <- NULL + check_glmnet_penalty(x) + if (any(names(x$eng_args) == "path_values")) { + # Since we decouple the parsnip `penalty` argument from being the same + # as the glmnet `lambda` value, `path_values` allows users to set the + # path differently from the default that glmnet uses. See + # https://github.com/tidymodels/parsnip/issues/431 + x$method$fit$args$lambda <- x$eng_args$path_values + x$eng_args$path_values <- NULL + x$method$fit$args$path_values <- NULL + } else { + # See discussion in https://github.com/tidymodels/parsnip/issues/195 + x$method$fit$args$lambda <- NULL + } # Since the `fit` information is gone for the penalty, we need to have an # evaluated value for the parameter. x$args$penalty <- rlang::eval_tidy(x$args$penalty) - check_glmnet_penalty(x) } - x } diff --git a/R/logistic_reg.R b/R/logistic_reg.R index 279e48002..44d70f3e6 100644 --- a/R/logistic_reg.R +++ b/R/logistic_reg.R @@ -108,14 +108,23 @@ translate.logistic_reg <- function(x, engine = x$engine, ...) { arg_vals <- x$method$fit$args arg_names <- names(arg_vals) - if (engine == "glmnet") { - # See discussion in https://github.com/tidymodels/parsnip/issues/195 - arg_vals$lambda <- NULL + check_glmnet_penalty(x) + if (any(names(x$eng_args) == "path_values")) { + # Since we decouple the parsnip `penalty` argument from being the same + # as the glmnet `lambda` value, `path_values` allows users to set the + # path differently from the default that glmnet uses. See + # https://github.com/tidymodels/parsnip/issues/431 + x$method$fit$args$lambda <- x$eng_args$path_values + x$eng_args$path_values <- NULL + x$method$fit$args$path_values <- NULL + } else { + # See discussion in https://github.com/tidymodels/parsnip/issues/195 + x$method$fit$args$lambda <- NULL + } # Since the `fit` information is gone for the penalty, we need to have an # evaluated value for the parameter. x$args$penalty <- rlang::eval_tidy(x$args$penalty) - check_glmnet_penalty(x) } if (engine == "LiblineaR") { @@ -134,11 +143,8 @@ translate.logistic_reg <- function(x, engine = x$engine, ...) { rlang::abort("For the LiblineaR engine, mixture must be 0 or 1.") } } - + x$method$fit$args <- arg_vals } - - x$method$fit$args <- arg_vals - x } diff --git a/R/misc.R b/R/misc.R index 2332e6afc..be7fc42f4 100644 --- a/R/misc.R +++ b/R/misc.R @@ -324,10 +324,12 @@ stan_conf_int <- function(object, newdata) { } check_glmnet_penalty <- function(x) { - if (length(x$args$penalty) != 1) { + pen <- rlang::eval_tidy(x$args$penalty) + + if (length(pen) != 1) { rlang::abort(c( "For the glmnet engine, `penalty` must be a single number (or a value of `tune()`).", - glue::glue("There are {length(x$args$penalty)} values for `penalty`."), + glue::glue("There are {length(pen)} values for `penalty`."), "To try multiple values for total regularization, use the tune package.", "To predict multiple penalties, use `multi_predict()`" )) diff --git a/man/linear_reg.Rd b/man/linear_reg.Rd index b11544e60..29c7cefb7 100644 --- a/man/linear_reg.Rd +++ b/man/linear_reg.Rd @@ -100,15 +100,38 @@ call. For this type of model, the template of the fit calls are below. ## family = "gaussian") } -For \code{glmnet} models, the full regularization path is always fit -regardless of the value given to \code{penalty}. Also, there is the option to -pass multiple values (or no values) to the \code{penalty} argument. When -using the \code{predict()} method in these cases, the return value depends on -the value of \code{penalty}. When using \code{predict()}, only a single value of -the penalty can be used. When predicting on multiple penalties, the -\code{multi_predict()} function can be used. It returns a tibble with a list -column called \code{.pred} that contains a tibble with all of the penalty -results. +The glmnet engine requires a single value for the \code{penalty} argument (a +number or \code{tune()}), but the full regularization path is always fit +regardless of the value given to \code{penalty}. To pass in a custom sequence +of values for glmnet’s \code{lambda}, use the argument \code{path_values} in +\code{set_engine()}. This will assign the value of the glmnet \code{lambda} +parameter without disturbing the value given of \code{linear_reg(penalty)}. +For example:\if{html}{\out{
}}\preformatted{linear_reg(penalty = .1) \%>\% + set_engine("glmnet", path_values = c(0, 10^seq(-10, 1, length.out = 20))) \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression) +## +## Main Arguments: +## penalty = 0.1 +## +## Computational engine: glmnet +## +## Model fit template: +## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(), +## lambda = c(0, 10^seq(-10, 1, length.out = 20)), family = "gaussian") +} + +When fitting a pure ridge regression model (i.e., \code{penalty = 0}), we +\emph{strongly suggest} that you pass in a vector for \code{path_values} that +includes zero. See \href{https://github.com/tidymodels/parsnip/issues/431}{issue #431} for a +discussion. + +When using \code{predict()}, the single \code{penalty} value used for prediction +is the one specified in \code{linear_reg()}. + +To predict on multiple penalties, use the \code{multi_predict()} function. +This function returns a tibble with a list column called \code{.pred} +containing all of the penalty results. } \subsection{stan}{\if{html}{\out{
}}\preformatted{linear_reg() \%>\% diff --git a/man/logistic_reg.Rd b/man/logistic_reg.Rd index 6dff2100d..2859d9f9f 100644 --- a/man/logistic_reg.Rd +++ b/man/logistic_reg.Rd @@ -100,15 +100,38 @@ call. For this type of model, the template of the fit calls are below. ## family = "binomial") } -For \code{glmnet} models, the full regularization path is always fit -regardless of the value given to \code{penalty}. Also, there is the option to -pass multiple values (or no values) to the \code{penalty} argument. When -using the \code{predict()} method in these cases, the return value depends on -the value of \code{penalty}. When using \code{predict()}, only a single value of -the penalty can be used. When predicting on multiple penalties, the -\code{multi_predict()} function can be used. It returns a tibble with a list -column called \code{.pred} that contains a tibble with all of the penalty -results. +The glmnet engine requires a single value for the \code{penalty} argument (a +number or \code{tune()}), but the full regularization path is always fit +regardless of the value given to \code{penalty}. To pass in a custom sequence +of values for glmnet’s \code{lambda}, use the argument \code{path_values} in +\code{set_engine()}. This will assign the value of the glmnet \code{lambda} +parameter without disturbing the value given of \code{logistic_reg(penalty)}. +For example:\if{html}{\out{
}}\preformatted{logistic_reg(penalty = .1) \%>\% + set_engine("glmnet", path_values = c(0, 10^seq(-10, 1, length.out = 20))) \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification) +## +## Main Arguments: +## penalty = 0.1 +## +## Computational engine: glmnet +## +## Model fit template: +## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(), +## lambda = c(0, 10^seq(-10, 1, length.out = 20)), family = "binomial") +} + +When fitting a pure ridge regression model (i.e., \code{penalty = 0}), we +\emph{strongly suggest} that you pass in a vector for \code{path_values} that +includes zero. See \href{https://github.com/tidymodels/parsnip/issues/431}{issue #431} for a +discussion. + +When using \code{predict()}, the single \code{penalty} value used for prediction +is the one specified in \code{logistic_reg()}. + +To predict on multiple penalties, use the \code{multi_predict()} function. +This function returns a tibble with a list column called \code{.pred} +containing all of the penalty results. } \subsection{LiblineaR}{\if{html}{\out{
}}\preformatted{logistic_reg() \%>\% diff --git a/man/multinom_reg.Rd b/man/multinom_reg.Rd index 8dc8b2e5c..6d9ba5fcc 100644 --- a/man/multinom_reg.Rd +++ b/man/multinom_reg.Rd @@ -82,15 +82,38 @@ call. For this type of model, the template of the fit calls are below. ## family = "multinomial") } -For \code{glmnet} models, the full regularization path is always fit -regardless of the value given to \code{penalty}. Also, there is the option to -pass multiple values (or no values) to the \code{penalty} argument. When -using the \code{predict()} method in these cases, the return value depends on -the value of \code{penalty}. When using \code{predict()}, only a single value of -the penalty can be used. When predicting on multiple penalties, the -\code{multi_predict()} function can be used. It returns a tibble with a list -column called \code{.pred} that contains a tibble with all of the penalty -results. +The glmnet engine requires a single value for the \code{penalty} argument (a +number or \code{tune()}), but the full regularization path is always fit +regardless of the value given to \code{penalty}. To pass in a custom sequence +of values for glmnet’s \code{lambda}, use the argument \code{path_values} in +\code{set_engine()}. This will assign the value of the glmnet \code{lambda} +parameter without disturbing the value given of \code{multinom_reg(penalty)}. +For example:\if{html}{\out{
}}\preformatted{multinom_reg(penalty = .1) \%>\% + set_engine("glmnet", path_values = c(0, 10^seq(-10, 1, length.out = 20))) \%>\% + translate() +}\if{html}{\out{
}}\preformatted{## Multinomial Regression Model Specification (classification) +## +## Main Arguments: +## penalty = 0.1 +## +## Computational engine: glmnet +## +## Model fit template: +## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(), +## lambda = c(0, 10^seq(-10, 1, length.out = 20)), family = "multinomial") +} + +When fitting a pure ridge regression model (i.e., \code{penalty = 0}), we +\emph{strongly suggest} that you pass in a vector for \code{path_values} that +includes zero. See \href{https://github.com/tidymodels/parsnip/issues/431}{issue #431} for a +discussion. + +When using \code{predict()}, the single \code{penalty} value used for prediction +is the one specified in \code{multinom_reg()}. + +To predict on multiple penalties, use the \code{multi_predict()} function. +This function returns a tibble with a list column called \code{.pred} +containing all of the penalty results. } \subsection{nnet}{\if{html}{\out{
}}\preformatted{multinom_reg() \%>\% diff --git a/man/rmd/linear-reg.Rmd b/man/rmd/linear-reg.Rmd index 378f71db8..dbef43a45 100644 --- a/man/rmd/linear-reg.Rmd +++ b/man/rmd/linear-reg.Rmd @@ -21,14 +21,29 @@ linear_reg(penalty = 0.1) %>% translate() ``` -For `glmnet` models, the full regularization path is always fit regardless of the -value given to `penalty`. Also, there is the option to pass multiple values (or -no values) to the `penalty` argument. When using the `predict()` method in these -cases, the return value depends on the value of `penalty`. When using -`predict()`, only a single value of the penalty can be used. When predicting on -multiple penalties, the `multi_predict()` function can be used. It returns a -tibble with a list column called `.pred` that contains a tibble with all of the -penalty results. +The glmnet engine requires a single value for the `penalty` argument (a number +or `tune()`), but the full regularization path is always fit +regardless of the value given to `penalty`. To pass in a custom sequence of +values for glmnet's `lambda`, use the argument `path_values` in `set_engine()`. +This will assign the value of the glmnet `lambda` parameter without disturbing +the value given of `linear_reg(penalty)`. For example: + +```{r glmnet-path} +linear_reg(penalty = .1) %>% + set_engine("glmnet", path_values = c(0, 10^seq(-10, 1, length.out = 20))) %>% + translate() +``` + +When fitting a pure ridge regression model (i.e., `penalty = 0`), we _strongly +suggest_ that you pass in a vector for `path_values` that includes zero. See +[issue #431](https://github.com/tidymodels/parsnip/issues/431) for a discussion. + +When using `predict()`, the single `penalty` value used for prediction is the +one specified in `linear_reg()`. + +To predict on multiple penalties, use the `multi_predict()` function. +This function returns a tibble with a list column called `.pred` containing +all of the penalty results. ## stan diff --git a/man/rmd/logistic-reg.Rmd b/man/rmd/logistic-reg.Rmd index b28b44391..174183546 100644 --- a/man/rmd/logistic-reg.Rmd +++ b/man/rmd/logistic-reg.Rmd @@ -22,14 +22,30 @@ logistic_reg(penalty = 0.1) %>% translate() ``` -For `glmnet` models, the full regularization path is always fit regardless of the -value given to `penalty`. Also, there is the option to pass multiple values (or -no values) to the `penalty` argument. When using the `predict()` method in these -cases, the return value depends on the value of `penalty`. When using -`predict()`, only a single value of the penalty can be used. When predicting on -multiple penalties, the `multi_predict()` function can be used. It returns a -tibble with a list column called `.pred` that contains a tibble with all of the -penalty results. +The glmnet engine requires a single value for the `penalty` argument (a number +or `tune()`), but the full regularization path is always fit +regardless of the value given to `penalty`. To pass in a custom sequence of +values for glmnet's `lambda`, use the argument `path_values` in `set_engine()`. +This will assign the value of the glmnet `lambda` parameter without disturbing +the value given of `logistic_reg(penalty)`. For example: + +```{r glmnet-path} +logistic_reg(penalty = .1) %>% + set_engine("glmnet", path_values = c(0, 10^seq(-10, 1, length.out = 20))) %>% + translate() +``` + +When fitting a pure ridge regression model (i.e., `penalty = 0`), we _strongly +suggest_ that you pass in a vector for `path_values` that includes zero. See +[issue #431](https://github.com/tidymodels/parsnip/issues/431) for a discussion. + +When using `predict()`, the single `penalty` value used for prediction is the +one specified in `logistic_reg()`. + +To predict on multiple penalties, use the `multi_predict()` function. +This function returns a tibble with a list column called `.pred` containing +all of the penalty results. + ## LiblineaR diff --git a/man/rmd/multinom-reg.Rmd b/man/rmd/multinom-reg.Rmd index 2071db327..878c1eb4c 100644 --- a/man/rmd/multinom-reg.Rmd +++ b/man/rmd/multinom-reg.Rmd @@ -14,14 +14,31 @@ multinom_reg(penalty = 0.1) %>% translate() ``` -For `glmnet` models, the full regularization path is always fit regardless of the -value given to `penalty`. Also, there is the option to pass multiple values (or -no values) to the `penalty` argument. When using the `predict()` method in these -cases, the return value depends on the value of `penalty`. When using -`predict()`, only a single value of the penalty can be used. When predicting on -multiple penalties, the `multi_predict()` function can be used. It returns a -tibble with a list column called `.pred` that contains a tibble with all of the -penalty results. +The glmnet engine requires a single value for the `penalty` argument (a number +or `tune()`), but the full regularization path is always fit +regardless of the value given to `penalty`. To pass in a custom sequence of +values for glmnet's `lambda`, use the argument `path_values` in `set_engine()`. +This will assign the value of the glmnet `lambda` parameter without disturbing +the value given of `multinom_reg(penalty)`. For example: + + +```{r glmnet-path} +multinom_reg(penalty = .1) %>% + set_engine("glmnet", path_values = c(0, 10^seq(-10, 1, length.out = 20))) %>% + translate() +``` + +When fitting a pure ridge regression model (i.e., `penalty = 0`), we _strongly +suggest_ that you pass in a vector for `path_values` that includes zero. See +[issue #431](https://github.com/tidymodels/parsnip/issues/431) for a discussion. + +When using `predict()`, the single `penalty` value used for prediction is the +one specified in `multinom_reg()`. + +To predict on multiple penalties, use the `multi_predict()` function. +This function returns a tibble with a list column called `.pred` containing +all of the penalty results. + ## nnet diff --git a/tests/testthat/test_linear_reg.R b/tests/testthat/test_linear_reg.R index b839572e8..efcf0138c 100644 --- a/tests/testthat/test_linear_reg.R +++ b/tests/testthat/test_linear_reg.R @@ -142,6 +142,19 @@ test_that('engine arguments', { ) ) + # For issue #431 + with_path <- + linear_reg(penalty = 1) %>% + set_engine("glmnet", path_values = 4:2) %>% + translate() + expect_equal( + names(with_path$method$fit$args), + c("x", "y", "weights", "lambda", "family") + ) + expect_equal( + rlang::eval_tidy(with_path$method$fit$args$lambda), + 4:2 + ) }) diff --git a/tests/testthat/test_logistic_reg.R b/tests/testthat/test_logistic_reg.R index 5b2c9df1b..25cbcccd8 100644 --- a/tests/testthat/test_logistic_reg.R +++ b/tests/testthat/test_logistic_reg.R @@ -130,7 +130,7 @@ test_that('primary arguments', { ) ) - penalty_v <- logistic_reg(penalty = varying()) + penalty_v <- logistic_reg(penalty = 1) penalty_v_glmnet <- translate(penalty_v %>% set_engine("glmnet")) penalty_v_liblinear <- translate(penalty_v %>% set_engine("LiblineaR")) penalty_v_spark <- translate(penalty_v %>% set_engine("spark")) @@ -147,7 +147,7 @@ test_that('primary arguments', { x = expr(missing_arg()), y = expr(missing_arg()), wi = expr(missing_arg()), - cost = new_empty_quosure(varying()), + cost = new_empty_quosure(1), verbose = FALSE ) ) @@ -156,7 +156,7 @@ test_that('primary arguments', { x = expr(missing_arg()), formula = expr(missing_arg()), weight_col = expr(missing_arg()), - reg_param = new_empty_quosure(varying()), + reg_param = new_empty_quosure(1), family = "binomial" ) ) @@ -228,6 +228,19 @@ test_that('engine arguments', { ) ) + # For issue #431 + with_path <- + logistic_reg(penalty = 1) %>% + set_engine("glmnet", path_values = 4:2) %>% + translate() + expect_equal( + names(with_path$method$fit$args), + c("x", "y", "weights", "lambda", "family") + ) + expect_equal( + rlang::eval_tidy(with_path$method$fit$args$lambda), + 4:2 + ) }) diff --git a/tests/testthat/test_multinom_reg.R b/tests/testthat/test_multinom_reg.R index 6a1b0037d..6687d0e60 100644 --- a/tests/testthat/test_multinom_reg.R +++ b/tests/testthat/test_multinom_reg.R @@ -67,6 +67,19 @@ test_that('engine arguments', { ) ) + # For issue #431 + with_path <- + multinom_reg(penalty = 1) %>% + set_engine("glmnet", path_values = 4:2) %>% + translate() + expect_equal( + names(with_path$method$fit$args), + c("x", "y", "weights", "lambda", "family") + ) + expect_equal( + rlang::eval_tidy(with_path$method$fit$args$lambda), + 4:2 + ) })