tidymodels · topepo · Jun 24, 2021 · Jun 15, 2021 · Jun 15, 2021 · Jun 17, 2021
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,9 @@
 # parsnip (development version)
 
+* Each model now has a default engine that is used when the model is defined. The default for each model is listed in the help documents. This also adds functionality to declare an engine in the model specification function. `set_engine()` is still required if engine-specific arguments need to be added. (#513)
+
+* The default engine for `multinom_reg()` was changed to `nnet`. 
+
 * The helper functions `.convert_form_to_xy_fit()`, `.convert_form_to_xy_new()`, `.convert_xy_to_form_fit()`, and  `.convert_xy_to_form_new()` for converting between formula and matrix interface are now exported for developer use (#508).
 
 * Fix bug in `augment()` when non-predictor, non-outcome variables are included in data (#510).

diff --git a/R/boost_tree.R b/R/boost_tree.R
@@ -29,9 +29,12 @@
 #'  functions. If parameters need to be modified, `update()` can be used
 #'  in lieu of recreating the object from scratch.
 #'
-#' @param mode A single character string for the type of model.
+#' @param mode A single character string for the prediction outcome mode.
 #'  Possible values for this model are "unknown", "regression", or
 #'  "classification".
+#' @param engine A single character string specifying what computational engine
+#'  to use for fitting. Possible engines are listed below. The default for this
+#'  model is `"xgboost"`.
 #' @param mtry A number for the number (or proportion) of predictors that will
 #'  be randomly sampled at each split when creating the tree models (`xgboost`
 #'  only).
@@ -92,6 +95,7 @@
 
 boost_tree <-
   function(mode = "unknown",
+           engine = "xgboost",
            mtry = NULL, trees = NULL, min_n = NULL,
            tree_depth = NULL, learn_rate = NULL,
            loss_reduction = NULL,
@@ -114,7 +118,7 @@ boost_tree <-
       eng_args = NULL,
       mode,
       method = NULL,
-      engine = NULL
+      engine = engine
     )
   }
 

diff --git a/R/decision_tree.R b/R/decision_tree.R
@@ -21,9 +21,12 @@
 #'  functions. If parameters need to be modified, `update()` can be used
 #'  in lieu of recreating the object from scratch.
 #'
-#' @param mode A single character string for the type of model.
+#' @param mode A single character string for the prediction outcome mode.
 #'  Possible values for this model are "unknown", "regression", or
 #'  "classification".
+#' @param engine A single character string specifying what computational engine
+#'  to use for fitting. Possible engines are listed below. The default for this
+#'  model is `"rpart"`.
 #' @param cost_complexity A positive number for the the cost/complexity
 #'   parameter (a.k.a. `Cp`) used by CART models (`rpart` only).
 #' @param tree_depth An integer for maximum depth of the tree.
@@ -69,7 +72,8 @@
 #' @export
 
 decision_tree <-
-  function(mode = "unknown", cost_complexity = NULL, tree_depth = NULL, min_n = NULL) {
+  function(mode = "unknown", engine = "rpart", cost_complexity = NULL,
+           tree_depth = NULL, min_n = NULL) {
 
     args <- list(
       cost_complexity   = enquo(cost_complexity),
@@ -83,7 +87,7 @@ decision_tree <-
       eng_args = NULL,
       mode = mode,
       method = NULL,
-      engine = NULL
+      engine = engine
     )
   }
 

diff --git a/R/linear_reg.R b/R/linear_reg.R
@@ -16,8 +16,11 @@
 #'  here (`NULL`), the values are taken from the underlying model
 #'  functions. If parameters need to be modified, `update()` can be used
 #'  in lieu of recreating the object from scratch.
-#' @param mode A single character string for the type of model.
+#' @param mode A single character string for the prediction outcome mode.
 #'  The only possible value for this model is "regression".
+#' @param engine A single character string specifying what computational engine
+#'  to use for fitting. Possible engines are listed below. The default for this
+#'  model is `"lm"`.
 #' @param penalty A non-negative number representing the total
 #'  amount of regularization (`glmnet`, `keras`, and `spark` only).
 #'  For `keras` models, this corresponds to purely L2 regularization
@@ -70,6 +73,7 @@
 #' @importFrom purrr map_lgl
 linear_reg <-
   function(mode = "regression",
+           engine = "lm",
            penalty = NULL,
            mixture = NULL) {
 
@@ -84,7 +88,7 @@ linear_reg <-
       eng_args = NULL,
       mode = mode,
       method = NULL,
-      engine = NULL
+      engine = engine
     )
   }
 

diff --git a/R/logistic_reg.R b/R/logistic_reg.R
@@ -16,8 +16,11 @@
 #'  here (`NULL`), the values are taken from the underlying model
 #'  functions. If parameters need to be modified, `update()` can be used
 #'  in lieu of recreating the object from scratch.
-#' @param mode A single character string for the type of model.
+#' @param mode A single character string for the prediction outcome mode.
 #'  The only possible value for this model is "classification".
+#' @param engine A single character string specifying what computational engine
+#'  to use for fitting. Possible engines are listed below. The default for this
+#'  model is `"glm"`.
 #' @param penalty A non-negative number representing the total
 #'  amount of regularization (`glmnet`, `LiblineaR`, `keras`, and `spark` only).
 #'  For `keras` models, this corresponds to purely L2 regularization
@@ -69,6 +72,7 @@
 #' @importFrom purrr map_lgl
 logistic_reg <-
   function(mode = "classification",
+           engine = "glm",
            penalty = NULL,
            mixture = NULL) {
 
@@ -83,7 +87,7 @@ logistic_reg <-
       eng_args = NULL,
       mode = mode,
       method = NULL,
-      engine = NULL
+      engine = engine
     )
   }
 

diff --git a/R/mars.R b/R/mars.R
@@ -22,9 +22,12 @@
 #'  functions. If parameters need to be modified, `update()` can be used
 #'  in lieu of recreating the object from scratch.
 #'
-#' @param mode A single character string for the type of model.
+#' @param mode A single character string for the prediction outcome mode.
 #'  Possible values for this model are "unknown", "regression", or
 #'  "classification".
+#' @param engine A single character string specifying what computational engine
+#'  to use for fitting. Possible engines are listed below. The default for this
+#'  model is `"earth"`.
 #' @param num_terms The number of features that will be retained in the
 #'    final model, including the intercept.
 #' @param prod_degree The highest possible interaction degree.
@@ -45,7 +48,7 @@
 #' mars(mode = "regression", num_terms = 5)
 #' @export
 mars <-
-  function(mode = "unknown",
+  function(mode = "unknown", engine = "earth",
            num_terms = NULL, prod_degree = NULL, prune_method = NULL) {
 
     args <- list(
@@ -60,7 +63,7 @@ mars <-
       eng_args = NULL,
       mode = mode,
       method = NULL,
-      engine = NULL
+      engine = engine
     )
   }
 

diff --git a/R/mlp.R b/R/mlp.R
@@ -27,9 +27,12 @@
 #'  If parameters need to be modified, `update()` can be used
 #'  in lieu of recreating the object from scratch.
 #'
-#' @param mode A single character string for the type of model.
+#' @param mode A single character string for the prediction outcome mode.
 #'  Possible values for this model are "unknown", "regression", or
 #'  "classification".
+#' @param engine A single character string specifying what computational engine
+#'  to use for fitting. Possible engines are listed below. The default for this
+#'  model is `"nnet"`.
 #' @param hidden_units An integer for the number of units in the hidden model.
 #' @param penalty A non-negative numeric value for the amount of weight
 #'  decay.
@@ -63,7 +66,7 @@
 #' @export
 
 mlp <-
-  function(mode = "unknown",
+  function(mode = "unknown", engine = "nnet",
            hidden_units = NULL, penalty = NULL, dropout = NULL, epochs = NULL,
            activation = NULL) {
 
@@ -81,7 +84,7 @@ mlp <-
       eng_args = NULL,
       mode = mode,
       method = NULL,
-      engine = NULL
+      engine = engine
     )
   }
 

diff --git a/R/multinom_reg.R b/R/multinom_reg.R
@@ -16,8 +16,11 @@
 #'  here (`NULL`), the values are taken from the underlying model
 #'  functions. If parameters need to be modified, `update()` can be used
 #'  in lieu of recreating the object from scratch.
-#' @param mode A single character string for the type of model.
+#' @param mode A single character string for the prediction outcome mode.
 #'  The only possible value for this model is "classification".
+#' @param engine A single character string specifying what computational engine
+#'  to use for fitting. Possible engines are listed below. The default for this
+#'  model is `"nnet"`.
 #' @param penalty A non-negative number representing the total
 #'  amount of regularization (`glmnet`, `keras`, and `spark` only).
 #'  For `keras` models, this corresponds to purely L2 regularization
@@ -33,7 +36,7 @@
 #' The model can be created using the `fit()` function using the
 #'  following _engines_:
 #' \itemize{
-#' \item \pkg{R}:   `"glmnet"`  (the default), `"nnet"`
+#' \item \pkg{R}:   `"nnet"` (the default), `"glmnet"`
 #' \item \pkg{Spark}: `"spark"`
 #' \item \pkg{keras}: `"keras"`
 #' }
@@ -64,6 +67,7 @@
 #' @importFrom purrr map_lgl
 multinom_reg <-
   function(mode = "classification",
+           engine = "nnet",
            penalty = NULL,
            mixture = NULL) {
 
@@ -78,7 +82,7 @@ multinom_reg <-
       eng_args = NULL,
       mode = mode,
       method = NULL,
-      engine = NULL
+      engine = engine
     )
   }
 

diff --git a/R/nearest_neighbor.R b/R/nearest_neighbor.R
@@ -23,10 +23,12 @@
 #'  here (`NULL`), the values are taken from the underlying model
 #'  functions. If parameters need to be modified, `update()` can be used
 #'  in lieu of recreating the object from scratch.
-#' @param mode A single character string for the type of model.
+#' @param mode A single character string for the prediction outcome mode.
 #' Possible values for this model are `"unknown"`, `"regression"`, or
 #' `"classification"`.
-#'
+#' @param engine A single character string specifying what computational engine
+#'  to use for fitting. Possible engines are listed below. The default for this
+#'  model is `"kknn"`.
 #' @param neighbors A single integer for the number of neighbors
 #' to consider (often called `k`). For \pkg{kknn}, a value of 5
 #' is used if `neighbors` is not specified.
@@ -57,6 +59,7 @@
 #'
 #' @export
 nearest_neighbor <- function(mode = "unknown",
+                             engine = "kknn",
                              neighbors = NULL,
                              weight_func = NULL,
                              dist_power = NULL) {
@@ -72,7 +75,7 @@ nearest_neighbor <- function(mode = "unknown",
     eng_args = NULL,
     mode = mode,
     method = NULL,
-    engine = NULL
+    engine = engine
   )
 }
 

diff --git a/R/proportional_hazards.R b/R/proportional_hazards.R
@@ -16,8 +16,11 @@
 #'  functions. If parameters need to be modified, `update()` can be used
 #'  in lieu of recreating the object from scratch.
 #'
-#' @param mode A single character string for the type of model.
+#' @param mode A single character string for the prediction outcome mode.
 #'  Possible values for this model are "unknown", or "censored regression".
+#' @param engine A single character string specifying what computational engine
+#'  to use for fitting. Possible engines are listed below. The default for this
+#'  model is `"survival"`.
 #' @inheritParams linear_reg
 #'
 #' @details
@@ -29,9 +32,11 @@
 #' show_engines("proportional_hazards")
 #' @keywords internal
 #' @export
-proportional_hazards <- function(mode = "censored regression",
-                    penalty = NULL,
-                    mixture = NULL) {
+proportional_hazards <- function(
+  mode = "censored regression",
+  engine = "survival",
+  penalty = NULL,
+  mixture = NULL) {
 
     args <- list(
       penalty = enquo(penalty),
@@ -44,7 +49,7 @@ proportional_hazards <- function(mode = "censored regression",
       eng_args = NULL,
       mode = mode,
       method = NULL,
-      engine = NULL
+      engine = engine
     )
   }
 

diff --git a/R/rand_forest.R b/R/rand_forest.R
@@ -20,9 +20,12 @@
 #'  functions. If parameters need to be modified, `update()` can be used
 #'  in lieu of recreating the object from scratch.
 #'
-#' @param mode A single character string for the type of model.
+#' @param mode A single character string for the prediction outcome mode.
 #'  Possible values for this model are "unknown", "regression", or
 #'  "classification".
+#' @param engine A single character string specifying what computational engine
+#'  to use for fitting. Possible engines are listed below. The default for this
+#'  model is `"ranger"`.
 #' @param mtry An integer for the number of predictors that will
 #'  be randomly sampled at each split when creating the tree models.
 #' @param trees An integer for the number of trees contained in
@@ -63,7 +66,7 @@
 #' @export
 
 rand_forest <-
-  function(mode = "unknown", mtry = NULL, trees = NULL, min_n = NULL) {
+  function(mode = "unknown", engine = "ranger", mtry = NULL, trees = NULL, min_n = NULL) {
 
     args <- list(
       mtry   = enquo(mtry),
@@ -77,7 +80,7 @@ rand_forest <-
       eng_args = NULL,
       mode = mode,
       method = NULL,
-      engine = NULL
+      engine = engine
     )
   }
 

diff --git a/R/surv_reg.R b/R/surv_reg.R
@@ -31,8 +31,11 @@
 #'  `strata` function cannot be used. To achieve the same effect,
 #'  the extra parameter roles can be used (as described above).
 #'
-#' @param mode A single character string for the type of model.
+#' @param mode A single character string for the prediction outcome mode.
 #'  The only possible value for this model is "regression".
+#'  @param engine A single character string specifying what computational engine
+#'  to use for fitting. Possible engines are listed below. The default for this
+#'  model is `"survival"`.
 #' @param dist A character string for the outcome distribution. "weibull" is
 #'  the default.
 #' @details
@@ -65,7 +68,7 @@
 #'
 #' @keywords internal
 #' @export
-surv_reg <- function(mode = "regression", dist = NULL) {
+surv_reg <- function(mode = "regression", engine = "survival", dist = NULL) {
 
   lifecycle::deprecate_soft("0.1.6", "surv_reg()", "survival_reg()")
 
@@ -79,7 +82,7 @@ surv_reg <- function(mode = "regression", dist = NULL) {
       eng_args = NULL,
       mode = mode,
       method = NULL,
-      engine = NULL
+      engine = engine
     )
   }
 

diff --git a/R/survival_reg.R b/R/survival_reg.R
@@ -14,8 +14,11 @@
 #'  functions. If parameters need to be modified, `update()` can be used
 #'  in lieu of recreating the object from scratch.
 #'
-#' @param mode A single character string for the type of model.
+#' @param mode A single character string for the prediction outcome mode.
 #'  The only possible value for this model is "censored regression".
+#' @param engine A single character string specifying what computational engine
+#'  to use for fitting. Possible engines are listed below. The default for this
+#'  model is `"survival"`.
 #' @param dist A character string for the outcome distribution. "weibull" is
 #'  the default.
 #' @details
@@ -34,7 +37,7 @@
 #' survival_reg(dist = varying())
 #' @keywords internal
 #' @export
-survival_reg <- function(mode = "censored regression", dist = NULL) {
+survival_reg <- function(mode = "censored regression", engine = "survival", dist = NULL) {
 
   args <- list(
     dist = enquo(dist)
@@ -46,7 +49,7 @@ survival_reg <- function(mode = "censored regression", dist = NULL) {
     eng_args = NULL,
     mode = mode,
     method = NULL,
-    engine = NULL
+    engine = engine
   )
 }