diff --git a/.Rbuildignore b/.Rbuildignore
index bbdc57513..d9fec7561 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -19,3 +19,4 @@ derby.log
^README\.html$
^codecov\.yml$
^LICENSE\.md$
+^man-roxygen$
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index f8ddf47ba..5bf28c747 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -65,6 +65,8 @@ jobs:
run: |
pak::local_system_requirements(execute = TRUE)
pak::pkg_system_requirements("rcmdcheck", execute = TRUE)
+ pak::pkg_system_requirements("textshaping", execute = TRUE)
+ pak::pkg_system_requirements("gert", execute = TRUE)
shell: Rscript {0}
- name: Install dependencies
diff --git a/DESCRIPTION b/DESCRIPTION
index f3c07f7db..b8a008f26 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -55,6 +55,8 @@ Suggests:
modeldata,
LiblineaR,
Matrix,
- mgcv
-Remotes:
+ mgcv,
+ dials (>= 0.0.9.9000)
+Remotes:
+ tidymodels/dials,
topepo/C5.0
diff --git a/NAMESPACE b/NAMESPACE
index a7e92493f..d3b33bb6a 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -138,6 +138,7 @@ export(control_parsnip)
export(convert_stan_interval)
export(decision_tree)
export(eval_args)
+export(find_engine_files)
export(fit)
export(fit.model_spec)
export(fit_control)
@@ -158,6 +159,8 @@ export(linear_reg)
export(logistic_reg)
export(make_call)
export(make_classes)
+export(make_engine_list)
+export(make_seealso_list)
export(mars)
export(maybe_data_frame)
export(maybe_matrix)
@@ -230,6 +233,7 @@ export(update_main_parameters)
export(varying)
export(varying_args)
export(xgb_train)
+importFrom(dplyr,"%>%")
importFrom(dplyr,arrange)
importFrom(dplyr,as_tibble)
importFrom(dplyr,bind_cols)
diff --git a/NEWS.md b/NEWS.md
index abf0668b2..54eaa7b9c 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -33,7 +33,11 @@
* `set_mode()` now checks if `mode` is compatible with the model class, similar to `new_model_spec()` (@jtlandis, #467). Both `set_mode()` and `set_engine()` now error for `NULL` or missing arguments (#503).
-* Re-organized model documentation for `update` methods (#479).
+* Re-organized model documentation:
+
+ * `update` methods were moved out of the model help files (#479).
+ * Each model/engine combination has its own help page.
+ * The model help page has a dynamic bulleted list of the engines with links to the individual help pages.
* `generics::required_pkgs()` was extended for `parsnip` objects.
diff --git a/R/aaa_models.R b/R/aaa_models.R
index f7f1b3824..b29c62467 100644
--- a/R/aaa_models.R
+++ b/R/aaa_models.R
@@ -65,7 +65,7 @@ get_model_env <- function() {
#' @export
get_from_env <- function(items) {
mod_env <- get_model_env()
- rlang::env_get(mod_env, items)
+ rlang::env_get(mod_env, items, default = NULL)
}
#' @rdname get_model_env
@@ -497,6 +497,7 @@ set_model_mode <- function(model, mode) {
#' @rdname set_new_model
#' @keywords internal
+#' @importFrom dplyr %>%
#' @export
set_model_engine <- function(model, mode, eng) {
check_model_exists(model)
@@ -951,3 +952,151 @@ get_encoding <- function(model) {
}
res
}
+
+#' Tools for dynamically documenting packages
+#'
+#' @description
+#' These are functions used to create dynamic documentation in Rd files
+#' based on which parsnip-related packages are loaded by the user.
+#'
+#' These functions can be used to make dynamic lists of documentation help
+#' files. \pkg{parsnip} uses these, along with files contained in `man/rmd`
+#' containing expanded documentation, for specific model/engine combinations.
+#' [find_engine_files()] looks for files that have the pattern
+#' `details_{model}_{engine}.Rd` to link to. These files are generated by files
+#' named `man/rmd/details_{model}_{engine}.Rmd`. `make_engine_list()` creates a
+#' list seen at the top of the model Rd files while `make_seealso_list()`
+#' populates the list seen in "See Also" below. See the details section.
+#'
+#' @param mod A character string for the model file (e.g. "linear_reg")
+#' @return
+#' `make_engine_list()` returns a character string that creates a
+#' bulleted list of links to more specific help files.
+#'
+#' `make_seealso_list()` returns a formatted character string of links.
+#'
+#' `find_engine_files()` returns a tibble.
+#' @details
+#' The \pkg{parsnip} documentation is generated _dynamically_. Part of the Rd
+#' file populates a list of engines that depends on what packages are loaded
+#' *at the time that the man file is loaded*. For example, if
+#' another package has a new engine for `linear_reg()`, the
+#' `parsnip::linear_reg()` help can show a link to a detailed help page in the
+#' other package.
+#'
+#' To enable this, the process for a package developer is to:
+#'
+#' 1. Create an engine-specific R file in the `R` directory with the name
+#' `{model}_{engine}.R` (e.g. `boost_tree_C5.0.R`). This has a small amount of
+#' documentation, as well as the directive
+#' "`@includeRmd man/rmd/{model}_{engine}.Rmd details`".
+#'
+#' 1. Copy the file in \pkg{parsnip} that is in `man/rmd/setup.Rmd` and put
+#' it in the same place in your package.
+#'
+#' 1. Write your own `man/rmd/{model}_{engine}.Rmd` file. This can include
+#' packages that are not listed in the DESCRIPTION file. Those are only
+#' required when the documentation file is created locally (probably using
+#' [devtools::document()].
+#'
+#' 1. Run [devtools::document()] so that the Rmd content is included in the
+#' Rd file.
+#'
+#' The examples in \pkg{parsnip} can provide guidance for how to organize
+#' technical information about the models.
+#' @name doc-tools
+#' @keywords internal
+#' @export
+#' @examples
+#' find_engine_files("linear_reg")
+#' cat(make_engine_list("linear_reg"))
+find_engine_files <- function(mod) {
+
+ # Get available topics
+ topic_names <- search_for_engine_docs(mod)
+ if (length(topic_names) == 0) {
+ return(character(0))
+ }
+
+ # Subset for our model function
+ eng <- strsplit(topic_names, "_")
+ eng <- purrr::map_chr(eng, ~ .x[length(.x)])
+ eng <- tibble::tibble(engine = eng, topic = topic_names)
+
+ # Combine them to keep the order in which they were registered
+ all_eng <- get_from_env(mod) %>% dplyr::distinct(engine)
+ all_eng$.order <- 1:nrow(all_eng)
+ eng <- dplyr::left_join(eng, all_eng, by = "engine")
+ eng <- eng[order(eng$.order),]
+
+ # Determine and label default engine
+ default <- get_default_engine(mod)
+ eng$default <- ifelse(eng$engine == default, " (default)", "")
+
+ eng
+}
+
+#' @export
+#' @rdname doc-tools
+make_engine_list <- function(mod) {
+ eng <- find_engine_files(mod)
+
+ res <-
+ glue::glue(" \\item \\code{\\link[=|eng$topic|]{|eng$engine|} |eng$default| }",
+ .open = "|", .close = "|")
+
+ res <- paste0("\\itemize{\n", paste0(res, collapse = "\n"), "\n}")
+ res
+}
+
+get_default_engine <- function(mod) {
+ cl <- rlang::call2(mod, .ns = "parsnip")
+ rlang::eval_tidy(cl)$engine
+}
+
+#' @export
+#' @rdname doc-tools
+make_seealso_list <- function(mod) {
+ eng <- find_engine_files(mod)
+
+ res <-
+ glue::glue("\\code{\\link[=|eng$topic|]{|eng$engine| engine details}}",
+ .open = "|", .close = "|")
+
+ main <- c("\\code{\\link[=fit.model_spec]{fit.model_spec()}}",
+ "\\code{\\link[=set_engine]{set_engine()}}",
+ "\\code{\\link[=update]{update()}}"
+ )
+ paste0(c(main, res), collapse = ", ")
+}
+
+# These will never have documentation and we can avoid searching them.
+excl_pkgs <-
+ c("C50", "Cubist", "earth", "flexsurv", "forecast", "glmnet",
+ "keras", "kernlab", "kknn", "klaR", "LiblineaR", "liquidSVM",
+ "magrittr", "MASS", "mda", "mixOmics", "naivebayes", "nnet",
+ "prophet", "pscl", "randomForest", "ranger", "rpart", "rstanarm",
+ "sparklyr", "stats", "survival", "xgboost", "xrf")
+
+search_for_engine_docs <- function(mod) {
+ all_deps <- get_from_env(paste0(mod, "_pkgs"))
+ all_deps <- unlist(all_deps$pkg)
+ all_deps <- unique(c("parsnip", all_deps))
+
+ all_deps <- all_deps[!(all_deps %in% excl_pkgs)]
+ res <- purrr::map(all_deps, find_details_topics, mod = mod)
+ res <- unique(unlist(res))
+ res
+}
+
+find_details_topics <- function(pkg, mod) {
+ meta_loc <- system.file("Meta/Rd.rds", package = pkg)
+ meta_loc <- meta_loc[meta_loc != ""]
+ if (length(meta_loc) > 0) {
+ topic_names <- readRDS(meta_loc)$Name
+ res <- grep(paste0("details_", mod), topic_names, value = TRUE)
+ } else {
+ res <- character(0)
+ }
+ res
+}
diff --git a/R/augment.R b/R/augment.R
index 4095a9ebf..0e32b2062 100644
--- a/R/augment.R
+++ b/R/augment.R
@@ -3,13 +3,14 @@
#' `augment()` will add column(s) for predictions to the given data.
#'
#' For regression models, a `.pred` column is added. If `x` was created using
-#' [fit()] and `new_data` contains the outcome column, a `.resid` column is
+#' [fit.model_spec()] and `new_data` contains the outcome column, a `.resid` column is
#' also added.
#'
#' For classification models, the results can include a column called
#' `.pred_class` as well as class probability columns named `.pred_{level}`.
#' This depends on what type of prediction types are available for the model.
-#' @param x A `model_fit` object produced by [fit()] or [fit_xy()].
+#' @param x A `model_fit` object produced by [fit.model_spec()] or
+#' [fit_xy.model_spec()] .
#' @param new_data A data frame or matrix.
#' @param ... Not currently used.
#' @rdname augment
diff --git a/R/boost_tree.R b/R/boost_tree.R
index 42cd9ce73..b1ded38ac 100644
--- a/R/boost_tree.R
+++ b/R/boost_tree.R
@@ -1,98 +1,58 @@
# Prototype parsnip code for boosted trees
-#' General Interface for Boosted Trees
+#' Boosted trees
#'
-#' `boost_tree()` is a way to generate a _specification_ of a model
-#' before fitting and allows the model to be created using
-#' different packages in R or via Spark. The main arguments for the
-#' model are:
-#' \itemize{
-#' \item \code{mtry}: The number of predictors that will be
-#' randomly sampled at each split when creating the tree models.
-#' \item \code{trees}: The number of trees contained in the ensemble.
-#' \item \code{min_n}: The minimum number of data points in a node
-#' that is required for the node to be split further.
-#' \item \code{tree_depth}: The maximum depth of the tree (i.e. number of
-#' splits).
-#' \item \code{learn_rate}: The rate at which the boosting algorithm adapts
-#' from iteration-to-iteration.
-#' \item \code{loss_reduction}: The reduction in the loss function required
-#' to split further.
-#' \item \code{sample_size}: The amount of data exposed to the fitting routine.
-#' \item \code{stop_iter}: The number of iterations without improvement before
-#' stopping.
-#' }
-#' These arguments are converted to their specific names at the
-#' time that the model is fit. Other options and arguments can be
-#' set using the `set_engine()` function. If left to their defaults
-#' here (`NULL`), the values are taken from the underlying model
-#' functions. If parameters need to be modified, `update()` can be used
-#' in lieu of recreating the object from scratch.
+#' @description
+#'
+#' `boost_tree()` defines a model that creates a series of decision trees
+#' forming an ensemble. Each tree depends on the results of previous trees.
+#' All trees in the ensemble are combined to produce a final prediction.
+#'
+#' There are different ways to fit this model. See the engine-specific pages
+#' for more details:
+#'
+#' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("boost_tree")}
+#'
+#' More information on how \pkg{parsnip} is used for modeling is at
+#' \url{https://www.tidymodels.org/}.
#'
#' @param mode A single character string for the prediction outcome mode.
#' Possible values for this model are "unknown", "regression", or
#' "classification".
#' @param engine A single character string specifying what computational engine
-#' to use for fitting. Possible engines are listed below. The default for this
-#' model is `"xgboost"`.
+#' to use for fitting.
#' @param mtry A number for the number (or proportion) of predictors that will
-#' be randomly sampled at each split when creating the tree models (`xgboost`
-#' only).
+#' be randomly sampled at each split when creating the tree models
+#' (specific engines only)
#' @param trees An integer for the number of trees contained in
#' the ensemble.
#' @param min_n An integer for the minimum number of data points
#' in a node that is required for the node to be split further.
#' @param tree_depth An integer for the maximum depth of the tree (i.e. number
-#' of splits) (`xgboost` only).
+#' of splits) (specific engines only).
#' @param learn_rate A number for the rate at which the boosting algorithm adapts
-#' from iteration-to-iteration (`xgboost` only).
+#' from iteration-to-iteration (specific engines only).
#' @param loss_reduction A number for the reduction in the loss function required
-#' to split further (`xgboost` only).
+#' to split further (specific engines only).
#' @param sample_size A number for the number (or proportion) of data that is
#' exposed to the fitting routine. For `xgboost`, the sampling is done at
#' each iteration while `C5.0` samples once during training.
#' @param stop_iter The number of iterations without improvement before
-#' stopping (`xgboost` only).
-#' @details
-#' The data given to the function are not saved and are only used
-#' to determine the _mode_ of the model. For `boost_tree()`, the
-#' possible modes are "regression" and "classification".
+#' stopping (specific engines only).
#'
-#' The model can be created using the `fit()` function using the
-#' following _engines_:
-#' \itemize{
-#' \item \pkg{R}: `"xgboost"` (the default), `"C5.0"`
-#' \item \pkg{Spark}: `"spark"`
-#' }
+#' @template spec-details
#'
-#' For this model, other packages may add additional engines. Use
-#' [show_engines()] to see the current set of engines.
+#' @template spec-references
#'
-#' @includeRmd man/rmd/boost-tree.Rmd details
+#' @seealso \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("boost_tree")},
+#' [xgb_train()], [C5.0_train()]
#'
-#' @note For models created using the spark engine, there are
-#' several differences to consider. First, only the formula
-#' interface to via `fit()` is available; using `fit_xy()` will
-#' generate an error. Second, the predictions will always be in a
-#' spark table format. The names will be the same as documented but
-#' without the dots. Third, there is no equivalent to factor
-#' columns in spark tables so class predictions are returned as
-#' character columns. Fourth, to retain the model object for a new
-#' R session (via `save()`), the `model$fit` element of the `parsnip`
-#' object should be serialized via `ml_save(object$fit)` and
-#' separately saved to disk. In a new session, the object can be
-#' reloaded and reattached to the `parsnip` object.
-#'
-#' @importFrom purrr map_lgl
-#' @seealso [fit()], [set_engine()], [update()]
#' @examples
#' show_engines("boost_tree")
#'
#' boost_tree(mode = "classification", trees = 20)
-#' # Parameters can be represented by a placeholder:
-#' boost_tree(mode = "regression", mtry = varying())
#' @export
-
+#' @importFrom purrr map_lgl
boost_tree <-
function(mode = "unknown",
engine = "xgboost",
@@ -573,7 +533,8 @@ xgb_by_tree <- function(tree, object, new_data, type, ...) {
#' random proportion of the data should be used to train the model.
#' By default, all the samples are used for model training. Samples
#' not used for training are used to evaluate the accuracy of the
-#' model in the printed output.
+#' model in the printed output. A value of zero means that all the training
+#' data are used.
#' @param ... Other arguments to pass.
#' @return A fitted C5.0 model.
#' @keywords internal
diff --git a/R/boost_tree_C5.0.R b/R/boost_tree_C5.0.R
new file mode 100644
index 000000000..439d643f9
--- /dev/null
+++ b/R/boost_tree_C5.0.R
@@ -0,0 +1,11 @@
+#' Boosted trees via C5.0
+#'
+#' [C50::C5.0()] creates a series of classification trees forming an
+#' ensemble. Each tree depends on the results of previous trees. All trees in
+#' the ensemble are combined to produce a final prediction.
+#'
+#' @includeRmd man/rmd/boost_tree_C5.0.Rmd details
+#'
+#' @name details_boost_tree_C5.0
+#' @keywords internal
+NULL
diff --git a/R/boost_tree_spark.R b/R/boost_tree_spark.R
new file mode 100644
index 000000000..c66b04485
--- /dev/null
+++ b/R/boost_tree_spark.R
@@ -0,0 +1,11 @@
+#' Boosted trees via Spark
+#'
+#' [sparklyr::ml_gradient_boosted_trees()] creates a series of decision trees
+#' forming an ensemble. Each tree depends on the results of previous trees.
+#' All trees in the ensemble are combined to produce a final prediction.
+#'
+#' @includeRmd man/rmd/boost_tree_spark.Rmd details
+#'
+#' @name details_boost_tree_spark
+#' @keywords internal
+NULL
diff --git a/R/boost_tree_xgboost.R b/R/boost_tree_xgboost.R
new file mode 100644
index 000000000..6cb4c88c5
--- /dev/null
+++ b/R/boost_tree_xgboost.R
@@ -0,0 +1,11 @@
+#' Boosted trees via xgboost
+#'
+#' [xgboost::xgb.train()] creates a series of decision trees forming an
+#' ensemble. Each tree depends on the results of previous trees. All trees in
+#' the ensemble are combined to produce a final prediction.
+#'
+#' @includeRmd man/rmd/boost_tree_xgboost.Rmd details
+#'
+#' @name details_boost_tree_xgboost
+#' @keywords internal
+NULL
diff --git a/R/control_parsnip.R b/R/control_parsnip.R
index 0aff90eb0..a16983410 100644
--- a/R/control_parsnip.R
+++ b/R/control_parsnip.R
@@ -1,6 +1,6 @@
#' Control the fit function
#'
-#' Options can be passed to the [fit()] function that control the output and
+#' Options can be passed to the [fit.model_spec()] function that control the output and
#' computations
#'
#' @param verbosity An integer where a value of zero indicates
diff --git a/R/decision_tree.R b/R/decision_tree.R
index 57463c61a..0e4946c55 100644
--- a/R/decision_tree.R
+++ b/R/decision_tree.R
@@ -1,74 +1,36 @@
# Prototype parsnip code for decision trees
-#' General Interface for Decision Tree Models
+#' Decision trees
#'
-#' `decision_tree()` is a way to generate a _specification_ of a model
-#' before fitting and allows the model to be created using
-#' different packages in R or via Spark. The main arguments for the
-#' model are:
-#' \itemize{
-#' \item \code{cost_complexity}: The cost/complexity parameter (a.k.a. `Cp`)
-#' used by CART models (`rpart` only).
-#' \item \code{tree_depth}: The _maximum_ depth of a tree (`rpart` and
-#' `spark` only).
-#' \item \code{min_n}: The minimum number of data points in a node
-#' that are required for the node to be split further.
-#' }
-#' These arguments are converted to their specific names at the
-#' time that the model is fit. Other options and arguments can be
-#' set using `set_engine()`. If left to their defaults
-#' here (`NULL`), the values are taken from the underlying model
-#' functions. If parameters need to be modified, `update()` can be used
-#' in lieu of recreating the object from scratch.
+#' @description
+#' `decision_tree()` defines a model as a set of `if/then` statements that
+#' creates a tree-based structure.
#'
-#' @param mode A single character string for the prediction outcome mode.
-#' Possible values for this model are "unknown", "regression", or
-#' "classification".
-#' @param engine A single character string specifying what computational engine
-#' to use for fitting. Possible engines are listed below. The default for this
-#' model is `"rpart"`.
+#' There are different ways to fit this model. See the engine-specific pages
+#' for more details:
+#'
+#' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("decision_tree")}
+#'
+#' More information on how \pkg{parsnip} is used for modeling is at
+#' \url{https://www.tidymodels.org/}.
+#'
+#' @inheritParams boost_tree
#' @param cost_complexity A positive number for the the cost/complexity
-#' parameter (a.k.a. `Cp`) used by CART models (`rpart` only).
+#' parameter (a.k.a. `Cp`) used by CART models (specific engines only).
#' @param tree_depth An integer for maximum depth of the tree.
#' @param min_n An integer for the minimum number of data points
#' in a node that are required for the node to be split further.
-#' @details
-#' The model can be created using the `fit()` function using the
-#' following _engines_:
-#' \itemize{
-#' \item \pkg{R}: `"rpart"` (the default) or `"C5.0"` (classification only)
-#' \item \pkg{Spark}: `"spark"`
-#' }
#'
-#' Note that, for `rpart` models, but `cost_complexity` and
-#' `tree_depth` can be both be specified but the package will give
-#' precedence to `cost_complexity`. Also, `tree_depth` values
-#' greater than 30 `rpart` will give nonsense results on 32-bit
-#' machines.
+#' @template spec-details
#'
-#' @includeRmd man/rmd/decision-tree.Rmd details
+#' @template spec-references
#'
-#' @note For models created using the spark engine, there are
-#' several differences to consider. First, only the formula
-#' interface to via `fit()` is available; using `fit_xy()` will
-#' generate an error. Second, the predictions will always be in a
-#' spark table format. The names will be the same as documented but
-#' without the dots. Third, there is no equivalent to factor
-#' columns in spark tables so class predictions are returned as
-#' character columns. Fourth, to retain the model object for a new
-#' R session (via `save()`), the `model$fit` element of the `parsnip`
-#' object should be serialized via `ml_save(object$fit)` and
-#' separately saved to disk. In a new session, the object can be
-#' reloaded and reattached to the `parsnip` object.
+#' @seealso \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("decision_tree")}
#'
-#' @importFrom purrr map_lgl
-#' @seealso [fit()], [set_engine()], [update()]
#' @examples
#' show_engines("decision_tree")
#'
#' decision_tree(mode = "classification", tree_depth = 5)
-#' # Parameters can be represented by a placeholder:
-#' decision_tree(mode = "regression", cost_complexity = varying())
#' @export
decision_tree <-
diff --git a/R/decision_tree_C5.0.R b/R/decision_tree_C5.0.R
new file mode 100644
index 000000000..b495f4e3a
--- /dev/null
+++ b/R/decision_tree_C5.0.R
@@ -0,0 +1,10 @@
+#' Decision trees via C5.0
+#'
+#' [C50::C5.0()] fits a model as a set of `if/then` statements that
+#' creates a tree-based structure.
+#'
+#' @includeRmd man/rmd/decision_tree_C5.0.Rmd details
+#'
+#' @name details_decision_tree_C5.0
+#' @keywords internal
+NULL
diff --git a/R/decision_tree_rpart.R b/R/decision_tree_rpart.R
new file mode 100644
index 000000000..0aec47624
--- /dev/null
+++ b/R/decision_tree_rpart.R
@@ -0,0 +1,10 @@
+#' Decision trees via CART
+#'
+#' [rpart::rpart()] fits a model as a set of `if/then` statements that
+#' creates a tree-based structure.
+#'
+#' @includeRmd man/rmd/decision_tree_rpart.Rmd details
+#'
+#' @name details_decision_tree_rpart
+#' @keywords internal
+NULL
diff --git a/R/decision_tree_spark.R b/R/decision_tree_spark.R
new file mode 100644
index 000000000..55bf23390
--- /dev/null
+++ b/R/decision_tree_spark.R
@@ -0,0 +1,10 @@
+#' Decision trees via Spark
+#'
+#' [sparklyr::ml_decision_tree()] fits a model as a set of `if/then`
+#' statements that creates a tree-based structure.
+#'
+#' @includeRmd man/rmd/decision_tree_spark.Rmd details
+#'
+#' @name details_decision_tree_spark
+#' @keywords internal
+NULL
diff --git a/R/engines.R b/R/engines.R
index e6000c184..2054259ab 100644
--- a/R/engines.R
+++ b/R/engines.R
@@ -134,7 +134,7 @@ show_engines <- function(x) {
rlang::abort("`show_engines()` takes a single character string as input.")
}
res <- try(get_from_env(x), silent = TRUE)
- if (inherits(res, "try-error")) {
+ if (inherits(res, "try-error") | is.null(res)) {
rlang::abort(
paste0("No results found for model function '", x, "'.")
)
diff --git a/R/glmnet_details.R b/R/glmnet_details.R
new file mode 100644
index 000000000..f1436df41
--- /dev/null
+++ b/R/glmnet_details.R
@@ -0,0 +1,10 @@
+#' Technical aspects of the glmnet model
+#'
+#' glmnet is a popular statistical model for regularized generalized linear
+#' models. These notes reflect common questions about this particular model.
+#'
+#' @includeRmd man/rmd/glmnet-details.Rmd details
+#'
+#' @name glmnet-details
+#' @keywords internal
+NULL
diff --git a/R/linear_reg.R b/R/linear_reg.R
index 67e6b73ab..574b4b1e1 100644
--- a/R/linear_reg.R
+++ b/R/linear_reg.R
@@ -1,74 +1,40 @@
-#' General Interface for Linear Regression Models
+#' Linear regression
#'
-#' `linear_reg()` is a way to generate a _specification_ of a model
-#' before fitting and allows the model to be created using
-#' different packages in R, Stan, keras, or via Spark. The main
-#' arguments for the model are:
-#' \itemize{
-#' \item \code{penalty}: The total amount of regularization
-#' in the model. Note that this must be zero for some engines.
-#' \item \code{mixture}: The mixture amounts of different types of
-#' regularization (see below). Note that this will be ignored for some engines.
-#' }
-#' These arguments are converted to their specific names at the
-#' time that the model is fit. Other options and arguments can be
-#' set using `set_engine()`. If left to their defaults
-#' here (`NULL`), the values are taken from the underlying model
-#' functions. If parameters need to be modified, `update()` can be used
-#' in lieu of recreating the object from scratch.
-#' @param mode A single character string for the prediction outcome mode.
+#' @description
+#'
+#' `linear_reg()` defines a model that can predict numeric values from
+#' predictors using a linear function.
+#'
+#' There are different ways to fit this model. See the engine-specific pages
+#' for more details:
+#'
+#' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("linear_reg")}
+#'
+#' More information on how \pkg{parsnip} is used for modeling is at
+#' \url{https://www.tidymodels.org/}.
+#'
+#' @param mode A single character string for the type of model.
#' The only possible value for this model is "regression".
#' @param engine A single character string specifying what computational engine
#' to use for fitting. Possible engines are listed below. The default for this
#' model is `"lm"`.
#' @param penalty A non-negative number representing the total
-#' amount of regularization (`glmnet`, `keras`, and `spark` only).
-#' For `keras` models, this corresponds to purely L2 regularization
-#' (aka weight decay) while the other models can be a combination
-#' of L1 and L2 (depending on the value of `mixture`; see below).
+#' amount of regularization (specific engines only).
#' @param mixture A number between zero and one (inclusive) that is the
#' proportion of L1 regularization (i.e. lasso) in the model. When
#' `mixture = 1`, it is a pure lasso model while `mixture = 0` indicates that
-#' ridge regression is being used. (`glmnet` and `spark` only).
-#' @details
-#' The data given to the function are not saved and are only used
-#' to determine the _mode_ of the model. For `linear_reg()`, the
-#' mode will always be "regression".
-#'
-#' The model can be created using the `fit()` function using the
-#' following _engines_:
-#' \itemize{
-#' \item \pkg{R}: `"lm"` (the default) or `"glmnet"`
-#' \item \pkg{Stan}: `"stan"`
-#' \item \pkg{Spark}: `"spark"`
-#' \item \pkg{keras}: `"keras"`
-#' }
+#' ridge regression is being used (specific engines only).
#'
-#' For this model, other packages may add additional engines. Use
-#' [show_engines()] to see the current set of engines.
+#' @template spec-details
#'
-#' @includeRmd man/rmd/linear-reg.Rmd details
+#' @template spec-references
#'
-#' @note For models created using the spark engine, there are
-#' several differences to consider. First, only the formula
-#' interface to via `fit()` is available; using `fit_xy()` will
-#' generate an error. Second, the predictions will always be in a
-#' spark table format. The names will be the same as documented but
-#' without the dots. Third, there is no equivalent to factor
-#' columns in spark tables so class predictions are returned as
-#' character columns. Fourth, to retain the model object for a new
-#' R session (via `save()`), the `model$fit` element of the `parsnip`
-#' object should be serialized via `ml_save(object$fit)` and
-#' separately saved to disk. In a new session, the object can be
-#' reloaded and reattached to the `parsnip` object.
+#' @seealso \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("linear_reg")}
#'
-#' @seealso [fit()], [set_engine()], [update()]
#' @examples
#' show_engines("linear_reg")
#'
#' linear_reg()
-#' # Parameters can be represented by a placeholder:
-#' linear_reg(penalty = varying())
#' @export
#' @importFrom purrr map_lgl
linear_reg <-
diff --git a/R/linear_reg_glmnet.R b/R/linear_reg_glmnet.R
new file mode 100644
index 000000000..1b22b70bc
--- /dev/null
+++ b/R/linear_reg_glmnet.R
@@ -0,0 +1,9 @@
+#' Linear regression via glmnet
+#'
+#' [glmnet::glmnet()] uses regularized least squares to fit models with numeric outcomes.
+#'
+#' @includeRmd man/rmd/linear_reg_glmnet.Rmd details
+#'
+#' @name details_linear_reg_glmnet
+#' @keywords internal
+NULL
diff --git a/R/linear_reg_keras.R b/R/linear_reg_keras.R
new file mode 100644
index 000000000..4f05b859c
--- /dev/null
+++ b/R/linear_reg_keras.R
@@ -0,0 +1,9 @@
+#' Linear regression via keras/tensorflow
+#'
+#' This model uses regularized least squares to fit models with numeric outcomes.
+#'
+#' @includeRmd man/rmd/linear_reg_keras.Rmd details
+#'
+#' @name details_linear_reg_keras
+#' @keywords internal
+NULL
diff --git a/R/linear_reg_lm.R b/R/linear_reg_lm.R
new file mode 100644
index 000000000..bd5054e28
--- /dev/null
+++ b/R/linear_reg_lm.R
@@ -0,0 +1,9 @@
+#' Linear regression via lm
+#'
+#' [stats::lm()] uses ordinary least squares to fit models with numeric outcomes.
+#'
+#' @includeRmd man/rmd/linear_reg_lm.Rmd details
+#'
+#' @name details_linear_reg_lm
+#' @keywords internal
+NULL
diff --git a/R/linear_reg_spark.R b/R/linear_reg_spark.R
new file mode 100644
index 000000000..ab7dd2706
--- /dev/null
+++ b/R/linear_reg_spark.R
@@ -0,0 +1,10 @@
+#' Linear regression via spark
+#'
+#' [sparklyr::ml_linear_regression()] uses regularized least squares to fit
+#' models with numeric outcomes.
+#'
+#' @includeRmd man/rmd/linear_reg_spark.Rmd details
+#'
+#' @name details_linear_reg_spark
+#' @keywords internal
+NULL
diff --git a/R/linear_reg_stan.R b/R/linear_reg_stan.R
new file mode 100644
index 000000000..f6ff4251d
--- /dev/null
+++ b/R/linear_reg_stan.R
@@ -0,0 +1,9 @@
+#' Linear regression via Bayesian Methods
+#'
+#' The `"stan"` engine estimates regression parameters using Bayesian estimation.
+#'
+#' @includeRmd man/rmd/linear_reg_stan.Rmd details
+#'
+#' @name details_linear_reg_stan
+#' @keywords internal
+NULL
diff --git a/R/logistic_reg.R b/R/logistic_reg.R
index c6c053aa9..37f84d932 100644
--- a/R/logistic_reg.R
+++ b/R/logistic_reg.R
@@ -1,73 +1,44 @@
-#' General Interface for Logistic Regression Models
+#' Logistic regression
#'
-#' `logistic_reg()` is a way to generate a _specification_ of a model
-#' before fitting and allows the model to be created using
-#' different packages in R, Stan, keras, or via Spark. The main
-#' arguments for the model are:
-#' \itemize{
-#' \item \code{penalty}: The total amount of regularization
-#' in the model. Note that this must be zero for some engines.
-#' \item \code{mixture}: The mixture amounts of different types of
-#' regularization (see below). Note that this will be ignored for some engines.
-#' }
-#' These arguments are converted to their specific names at the
-#' time that the model is fit. Other options and arguments can be
-#' set using `set_engine()`. If left to their defaults
-#' here (`NULL`), the values are taken from the underlying model
-#' functions. If parameters need to be modified, `update()` can be used
-#' in lieu of recreating the object from scratch.
-#' @param mode A single character string for the prediction outcome mode.
+#' @description
+#' [logistic_reg()] defines a generalized linear model for binary outcomes. A
+#' linear combination of the predictors is used to model the log odds of an
+#' event.
+#'
+#' There are different ways to fit this model. See the engine-specific pages
+#' for more details:
+#'
+#' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("logistic_reg")}
+#'
+#' More information on how \pkg{parsnip} is used for modeling is at
+#' \url{https://www.tidymodels.org/}.
+#'
+#' @param mode A single character string for the type of model.
#' The only possible value for this model is "classification".
#' @param engine A single character string specifying what computational engine
#' to use for fitting. Possible engines are listed below. The default for this
#' model is `"glm"`.
#' @param penalty A non-negative number representing the total
-#' amount of regularization (`glmnet`, `LiblineaR`, `keras`, and `spark` only).
+#' amount of regularization (specific engines only).
#' For `keras` models, this corresponds to purely L2 regularization
#' (aka weight decay) while the other models can be either or a combination
#' of L1 and L2 (depending on the value of `mixture`).
#' @param mixture A number between zero and one (inclusive) that is the
#' proportion of L1 regularization (i.e. lasso) in the model. When
#' `mixture = 1`, it is a pure lasso model while `mixture = 0` indicates that
-#' ridge regression is being used. (`glmnet`, `LiblineaR`, and `spark` only).
+#' ridge regression is being used. (specific engines only).
#' For `LiblineaR` models, `mixture` must be exactly 0 or 1 only.
-#' @details
-#' For `logistic_reg()`, the mode will always be "classification".
-#'
-#' The model can be created using the `fit()` function using the
-#' following _engines_:
-#' \itemize{
-#' \item \pkg{R}: `"glm"` (the default), `"glmnet"`, or `"LiblineaR"`
-#' \item \pkg{Stan}: `"stan"`
-#' \item \pkg{Spark}: `"spark"`
-#' \item \pkg{keras}: `"keras"`
-#' }
#'
-#' For this model, other packages may add additional engines. Use
-#' [show_engines()] to see the current set of engines.
+#' @template spec-details
#'
-#' @includeRmd man/rmd/logistic-reg.Rmd details
+#' @template spec-references
#'
-#' @note For models created using the spark engine, there are
-#' several differences to consider. First, only the formula
-#' interface to via `fit()` is available; using `fit_xy()` will
-#' generate an error. Second, the predictions will always be in a
-#' spark table format. The names will be the same as documented but
-#' without the dots. Third, there is no equivalent to factor
-#' columns in spark tables so class predictions are returned as
-#' character columns. Fourth, to retain the model object for a new
-#' R session (via `save()`), the `model$fit` element of the `parsnip`
-#' object should be serialized via `ml_save(object$fit)` and
-#' separately saved to disk. In a new session, the object can be
-#' reloaded and reattached to the `parsnip` object.
+#' @seealso \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("logistic_reg")}
#'
-#' @seealso [fit()], [set_engine()], [update()]
#' @examples
#' show_engines("logistic_reg")
#'
#' logistic_reg()
-#' # Parameters can be represented by a placeholder:
-#' logistic_reg(penalty = varying())
#' @export
#' @importFrom purrr map_lgl
logistic_reg <-
diff --git a/R/logistic_reg_LiblineaR.R b/R/logistic_reg_LiblineaR.R
new file mode 100644
index 000000000..b414e41f2
--- /dev/null
+++ b/R/logistic_reg_LiblineaR.R
@@ -0,0 +1,11 @@
+#' Logistic regression via LiblineaR
+#'
+#' [LiblineaR::LiblineaR()] fits a generalized linear model for binary outcomes. A
+#' linear combination of the predictors is used to model the log odds of an
+#' event.
+#'
+#' @includeRmd man/rmd/logistic_reg_LiblineaR.Rmd details
+#'
+#' @name details_logistic_reg_LiblineaR
+#' @keywords internal
+NULL
diff --git a/R/logistic_reg_glm.R b/R/logistic_reg_glm.R
new file mode 100644
index 000000000..b44a2fe00
--- /dev/null
+++ b/R/logistic_reg_glm.R
@@ -0,0 +1,11 @@
+#' Logistic regression via glm
+#'
+#' [stats::glm()] fits a generalized linear model for binary outcomes. A
+#' linear combination of the predictors is used to model the log odds of an
+#' event.
+#'
+#' @includeRmd man/rmd/logistic_reg_glm.Rmd details
+#'
+#' @name details_logistic_reg_glm
+#' @keywords internal
+NULL
diff --git a/R/logistic_reg_glmnet.R b/R/logistic_reg_glmnet.R
new file mode 100644
index 000000000..af927b7a6
--- /dev/null
+++ b/R/logistic_reg_glmnet.R
@@ -0,0 +1,11 @@
+#' Logistic regression via glmnet
+#'
+#' [glmnet::glmnet()] fits a generalized linear model for binary outcomes. A
+#' linear combination of the predictors is used to model the log odds of an
+#' event.
+#'
+#' @includeRmd man/rmd/logistic_reg_glmnet.Rmd details
+#'
+#' @name details_logistic_reg_glmnet
+#' @keywords internal
+NULL
diff --git a/R/logistic_reg_keras.R b/R/logistic_reg_keras.R
new file mode 100644
index 000000000..54ca11a52
--- /dev/null
+++ b/R/logistic_reg_keras.R
@@ -0,0 +1,11 @@
+#' Logistic regression via keras
+#'
+#' [keras_mlp()] fits a generalized linear model for binary outcomes. A
+#' linear combination of the predictors is used to model the log odds of an
+#' event.
+#'
+#' @includeRmd man/rmd/logistic_reg_keras.Rmd details
+#'
+#' @name details_logistic_reg_keras
+#' @keywords internal
+NULL
diff --git a/R/logistic_reg_spark.R b/R/logistic_reg_spark.R
new file mode 100644
index 000000000..316db1a65
--- /dev/null
+++ b/R/logistic_reg_spark.R
@@ -0,0 +1,11 @@
+#' Logistic regression via spark
+#'
+#' [sparklyr::ml_logistic_regression()] fits a generalized linear model for
+#' binary outcomes. A linear combination of the predictors is used to model the
+#' log odds of an event.
+#'
+#' @includeRmd man/rmd/logistic_reg_spark.Rmd details
+#'
+#' @name details_logistic_reg_spark
+#' @keywords internal
+NULL
diff --git a/R/logistic_reg_stan.R b/R/logistic_reg_stan.R
new file mode 100644
index 000000000..08b07f6ef
--- /dev/null
+++ b/R/logistic_reg_stan.R
@@ -0,0 +1,11 @@
+#' Logistic regression via stan
+#'
+#' [rstanarm::stan_glm()] fits a generalized linear model for binary outcomes.
+#' A linear combination of the predictors is used to model the log odds of an
+#' event.
+#'
+#' @includeRmd man/rmd/logistic_reg_stan.Rmd details
+#'
+#' @name details_logistic_reg_stan
+#' @keywords internal
+NULL
diff --git a/R/mars.R b/R/mars.R
index 768ca39c8..69edffea7 100644
--- a/R/mars.R
+++ b/R/mars.R
@@ -1,47 +1,31 @@
-# Prototype parsnip code for multivariate adaptive regression splines (MARS)
+#' Multivariate adaptive regression splines (MARS)
#'
-#' General Interface for MARS
+#' @description
#'
-#' `mars()` is a way to generate a _specification_ of a model before
-#' fitting and allows the model to be created using R. The main
-#' arguments for the
-#' model are:
-#' \itemize{
-#' \item \code{num_terms}: The number of features that will be retained in the
-#' final model.
-#' \item \code{prod_degree}: The highest possible degree of interaction between
-#' features. A value of 1 indicates an additive model while a value of 2
-#' allows, but does not guarantee, two-way interactions between features.
-#' \item \code{prune_method}: The type of pruning. Possible values are listed
-#' in `?earth`.
-#' }
-#' These arguments are converted to their specific names at the
-#' time that the model is fit. Other options and arguments can be
-#' set using `set_engine()`. If left to their defaults
-#' here (`NULL`), the values are taken from the underlying model
-#' functions. If parameters need to be modified, `update()` can be used
-#' in lieu of recreating the object from scratch.
+#' `mars()` defines a generalized linear model that uses artificial features for
+#' some predictors. These features resemble hinge functions and the result is
+#' a model that is a segmented regression in small dimensions.
#'
-#' @param mode A single character string for the prediction outcome mode.
-#' Possible values for this model are "unknown", "regression", or
-#' "classification".
-#' @param engine A single character string specifying what computational engine
-#' to use for fitting. Possible engines are listed below. The default for this
-#' model is `"earth"`.
+#' There are different ways to fit this model. See the engine-specific pages
+#' for more details:
+#'
+#' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("mars")}
+#'
+#' More information on how \pkg{parsnip} is used for modeling is at
+#' \url{https://www.tidymodels.org/}.
+#'
+#' @inheritParams boost_tree
#' @param num_terms The number of features that will be retained in the
#' final model, including the intercept.
#' @param prod_degree The highest possible interaction degree.
#' @param prune_method The pruning method.
-#' @details The model can be created using the `fit()` function using the
-#' following _engines_:
-#' \itemize{
-#' \item \pkg{R}: `"earth"` (the default)
-#' }
#'
-#' @includeRmd man/rmd/mars.Rmd details
+#' @template spec-details
+#'
+#' @template spec-references
+#'
+#' @seealso \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("mars")}
#'
-#' @importFrom purrr map_lgl
-#' @seealso [fit()], [set_engine()], [update()]
#' @examples
#' show_engines("mars")
#'
diff --git a/R/mars_earth.R b/R/mars_earth.R
new file mode 100644
index 000000000..18ad8cd1c
--- /dev/null
+++ b/R/mars_earth.R
@@ -0,0 +1,11 @@
+#' Multivariate adaptive regression splines (MARS) via earth
+#'
+#' [earth::earth()] fits a generalized linear model that uses artificial features for
+#' some predictors. These features resemble hinge functions and the result is
+#' a model that is a segmented regression in small dimensions.
+#'
+#' @includeRmd man/rmd/mars_earth.Rmd details
+#'
+#' @name details_mars_earth
+#' @keywords internal
+NULL
diff --git a/R/mlp.R b/R/mlp.R
index d577897ab..469258d99 100644
--- a/R/mlp.R
+++ b/R/mlp.R
@@ -1,38 +1,18 @@
-#' General Interface for Single Layer Neural Network
+#' Single layer neural network
#'
-#' `mlp()`, for multilayer perceptron, is a way to generate a _specification_ of
-#' a model before fitting and allows the model to be created using
-#' different packages in R or via keras The main arguments for the
-#' model are:
-#' \itemize{
-#' \item \code{hidden_units}: The number of units in the hidden layer
-#' (default: 5).
-#' \item \code{penalty}: The amount of L2 regularization (aka weight
-#' decay, default is zero).
-#' \item \code{dropout}: The proportion of parameters randomly dropped out of
-#' the model (`keras` only, default is zero).
-#' \item \code{epochs}: The number of training iterations (default: 20).
-#' \item \code{activation}: The type of function that connects the hidden
-#' layer and the input variables (`keras` only, default is softmax).
-#' }
+#' @description
+#' `mlp()` defines a multilayer perceptron model (a.k.a. a single layer,
+#' feed-forward neural network).
#'
-#' These arguments are converted to their specific names at the
-#' time that the model is fit. Other options and arguments can be
-#' set using `set_engine()`. If left to their defaults
-#' here (see above), the values are taken from the underlying model
-#' functions. One exception is `hidden_units` when `nnet::nnet` is used; that
-#' function's `size` argument has no default so a value of 5 units will be
-#' used. Also, unless otherwise specified, the `linout` argument to
-#' `nnet::nnet()` will be set to `TRUE` when a regression model is created.
-#' If parameters need to be modified, `update()` can be used
-#' in lieu of recreating the object from scratch.
+#' There are different ways to fit this model. See the engine-specific pages
+#' for more details:
#'
-#' @param mode A single character string for the prediction outcome mode.
-#' Possible values for this model are "unknown", "regression", or
-#' "classification".
-#' @param engine A single character string specifying what computational engine
-#' to use for fitting. Possible engines are listed below. The default for this
-#' model is `"nnet"`.
+#' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("mlp")}
+#'
+#' More information on how \pkg{parsnip} is used for modeling is at
+#' \url{https://www.tidymodels.org/}.
+#'
+#' @inheritParams boost_tree
#' @param hidden_units An integer for the number of units in the hidden model.
#' @param penalty A non-negative numeric value for the amount of weight
#' decay.
@@ -44,25 +24,17 @@
#' function between the hidden and output layers is automatically set to either
#' "linear" or "softmax" depending on the type of outcome. Possible values are:
#' "linear", "softmax", "relu", and "elu"
-#' @details
#'
-#' The model can be created using the `fit()` function using the
-#' following _engines_:
-#' \itemize{
-#' \item \pkg{R}: `"nnet"` (the default)
-#' \item \pkg{keras}: `"keras"`
-#' }
+#' @template spec-details
+#'
+#' @template spec-references
#'
-#' @includeRmd man/rmd/mlp.Rmd details
+#' @seealso \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("mlp")}
#'
-#' @importFrom purrr map_lgl
-#' @seealso [fit()], [set_engine()], [update()]
#' @examples
#' show_engines("mlp")
#'
#' mlp(mode = "classification", penalty = 0.01)
-#' # Parameters can be represented by a placeholder:
-#' mlp(mode = "regression", hidden_units = varying())
#' @export
mlp <-
diff --git a/R/mlp_keras.R b/R/mlp_keras.R
new file mode 100644
index 000000000..0c3917f1b
--- /dev/null
+++ b/R/mlp_keras.R
@@ -0,0 +1,9 @@
+#' Multilayer perceptron via keras
+#'
+#' [keras_mlp()] fits a single layer, feed-forward neural network.
+#'
+#' @includeRmd man/rmd/mlp_keras.Rmd details
+#'
+#' @name details_mlp_keras
+#' @keywords internal
+NULL
diff --git a/R/mlp_nnet.R b/R/mlp_nnet.R
new file mode 100644
index 000000000..774ae7f57
--- /dev/null
+++ b/R/mlp_nnet.R
@@ -0,0 +1,9 @@
+#' Multilayer perceptron via nnet
+#'
+#' [nnet::nnet()] fits a single layer, feed-forward neural network.
+#'
+#' @includeRmd man/rmd/mlp_nnet.Rmd details
+#'
+#' @name details_mlp_nnet
+#' @keywords internal
+NULL
diff --git a/R/model_object_docs.R b/R/model_object_docs.R
index aebf95730..805bd7a0b 100644
--- a/R/model_object_docs.R
+++ b/R/model_object_docs.R
@@ -69,8 +69,8 @@
#'
#' The model functions save the argument _expressions_ and their
#' associated environments (a.k.a. a quosure) to be evaluated later
-#' when either [fit()] or [fit_xy()] are called with the actual
-#' data.
+#' when either [fit.model_spec()] or [fit_xy.model_spec()] are
+#' called with the actual data.
#'
#' The consequence of this strategy is that any data required to
#' get the parameter values must be available when the model is
diff --git a/R/multinom_reg.R b/R/multinom_reg.R
index 54d774d70..603ddc6ef 100644
--- a/R/multinom_reg.R
+++ b/R/multinom_reg.R
@@ -1,68 +1,43 @@
-#' General Interface for Multinomial Regression Models
+#' Multinomial regression
#'
-#' `multinom_reg()` is a way to generate a _specification_ of a model
-#' before fitting and allows the model to be created using
-#' different packages in R, keras, or Spark. The main arguments for the
-#' model are:
-#' \itemize{
-#' \item \code{penalty}: The total amount of regularization
-#' in the model. Note that this must be zero for some engines.
-#' \item \code{mixture}: The mixture amounts of different types of
-#' regularization (see below). Note that this will be ignored for some engines.
-#' }
-#' These arguments are converted to their specific names at the
-#' time that the model is fit. Other options and arguments can be
-#' set using `set_engine()`. If left to their defaults
-#' here (`NULL`), the values are taken from the underlying model
-#' functions. If parameters need to be modified, `update()` can be used
-#' in lieu of recreating the object from scratch.
-#' @param mode A single character string for the prediction outcome mode.
+#' @description
+#'
+#' `multinom_reg()` defines a model that uses linear predictors to predict
+#' multiclass data using the multinomial distribution.
+#'
+#' There are different ways to fit this model. See the engine-specific pages
+#' for more details:
+#'
+#' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("multinom_reg")}
+#'
+#' More information on how \pkg{parsnip} is used for modeling is at
+#' \url{https://www.tidymodels.org/}.
+#'
+#' @param mode A single character string for the type of model.
#' The only possible value for this model is "classification".
#' @param engine A single character string specifying what computational engine
#' to use for fitting. Possible engines are listed below. The default for this
#' model is `"nnet"`.
#' @param penalty A non-negative number representing the total
-#' amount of regularization (`glmnet`, `keras`, and `spark` only).
+#' amount of regularization (specific engines only).
#' For `keras` models, this corresponds to purely L2 regularization
#' (aka weight decay) while the other models can be a combination
#' of L1 and L2 (depending on the value of `mixture`).
#' @param mixture A number between zero and one (inclusive) that is the
#' proportion of L1 regularization (i.e. lasso) in the model. When
#' `mixture = 1`, it is a pure lasso model while `mixture = 0` indicates that
-#' ridge regression is being used. (`glmnet` and `spark` only).
-#' @details
-#' For `multinom_reg()`, the mode will always be "classification".
+#' ridge regression is being used. (specific engines only).
#'
-#' The model can be created using the `fit()` function using the
-#' following _engines_:
-#' \itemize{
-#' \item \pkg{R}: `"nnet"` (the default), `"glmnet"`
-#' \item \pkg{Spark}: `"spark"`
-#' \item \pkg{keras}: `"keras"`
-#' }
+#' @template spec-details
#'
-#' @includeRmd man/rmd/multinom-reg.Rmd details
+#' @template spec-references
#'
-#' @note For models created using the spark engine, there are
-#' several differences to consider. First, only the formula
-#' interface to via `fit()` is available; using `fit_xy()` will
-#' generate an error. Second, the predictions will always be in a
-#' spark table format. The names will be the same as documented but
-#' without the dots. Third, there is no equivalent to factor
-#' columns in spark tables so class predictions are returned as
-#' character columns. Fourth, to retain the model object for a new
-#' R session (via `save()`), the `model$fit` element of the `parsnip`
-#' object should be serialized via `ml_save(object$fit)` and
-#' separately saved to disk. In a new session, the object can be
-#' reloaded and reattached to the `parsnip` object.
+#' @seealso \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("multinom_reg")}
#'
-#' @seealso [fit()], [set_engine()], [update()]
#' @examples
#' show_engines("multinom_reg")
#'
#' multinom_reg()
-#' # Parameters can be represented by a placeholder:
-#' multinom_reg(penalty = varying())
#' @export
#' @importFrom purrr map_lgl
multinom_reg <-
diff --git a/R/multinom_reg_glmnet.R b/R/multinom_reg_glmnet.R
new file mode 100644
index 000000000..3ddb98120
--- /dev/null
+++ b/R/multinom_reg_glmnet.R
@@ -0,0 +1,10 @@
+#' Multinomial regression via glmnet
+#'
+#' [glmnet::glmnet()] fits a model that uses linear predictors to predict
+#' multiclass data using the multinomial distribution.
+#'
+#' @includeRmd man/rmd/multinom_reg_glmnet.Rmd details
+#'
+#' @name details_multinom_reg_glmnet
+#' @keywords internal
+NULL
diff --git a/R/multinom_reg_keras.R b/R/multinom_reg_keras.R
new file mode 100644
index 000000000..dfe0a396e
--- /dev/null
+++ b/R/multinom_reg_keras.R
@@ -0,0 +1,10 @@
+#' Multinomial regression via keras
+#'
+#' [keras_mlp()] fits a model that uses linear predictors to predict
+#' multiclass data using the multinomial distribution.
+#'
+#' @includeRmd man/rmd/multinom_reg_keras.Rmd details
+#'
+#' @name details_multinom_reg_keras
+#' @keywords internal
+NULL
diff --git a/R/multinom_reg_nnet.R b/R/multinom_reg_nnet.R
new file mode 100644
index 000000000..049f3d355
--- /dev/null
+++ b/R/multinom_reg_nnet.R
@@ -0,0 +1,10 @@
+#' Multinomial regression via nnet
+#'
+#' [nnet::multinom()] fits a model that uses linear predictors to predict
+#' multiclass data using the multinomial distribution.
+#'
+#' @includeRmd man/rmd/multinom_reg_nnet.Rmd details
+#'
+#' @name details_multinom_reg_nnet
+#' @keywords internal
+NULL
diff --git a/R/multinom_reg_spark.R b/R/multinom_reg_spark.R
new file mode 100644
index 000000000..2cb5adc4e
--- /dev/null
+++ b/R/multinom_reg_spark.R
@@ -0,0 +1,10 @@
+#' Multinomial regression via spark
+#'
+#' [sparklyr::ml_logistic_regression()] fits a model that uses linear
+#' predictors to predict multiclass data using the multinomial distribution.
+#'
+#' @includeRmd man/rmd/multinom_reg_spark.Rmd details
+#'
+#' @name details_multinom_reg_spark
+#' @keywords internal
+NULL
diff --git a/R/nearest_neighbor.R b/R/nearest_neighbor.R
index 67eb91a3b..8bd04fa29 100644
--- a/R/nearest_neighbor.R
+++ b/R/nearest_neighbor.R
@@ -1,56 +1,34 @@
-# TODO) If implementing `class::knn()`, mention that it does not have
-# the distance param because it uses Euclidean distance. And no `weight_func`
-# param.
-
-#' General Interface for K-Nearest Neighbor Models
+#' K-nearest neighbors
+#'
+#' @description
+#'
+#' `nearest_neighbor()` defines a model that uses the `K` most similar data
+#' points from the training set to predict new samples.
+#'
+#' There are different ways to fit this model. See the engine-specific pages
+#' for more details:
#'
-#' `nearest_neighbor()` is a way to generate a _specification_ of a model
-#' before fitting and allows the model to be created using
-#' different packages in R. The main arguments for the
-#' model are:
-#' \itemize{
-#' \item \code{neighbors}: The number of neighbors considered at
-#' each prediction.
-#' \item \code{weight_func}: The type of kernel function that weights the
-#' distances between samples.
-#' \item \code{dist_power}: The parameter used when calculating the Minkowski
-#' distance. This corresponds to the Manhattan distance with `dist_power = 1`
-#' and the Euclidean distance with `dist_power = 2`.
-#' }
-#' These arguments are converted to their specific names at the
-#' time that the model is fit. Other options and arguments can be
-#' set using `set_engine()`. If left to their defaults
-#' here (`NULL`), the values are taken from the underlying model
-#' functions. If parameters need to be modified, `update()` can be used
-#' in lieu of recreating the object from scratch.
-#' @param mode A single character string for the prediction outcome mode.
-#' Possible values for this model are `"unknown"`, `"regression"`, or
-#' `"classification"`.
-#' @param engine A single character string specifying what computational engine
-#' to use for fitting. Possible engines are listed below. The default for this
-#' model is `"kknn"`.
+#' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("nearest_neighbor")}
+#'
+#' More information on how \pkg{parsnip} is used for modeling is at
+#' \url{https://www.tidymodels.org/}.
+#'
+#' @inheritParams boost_tree
#' @param neighbors A single integer for the number of neighbors
#' to consider (often called `k`). For \pkg{kknn}, a value of 5
#' is used if `neighbors` is not specified.
-#'
#' @param weight_func A *single* character for the type of kernel function used
#' to weight distances between samples. Valid choices are: `"rectangular"`,
#' `"triangular"`, `"epanechnikov"`, `"biweight"`, `"triweight"`,
#' `"cos"`, `"inv"`, `"gaussian"`, `"rank"`, or `"optimal"`.
-#'
#' @param dist_power A single number for the parameter used in
#' calculating Minkowski distance.
#'
-#' @details
-#' The model can be created using the `fit()` function using the
-#' following _engines_:
-#' \itemize{
-#' \item \pkg{R}: `"kknn"` (the default)
-#' }
+#' @template spec-details
#'
-#' @includeRmd man/rmd/nearest-neighbor.Rmd details
+#' @template spec-references
#'
-#' @seealso [fit()], [set_engine()], [update()]
+#' @seealso \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("nearest_neighbor")}
#'
#' @examples
#' show_engines("nearest_neighbor")
diff --git a/R/nearest_neighbor_kknn.R b/R/nearest_neighbor_kknn.R
new file mode 100644
index 000000000..bd00e391c
--- /dev/null
+++ b/R/nearest_neighbor_kknn.R
@@ -0,0 +1,10 @@
+#' K-nearest neighbors via kknn
+#'
+#' [kknn::train.kknn()] fits a model that uses the `K` most similar data points
+#' from the training set to predict new samples.
+#'
+#' @includeRmd man/rmd/nearest_neighbor_kknn.Rmd details
+#'
+#' @name details_nearest_neighbor_kknn
+#' @keywords internal
+NULL
diff --git a/R/nullmodel.R b/R/nullmodel.R
index 83954cef0..522265bbc 100644
--- a/R/nullmodel.R
+++ b/R/nullmodel.R
@@ -125,15 +125,12 @@ predict.nullmodel <- function (object, new_data = NULL, type = NULL, ...) {
out
}
-#' General Interface for null models
+#' Null model
#'
-#' `null_model()` is a way to generate a _specification_ of a model before
-#' fitting and allows the model to be created using R. It doesn't have any
+#' `null_model()` defines a simple, non-informative model. It doesn't have any
#' main arguments.
#'
-#' @param mode A single character string for the type of model.
-#' Possible values for this model are "unknown", "regression", or
-#' "classification".
+#' @inheritParams boost_tree
#' @details The model can be created using the `fit()` function using the
#' following _engines_:
#' \itemize{
@@ -143,7 +140,7 @@ predict.nullmodel <- function (object, new_data = NULL, type = NULL, ...) {
#' @includeRmd man/rmd/null-model.Rmd details
#'
#' @importFrom purrr map_lgl
-#' @seealso [fit()]
+#' @seealso [fit.model_spec()]
#' @examples
#' null_model(mode = "regression")
#' @export
diff --git a/R/proportional_hazards.R b/R/proportional_hazards.R
index d2af62ae3..496b0242c 100644
--- a/R/proportional_hazards.R
+++ b/R/proportional_hazards.R
@@ -1,4 +1,4 @@
-#' General Interface for Proportional Hazards Models
+#' Proportional hazards regression
#'
#' `proportional_hazards()` is a way to generate a _specification_ of a model
#' before fitting and allows the model to be created using different packages
@@ -27,7 +27,7 @@
#' Proportional hazards models include the Cox model.
#' For `proportional_hazards()`, the mode will always be "censored regression".
#'
-#' @seealso [fit()], [set_engine()], [update()]
+#' @seealso [fit.model_spec()], [set_engine()], [update()]
#' @examples
#' show_engines("proportional_hazards")
#' @keywords internal
diff --git a/R/rand_forest.R b/R/rand_forest.R
index 5e62e3806..14bf0e76f 100644
--- a/R/rand_forest.R
+++ b/R/rand_forest.R
@@ -1,68 +1,37 @@
-# Prototype parsnip code for random forests
-
-#' General Interface for Random Forest Models
+#' Random forest
+#'
+#' @description
+#'
+#' `rand_forest()` defines a model that creates a large number of decision
+#' trees, each independent of the others. The final prediction uses all
+#' predictions from the individual trees and combines them.
#'
-#' `rand_forest()` is a way to generate a _specification_ of a model
-#' before fitting and allows the model to be created using
-#' different packages in R or via Spark. The main arguments for the
-#' model are:
-#' \itemize{
-#' \item \code{mtry}: The number of predictors that will be
-#' randomly sampled at each split when creating the tree models.
-#' \item \code{trees}: The number of trees contained in the ensemble.
-#' \item \code{min_n}: The minimum number of data points in a node
-#' that are required for the node to be split further.
-#' }
-#' These arguments are converted to their specific names at the
-#' time that the model is fit. Other options and argument can be
-#' set using `set_engine()`. If left to their defaults
-#' here (`NULL`), the values are taken from the underlying model
-#' functions. If parameters need to be modified, `update()` can be used
-#' in lieu of recreating the object from scratch.
+#' There are different ways to fit this model. See the engine-specific pages
+#' for more details:
#'
-#' @param mode A single character string for the prediction outcome mode.
-#' Possible values for this model are "unknown", "regression", or
-#' "classification".
-#' @param engine A single character string specifying what computational engine
-#' to use for fitting. Possible engines are listed below. The default for this
-#' model is `"ranger"`.
+#' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("rand_forest")}
+#'
+#' More information on how \pkg{parsnip} is used for modeling is at
+#' \url{https://www.tidymodels.org/}.
+#'
+#' @inheritParams boost_tree
#' @param mtry An integer for the number of predictors that will
#' be randomly sampled at each split when creating the tree models.
#' @param trees An integer for the number of trees contained in
#' the ensemble.
#' @param min_n An integer for the minimum number of data points
#' in a node that are required for the node to be split further.
-#' @details
-#' The model can be created using the `fit()` function using the
-#' following _engines_:
-#' \itemize{
-#' \item \pkg{R}: `"ranger"` (the default) or `"randomForest"`
-#' \item \pkg{Spark}: `"spark"`
-#' }
#'
-#' @includeRmd man/rmd/rand-forest.Rmd details
+#' @template spec-details
+#'
+#' @template spec-references
#'
-#' @note For models created using the spark engine, there are
-#' several differences to consider. First, only the formula
-#' interface to via `fit()` is available; using `fit_xy()` will
-#' generate an error. Second, the predictions will always be in a
-#' spark table format. The names will be the same as documented but
-#' without the dots. Third, there is no equivalent to factor
-#' columns in spark tables so class predictions are returned as
-#' character columns. Fourth, to retain the model object for a new
-#' R session (via `save`), the `model$fit` element of the `parsnip`
-#' object should be serialized via `ml_save(object$fit)` and
-#' separately saved to disk. In a new session, the object can be
-#' reloaded and reattached to the `parsnip` object.
+#' @seealso \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("rand_forest")}
#'
-#' @importFrom purrr map_lgl
-#' @seealso [fit()], [set_engine()], [update()]
#' @examples
#' show_engines("rand_forest")
#'
#' rand_forest(mode = "classification", trees = 2000)
-#' # Parameters can be represented by a placeholder:
-#' rand_forest(mode = "regression", mtry = varying())
#' @export
rand_forest <-
diff --git a/R/rand_forest_randomForest.R b/R/rand_forest_randomForest.R
new file mode 100644
index 000000000..df3322c5e
--- /dev/null
+++ b/R/rand_forest_randomForest.R
@@ -0,0 +1,11 @@
+#' Random forests via randomForest
+#'
+#' [randomForest::randomForest()] fits a model that creates a large number of
+#' decision trees, each independent of the others. The final prediction uses all
+#' predictions from the individual trees and combines them.
+#'
+#' @includeRmd man/rmd/rand_forest_randomForest.Rmd details
+#'
+#' @name details_rand_forest_randomForest
+#' @keywords internal
+NULL
diff --git a/R/rand_forest_ranger.R b/R/rand_forest_ranger.R
new file mode 100644
index 000000000..44c8ca72a
--- /dev/null
+++ b/R/rand_forest_ranger.R
@@ -0,0 +1,11 @@
+#' Random forests via ranger
+#'
+#' [ranger::ranger()] fits a model that creates a large number of decision
+#' trees, each independent of the others. The final prediction uses all
+#' predictions from the individual trees and combines them.
+#'
+#' @includeRmd man/rmd/rand_forest_ranger.Rmd details
+#'
+#' @name details_rand_forest_ranger
+#' @keywords internal
+NULL
diff --git a/R/rand_forest_spark.R b/R/rand_forest_spark.R
new file mode 100644
index 000000000..0cb69438d
--- /dev/null
+++ b/R/rand_forest_spark.R
@@ -0,0 +1,11 @@
+#' Random forests via spark
+#'
+#' [sparklyr::ml_random_forest()] fits a model that creates a large number of
+#' decision trees, each independent of the others. The final prediction uses all
+#' predictions from the individual trees and combines them.
+#'
+#' @includeRmd man/rmd/rand_forest_spark.Rmd details
+#'
+#' @name details_rand_forest_spark
+#' @keywords internal
+NULL
diff --git a/R/surv_reg.R b/R/surv_reg.R
index fb378e06e..b03152b41 100644
--- a/R/surv_reg.R
+++ b/R/surv_reg.R
@@ -1,4 +1,4 @@
-#' General Interface for Parametric Survival Models
+#' Parametric survival regression
#'
#' @description
#' `r lifecycle::badge("deprecated")`
@@ -24,7 +24,7 @@
#' mode will always be "regression".
#'
#' Since survival models typically involve censoring (and require the use of
-#' [survival::Surv()] objects), the [fit()] function will require that the
+#' [survival::Surv()] objects), the [fit.model_spec()] function will require that the
#' survival model be specified via the formula interface.
#'
#' Also, for the `flexsurv::flexsurvfit` engine, the typical
@@ -49,22 +49,13 @@
#'
#' @includeRmd man/rmd/surv-reg.Rmd details
#'
-#' @seealso [fit()], [survival::Surv()], [set_engine()], [update()]
+#' @seealso [fit.model_spec()], [survival::Surv()], [set_engine()], [update()]
#' @references Jackson, C. (2016). `flexsurv`: A Platform for Parametric Survival
#' Modeling in R. _Journal of Statistical Software_, 70(8), 1 - 33.
#' @examples
#' show_engines("surv_reg")
#'
#' surv_reg()
-#' # Parameters can be represented by a placeholder:
-#' surv_reg(dist = varying())
-#'
-#' # ->
-#' show_engines("survival_reg")
-#'
-#' survival_reg()
-#' # Parameters can be represented by a placeholder:
-#' survival_reg(dist = varying())
#'
#' @keywords internal
#' @export
diff --git a/R/survival_reg.R b/R/survival_reg.R
index 04f93e507..5afce23ab 100644
--- a/R/survival_reg.R
+++ b/R/survival_reg.R
@@ -1,4 +1,4 @@
-#' General Interface for Parametric Survival Models
+#' Parametric survival regression
#'
#' `survival_reg()` is a way to generate a _specification_ of a model
#' before fitting and allows the model to be created using
@@ -27,14 +27,12 @@
#' mode will always be "censored regression".
#'
#' Since survival models typically involve censoring (and require the use of
-#' [survival::Surv()] objects), the [fit()] function will require that the
+#' [survival::Surv()] objects), the [fit.model_spec()] function will require that the
#' survival model be specified via the formula interface.
#'
-#' @seealso [fit()], [survival::Surv()], [set_engine()], [update()]
+#' @seealso [fit.model_spec()], [survival::Surv()], [set_engine()], [update()]
#' @examples
#' survival_reg()
-#' # Parameters can be represented by a placeholder:
-#' survival_reg(dist = varying())
#' @keywords internal
#' @export
survival_reg <- function(mode = "censored regression", engine = "survival", dist = NULL) {
diff --git a/R/svm_linear.R b/R/svm_linear.R
index e6da7416a..8d00a724a 100644
--- a/R/svm_linear.R
+++ b/R/svm_linear.R
@@ -1,50 +1,39 @@
-#' General interface for linear support vector machines
+#' Linear support vector machines
#'
-#' `svm_linear()` is a way to generate a _specification_ of a model
-#' before fitting and allows the model to be created using
-#' different packages in R or via Spark. The main arguments for the
-#' model are:
-#' \itemize{
-#' \item \code{cost}: The cost of predicting a sample within or on the
-#' wrong side of the margin.
-#' \item \code{margin}: The epsilon in the SVM insensitive loss function
-#' (regression only)
-#' }
-#' These arguments are converted to their specific names at the
-#' time that the model is fit. Other options and arguments can be
-#' set using `set_engine()`. If left to their defaults
-#' here (`NULL`), the values are taken from the underlying model
-#' functions. If parameters need to be modified, `update()` can be used
-#' in lieu of recreating the object from scratch.
+#' @description
#'
-#' @param mode A single character string for the prediction outcome mode.
-#' Possible values for this model are "unknown", "regression", or
-#' "classification".
-#' @param engine A single character string specifying what computational engine
-#' to use for fitting. Possible engines are listed below. The default for this
-#' model is `"LiblineaR"`.
+#' `svm_linear()` defines a support vector machine model. For classification,
+#' the model tries to maximize the width of the margin between classes.
+#' For regression, the model optimizes a robust loss function that is only
+#' affected by very large model residuals.
+#'
+#' This SVM model uses a linear function to create the decision boundary or
+#' regression line.
+#'
+#' There are different ways to fit this model. See the engine-specific pages
+#' for more details:
+#'
+#' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("svm_linear")}
+#'
+#' More information on how \pkg{parsnip} is used for modeling is at
+#' \url{https://www.tidymodels.org/}.
+#'
+#' @inheritParams boost_tree
#' @param cost A positive number for the cost of predicting a sample within
#' or on the wrong side of the margin
#' @param margin A positive number for the epsilon in the SVM insensitive
#' loss function (regression only)
-#' @details
-#' The model can be created using the `fit()` function using the
-#' following _engines_:
-#' \itemize{
-#' \item \pkg{R}: `"LiblineaR"` (the default) or `"kernlab"`
-#' }
#'
+#' @template spec-details
+#'
+#' @template spec-references
#'
-#' @includeRmd man/rmd/svm-linear.Rmd details
+#' @seealso \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("svm_linear")}
#'
-#' @importFrom purrr map_lgl
-#' @seealso [fit()], [set_engine()], [update()]
#' @examples
#' show_engines("svm_linear")
#'
#' svm_linear(mode = "classification")
-#' # Parameters can be represented by a placeholder:
-#' svm_linear(mode = "regression", cost = varying())
#' @export
svm_linear <-
diff --git a/R/svm_linear_LiblineaR.R b/R/svm_linear_LiblineaR.R
new file mode 100644
index 000000000..8a65e8403
--- /dev/null
+++ b/R/svm_linear_LiblineaR.R
@@ -0,0 +1,12 @@
+#' Linear support vector machines (SVMs) via LiblineaR
+#'
+#' [LiblineaR::LiblineaR()] fits a support vector machine model. For classification,
+#' the model tries to maximize the width of the margin between classes.
+#' For regression, the model optimizes a robust loss function that is only
+#' affected by very large model residuals.
+#'
+#' @includeRmd man/rmd/svm_linear_LiblineaR.Rmd details
+#'
+#' @name details_svm_linear_LiblineaR
+#' @keywords internal
+NULL
diff --git a/R/svm_linear_kernlab.R b/R/svm_linear_kernlab.R
new file mode 100644
index 000000000..dd781d447
--- /dev/null
+++ b/R/svm_linear_kernlab.R
@@ -0,0 +1,12 @@
+#' Linear support vector machines (SVMs) via kernlab
+#'
+#' [kernlab::ksvm()] fits a support vector machine model. For classification,
+#' the model tries to maximize the width of the margin between classes.
+#' For regression, the model optimizes a robust loss function that is only
+#' affected by very large model residuals.
+#'
+#' @includeRmd man/rmd/svm_linear_kernlab.Rmd details
+#'
+#' @name details_svm_linear_kernlab
+#' @keywords internal
+NULL
diff --git a/R/svm_poly.R b/R/svm_poly.R
index 6da8342b4..8a9facf91 100644
--- a/R/svm_poly.R
+++ b/R/svm_poly.R
@@ -1,53 +1,41 @@
-#' General interface for polynomial support vector machines
+#' Polynomial support vector machines
#'
-#' `svm_poly()` is a way to generate a _specification_ of a model
-#' before fitting and allows the model to be created using
-#' different packages in R or via Spark. The main arguments for the
-#' model are:
-#' \itemize{
-#' \item \code{cost}: The cost of predicting a sample within or on the
-#' wrong side of the margin.
-#' \item \code{degree}: The polynomial degree.
-#' \item \code{scale_factor}: A scaling factor for the kernel.
-#' \item \code{margin}: The epsilon in the SVM insensitive loss function
-#' (regression only)
-#' }
-#' These arguments are converted to their specific names at the
-#' time that the model is fit. Other options and arguments can be
-#' set using `set_engine()`. If left to their defaults
-#' here (`NULL`), the values are taken from the underlying model
-#' functions. If parameters need to be modified, `update()` can be used
-#' in lieu of recreating the object from scratch.
+#' @description
#'
-#' @param mode A single character string for the prediction outcome mode.
-#' Possible values for this model are "unknown", "regression", or
-#' "classification".
-#' @param engine A single character string specifying what computational engine
-#' to use for fitting. Possible engines are listed below. The default for this
-#' model is `"kernlab"`.
+#' `svm_poly()` defines a support vector machine model. For classification,
+#' the model tries to maximize the width of the margin between classes.
+#' For regression, the model optimizes a robust loss function that is only
+#' affected by very large model residuals.
+#'
+#' This SVM model uses a nonlinear function, specifically a polynomial function,
+#' to create the decision boundary or regression line.
+#'
+#' There are different ways to fit this model. See the engine-specific pages
+#' for more details:
+#'
+#' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("svm_poly")}
+#'
+#' More information on how \pkg{parsnip} is used for modeling is at
+#' \url{https://www.tidymodels.org/}.
+#'
+#' @inheritParams boost_tree
#' @param cost A positive number for the cost of predicting a sample within
#' or on the wrong side of the margin
#' @param degree A positive number for polynomial degree.
#' @param scale_factor A positive number for the polynomial scaling factor.
#' @param margin A positive number for the epsilon in the SVM insensitive
#' loss function (regression only)
-#' @details
-#' The model can be created using the `fit()` function using the
-#' following _engines_:
-#' \itemize{
-#' \item \pkg{R}: `"kernlab"` (the default)
-#' }
#'
-#' @includeRmd man/rmd/svm-poly.Rmd details
+#' @template spec-details
+#'
+#' @template spec-references
+#'
+#' @seealso \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("svm_poly")}
#'
-#' @importFrom purrr map_lgl
-#' @seealso [fit()], [set_engine()], [update()]
#' @examples
#' show_engines("svm_poly")
#'
#' svm_poly(mode = "classification", degree = 1.2)
-#' # Parameters can be represented by a placeholder:
-#' svm_poly(mode = "regression", cost = varying())
#' @export
svm_poly <-
diff --git a/R/svm_poly_kernlab.R b/R/svm_poly_kernlab.R
new file mode 100644
index 000000000..b9c694232
--- /dev/null
+++ b/R/svm_poly_kernlab.R
@@ -0,0 +1,12 @@
+#' Polynomial support vector machines (SVMs) via kernlab
+#'
+#' [kernlab::ksvm()] fits a support vector machine model. For classification,
+#' the model tries to maximize the width of the margin between classes.
+#' For regression, the model optimizes a robust loss function that is only
+#' affected by very large model residuals.
+#'
+#' @includeRmd man/rmd/svm_poly_kernlab.Rmd details
+#'
+#' @name details_svm_poly_kernlab
+#' @keywords internal
+NULL
diff --git a/R/svm_rbf.R b/R/svm_rbf.R
index cd701690b..d19313cbf 100644
--- a/R/svm_rbf.R
+++ b/R/svm_rbf.R
@@ -1,27 +1,24 @@
-#' General interface for radial basis function support vector machines
+#' Radial basis function support vector machines
#'
-#' `svm_rbf()` is a way to generate a _specification_ of a model
-#' before fitting and allows the model to be created using
-#' different packages in R or via Spark. The main arguments for the
-#' model are:
-#' \itemize{
-#' \item \code{cost}: The cost of predicting a sample within or on the
-#' wrong side of the margin.
-#' \item \code{rbf_sigma}: The precision parameter for the radial basis
-#' function.
-#' \item \code{margin}: The epsilon in the SVM insensitive loss function
-#' (regression only)
-#' }
-#' These arguments are converted to their specific names at the
-#' time that the model is fit. Other options and arguments can be
-#' set using `set_engine()`. If left to their defaults
-#' here (`NULL`), the values are taken from the underlying model
-#' functions. If parameters need to be modified, `update()` can be used
-#' in lieu of recreating the object from scratch.
+#' @description
#'
-#' @param mode A single character string for the prediction outcome mode.
-#' Possible values for this model are "unknown", "regression", or
-#' "classification".
+#' `svm_rbf()` defines a support vector machine model. For classification,
+#' the model tries to maximize the width of the margin between classes.
+#' For regression, the model optimizes a robust loss function that is only
+#' affected by very large model residuals.
+#'
+#' This SVM model uses a nonlinear function, specifically the radial basis function,
+#' to create the decision boundary or regression line.
+#'
+#' There are different ways to fit this model. See the engine-specific pages
+#' for more details:
+#'
+#' \Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("svm_rbf")}
+#'
+#' More information on how \pkg{parsnip} is used for modeling is at
+#' \url{https://www.tidymodels.org/}.
+#'
+#' @inheritParams boost_tree
#' @param engine A single character string specifying what computational engine
#' to use for fitting. Possible engines are listed below. The default for this
#' model is `"kernlab"`.
@@ -30,24 +27,17 @@
#' @param rbf_sigma A positive number for radial basis function.
#' @param margin A positive number for the epsilon in the SVM insensitive
#' loss function (regression only)
-#' @details
-#' The model can be created using the `fit()` function using the
-#' following _engines_:
-#' \itemize{
-#' \item \pkg{R}: `"kernlab"` (the default)
-#' }
#'
+#' @template spec-details
+#'
+#' @template spec-references
#'
-#' @includeRmd man/rmd/svm-rbf.Rmd details
+#' @seealso \Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("svm_rbf")}
#'
-#' @importFrom purrr map_lgl
-#' @seealso [fit()], [set_engine()], [update()]
#' @examples
#' show_engines("svm_rbf")
#'
#' svm_rbf(mode = "classification", rbf_sigma = 0.2)
-#' # Parameters can be represented by a placeholder:
-#' svm_rbf(mode = "regression", cost = varying())
#' @export
svm_rbf <-
diff --git a/R/svm_rbf_kernlab.R b/R/svm_rbf_kernlab.R
new file mode 100644
index 000000000..92ec43554
--- /dev/null
+++ b/R/svm_rbf_kernlab.R
@@ -0,0 +1,12 @@
+#' Radial basis function support vector machines (SVMs) via kernlab
+#'
+#' [kernlab::ksvm()] fits a support vector machine model. For classification,
+#' the model tries to maximize the width of the margin between classes.
+#' For regression, the model optimizes a robust loss function that is only
+#' affected by very large model residuals.
+#'
+#' @includeRmd man/rmd/svm_rbf_kernlab.Rmd details
+#'
+#' @name details_svm_rbf_kernlab
+#' @keywords internal
+NULL
diff --git a/man-roxygen/spec-details.R b/man-roxygen/spec-details.R
new file mode 100644
index 000000000..683e68cc5
--- /dev/null
+++ b/man-roxygen/spec-details.R
@@ -0,0 +1,6 @@
+#' @details
+#' This function only defines what _type_ of model is being fit. Once an engine
+#' is specified, the _method_ to fit the model is also defined.
+#'
+#' The model is not trained or fit until the [fit.model_spec()] function is used
+#' with the data.
diff --git a/man-roxygen/spec-references.R b/man-roxygen/spec-references.R
new file mode 100644
index 000000000..ea233102a
--- /dev/null
+++ b/man-roxygen/spec-references.R
@@ -0,0 +1 @@
+#' @references \url{https://www.tidymodels.org}, [_Tidy Models with R_](https://tmwr.org)
diff --git a/man/C5.0_train.Rd b/man/C5.0_train.Rd
index d7c225ebf..264b85a6a 100644
--- a/man/C5.0_train.Rd
+++ b/man/C5.0_train.Rd
@@ -28,7 +28,8 @@ that must be put in at least two of the splits.}
random proportion of the data should be used to train the model.
By default, all the samples are used for model training. Samples
not used for training are used to evaluate the accuracy of the
-model in the printed output.}
+model in the printed output. A value of zero means that all the training
+data are used.}
\item{...}{Other arguments to pass.}
}
diff --git a/man/augment.Rd b/man/augment.Rd
index fe3bd4982..0f0839407 100644
--- a/man/augment.Rd
+++ b/man/augment.Rd
@@ -7,7 +7,8 @@
\method{augment}{model_fit}(x, new_data, ...)
}
\arguments{
-\item{x}{A \code{model_fit} object produced by \code{\link[=fit]{fit()}} or \code{\link[=fit_xy]{fit_xy()}}.}
+\item{x}{A \code{model_fit} object produced by \code{\link[=fit.model_spec]{fit.model_spec()}} or
+\code{\link[=fit_xy.model_spec]{fit_xy.model_spec()}} .}
\item{new_data}{A data frame or matrix.}
@@ -18,7 +19,7 @@
}
\details{
For regression models, a \code{.pred} column is added. If \code{x} was created using
-\code{\link[=fit]{fit()}} and \code{new_data} contains the outcome column, a \code{.resid} column is
+\code{\link[=fit.model_spec]{fit.model_spec()}} and \code{new_data} contains the outcome column, a \code{.resid} column is
also added.
For classification models, the results can include a column called
diff --git a/man/boost_tree.Rd b/man/boost_tree.Rd
index c38ddaf7d..e7eac6567 100644
--- a/man/boost_tree.Rd
+++ b/man/boost_tree.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/boost_tree.R
\name{boost_tree}
\alias{boost_tree}
-\title{General Interface for Boosted Trees}
+\title{Boosted trees}
\usage{
boost_tree(
mode = "unknown",
@@ -23,12 +23,11 @@ Possible values for this model are "unknown", "regression", or
"classification".}
\item{engine}{A single character string specifying what computational engine
-to use for fitting. Possible engines are listed below. The default for this
-model is \code{"xgboost"}.}
+to use for fitting.}
\item{mtry}{A number for the number (or proportion) of predictors that will
-be randomly sampled at each split when creating the tree models (\code{xgboost}
-only).}
+be randomly sampled at each split when creating the tree models
+(specific engines only)}
\item{trees}{An integer for the number of trees contained in
the ensemble.}
@@ -37,222 +36,50 @@ the ensemble.}
in a node that is required for the node to be split further.}
\item{tree_depth}{An integer for the maximum depth of the tree (i.e. number
-of splits) (\code{xgboost} only).}
+of splits) (specific engines only).}
\item{learn_rate}{A number for the rate at which the boosting algorithm adapts
-from iteration-to-iteration (\code{xgboost} only).}
+from iteration-to-iteration (specific engines only).}
\item{loss_reduction}{A number for the reduction in the loss function required
-to split further (\code{xgboost} only).}
+to split further (specific engines only).}
\item{sample_size}{A number for the number (or proportion) of data that is
exposed to the fitting routine. For \code{xgboost}, the sampling is done at
each iteration while \code{C5.0} samples once during training.}
\item{stop_iter}{The number of iterations without improvement before
-stopping (\code{xgboost} only).}
+stopping (specific engines only).}
}
\description{
-\code{boost_tree()} is a way to generate a \emph{specification} of a model
-before fitting and allows the model to be created using
-different packages in R or via Spark. The main arguments for the
-model are:
-\itemize{
-\item \code{mtry}: The number of predictors that will be
-randomly sampled at each split when creating the tree models.
-\item \code{trees}: The number of trees contained in the ensemble.
-\item \code{min_n}: The minimum number of data points in a node
-that is required for the node to be split further.
-\item \code{tree_depth}: The maximum depth of the tree (i.e. number of
-splits).
-\item \code{learn_rate}: The rate at which the boosting algorithm adapts
-from iteration-to-iteration.
-\item \code{loss_reduction}: The reduction in the loss function required
-to split further.
-\item \code{sample_size}: The amount of data exposed to the fitting routine.
-\item \code{stop_iter}: The number of iterations without improvement before
-stopping.
-}
-These arguments are converted to their specific names at the
-time that the model is fit. Other options and arguments can be
-set using the \code{set_engine()} function. If left to their defaults
-here (\code{NULL}), the values are taken from the underlying model
-functions. If parameters need to be modified, \code{update()} can be used
-in lieu of recreating the object from scratch.
-}
-\details{
-The data given to the function are not saved and are only used
-to determine the \emph{mode} of the model. For \code{boost_tree()}, the
-possible modes are "regression" and "classification".
-
-The model can be created using the \code{fit()} function using the
-following \emph{engines}:
-\itemize{
-\item \pkg{R}: \code{"xgboost"} (the default), \code{"C5.0"}
-\item \pkg{Spark}: \code{"spark"}
-}
+\code{boost_tree()} defines a model that creates a series of decision trees
+forming an ensemble. Each tree depends on the results of previous trees.
+All trees in the ensemble are combined to produce a final prediction.
-For this model, other packages may add additional engines. Use
-\code{\link[=show_engines]{show_engines()}} to see the current set of engines.
-}
-\note{
-For models created using the spark engine, there are
-several differences to consider. First, only the formula
-interface to via \code{fit()} is available; using \code{fit_xy()} will
-generate an error. Second, the predictions will always be in a
-spark table format. The names will be the same as documented but
-without the dots. Third, there is no equivalent to factor
-columns in spark tables so class predictions are returned as
-character columns. Fourth, to retain the model object for a new
-R session (via \code{save()}), the \code{model$fit} element of the \code{parsnip}
-object should be serialized via \code{ml_save(object$fit)} and
-separately saved to disk. In a new session, the object can be
-reloaded and reattached to the \code{parsnip} object.
-}
-\section{Engine Details}{
-Engines may have pre-set default arguments when executing the model fit
-call. For this type of model, the template of the fit calls are below:
-\subsection{xgboost}{\if{html}{\out{
}}\preformatted{boost_tree() \%>\%
- set_engine("xgboost") \%>\%
- set_mode("regression") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Boosted Tree Model Specification (regression)
-##
-## Computational engine: xgboost
-##
-## Model fit template:
-## parsnip::xgb_train(x = missing_arg(), y = missing_arg(), nthread = 1,
-## verbose = 0)
-}\if{html}{\out{}}\preformatted{boost_tree() \%>\%
- set_engine("xgboost") \%>\%
- set_mode("classification") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Boosted Tree Model Specification (classification)
-##
-## Computational engine: xgboost
-##
-## Model fit template:
-## parsnip::xgb_train(x = missing_arg(), y = missing_arg(), nthread = 1,
-## verbose = 0)
-}
+There are different ways to fit this model. See the engine-specific pages
+for more details:
-Note that, for most engines to \code{boost_tree()}, the \code{sample_size}
-argument is in terms of the \emph{number} of training set points. The
-\code{xgboost} package parameterizes this as the \emph{proportion} of training set
-samples instead. When using the \code{tune}, this \strong{occurs automatically}.
+\Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("boost_tree")}
-If you would like to use a custom range when tuning \code{sample_size}, the
-\code{dials::sample_prop()} function can be used in that case. For example,
-using a parameter set:\if{html}{\out{}}\preformatted{mod <-
- boost_tree(sample_size = tune()) \%>\%
- set_engine("xgboost") \%>\%
- set_mode("classification")
-
-# update the parameters using the `dials` function
-mod_param <-
- mod \%>\%
- parameters() \%>\%
- update(sample_size = sample_prop(c(0.4, 0.9)))
-}\if{html}{\out{
}}
-
-For this engine, tuning over \code{trees} is very efficient since the same
-model object can be used to make predictions over multiple values of
-\code{trees}.
-
-Note that \code{xgboost} models require that non-numeric predictors (e.g.,
-factors) must be converted to dummy variables or some other numeric
-representation. By default, when using \code{fit()} with \code{xgboost}, a one-hot
-encoding is used to convert factor predictors to indicator variables.
-
-Finally, in the classification mode, non-numeric outcomes (i.e.,
-factors) are converted to numeric. For binary classification, the
-\code{event_level} argument of \code{set_engine()} can be set to either \code{"first"}
-or \code{"second"} to specify which level should be used as the event. This
-can be helpful when a watchlist is used to monitor performance from with
-the xgboost training process.
+More information on how \pkg{parsnip} is used for modeling is at
+\url{https://www.tidymodels.org/}.
}
+\details{
+This function only defines what \emph{type} of model is being fit. Once an engine
+is specified, the \emph{method} to fit the model is also defined.
-\subsection{C5.0}{\if{html}{\out{}}\preformatted{boost_tree() \%>\%
- set_engine("C5.0") \%>\%
- set_mode("classification") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Boosted Tree Model Specification (classification)
-##
-## Computational engine: C5.0
-##
-## Model fit template:
-## parsnip::C5.0_train(x = missing_arg(), y = missing_arg(), weights = missing_arg())
-}
-
-Note that \code{\link[C50:C5.0]{C50::C5.0()}} does not require factor
-predictors to be converted to indicator variables. \code{fit()} does not
-affect the encoding of the predictor values (i.e. factors stay factors)
-for this model.
-
-For this engine, tuning over \code{trees} is very efficient since the same
-model object can be used to make predictions over multiple values of
-\code{trees}.
-}
-
-\subsection{spark}{\if{html}{\out{}}\preformatted{boost_tree() \%>\%
- set_engine("spark") \%>\%
- set_mode("regression") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Boosted Tree Model Specification (regression)
-##
-## Computational engine: spark
-##
-## Model fit template:
-## sparklyr::ml_gradient_boosted_trees(x = missing_arg(), formula = missing_arg(),
-## type = "regression", seed = sample.int(10^5, 1))
-}\if{html}{\out{}}\preformatted{boost_tree() \%>\%
- set_engine("spark") \%>\%
- set_mode("classification") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Boosted Tree Model Specification (classification)
-##
-## Computational engine: spark
-##
-## Model fit template:
-## sparklyr::ml_gradient_boosted_trees(x = missing_arg(), formula = missing_arg(),
-## type = "classification", seed = sample.int(10^5, 1))
-}
-
-\code{fit()} does not affect the encoding of the predictor values
-(i.e. factors stay factors) for this model.
-}
-
-\subsection{Parameter translations}{
-
-The standardized parameter names in parsnip can be mapped to their
-original names in each engine that has main parameters. Each engine
-typically has a different default value (shown in parentheses) for each
-parameter.\tabular{llll}{
- \strong{parsnip} \tab \strong{xgboost} \tab \strong{C5.0} \tab \strong{spark} \cr
- tree_depth \tab max_depth (6) \tab NA \tab max_depth (5) \cr
- trees \tab nrounds (15) \tab trials (15) \tab max_iter (20) \cr
- learn_rate \tab eta (0.3) \tab NA \tab step_size (0.1) \cr
- mtry \tab colsample_bynode (character(0)) \tab NA \tab feature_subset_strategy (see below) \cr
- min_n \tab min_child_weight (1) \tab minCases (2) \tab min_instances_per_node (1) \cr
- loss_reduction \tab gamma (0) \tab NA \tab min_info_gain (0) \cr
- sample_size \tab subsample (1) \tab sample (0) \tab subsampling_rate (1) \cr
- stop_iter \tab early_stop (NULL) \tab NA \tab NA \cr
-}
-
-
-For spark, the default \code{mtry} is the square root of the number of
-predictors for classification, and one-third of the predictors for
-regression.
+The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model_spec()}} function is used
+with the data.
}
-}
-
\examples{
show_engines("boost_tree")
boost_tree(mode = "classification", trees = 20)
-# Parameters can be represented by a placeholder:
-boost_tree(mode = "regression", mtry = varying())
+}
+\references{
+\url{https://www.tidymodels.org}, \href{https://tmwr.org}{\emph{Tidy Models with R}}
}
\seealso{
-\code{\link[=fit]{fit()}}, \code{\link[=set_engine]{set_engine()}}, \code{\link[=update]{update()}}
+\Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("boost_tree")},
+\code{\link[=xgb_train]{xgb_train()}}, \code{\link[=C5.0_train]{C5.0_train()}}
}
diff --git a/man/contr_one_hot.Rd b/man/contr_one_hot.Rd
index df945bebd..a8c21d593 100644
--- a/man/contr_one_hot.Rd
+++ b/man/contr_one_hot.Rd
@@ -39,14 +39,16 @@ levels(penguins$species)
}\if{html}{\out{}}\preformatted{## [1] "Biscoe" "Dream" "Torgersen"
}\if{html}{\out{}}\preformatted{model.matrix(~ species + island, data = penguins) \%>\%
colnames()
-}\if{html}{\out{
}}\preformatted{## [1] "(Intercept)" "speciesChinstrap" "speciesGentoo" "islandDream" "islandTorgersen"
+}\if{html}{\out{}}\preformatted{## [1] "(Intercept)" "speciesChinstrap" "speciesGentoo" "islandDream"
+## [5] "islandTorgersen"
}
For a formula with no intercept, the first factor is expanded to
indicators for \emph{all} factor levels but all other factors are expanded to
all but one (as above):\if{html}{\out{}}\preformatted{model.matrix(~ 0 + species + island, data = penguins) \%>\%
colnames()
-}\if{html}{\out{
}}\preformatted{## [1] "speciesAdelie" "speciesChinstrap" "speciesGentoo" "islandDream" "islandTorgersen"
+}\if{html}{\out{}}\preformatted{## [1] "speciesAdelie" "speciesChinstrap" "speciesGentoo" "islandDream"
+## [5] "islandTorgersen"
}
For inference, this hybrid encoding can be problematic.
@@ -59,8 +61,8 @@ options(contrasts = new_contr)
model.matrix(~ species + island, data = penguins) \%>\%
colnames()
-}\if{html}{\out{}}\preformatted{## [1] "(Intercept)" "speciesAdelie" "speciesChinstrap" "speciesGentoo" "islandBiscoe"
-## [6] "islandDream" "islandTorgersen"
+}\if{html}{\out{}}\preformatted{## [1] "(Intercept)" "speciesAdelie" "speciesChinstrap" "speciesGentoo"
+## [5] "islandBiscoe" "islandDream" "islandTorgersen"
}\if{html}{\out{}}\preformatted{options(contrasts = old_contr)
}\if{html}{\out{
}}
diff --git a/man/control_parsnip.Rd b/man/control_parsnip.Rd
index 05a5f597b..d6fde30fa 100644
--- a/man/control_parsnip.Rd
+++ b/man/control_parsnip.Rd
@@ -28,7 +28,7 @@ An S3 object with class "fit_control" that is a named list with the
results of the function call
}
\description{
-Options can be passed to the \code{\link[=fit]{fit()}} function that control the output and
+Options can be passed to the \code{\link[=fit.model_spec]{fit.model_spec()}} function that control the output and
computations
}
\details{
diff --git a/man/decision_tree.Rd b/man/decision_tree.Rd
index c542cb8e4..bfcb947e7 100644
--- a/man/decision_tree.Rd
+++ b/man/decision_tree.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/decision_tree.R
\name{decision_tree}
\alias{decision_tree}
-\title{General Interface for Decision Tree Models}
+\title{Decision trees}
\usage{
decision_tree(
mode = "unknown",
@@ -18,11 +18,10 @@ Possible values for this model are "unknown", "regression", or
"classification".}
\item{engine}{A single character string specifying what computational engine
-to use for fitting. Possible engines are listed below. The default for this
-model is \code{"rpart"}.}
+to use for fitting.}
\item{cost_complexity}{A positive number for the the cost/complexity
-parameter (a.k.a. \code{Cp}) used by CART models (\code{rpart} only).}
+parameter (a.k.a. \code{Cp}) used by CART models (specific engines only).}
\item{tree_depth}{An integer for maximum depth of the tree.}
@@ -30,153 +29,32 @@ parameter (a.k.a. \code{Cp}) used by CART models (\code{rpart} only).}
in a node that are required for the node to be split further.}
}
\description{
-\code{decision_tree()} is a way to generate a \emph{specification} of a model
-before fitting and allows the model to be created using
-different packages in R or via Spark. The main arguments for the
-model are:
-\itemize{
-\item \code{cost_complexity}: The cost/complexity parameter (a.k.a. \code{Cp})
-used by CART models (\code{rpart} only).
-\item \code{tree_depth}: The \emph{maximum} depth of a tree (\code{rpart} and
-\code{spark} only).
-\item \code{min_n}: The minimum number of data points in a node
-that are required for the node to be split further.
-}
-These arguments are converted to their specific names at the
-time that the model is fit. Other options and arguments can be
-set using \code{set_engine()}. If left to their defaults
-here (\code{NULL}), the values are taken from the underlying model
-functions. If parameters need to be modified, \code{update()} can be used
-in lieu of recreating the object from scratch.
-}
-\details{
-The model can be created using the \code{fit()} function using the
-following \emph{engines}:
-\itemize{
-\item \pkg{R}: \code{"rpart"} (the default) or \code{"C5.0"} (classification only)
-\item \pkg{Spark}: \code{"spark"}
-}
+\code{decision_tree()} defines a model as a set of \verb{if/then} statements that
+creates a tree-based structure.
-Note that, for \code{rpart} models, but \code{cost_complexity} and
-\code{tree_depth} can be both be specified but the package will give
-precedence to \code{cost_complexity}. Also, \code{tree_depth} values
-greater than 30 \code{rpart} will give nonsense results on 32-bit
-machines.
-}
-\note{
-For models created using the spark engine, there are
-several differences to consider. First, only the formula
-interface to via \code{fit()} is available; using \code{fit_xy()} will
-generate an error. Second, the predictions will always be in a
-spark table format. The names will be the same as documented but
-without the dots. Third, there is no equivalent to factor
-columns in spark tables so class predictions are returned as
-character columns. Fourth, to retain the model object for a new
-R session (via \code{save()}), the \code{model$fit} element of the \code{parsnip}
-object should be serialized via \code{ml_save(object$fit)} and
-separately saved to disk. In a new session, the object can be
-reloaded and reattached to the \code{parsnip} object.
-}
-\section{Engine Details}{
-Engines may have pre-set default arguments when executing the model fit
-call. For this type of model, the template of the fit calls are below:
-\subsection{rpart}{\if{html}{\out{}}\preformatted{decision_tree() \%>\%
- set_engine("rpart") \%>\%
- set_mode("regression") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (regression)
-##
-## Computational engine: rpart
-##
-## Model fit template:
-## rpart::rpart(formula = missing_arg(), data = missing_arg(), weights = missing_arg())
-}\if{html}{\out{}}\preformatted{decision_tree() \%>\%
- set_engine("rpart") \%>\%
- set_mode("classification") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (classification)
-##
-## Computational engine: rpart
-##
-## Model fit template:
-## rpart::rpart(formula = missing_arg(), data = missing_arg(), weights = missing_arg())
-}
+There are different ways to fit this model. See the engine-specific pages
+for more details:
-Note that \code{\link[rpart:rpart]{rpart::rpart()}} does not require factor
-predictors to be converted to indicator variables. \code{fit()} does not
-affect the encoding of the predictor values (i.e. factors stay factors)
-for this model
-}
+\Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("decision_tree")}
-\subsection{C5.0}{\if{html}{\out{}}\preformatted{decision_tree() \%>\%
- set_engine("C5.0") \%>\%
- set_mode("classification") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (classification)
-##
-## Computational engine: C5.0
-##
-## Model fit template:
-## parsnip::C5.0_train(x = missing_arg(), y = missing_arg(), weights = missing_arg(),
-## trials = 1)
-}
-
-Note that \code{\link[C50:C5.0]{C50::C5.0()}} does not require factor
-predictors to be converted to indicator variables. \code{fit()} does not
-affect the encoding of the predictor values (i.e. factors stay factors)
-for this model
-}
-
-\subsection{spark}{\if{html}{\out{}}\preformatted{decision_tree() \%>\%
- set_engine("spark") \%>\%
- set_mode("regression") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (regression)
-##
-## Computational engine: spark
-##
-## Model fit template:
-## sparklyr::ml_decision_tree_regressor(x = missing_arg(), formula = missing_arg(),
-## seed = sample.int(10^5, 1))
-}\if{html}{\out{}}\preformatted{decision_tree() \%>\%
- set_engine("spark") \%>\%
- set_mode("classification") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (classification)
-##
-## Computational engine: spark
-##
-## Model fit template:
-## sparklyr::ml_decision_tree_classifier(x = missing_arg(), formula = missing_arg(),
-## seed = sample.int(10^5, 1))
-}
-
-\code{fit()} does not affect the encoding of the predictor values
-(i.e. factors stay factors) for this model
-}
-
-\subsection{Parameter translations}{
-
-The standardized parameter names in parsnip can be mapped to their
-original names in each engine that has main parameters. Each engine
-typically has a different default value (shown in parentheses) for each
-parameter.\tabular{llll}{
- \strong{parsnip} \tab \strong{rpart} \tab \strong{C5.0} \tab \strong{spark} \cr
- tree_depth \tab maxdepth (30) \tab NA \tab max_depth (5) \cr
- min_n \tab minsplit (20) \tab minCases (2) \tab min_instances_per_node (1) \cr
- cost_complexity \tab cp (0.01) \tab NA \tab NA \cr
+More information on how \pkg{parsnip} is used for modeling is at
+\url{https://www.tidymodels.org/}.
}
+\details{
+This function only defines what \emph{type} of model is being fit. Once an engine
+is specified, the \emph{method} to fit the model is also defined.
+The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model_spec()}} function is used
+with the data.
}
-}
-
\examples{
show_engines("decision_tree")
decision_tree(mode = "classification", tree_depth = 5)
-# Parameters can be represented by a placeholder:
-decision_tree(mode = "regression", cost_complexity = varying())
+}
+\references{
+\url{https://www.tidymodels.org}, \href{https://tmwr.org}{\emph{Tidy Models with R}}
}
\seealso{
-\code{\link[=fit]{fit()}}, \code{\link[=set_engine]{set_engine()}}, \code{\link[=update]{update()}}
+\Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("decision_tree")}
}
diff --git a/man/details_boost_tree_C5.0.Rd b/man/details_boost_tree_C5.0.Rd
new file mode 100644
index 000000000..fdc881c63
--- /dev/null
+++ b/man/details_boost_tree_C5.0.Rd
@@ -0,0 +1,75 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/boost_tree_C5.0.R
+\name{details_boost_tree_C5.0}
+\alias{details_boost_tree_C5.0}
+\title{Boosted trees via C5.0}
+\description{
+\code{\link[C50:C5.0]{C50::C5.0()}} creates a series of classification trees forming an
+ensemble. Each tree depends on the results of previous trees. All trees in
+the ensemble are combined to produce a final prediction.
+}
+\details{
+For this engine, there is a single mode: classification
+\subsection{Tuning Parameters}{
+
+This model has 3 tuning parameters:
+\itemize{
+\item \code{trees}: # Trees (type: integer, default: 15L)
+\item \code{min_n}: Minimal Node Size (type: integer, default: 2L)
+\item \code{sample_size}: Proportion Observations Sampled (type: double,
+default: 1.0)
+}
+
+The implementation of C5.0 limits the number of trees to be between 1
+and 100.
+}
+
+\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{}}\preformatted{boost_tree(trees = integer(), min_n = integer(), sample_size = numeric()) \%>\%
+ set_engine("C5.0") \%>\%
+ set_mode("classification") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Boosted Tree Model Specification (classification)
+##
+## Main Arguments:
+## trees = integer()
+## min_n = integer()
+## sample_size = numeric()
+##
+## Computational engine: C5.0
+##
+## Model fit template:
+## parsnip::C5.0_train(x = missing_arg(), y = missing_arg(), weights = missing_arg(),
+## trials = integer(), minCases = integer(), sample = numeric())
+}
+
+\code{\link[=C5.0_train]{C5.0_train()}} is a wrapper around
+\code{\link[C50:C5.0]{C50::C5.0()}} that makes it easier to run this model.
+}
+
+\subsection{Preprocessing requirements}{
+
+This engine does not require any special encoding of the predictors.
+Categorical predictors can be partitioned into groups of factor levels
+(e.g. \verb{\{a, c\}} vs \verb{\{b, d\}}) when splitting at a node. Dummy variables
+are not required for this model.
+}
+
+\subsection{Other details}{
+\subsection{Early stopping}{
+
+By default, early stopping is used. To use the complete set of boosting
+iterations, pass \code{earlyStopping = FALSE} to
+\code{\link[=set_engine]{set_engine()}}. Also, it is unlikely that early
+stopping will occur if \code{sample_size = 1}.
+}
+
+}
+
+\subsection{References}{
+\itemize{
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_boost_tree_spark.Rd b/man/details_boost_tree_spark.Rd
new file mode 100644
index 000000000..c2ea5353b
--- /dev/null
+++ b/man/details_boost_tree_spark.Rd
@@ -0,0 +1,129 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/boost_tree_spark.R
+\name{details_boost_tree_spark}
+\alias{details_boost_tree_spark}
+\title{Boosted trees via Spark}
+\description{
+\code{\link[sparklyr:ml_gradient_boosted_trees]{sparklyr::ml_gradient_boosted_trees()}} creates a series of decision trees
+forming an ensemble. Each tree depends on the results of previous trees.
+All trees in the ensemble are combined to produce a final prediction.
+}
+\details{
+For this engine, there are multiple modes: classification and
+regression. However, multiclass classification is not supported yet.
+\subsection{Tuning Parameters}{
+
+This model has 7 tuning parameters:
+\itemize{
+\item \code{tree_depth}: Tree Depth (type: integer, default: 5L)
+\item \code{trees}: # Trees (type: integer, default: 20L)
+\item \code{learn_rate}: Learning Rate (type: double, default: 0.1)
+\item \code{mtry}: # Randomly Selected Predictors (type: integer, default: see
+below)
+\item \code{min_n}: Minimal Node Size (type: integer, default: 1L)
+\item \code{loss_reduction}: Minimum Loss Reduction (type: double, default:
+0.0)
+\item \code{sample_size}: # Observations Sampled (type: integer, default: 1.0)
+}
+
+The \code{mtry} parameter is related to the number of predictors. The default
+depends on the model mode. For classification, the square root of the
+number of predictors is used and for regression, one third of the
+predictors are sampled.
+}
+
+\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{}}\preformatted{boost_tree(
+ mtry = integer(), trees = integer(), min_n = integer(), tree_depth = integer(),
+ learn_rate = numeric(), loss_reduction = numeric(), sample_size = numeric()
+) \%>\%
+ set_engine("spark") \%>\%
+ set_mode("regression") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Boosted Tree Model Specification (regression)
+##
+## Main Arguments:
+## mtry = integer()
+## trees = integer()
+## min_n = integer()
+## tree_depth = integer()
+## learn_rate = numeric()
+## loss_reduction = numeric()
+## sample_size = numeric()
+##
+## Computational engine: spark
+##
+## Model fit template:
+## sparklyr::ml_gradient_boosted_trees(x = missing_arg(), formula = missing_arg(),
+## type = "regression", feature_subset_strategy = integer(),
+## max_iter = integer(), min_instances_per_node = min_rows(integer(0),
+## x), max_depth = integer(), step_size = numeric(), min_info_gain = numeric(),
+## subsampling_rate = numeric(), seed = sample.int(10^5, 1))
+}
+}
+
+\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{}}\preformatted{boost_tree(
+ mtry = integer(), trees = integer(), min_n = integer(), tree_depth = integer(),
+ learn_rate = numeric(), loss_reduction = numeric(), sample_size = numeric()
+) \%>\%
+ set_engine("spark") \%>\%
+ set_mode("classification") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Boosted Tree Model Specification (classification)
+##
+## Main Arguments:
+## mtry = integer()
+## trees = integer()
+## min_n = integer()
+## tree_depth = integer()
+## learn_rate = numeric()
+## loss_reduction = numeric()
+## sample_size = numeric()
+##
+## Computational engine: spark
+##
+## Model fit template:
+## sparklyr::ml_gradient_boosted_trees(x = missing_arg(), formula = missing_arg(),
+## type = "classification", feature_subset_strategy = integer(),
+## max_iter = integer(), min_instances_per_node = min_rows(integer(0),
+## x), max_depth = integer(), step_size = numeric(), min_info_gain = numeric(),
+## subsampling_rate = numeric(), seed = sample.int(10^5, 1))
+}
+}
+
+\subsection{Preprocessing requirements}{
+
+This engine does not require any special encoding of the predictors.
+Categorical predictors can be partitioned into groups of factor levels
+(e.g. \verb{\{a, c\}} vs \verb{\{b, d\}}) when splitting at a node. Dummy variables
+are not required for this model.
+}
+
+\subsection{Other details}{
+
+For models created using the \code{"spark"} engine, there are several things
+to consider.
+\itemize{
+\item Only the formula interface to via \code{fit()} is available; using
+\code{fit_xy()} will generate an error.
+\item The predictions will always be in a Spark table format. The names
+will be the same as documented but without the dots.
+\item There is no equivalent to factor columns in Spark tables so class
+predictions are returned as character columns.
+\item To retain the model object for a new R session (via \code{save()}), the
+\code{model$fit} element of the parsnip object should be serialized via
+\code{ml_save(object$fit)} and separately saved to disk. In a new
+session, the object can be reloaded and reattached to the parsnip
+object.
+}
+}
+
+\subsection{References}{
+\itemize{
+\item Luraschi, J, K Kuo, and E Ruiz. 2019. \emph{Mastering Spark with R}.
+O’Reilly Media
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_boost_tree_xgboost.Rd b/man/details_boost_tree_xgboost.Rd
new file mode 100644
index 000000000..a1cc3f00f
--- /dev/null
+++ b/man/details_boost_tree_xgboost.Rd
@@ -0,0 +1,169 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/boost_tree_xgboost.R
+\name{details_boost_tree_xgboost}
+\alias{details_boost_tree_xgboost}
+\title{Boosted trees via xgboost}
+\description{
+\code{\link[xgboost:xgb.train]{xgboost::xgb.train()}} creates a series of decision trees forming an
+ensemble. Each tree depends on the results of previous trees. All trees in
+the ensemble are combined to produce a final prediction.
+}
+\details{
+For this engine, there are multiple modes: classification and regression
+\subsection{Tuning Parameters}{
+
+This model has 8 tuning parameters:
+\itemize{
+\item \code{tree_depth}: Tree Depth (type: integer, default: 6L)
+\item \code{trees}: # Trees (type: integer, default: 15L)
+\item \code{learn_rate}: Learning Rate (type: double, default: 0.3)
+\item \code{mtry}: # Randomly Selected Predictors (type: integer, default: see
+below)
+\item \code{min_n}: Minimal Node Size (type: integer, default: 1L)
+\item \code{loss_reduction}: Minimum Loss Reduction (type: double, default:
+0.0)
+\item \code{sample_size}: Proportion Observations Sampled (type: double,
+default: 1.0)
+\item \code{stop_iter}: # Iterations Before Stopping (type: integer, default:
+Inf)
+}
+
+The \code{mtry} parameter is related to the number of predictors. The default
+is to use all predictors. \code{\link[xgboost:xgb.train]{xgboost::xgb.train()}}
+encodes this as a real number between zero and one. parsnip translates
+the number of columns to this type of value. The user should give the
+argument to \code{boost_tree()} as an integer (not a real number).
+}
+
+\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{}}\preformatted{boost_tree(
+ mtry = integer(), trees = integer(), min_n = integer(), tree_depth = integer(),
+ learn_rate = numeric(), loss_reduction = numeric(), sample_size = numeric(),
+ stop_iter = integer()
+) \%>\%
+ set_engine("xgboost") \%>\%
+ set_mode("regression") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Boosted Tree Model Specification (regression)
+##
+## Main Arguments:
+## mtry = integer()
+## trees = integer()
+## min_n = integer()
+## tree_depth = integer()
+## learn_rate = numeric()
+## loss_reduction = numeric()
+## sample_size = numeric()
+## stop_iter = integer()
+##
+## Computational engine: xgboost
+##
+## Model fit template:
+## parsnip::xgb_train(x = missing_arg(), y = missing_arg(), colsample_bynode = integer(),
+## nrounds = integer(), min_child_weight = integer(), max_depth = integer(),
+## eta = numeric(), gamma = numeric(), subsample = numeric(),
+## early_stop = integer(), nthread = 1, verbose = 0)
+}
+}
+
+\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{}}\preformatted{boost_tree(
+ mtry = integer(), trees = integer(), min_n = integer(), tree_depth = integer(),
+ learn_rate = numeric(), loss_reduction = numeric(), sample_size = numeric(),
+ stop_iter = integer()
+) \%>\%
+ set_engine("xgboost") \%>\%
+ set_mode("classification") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Boosted Tree Model Specification (classification)
+##
+## Main Arguments:
+## mtry = integer()
+## trees = integer()
+## min_n = integer()
+## tree_depth = integer()
+## learn_rate = numeric()
+## loss_reduction = numeric()
+## sample_size = numeric()
+## stop_iter = integer()
+##
+## Computational engine: xgboost
+##
+## Model fit template:
+## parsnip::xgb_train(x = missing_arg(), y = missing_arg(), colsample_bynode = integer(),
+## nrounds = integer(), min_child_weight = integer(), max_depth = integer(),
+## eta = numeric(), gamma = numeric(), subsample = numeric(),
+## early_stop = integer(), nthread = 1, verbose = 0)
+}
+
+\code{\link[=xgb_train]{xgb_train()}} is a wrapper around
+\code{\link[xgboost:xgb.train]{xgboost::xgb.train()}} (and other functions)
+that makes it easier to run this model.
+}
+
+\subsection{Preprocessing requirements}{
+
+xgboost does not have a means to translate factor predictors to grouped
+splits. Factor/categorical predictors need to be converted to numeric
+values (e.g., dummy or indicator variables) for this engine. When using
+the formula method via \code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip
+will convert factor columns to indicators using a one-hot encoding.
+
+For classification, non-numeric outcomes (i.e., factors) are internally
+converted to numeric. For binary classification, the \code{event_level}
+argument of \code{set_engine()} can be set to either \code{"first"} or \code{"second"}
+to specify which level should be used as the event. This can be helpful
+when a watchlist is used to monitor performance from with the xgboost
+training process.
+}
+
+\subsection{Other details}{
+\subsection{Sparse matrices}{
+
+xgboost requires the data to be in a sparse format. If your predictor
+data are already in this format, then use
+\code{\link[=fit_xy.model_spec]{fit_xy.model_spec()}} to pass it to the model
+function. Otherwise, parsnip converts the data to this format.
+}
+
+\subsection{Parallel processing}{
+
+By default, the model is trained without parallel processing. This can
+be change by passing the \code{nthread} parameter to
+\code{\link[=set_engine]{set_engine()}}. However, it is unwise to combine this
+with external parallel processing when using the package.
+}
+
+\subsection{Early stopping}{
+
+The \code{stop_iter()} argument allows the model to prematurely stop training
+if the objective function does not improve within \code{early_stop}
+iterations.
+
+The best way to use this feature is in conjunction with an \emph{internal
+validation set}. To do this, pass the \code{validation} parameter of
+\code{\link[=xgb_train]{xgb_train()}} via the parsnip
+\code{\link[=set_engine]{set_engine()}} function. This is the proportion of the
+training set that should be reserved for measuring performance (and stop
+early).
+
+If the model specification has \code{early_stop >= trees}, \code{early_stop} is
+converted to \code{trees - 1} and a warning is issued.
+}
+
+\subsection{Objective function}{
+
+parsnip chooses the objective function based on the characteristics of
+the outcome. To use a different loss, pass the \code{objective} argument to
+\code{\link[=set_engine]{set_engine()}}.
+}
+
+}
+
+\subsection{References}{
+\itemize{
+\item \href{https://arxiv.org/abs/1603.02754}{XGBoost: A Scalable Tree Boosting System}
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_decision_tree_C5.0.Rd b/man/details_decision_tree_C5.0.Rd
new file mode 100644
index 000000000..6efac58d9
--- /dev/null
+++ b/man/details_decision_tree_C5.0.Rd
@@ -0,0 +1,55 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/decision_tree_C5.0.R
+\name{details_decision_tree_C5.0}
+\alias{details_decision_tree_C5.0}
+\title{Decision trees via C5.0}
+\description{
+\code{\link[C50:C5.0]{C50::C5.0()}} fits a model as a set of \verb{if/then} statements that
+creates a tree-based structure.
+}
+\details{
+For this engine, there is a single mode: classification
+\subsection{Tuning Parameters}{
+
+This model has 1 tuning parameters:
+\itemize{
+\item \code{min_n}: Minimal Node Size (type: integer, default: 2L)
+}
+}
+
+\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{}}\preformatted{decision_tree(min_n = integer()) \%>\%
+ set_engine("C5.0") \%>\%
+ set_mode("classification") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (classification)
+##
+## Main Arguments:
+## min_n = integer()
+##
+## Computational engine: C5.0
+##
+## Model fit template:
+## parsnip::C5.0_train(x = missing_arg(), y = missing_arg(), weights = missing_arg(),
+## minCases = integer(), trials = 1)
+}
+
+\code{\link[=C5.0_train]{C5.0_train()}} is a wrapper around
+\code{\link[C50:C5.0]{C50::C5.0()}} that makes it easier to run this model.
+}
+
+\subsection{Preprocessing requirements}{
+
+This engine does not require any special encoding of the predictors.
+Categorical predictors can be partitioned into groups of factor levels
+(e.g. \verb{\{a, c\}} vs \verb{\{b, d\}}) when splitting at a node. Dummy variables
+are not required for this model.
+}
+
+\subsection{References}{
+\itemize{
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_decision_tree_rpart.Rd b/man/details_decision_tree_rpart.Rd
new file mode 100644
index 000000000..c929f8828
--- /dev/null
+++ b/man/details_decision_tree_rpart.Rd
@@ -0,0 +1,78 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/decision_tree_rpart.R
+\name{details_decision_tree_rpart}
+\alias{details_decision_tree_rpart}
+\title{Decision trees via CART}
+\description{
+\code{\link[rpart:rpart]{rpart::rpart()}} fits a model as a set of \verb{if/then} statements that
+creates a tree-based structure.
+}
+\details{
+For this engine, there are multiple modes: classification and regression
+\subsection{Tuning Parameters}{
+
+This model has 3 tuning parameters:
+\itemize{
+\item \code{tree_depth}: Tree Depth (type: integer, default: 30L)
+\item \code{min_n}: Minimal Node Size (type: integer, default: 2L)
+\item \code{cost_complexity}: Cost-Complexity Parameter (type: double, default:
+0.01)
+}
+}
+
+\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{}}\preformatted{decision_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\%
+ set_engine("rpart") \%>\%
+ set_mode("classification") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (classification)
+##
+## Main Arguments:
+## cost_complexity = double(1)
+## tree_depth = integer(1)
+## min_n = integer(1)
+##
+## Computational engine: rpart
+##
+## Model fit template:
+## rpart::rpart(formula = missing_arg(), data = missing_arg(), weights = missing_arg(),
+## cp = double(1), maxdepth = integer(1), minsplit = min_rows(0L,
+## data))
+}
+}
+
+\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{}}\preformatted{decision_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) \%>\%
+ set_engine("rpart") \%>\%
+ set_mode("regression") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (regression)
+##
+## Main Arguments:
+## cost_complexity = double(1)
+## tree_depth = integer(1)
+## min_n = integer(1)
+##
+## Computational engine: rpart
+##
+## Model fit template:
+## rpart::rpart(formula = missing_arg(), data = missing_arg(), weights = missing_arg(),
+## cp = double(1), maxdepth = integer(1), minsplit = min_rows(0L,
+## data))
+}
+}
+
+\subsection{Preprocessing requirements}{
+
+This engine does not require any special encoding of the predictors.
+Categorical predictors can be partitioned into groups of factor levels
+(e.g. \verb{\{a, c\}} vs \verb{\{b, d\}}) when splitting at a node. Dummy variables
+are not required for this model.
+}
+
+\subsection{References}{
+\itemize{
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_decision_tree_spark.Rd b/man/details_decision_tree_spark.Rd
new file mode 100644
index 000000000..7e3c34b73
--- /dev/null
+++ b/man/details_decision_tree_spark.Rd
@@ -0,0 +1,93 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/decision_tree_spark.R
+\name{details_decision_tree_spark}
+\alias{details_decision_tree_spark}
+\title{Decision trees via Spark}
+\description{
+\code{\link[sparklyr:ml_decision_tree]{sparklyr::ml_decision_tree()}} fits a model as a set of \verb{if/then}
+statements that creates a tree-based structure.
+}
+\details{
+For this engine, there are multiple modes: classification and regression
+\subsection{Tuning Parameters}{
+
+This model has 2 tuning parameters:
+\itemize{
+\item \code{tree_depth}: Tree Depth (type: integer, default: 5L)
+\item \code{min_n}: Minimal Node Size (type: integer, default: 1L)
+}
+}
+
+\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{}}\preformatted{decision_tree(tree_depth = integer(1), min_n = integer(1)) \%>\%
+ set_engine("spark") \%>\%
+ set_mode("classification") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (classification)
+##
+## Main Arguments:
+## tree_depth = integer(1)
+## min_n = integer(1)
+##
+## Computational engine: spark
+##
+## Model fit template:
+## sparklyr::ml_decision_tree_classifier(x = missing_arg(), formula = missing_arg(),
+## max_depth = integer(1), min_instances_per_node = min_rows(0L,
+## x), seed = sample.int(10^5, 1))
+}
+}
+
+\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{}}\preformatted{decision_tree(tree_depth = integer(1), min_n = integer(1)) \%>\%
+ set_engine("spark") \%>\%
+ set_mode("regression") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Decision Tree Model Specification (regression)
+##
+## Main Arguments:
+## tree_depth = integer(1)
+## min_n = integer(1)
+##
+## Computational engine: spark
+##
+## Model fit template:
+## sparklyr::ml_decision_tree_regressor(x = missing_arg(), formula = missing_arg(),
+## max_depth = integer(1), min_instances_per_node = min_rows(0L,
+## x), seed = sample.int(10^5, 1))
+}
+}
+
+\subsection{Preprocessing requirements}{
+
+This engine does not require any special encoding of the predictors.
+Categorical predictors can be partitioned into groups of factor levels
+(e.g. \verb{\{a, c\}} vs \verb{\{b, d\}}) when splitting at a node. Dummy variables
+are not required for this model.
+}
+
+\subsection{Other details}{
+
+For models created using the \code{"spark"} engine, there are several things
+to consider.
+\itemize{
+\item Only the formula interface to via \code{fit()} is available; using
+\code{fit_xy()} will generate an error.
+\item The predictions will always be in a Spark table format. The names
+will be the same as documented but without the dots.
+\item There is no equivalent to factor columns in Spark tables so class
+predictions are returned as character columns.
+\item To retain the model object for a new R session (via \code{save()}), the
+\code{model$fit} element of the parsnip object should be serialized via
+\code{ml_save(object$fit)} and separately saved to disk. In a new
+session, the object can be reloaded and reattached to the parsnip
+object.
+}
+}
+
+\subsection{References}{
+\itemize{
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_linear_reg_glmnet.Rd b/man/details_linear_reg_glmnet.Rd
new file mode 100644
index 000000000..ac3d2e5d9
--- /dev/null
+++ b/man/details_linear_reg_glmnet.Rd
@@ -0,0 +1,68 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/linear_reg_glmnet.R
+\name{details_linear_reg_glmnet}
+\alias{details_linear_reg_glmnet}
+\title{Linear regression via glmnet}
+\description{
+\code{\link[glmnet:glmnet]{glmnet::glmnet()}} uses regularized least squares to fit models with numeric outcomes.
+}
+\details{
+For this engine, there is a single mode: regression
+\subsection{Tuning Parameters}{
+
+This model has 2 tuning parameters:
+\itemize{
+\item \code{penalty}: Amount of Regularization (type: double, default: see
+below)
+\item \code{mixture}: Proportion of Lasso Penalty (type: double, default: 1.0)
+}
+
+A value of \code{mixture = 1} corresponds to a pure lasso model, while
+\code{mixture = 0} indicates ridge regression.
+
+The \code{penalty} parameter has no default and requires a single numeric
+value. For more details about this, and the \code{glmnet} model in general,
+see \link{glmnet-details}.
+}
+
+\subsection{Translation from parsnip to the original package}{\if{html}{\out{}}\preformatted{linear_reg(penalty = double(1), mixture = double(1)) \%>\%
+ set_engine("glmnet") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression)
+##
+## Main Arguments:
+## penalty = 0
+## mixture = double(1)
+##
+## Computational engine: glmnet
+##
+## Model fit template:
+## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(),
+## alpha = double(1), family = "gaussian")
+}
+}
+
+\subsection{Preprocessing requirements}{
+
+Factor/categorical predictors need to be converted to numeric values
+(e.g., dummy or indicator variables) for this engine. When using the
+formula method via
+\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will
+convert factor columns to indicators.
+
+Predictors should have the same scale. One way to achieve this is to
+center and scale each so that each predictor has mean zero and a
+variance of one. By default, \code{\link[glmnet:glmnet]{glmnet::glmnet()}} uses
+the argument \code{standardize = TRUE} to center and scale the data.
+}
+
+\subsection{References}{
+\itemize{
+\item Hastie, T, R Tibshirani, and M Wainwright. 2015. \emph{Statistical
+Learning with Sparsity}. CRC Press.
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_linear_reg_keras.Rd b/man/details_linear_reg_keras.Rd
new file mode 100644
index 000000000..0863dec60
--- /dev/null
+++ b/man/details_linear_reg_keras.Rd
@@ -0,0 +1,62 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/linear_reg_keras.R
+\name{details_linear_reg_keras}
+\alias{details_linear_reg_keras}
+\title{Linear regression via keras/tensorflow}
+\description{
+This model uses regularized least squares to fit models with numeric outcomes.
+}
+\details{
+For this engine, there is a single mode: regression
+\subsection{Tuning Parameters}{
+
+This model has one tuning parameter:
+\itemize{
+\item \code{penalty}: Amount of Regularization (type: double, default: 0.0)
+}
+
+For \code{penalty}, the amount of regularization is \emph{only} L2 penalty (i.e.,
+ridge or weight decay).
+}
+
+\subsection{Translation from parsnip to the original package}{\if{html}{\out{}}\preformatted{linear_reg(penalty = double(1)) \%>\%
+ set_engine("keras") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression)
+##
+## Main Arguments:
+## penalty = double(1)
+##
+## Computational engine: keras
+##
+## Model fit template:
+## parsnip::keras_mlp(x = missing_arg(), y = missing_arg(), penalty = double(1),
+## hidden_units = 1, act = "linear")
+}
+
+\code{\link[=keras_mlp]{keras_mlp()}} is a parsnip wrapper around keras code for
+neural networks. This model fits a linear regression as a network with a
+single hidden unit.
+}
+
+\subsection{Preprocessing requirements}{
+
+Factor/categorical predictors need to be converted to numeric values
+(e.g., dummy or indicator variables) for this engine. When using the
+formula method via
+\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will
+convert factor columns to indicators.
+
+Predictors should have the same scale. One way to achieve this is to
+center and scale each so that each predictor has mean zero and a
+variance of one.
+}
+
+\subsection{References}{
+\itemize{
+\item Hoerl, A., & Kennard, R. (2000). \emph{Ridge Regression: Biased
+Estimation for Nonorthogonal Problems}. Technometrics, 42(1), 80-86.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_linear_reg_lm.Rd b/man/details_linear_reg_lm.Rd
new file mode 100644
index 000000000..e19f79615
--- /dev/null
+++ b/man/details_linear_reg_lm.Rd
@@ -0,0 +1,44 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/linear_reg_lm.R
+\name{details_linear_reg_lm}
+\alias{details_linear_reg_lm}
+\title{Linear regression via lm}
+\description{
+\code{\link[stats:lm]{stats::lm()}} uses ordinary least squares to fit models with numeric outcomes.
+}
+\details{
+For this engine, there is a single mode: regression
+\subsection{Tuning Parameters}{
+
+This engine has no tuning parameters.
+}
+
+\subsection{Translation from parsnip to the original package}{\if{html}{\out{}}\preformatted{linear_reg() \%>\%
+ set_engine("lm") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression)
+##
+## Computational engine: lm
+##
+## Model fit template:
+## stats::lm(formula = missing_arg(), data = missing_arg(), weights = missing_arg())
+}
+}
+
+\subsection{Preprocessing requirements}{
+
+Factor/categorical predictors need to be converted to numeric values
+(e.g., dummy or indicator variables) for this engine. When using the
+formula method via
+\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will
+convert factor columns to indicators.
+}
+
+\subsection{References}{
+\itemize{
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_linear_reg_spark.Rd b/man/details_linear_reg_spark.Rd
new file mode 100644
index 000000000..7353503e6
--- /dev/null
+++ b/man/details_linear_reg_spark.Rd
@@ -0,0 +1,88 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/linear_reg_spark.R
+\name{details_linear_reg_spark}
+\alias{details_linear_reg_spark}
+\title{Linear regression via spark}
+\description{
+\code{\link[sparklyr:ml_linear_regression]{sparklyr::ml_linear_regression()}} uses regularized least squares to fit
+models with numeric outcomes.
+}
+\details{
+For this engine, there is a single mode: regression
+\subsection{Tuning Parameters}{
+
+This model has 2 tuning parameters:
+\itemize{
+\item \code{penalty}: Amount of Regularization (type: double, default: 0.0)
+\item \code{mixture}: Proportion of Lasso Penalty (type: double, default: 0.0)
+}
+
+For \code{penalty}, the amount of regularization includes both the L1 penalty
+(i.e., lasso) and the L2 penalty (i.e., ridge or weight decay).
+
+A value of \code{mixture = 1} corresponds to a pure lasso model, while
+\code{mixture = 0} indicates ridge regression.
+}
+
+\subsection{Translation from parsnip to the original package}{\if{html}{\out{}}\preformatted{linear_reg(penalty = double(1), mixture = double(1)) \%>\%
+ set_engine("spark") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression)
+##
+## Main Arguments:
+## penalty = double(1)
+## mixture = double(1)
+##
+## Computational engine: spark
+##
+## Model fit template:
+## sparklyr::ml_linear_regression(x = missing_arg(), formula = missing_arg(),
+## weight_col = missing_arg(), reg_param = double(1), elastic_net_param = double(1))
+}
+}
+
+\subsection{Preprocessing requirements}{
+
+Factor/categorical predictors need to be converted to numeric values
+(e.g., dummy or indicator variables) for this engine. When using the
+formula method via
+\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will
+convert factor columns to indicators.
+
+Predictors should have the same scale. One way to achieve this is to
+center and scale each so that each predictor has mean zero and a
+variance of one. By default, \code{ml_linear_regression()} uses the argument
+\code{standardization = TRUE} to center and scale the data.
+}
+
+\subsection{Other details}{
+
+For models created using the \code{"spark"} engine, there are several things
+to consider.
+\itemize{
+\item Only the formula interface to via \code{fit()} is available; using
+\code{fit_xy()} will generate an error.
+\item The predictions will always be in a Spark table format. The names
+will be the same as documented but without the dots.
+\item There is no equivalent to factor columns in Spark tables so class
+predictions are returned as character columns.
+\item To retain the model object for a new R session (via \code{save()}), the
+\code{model$fit} element of the parsnip object should be serialized via
+\code{ml_save(object$fit)} and separately saved to disk. In a new
+session, the object can be reloaded and reattached to the parsnip
+object.
+}
+}
+
+\subsection{References}{
+\itemize{
+\item Luraschi, J, K Kuo, and E Ruiz. 2019. \emph{Mastering Spark with R}.
+O’Reilly Media
+\item Hastie, T, R Tibshirani, and M Wainwright. 2015. \emph{Statistical
+Learning with Sparsity}. CRC Press.
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_linear_reg_stan.Rd b/man/details_linear_reg_stan.Rd
new file mode 100644
index 000000000..ea45be31f
--- /dev/null
+++ b/man/details_linear_reg_stan.Rd
@@ -0,0 +1,80 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/linear_reg_stan.R
+\name{details_linear_reg_stan}
+\alias{details_linear_reg_stan}
+\title{Linear regression via Bayesian Methods}
+\description{
+The \code{"stan"} engine estimates regression parameters using Bayesian estimation.
+}
+\details{
+For this engine, there is a single mode: regression
+\subsection{Tuning Parameters}{
+
+This engine has no tuning parameters.
+}
+
+\subsection{Important engine-specific options}{
+
+Some relevant arguments that can be passed to \code{set_engine()}:
+\itemize{
+\item \code{chains}: A positive integer specifying the number of Markov chains.
+The default is 4.
+\item \code{iter}: A positive integer specifying the number of iterations for
+each chain (including warmup). The default is 2000.
+\item \code{seed}: The seed for random number generation.
+\item \code{cores}: Number of cores to use when executing the chains in
+parallel.
+\item \code{prior}: The prior distribution for the (non-hierarchical)
+regression coefficients. The \code{"stan"} engine does not fit any
+hierarchical terms. See the \code{"stan_glmer"} engine from the
+multilevelmod package for that type of model.
+\item \code{prior_intercept}: The prior distribution for the intercept (after
+centering all predictors).
+}
+
+See \code{\link[rstan:stanmodel-method-sampling]{rstan::sampling()}} and
+\code{\link[rstanarm:priors]{rstanarm::priors()}} for more information on these
+and other options.
+}
+
+\subsection{Translation from parsnip to the original package}{\if{html}{\out{}}\preformatted{linear_reg() \%>\%
+ set_engine("stan") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression)
+##
+## Computational engine: stan
+##
+## Model fit template:
+## rstanarm::stan_glm(formula = missing_arg(), data = missing_arg(),
+## weights = missing_arg(), family = stats::gaussian, refresh = 0)
+}
+
+Note that the \code{refresh} default prevents logging of the estimation
+process. Change this value in \code{set_engine()} to show the MCMC logs.
+}
+
+\subsection{Preprocessing requirements}{
+
+Factor/categorical predictors need to be converted to numeric values
+(e.g., dummy or indicator variables) for this engine. When using the
+formula method via
+\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will
+convert factor columns to indicators.
+}
+
+\subsection{Other details}{
+
+For prediction, the \code{"stan"} engine can compute posterior intervals
+analogous to confidence and prediction intervals. In these instances,
+the units are the original outcome and when \code{std_error = TRUE}, the
+standard deviation of the posterior distribution (or posterior
+predictive distribution as appropriate) is returned.
+}
+
+\subsection{References}{
+\itemize{
+\item McElreath, R. 2020 \emph{Statistical Rethinking}. CRC Press.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_logistic_reg_LiblineaR.Rd b/man/details_logistic_reg_LiblineaR.Rd
new file mode 100644
index 000000000..7723c6a02
--- /dev/null
+++ b/man/details_logistic_reg_LiblineaR.Rd
@@ -0,0 +1,71 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/logistic_reg_LiblineaR.R
+\name{details_logistic_reg_LiblineaR}
+\alias{details_logistic_reg_LiblineaR}
+\title{Logistic regression via LiblineaR}
+\description{
+\code{\link[LiblineaR:LiblineaR]{LiblineaR::LiblineaR()}} fits a generalized linear model for binary outcomes. A
+linear combination of the predictors is used to model the log odds of an
+event.
+}
+\details{
+For this engine, there is a single mode: classification
+\subsection{Tuning Parameters}{
+
+This model has 2 tuning parameters:
+\itemize{
+\item \code{penalty}: Amount of Regularization (type: double, default: see
+below)
+\item \code{mixture}: Proportion of Lasso Penalty (type: double, default: 0)
+}
+
+For \code{LiblineaR} models, the value for \code{mixture} can either be 0 (for
+ridge) or 1 (for lasso) but not other intermediate values. In the
+\code{\link[LiblineaR:LiblineaR]{LiblineaR::LiblineaR()}} documentation, these
+correspond to types 0 (L2-regularized) and 6 (L1-regularized).
+
+Be aware that the \code{LiblineaR} engine regularizes the intercept. Other
+regularized regression models do not, which will result in different
+parameter estimates.
+}
+
+\subsection{Translation from parsnip to the original package}{\if{html}{\out{}}\preformatted{logistic_reg(penalty = double(1), mixture = double(1)) \%>\%
+ set_engine("LiblineaR") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification)
+##
+## Main Arguments:
+## penalty = double(1)
+## mixture = double(1)
+##
+## Computational engine: LiblineaR
+##
+## Model fit template:
+## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), wi = missing_arg(),
+## cost = Inf, type = double(1), verbose = FALSE)
+}
+}
+
+\subsection{Preprocessing requirements}{
+
+Factor/categorical predictors need to be converted to numeric values
+(e.g., dummy or indicator variables) for this engine. When using the
+formula method via
+\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will
+convert factor columns to indicators.
+
+Predictors should have the same scale. One way to achieve this is to
+center and scale each so that each predictor has mean zero and a
+variance of one.
+}
+
+\subsection{References}{
+\itemize{
+\item Hastie, T, R Tibshirani, and M Wainwright. 2015. \emph{Statistical
+Learning with Sparsity}. CRC Press.
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_logistic_reg_glm.Rd b/man/details_logistic_reg_glm.Rd
new file mode 100644
index 000000000..677bdc375
--- /dev/null
+++ b/man/details_logistic_reg_glm.Rd
@@ -0,0 +1,47 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/logistic_reg_glm.R
+\name{details_logistic_reg_glm}
+\alias{details_logistic_reg_glm}
+\title{Logistic regression via glm}
+\description{
+\code{\link[stats:glm]{stats::glm()}} fits a generalized linear model for binary outcomes. A
+linear combination of the predictors is used to model the log odds of an
+event.
+}
+\details{
+For this engine, there is a single mode: classification
+\subsection{Tuning Parameters}{
+
+This engine has no tuning parameters.
+}
+
+\subsection{Translation from parsnip to the original package}{\if{html}{\out{}}\preformatted{logistic_reg() \%>\%
+ set_engine("glm") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification)
+##
+## Computational engine: glm
+##
+## Model fit template:
+## stats::glm(formula = missing_arg(), data = missing_arg(), weights = missing_arg(),
+## family = stats::binomial)
+}
+}
+
+\subsection{Preprocessing requirements}{
+
+Factor/categorical predictors need to be converted to numeric values
+(e.g., dummy or indicator variables) for this engine. When using the
+formula method via
+\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will
+convert factor columns to indicators.
+}
+
+\subsection{References}{
+\itemize{
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_logistic_reg_glmnet.Rd b/man/details_logistic_reg_glmnet.Rd
new file mode 100644
index 000000000..6e437995b
--- /dev/null
+++ b/man/details_logistic_reg_glmnet.Rd
@@ -0,0 +1,70 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/logistic_reg_glmnet.R
+\name{details_logistic_reg_glmnet}
+\alias{details_logistic_reg_glmnet}
+\title{Logistic regression via glmnet}
+\description{
+\code{\link[glmnet:glmnet]{glmnet::glmnet()}} fits a generalized linear model for binary outcomes. A
+linear combination of the predictors is used to model the log odds of an
+event.
+}
+\details{
+For this engine, there is a single mode: classification
+\subsection{Tuning Parameters}{
+
+This model has 2 tuning parameters:
+\itemize{
+\item \code{penalty}: Amount of Regularization (type: double, default: see
+below)
+\item \code{mixture}: Proportion of Lasso Penalty (type: double, default: 1.0)
+}
+
+A value of \code{mixture = 1} corresponds to a pure lasso model, while
+\code{mixture = 0} indicates ridge regression.
+
+The \code{penalty} parameter has no default and requires a single numeric
+value. For more details about this, and the \code{glmnet} model in general,
+see \link{glmnet-details}.
+}
+
+\subsection{Translation from parsnip to the original package}{\if{html}{\out{}}\preformatted{logistic_reg(penalty = double(1), mixture = double(1)) \%>\%
+ set_engine("glmnet") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification)
+##
+## Main Arguments:
+## penalty = 0
+## mixture = double(1)
+##
+## Computational engine: glmnet
+##
+## Model fit template:
+## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(),
+## alpha = double(1), family = "binomial")
+}
+}
+
+\subsection{Preprocessing requirements}{
+
+Factor/categorical predictors need to be converted to numeric values
+(e.g., dummy or indicator variables) for this engine. When using the
+formula method via
+\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will
+convert factor columns to indicators.
+
+Predictors should have the same scale. One way to achieve this is to
+center and scale each so that each predictor has mean zero and a
+variance of one. By default, \code{\link[glmnet:glmnet]{glmnet::glmnet()}} uses
+the argument \code{standardize = TRUE} to center and scale the data.
+}
+
+\subsection{References}{
+\itemize{
+\item Hastie, T, R Tibshirani, and M Wainwright. 2015. \emph{Statistical
+Learning with Sparsity}. CRC Press.
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_logistic_reg_keras.Rd b/man/details_logistic_reg_keras.Rd
new file mode 100644
index 000000000..7ce3d4659
--- /dev/null
+++ b/man/details_logistic_reg_keras.Rd
@@ -0,0 +1,64 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/logistic_reg_keras.R
+\name{details_logistic_reg_keras}
+\alias{details_logistic_reg_keras}
+\title{Logistic regression via keras}
+\description{
+\code{\link[=keras_mlp]{keras_mlp()}} fits a generalized linear model for binary outcomes. A
+linear combination of the predictors is used to model the log odds of an
+event.
+}
+\details{
+For this engine, there is a single mode: classification
+\subsection{Tuning Parameters}{
+
+This model has one tuning parameter:
+\itemize{
+\item \code{penalty}: Amount of Regularization (type: double, default: 0.0)
+}
+
+For \code{penalty}, the amount of regularization is \emph{only} L2 penalty (i.e.,
+ridge or weight decay).
+}
+
+\subsection{Translation from parsnip to the original package}{\if{html}{\out{}}\preformatted{logistic_reg(penalty = double(1)) \%>\%
+ set_engine("keras") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification)
+##
+## Main Arguments:
+## penalty = double(1)
+##
+## Computational engine: keras
+##
+## Model fit template:
+## parsnip::keras_mlp(x = missing_arg(), y = missing_arg(), penalty = double(1),
+## hidden_units = 1, act = "linear")
+}
+
+\code{\link[=keras_mlp]{keras_mlp()}} is a parsnip wrapper around keras code for
+neural networks. This model fits a linear regression as a network with a
+single hidden unit.
+}
+
+\subsection{Preprocessing requirements}{
+
+Factor/categorical predictors need to be converted to numeric values
+(e.g., dummy or indicator variables) for this engine. When using the
+formula method via
+\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will
+convert factor columns to indicators.
+
+Predictors should have the same scale. One way to achieve this is to
+center and scale each so that each predictor has mean zero and a
+variance of one.
+}
+
+\subsection{References}{
+\itemize{
+\item Hoerl, A., & Kennard, R. (2000). \emph{Ridge Regression: Biased
+Estimation for Nonorthogonal Problems}. Technometrics, 42(1), 80-86.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_logistic_reg_spark.Rd b/man/details_logistic_reg_spark.Rd
new file mode 100644
index 000000000..2cbce1292
--- /dev/null
+++ b/man/details_logistic_reg_spark.Rd
@@ -0,0 +1,90 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/logistic_reg_spark.R
+\name{details_logistic_reg_spark}
+\alias{details_logistic_reg_spark}
+\title{Logistic regression via spark}
+\description{
+\code{\link[sparklyr:ml_logistic_regression]{sparklyr::ml_logistic_regression()}} fits a generalized linear model for
+binary outcomes. A linear combination of the predictors is used to model the
+log odds of an event.
+}
+\details{
+For this engine, there is a single mode: classification
+\subsection{Tuning Parameters}{
+
+This model has 2 tuning parameters:
+\itemize{
+\item \code{penalty}: Amount of Regularization (type: double, default: 0.0)
+\item \code{mixture}: Proportion of Lasso Penalty (type: double, default: 0.0)
+}
+
+For \code{penalty}, the amount of regularization includes both the L1 penalty
+(i.e., lasso) and the L2 penalty (i.e., ridge or weight decay).
+
+A value of \code{mixture = 1} corresponds to a pure lasso model, while
+\code{mixture = 0} indicates ridge regression.
+}
+
+\subsection{Translation from parsnip to the original package}{\if{html}{\out{}}\preformatted{logistic_reg(penalty = double(1), mixture = double(1)) \%>\%
+ set_engine("spark") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification)
+##
+## Main Arguments:
+## penalty = double(1)
+## mixture = double(1)
+##
+## Computational engine: spark
+##
+## Model fit template:
+## sparklyr::ml_logistic_regression(x = missing_arg(), formula = missing_arg(),
+## weight_col = missing_arg(), reg_param = double(1), elastic_net_param = double(1),
+## family = "binomial")
+}
+}
+
+\subsection{Preprocessing requirements}{
+
+Factor/categorical predictors need to be converted to numeric values
+(e.g., dummy or indicator variables) for this engine. When using the
+formula method via
+\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will
+convert factor columns to indicators.
+
+Predictors should have the same scale. One way to achieve this is to
+center and scale each so that each predictor has mean zero and a
+variance of one. By default, \code{ml_logistic_regression()} uses the
+argument \code{standardization = TRUE} to center and scale the data.
+}
+
+\subsection{Other details}{
+
+For models created using the \code{"spark"} engine, there are several things
+to consider.
+\itemize{
+\item Only the formula interface to via \code{fit()} is available; using
+\code{fit_xy()} will generate an error.
+\item The predictions will always be in a Spark table format. The names
+will be the same as documented but without the dots.
+\item There is no equivalent to factor columns in Spark tables so class
+predictions are returned as character columns.
+\item To retain the model object for a new R session (via \code{save()}), the
+\code{model$fit} element of the parsnip object should be serialized via
+\code{ml_save(object$fit)} and separately saved to disk. In a new
+session, the object can be reloaded and reattached to the parsnip
+object.
+}
+}
+
+\subsection{References}{
+\itemize{
+\item Luraschi, J, K Kuo, and E Ruiz. 2019. \emph{Mastering Spark with R}.
+O’Reilly Media
+\item Hastie, T, R Tibshirani, and M Wainwright. 2015. \emph{Statistical
+Learning with Sparsity}. CRC Press.
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_logistic_reg_stan.Rd b/man/details_logistic_reg_stan.Rd
new file mode 100644
index 000000000..8f727ca1e
--- /dev/null
+++ b/man/details_logistic_reg_stan.Rd
@@ -0,0 +1,81 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/logistic_reg_stan.R
+\name{details_logistic_reg_stan}
+\alias{details_logistic_reg_stan}
+\title{Logistic regression via stan}
+\description{
+\code{\link[rstanarm:stan_glm]{rstanarm::stan_glm()}} fits a generalized linear model for binary outcomes.
+A linear combination of the predictors is used to model the log odds of an
+event.
+}
+\details{
+For this engine, there is a single mode: classification
+\subsection{Tuning Parameters}{
+
+This engine has no tuning parameters.
+}
+
+\subsection{Important engine-specific options}{
+
+Some relevant arguments that can be passed to \code{set_engine()}:
+\itemize{
+\item \code{chains}: A positive integer specifying the number of Markov chains.
+The default is 4.
+\item \code{iter}: A positive integer specifying the number of iterations for
+each chain (including warmup). The default is 2000.
+\item \code{seed}: The seed for random number generation.
+\item \code{cores}: Number of cores to use when executing the chains in
+parallel.
+\item \code{prior}: The prior distribution for the (non-hierarchical)
+regression coefficients. This \code{"stan"} engine does not fit any
+hierarchical terms.
+\item \code{prior_intercept}: The prior distribution for the intercept (after
+centering all predictors).
+}
+
+See \code{\link[rstan:stanmodel-method-sampling]{rstan::sampling()}} and
+\code{\link[rstanarm:priors]{rstanarm::priors()}} for more information on these
+and other options.
+}
+
+\subsection{Translation from parsnip to the original package}{\if{html}{\out{}}\preformatted{logistic_reg() \%>\%
+ set_engine("stan") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification)
+##
+## Computational engine: stan
+##
+## Model fit template:
+## rstanarm::stan_glm(formula = missing_arg(), data = missing_arg(),
+## weights = missing_arg(), family = stats::binomial, refresh = 0)
+}
+
+Note that the \code{refresh} default prevents logging of the estimation
+process. Change this value in \code{set_engine()} to show the MCMC logs.
+}
+
+\subsection{Preprocessing requirements}{
+
+Factor/categorical predictors need to be converted to numeric values
+(e.g., dummy or indicator variables) for this engine. When using the
+formula method via
+\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will
+convert factor columns to indicators.
+}
+
+\subsection{Other details}{
+
+For prediction, the \code{"stan"} engine can compute posterior intervals
+analogous to confidence and prediction intervals. In these instances,
+the units are the original outcome and when \code{std_error = TRUE}, the
+standard deviation of the posterior distribution (or posterior
+predictive distribution as appropriate) is returned.
+}
+
+\subsection{References}{
+\itemize{
+\item McElreath, R. 2020 \emph{Statistical Rethinking}. CRC Press.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_mars_earth.Rd b/man/details_mars_earth.Rd
new file mode 100644
index 000000000..7d17cc1a1
--- /dev/null
+++ b/man/details_mars_earth.Rd
@@ -0,0 +1,94 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mars_earth.R
+\name{details_mars_earth}
+\alias{details_mars_earth}
+\title{Multivariate adaptive regression splines (MARS) via earth}
+\description{
+\code{\link[earth:earth]{earth::earth()}} fits a generalized linear model that uses artificial features for
+some predictors. These features resemble hinge functions and the result is
+a model that is a segmented regression in small dimensions.
+}
+\details{
+For this engine, there are multiple modes: classification and regression
+\subsection{Tuning Parameters}{
+
+This model has 3 tuning parameters:
+\itemize{
+\item \code{num_terms}: # Model Terms (type: integer, default: see below)
+\item \code{prod_degree}: Degree of Interaction (type: integer, default: 1L)
+\item \code{prune_method}: Pruning Method (type: character, default:
+‘backward’)
+}
+
+The default value of \code{num_terms} depends on the number of predictor
+columns. For a data frame \code{x}, the default is
+\code{min(200, max(20, 2 * ncol(x))) + 1} (see
+\code{\link[earth:earth]{earth::earth()}} and the reference below).
+}
+
+\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{}}\preformatted{mars(num_terms = integer(1), prod_degree = integer(1), prune_method = character(1)) \%>\%
+ set_engine("earth") \%>\%
+ set_mode("regression") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## MARS Model Specification (regression)
+##
+## Main Arguments:
+## num_terms = integer(1)
+## prod_degree = integer(1)
+## prune_method = character(1)
+##
+## Computational engine: earth
+##
+## Model fit template:
+## earth::earth(formula = missing_arg(), data = missing_arg(), weights = missing_arg(),
+## nprune = integer(1), degree = integer(1), pmethod = character(1),
+## keepxy = TRUE)
+}
+}
+
+\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{}}\preformatted{mars(num_terms = integer(1), prod_degree = integer(1), prune_method = character(1)) \%>\%
+ set_engine("earth") \%>\%
+ set_mode("classification") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## MARS Model Specification (classification)
+##
+## Main Arguments:
+## num_terms = integer(1)
+## prod_degree = integer(1)
+## prune_method = character(1)
+##
+## Engine-Specific Arguments:
+## glm = list(family = stats::binomial)
+##
+## Computational engine: earth
+##
+## Model fit template:
+## earth::earth(formula = missing_arg(), data = missing_arg(), weights = missing_arg(),
+## nprune = integer(1), degree = integer(1), pmethod = character(1),
+## glm = list(family = stats::binomial), keepxy = TRUE)
+}
+
+An alternate method for using MARs for categorical outcomes can be found
+in \code{\link[discrim:discrim_flexible]{discrim::discrim_flexible()}}.
+}
+
+\subsection{Preprocessing requirements}{
+
+Factor/categorical predictors need to be converted to numeric values
+(e.g., dummy or indicator variables) for this engine. When using the
+formula method via
+\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will
+convert factor columns to indicators.
+}
+
+\subsection{References}{
+\itemize{
+\item Friedman, J. 1991. “Multivariate Adaptive Regression Splines.” \emph{The
+Annals of Statistics}, vol. 19, no. 1, pp. 1-67.
+\item Milborrow, S. \href{http://www.milbo.org/doc/earth-notes.pdf}{“Notes on the earth package.”}
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_mlp_keras.Rd b/man/details_mlp_keras.Rd
new file mode 100644
index 000000000..f40402ed6
--- /dev/null
+++ b/man/details_mlp_keras.Rd
@@ -0,0 +1,100 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mlp_keras.R
+\name{details_mlp_keras}
+\alias{details_mlp_keras}
+\title{Multilayer perceptron via keras}
+\description{
+\code{\link[=keras_mlp]{keras_mlp()}} fits a single layer, feed-forward neural network.
+}
+\details{
+For this engine, there are multiple modes: classification and regression
+\subsection{Tuning Parameters}{
+
+This model has 5 tuning parameters:
+\itemize{
+\item \code{hidden_units}: # Hidden Units (type: integer, default: 5L)
+\item \code{penalty}: Amount of Regularization (type: double, default: 0.0)
+\item \code{dropout}: Dropout Rate (type: double, default: 0.0)
+\item \code{epochs}: # Epochs (type: integer, default: 20L)
+\item \code{activation}: Activation Function (type: character, default:
+‘softmax’)
+}
+}
+
+\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{}}\preformatted{mlp(
+ hidden_units = integer(1),
+ penalty = double(1),
+ dropout = double(1),
+ epochs = integer(1),
+ activation = character(1)
+) \%>\%
+ set_engine("keras") \%>\%
+ set_mode("regression") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Single Layer Neural Network Specification (regression)
+##
+## Main Arguments:
+## hidden_units = integer(1)
+## penalty = double(1)
+## dropout = double(1)
+## epochs = integer(1)
+## activation = character(1)
+##
+## Computational engine: keras
+##
+## Model fit template:
+## parsnip::keras_mlp(x = missing_arg(), y = missing_arg(), hidden_units = integer(1),
+## penalty = double(1), dropout = double(1), epochs = integer(1),
+## activation = character(1))
+}
+}
+
+\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{}}\preformatted{mlp(
+ hidden_units = integer(1),
+ penalty = double(1),
+ dropout = double(1),
+ epochs = integer(1),
+ activation = character(1)
+) \%>\%
+ set_engine("keras") \%>\%
+ set_mode("classification") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Single Layer Neural Network Specification (classification)
+##
+## Main Arguments:
+## hidden_units = integer(1)
+## penalty = double(1)
+## dropout = double(1)
+## epochs = integer(1)
+## activation = character(1)
+##
+## Computational engine: keras
+##
+## Model fit template:
+## parsnip::keras_mlp(x = missing_arg(), y = missing_arg(), hidden_units = integer(1),
+## penalty = double(1), dropout = double(1), epochs = integer(1),
+## activation = character(1))
+}
+}
+
+\subsection{Preprocessing requirements}{
+
+Factor/categorical predictors need to be converted to numeric values
+(e.g., dummy or indicator variables) for this engine. When using the
+formula method via
+\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will
+convert factor columns to indicators.
+
+Predictors should have the same scale. One way to achieve this is to
+center and scale each so that each predictor has mean zero and a
+variance of one.
+}
+
+\subsection{References}{
+\itemize{
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_mlp_nnet.Rd b/man/details_mlp_nnet.Rd
new file mode 100644
index 000000000..f2575a122
--- /dev/null
+++ b/man/details_mlp_nnet.Rd
@@ -0,0 +1,97 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mlp_nnet.R
+\name{details_mlp_nnet}
+\alias{details_mlp_nnet}
+\title{Multilayer perceptron via nnet}
+\description{
+\code{\link[nnet:nnet]{nnet::nnet()}} fits a single layer, feed-forward neural network.
+}
+\details{
+For this engine, there are multiple modes: classification and regression
+\subsection{Tuning Parameters}{
+
+This model has 3 tuning parameters:
+\itemize{
+\item \code{hidden_units}: # Hidden Units (type: integer, default: none)
+\item \code{penalty}: Amount of Regularization (type: double, default: 0.0)
+\item \code{epochs}: # Epochs (type: integer, default: 100L)
+}
+
+Note that, in \code{\link[nnet:nnet]{nnet::nnet()}}, the maximum number of
+parameters is an argument with a fairly low value of \code{maxit = 1000}. For
+some models, you may need to pass this value in via
+\code{\link[=set_engine]{set_engine()}} so that the model does not fail.
+}
+
+\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{}}\preformatted{mlp(
+ hidden_units = integer(1),
+ penalty = double(1),
+ epochs = integer(1)
+) \%>\%
+ set_engine("nnet") \%>\%
+ set_mode("regression") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Single Layer Neural Network Specification (regression)
+##
+## Main Arguments:
+## hidden_units = integer(1)
+## penalty = double(1)
+## epochs = integer(1)
+##
+## Computational engine: nnet
+##
+## Model fit template:
+## nnet::nnet(formula = missing_arg(), data = missing_arg(), weights = missing_arg(),
+## size = integer(1), decay = double(1), maxit = integer(1),
+## trace = FALSE, linout = TRUE)
+}
+
+Note that parsnip automatically sets linear activation in the last
+layer.
+}
+
+\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{}}\preformatted{mlp(
+ hidden_units = integer(1),
+ penalty = double(1),
+ epochs = integer(1)
+) \%>\%
+ set_engine("nnet") \%>\%
+ set_mode("classification") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Single Layer Neural Network Specification (classification)
+##
+## Main Arguments:
+## hidden_units = integer(1)
+## penalty = double(1)
+## epochs = integer(1)
+##
+## Computational engine: nnet
+##
+## Model fit template:
+## nnet::nnet(formula = missing_arg(), data = missing_arg(), weights = missing_arg(),
+## size = integer(1), decay = double(1), maxit = integer(1),
+## trace = FALSE, linout = FALSE)
+}
+}
+
+\subsection{Preprocessing requirements}{
+
+Factor/categorical predictors need to be converted to numeric values
+(e.g., dummy or indicator variables) for this engine. When using the
+formula method via
+\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will
+convert factor columns to indicators.
+
+Predictors should have the same scale. One way to achieve this is to
+center and scale each so that each predictor has mean zero and a
+variance of one.
+}
+
+\subsection{References}{
+\itemize{
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_multinom_reg_glmnet.Rd b/man/details_multinom_reg_glmnet.Rd
new file mode 100644
index 000000000..b64927e87
--- /dev/null
+++ b/man/details_multinom_reg_glmnet.Rd
@@ -0,0 +1,69 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/multinom_reg_glmnet.R
+\name{details_multinom_reg_glmnet}
+\alias{details_multinom_reg_glmnet}
+\title{Multinomial regression via glmnet}
+\description{
+\code{\link[glmnet:glmnet]{glmnet::glmnet()}} fits a model that uses linear predictors to predict
+multiclass data using the multinomial distribution.
+}
+\details{
+For this engine, there is a single mode: classification
+\subsection{Tuning Parameters}{
+
+This model has 2 tuning parameters:
+\itemize{
+\item \code{penalty}: Amount of Regularization (type: double, default: see
+below)
+\item \code{mixture}: Proportion of Lasso Penalty (type: double, default: 1.0)
+}
+
+A value of \code{mixture = 1} corresponds to a pure lasso model, while
+\code{mixture = 0} indicates ridge regression.
+
+The \code{penalty} parameter has no default and requires a single numeric
+value. For more details about this, and the \code{glmnet} model in general,
+see \link{glmnet-details}.
+}
+
+\subsection{Translation from parsnip to the original package}{\if{html}{\out{}}\preformatted{multinom_reg(penalty = double(1), mixture = double(1)) \%>\%
+ set_engine("glmnet") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Multinomial Regression Model Specification (classification)
+##
+## Main Arguments:
+## penalty = 0
+## mixture = double(1)
+##
+## Computational engine: glmnet
+##
+## Model fit template:
+## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(),
+## alpha = double(1), family = "multinomial")
+}
+}
+
+\subsection{Preprocessing requirements}{
+
+Factor/categorical predictors need to be converted to numeric values
+(e.g., dummy or indicator variables) for this engine. When using the
+formula method via
+\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will
+convert factor columns to indicators.
+
+Predictors should have the same scale. One way to achieve this is to
+center and scale each so that each predictor has mean zero and a
+variance of one. By default, \code{\link[glmnet:glmnet]{glmnet::glmnet()}} uses
+the argument \code{standardize = TRUE} to center and scale the data.
+}
+
+\subsection{References}{
+\itemize{
+\item Hastie, T, R Tibshirani, and M Wainwright. 2015. \emph{Statistical
+Learning with Sparsity}. CRC Press.
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_multinom_reg_keras.Rd b/man/details_multinom_reg_keras.Rd
new file mode 100644
index 000000000..aa7dba295
--- /dev/null
+++ b/man/details_multinom_reg_keras.Rd
@@ -0,0 +1,63 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/multinom_reg_keras.R
+\name{details_multinom_reg_keras}
+\alias{details_multinom_reg_keras}
+\title{Multinomial regression via keras}
+\description{
+\code{\link[=keras_mlp]{keras_mlp()}} fits a model that uses linear predictors to predict
+multiclass data using the multinomial distribution.
+}
+\details{
+For this engine, there is a single mode: classification
+\subsection{Tuning Parameters}{
+
+This model has one tuning parameter:
+\itemize{
+\item \code{penalty}: Amount of Regularization (type: double, default: 0.0)
+}
+
+For \code{penalty}, the amount of regularization is \emph{only} L2 penalty (i.e.,
+ridge or weight decay).
+}
+
+\subsection{Translation from parsnip to the original package}{\if{html}{\out{}}\preformatted{multinom_reg(penalty = double(1)) \%>\%
+ set_engine("keras") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Multinomial Regression Model Specification (classification)
+##
+## Main Arguments:
+## penalty = double(1)
+##
+## Computational engine: keras
+##
+## Model fit template:
+## parsnip::keras_mlp(x = missing_arg(), y = missing_arg(), penalty = double(1),
+## hidden_units = 1, act = "linear")
+}
+
+\code{\link[=keras_mlp]{keras_mlp()}} is a parsnip wrapper around keras code for
+neural networks. This model fits a linear regression as a network with a
+single hidden unit.
+}
+
+\subsection{Preprocessing requirements}{
+
+Factor/categorical predictors need to be converted to numeric values
+(e.g., dummy or indicator variables) for this engine. When using the
+formula method via
+\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will
+convert factor columns to indicators.
+
+Predictors should have the same scale. One way to achieve this is to
+center and scale each so that each predictor has mean zero and a
+variance of one.
+}
+
+\subsection{References}{
+\itemize{
+\item Hoerl, A., & Kennard, R. (2000). \emph{Ridge Regression: Biased
+Estimation for Nonorthogonal Problems}. Technometrics, 42(1), 80-86.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_multinom_reg_nnet.Rd b/man/details_multinom_reg_nnet.Rd
new file mode 100644
index 000000000..d4e268713
--- /dev/null
+++ b/man/details_multinom_reg_nnet.Rd
@@ -0,0 +1,63 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/multinom_reg_nnet.R
+\name{details_multinom_reg_nnet}
+\alias{details_multinom_reg_nnet}
+\title{Multinomial regression via nnet}
+\description{
+\code{\link[nnet:multinom]{nnet::multinom()}} fits a model that uses linear predictors to predict
+multiclass data using the multinomial distribution.
+}
+\details{
+For this engine, there is a single mode: classification
+\subsection{Tuning Parameters}{
+
+This model has 1 tuning parameters:
+\itemize{
+\item \code{penalty}: Amount of Regularization (type: double, default: 0.0)
+}
+
+For \code{penalty}, the amount of regularization includes only the L2 penalty
+(i.e., ridge or weight decay).
+}
+
+\subsection{Translation from parsnip to the original package}{\if{html}{\out{}}\preformatted{multinom_reg(penalty = double(1)) \%>\%
+ set_engine("nnet") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Multinomial Regression Model Specification (classification)
+##
+## Main Arguments:
+## penalty = double(1)
+##
+## Computational engine: nnet
+##
+## Model fit template:
+## nnet::multinom(formula = missing_arg(), data = missing_arg(),
+## weights = missing_arg(), decay = double(1), trace = FALSE)
+}
+}
+
+\subsection{Preprocessing requirements}{
+
+Factor/categorical predictors need to be converted to numeric values
+(e.g., dummy or indicator variables) for this engine. When using the
+formula method via
+\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will
+convert factor columns to indicators.
+
+Predictors should have the same scale. One way to achieve this is to
+center and scale each so that each predictor has mean zero and a
+variance of one.
+}
+
+\subsection{References}{
+\itemize{
+\item Luraschi, J, K Kuo, and E Ruiz. 2019. \emph{Mastering nnet with R}.
+O’Reilly Media
+\item Hastie, T, R Tibshirani, and M Wainwright. 2015. \emph{Statistical
+Learning with Sparsity}. CRC Press.
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_multinom_reg_spark.Rd b/man/details_multinom_reg_spark.Rd
new file mode 100644
index 000000000..c045e2ab0
--- /dev/null
+++ b/man/details_multinom_reg_spark.Rd
@@ -0,0 +1,89 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/multinom_reg_spark.R
+\name{details_multinom_reg_spark}
+\alias{details_multinom_reg_spark}
+\title{Multinomial regression via spark}
+\description{
+\code{\link[sparklyr:ml_logistic_regression]{sparklyr::ml_logistic_regression()}} fits a model that uses linear
+predictors to predict multiclass data using the multinomial distribution.
+}
+\details{
+For this engine, there is a single mode: classification
+\subsection{Tuning Parameters}{
+
+This model has 2 tuning parameters:
+\itemize{
+\item \code{penalty}: Amount of Regularization (type: double, default: 0.0)
+\item \code{mixture}: Proportion of Lasso Penalty (type: double, default: 0.0)
+}
+
+For \code{penalty}, the amount of regularization includes both the L1 penalty
+(i.e., lasso) and the L2 penalty (i.e., ridge or weight decay).
+
+A value of \code{mixture = 1} corresponds to a pure lasso model, while
+\code{mixture = 0} indicates ridge regression.
+}
+
+\subsection{Translation from parsnip to the original package}{\if{html}{\out{}}\preformatted{multinom_reg(penalty = double(1), mixture = double(1)) \%>\%
+ set_engine("spark") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Multinomial Regression Model Specification (classification)
+##
+## Main Arguments:
+## penalty = double(1)
+## mixture = double(1)
+##
+## Computational engine: spark
+##
+## Model fit template:
+## sparklyr::ml_logistic_regression(x = missing_arg(), formula = missing_arg(),
+## weight_col = missing_arg(), reg_param = double(1), elastic_net_param = double(1),
+## family = "multinomial")
+}
+}
+
+\subsection{Preprocessing requirements}{
+
+Factor/categorical predictors need to be converted to numeric values
+(e.g., dummy or indicator variables) for this engine. When using the
+formula method via
+\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will
+convert factor columns to indicators.
+
+Predictors should have the same scale. One way to achieve this is to
+center and scale each so that each predictor has mean zero and a
+variance of one. By default, \code{ml_multinom_regression()} uses the
+argument \code{standardization = TRUE} to center and scale the data.
+}
+
+\subsection{Other details}{
+
+For models created using the \code{"spark"} engine, there are several things
+to consider.
+\itemize{
+\item Only the formula interface to via \code{fit()} is available; using
+\code{fit_xy()} will generate an error.
+\item The predictions will always be in a Spark table format. The names
+will be the same as documented but without the dots.
+\item There is no equivalent to factor columns in Spark tables so class
+predictions are returned as character columns.
+\item To retain the model object for a new R session (via \code{save()}), the
+\code{model$fit} element of the parsnip object should be serialized via
+\code{ml_save(object$fit)} and separately saved to disk. In a new
+session, the object can be reloaded and reattached to the parsnip
+object.
+}
+}
+
+\subsection{References}{
+\itemize{
+\item Luraschi, J, K Kuo, and E Ruiz. 2019. \emph{Mastering Spark with R}.
+O’Reilly Media
+\item Hastie, T, R Tibshirani, and M Wainwright. 2015. \emph{Statistical
+Learning with Sparsity}. CRC Press.
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_nearest_neighbor_kknn.Rd b/man/details_nearest_neighbor_kknn.Rd
new file mode 100644
index 000000000..fa34905e1
--- /dev/null
+++ b/man/details_nearest_neighbor_kknn.Rd
@@ -0,0 +1,94 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/nearest_neighbor_kknn.R
+\name{details_nearest_neighbor_kknn}
+\alias{details_nearest_neighbor_kknn}
+\title{K-nearest neighbors via kknn}
+\description{
+\code{\link[kknn:train.kknn]{kknn::train.kknn()}} fits a model that uses the \code{K} most similar data points
+from the training set to predict new samples.
+}
+\details{
+For this engine, there are multiple modes: classification and regression
+\subsection{Tuning Parameters}{
+
+This model has 3 tuning parameters:
+\itemize{
+\item \code{neighbors}: # Nearest Neighbors (type: integer, default: 5L)
+\item \code{weight_func}: Distance Weighting Function (type: character,
+default: ‘optimal’)
+\item \code{dist_power}: Minkowski Distance Order (type: double, default: 2.0)
+}
+}
+
+\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{}}\preformatted{nearest_neighbor(
+ neighbors = integer(1),
+ weight_func = character(1),
+ dist_power = double(1)
+) \%>\%
+ set_engine("kknn") \%>\%
+ set_mode("regression") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## K-Nearest Neighbor Model Specification (regression)
+##
+## Main Arguments:
+## neighbors = integer(1)
+## weight_func = character(1)
+## dist_power = double(1)
+##
+## Computational engine: kknn
+##
+## Model fit template:
+## kknn::train.kknn(formula = missing_arg(), data = missing_arg(),
+## ks = min_rows(0L, data, 5), kernel = character(1), distance = double(1))
+}
+
+\code{min_rows()} will adjust the number of neighbors if the chosen value if
+it is not consistent with the actual data dimensions.
+}
+
+\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{}}\preformatted{nearest_neighbor(
+ neighbors = integer(1),
+ weight_func = character(1),
+ dist_power = double(1)
+) \%>\%
+ set_engine("kknn") \%>\%
+ set_mode("classification") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## K-Nearest Neighbor Model Specification (classification)
+##
+## Main Arguments:
+## neighbors = integer(1)
+## weight_func = character(1)
+## dist_power = double(1)
+##
+## Computational engine: kknn
+##
+## Model fit template:
+## kknn::train.kknn(formula = missing_arg(), data = missing_arg(),
+## ks = min_rows(0L, data, 5), kernel = character(1), distance = double(1))
+}
+}
+
+\subsection{Preprocessing requirements}{
+
+Factor/categorical predictors need to be converted to numeric values
+(e.g., dummy or indicator variables) for this engine. When using the
+formula method via
+\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will
+convert factor columns to indicators.
+
+Predictors should have the same scale. One way to achieve this is to
+center and scale each so that each predictor has mean zero and a
+variance of one.
+}
+
+\subsection{References}{
+\itemize{
+\item Hechenbichler K. and Schliep K.P. (2004) \href{https://epub.ub.uni-muenchen.de/1769/}{Weighted k-Nearest-Neighbor Techniques and Ordinal Classification}, Discussion
+Paper 399, SFB 386, Ludwig-Maximilians University Munich
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_rand_forest_randomForest.Rd b/man/details_rand_forest_randomForest.Rd
new file mode 100644
index 000000000..09dea5f2f
--- /dev/null
+++ b/man/details_rand_forest_randomForest.Rd
@@ -0,0 +1,98 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rand_forest_randomForest.R
+\name{details_rand_forest_randomForest}
+\alias{details_rand_forest_randomForest}
+\title{Random forests via randomForest}
+\description{
+\code{\link[randomForest:randomForest]{randomForest::randomForest()}} fits a model that creates a large number of
+decision trees, each independent of the others. The final prediction uses all
+predictions from the individual trees and combines them.
+}
+\details{
+For this engine, there are multiple modes: classification and regression
+\subsection{Tuning Parameters}{
+
+This model has 3 tuning parameters:
+\itemize{
+\item \code{mtry}: # Randomly Selected Predictors (type: integer, default: see
+below)
+\item \code{trees}: # Trees (type: integer, default: 500L)
+\item \code{min_n}: Minimal Node Size (type: integer, default: see below)
+}
+
+\code{mtry} depends on the number of columns and the model mode. The default
+in \code{\link[randomForest:randomForest]{randomForest::randomForest()}} is
+\code{floor(sqrt(ncol(x)))} for classification and \code{floor(ncol(x)/3)} for
+regression.
+
+\code{min_n} depends on the mode. For regression, a value of 5 is the
+default. For classification, a value of 10 is used.
+}
+
+\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{}}\preformatted{rand_forest(
+ mtry = integer(1),
+ trees = integer(1),
+ min_n = integer(1)
+) \%>\%
+ set_engine("randomForest") \%>\%
+ set_mode("regression") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (regression)
+##
+## Main Arguments:
+## mtry = integer(1)
+## trees = integer(1)
+## min_n = integer(1)
+##
+## Computational engine: randomForest
+##
+## Model fit template:
+## randomForest::randomForest(x = missing_arg(), y = missing_arg(),
+## mtry = min_cols(~integer(1), x), ntree = integer(1), nodesize = min_rows(~integer(1),
+## x))
+}
+
+\code{min_rows()} and \code{min_cols()} will adjust the number of neighbors if the
+chosen value if it is not consistent with the actual data dimensions.
+}
+
+\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{}}\preformatted{rand_forest(
+ mtry = integer(1),
+ trees = integer(1),
+ min_n = integer(1)
+) \%>\%
+ set_engine("randomForest") \%>\%
+ set_mode("classification") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (classification)
+##
+## Main Arguments:
+## mtry = integer(1)
+## trees = integer(1)
+## min_n = integer(1)
+##
+## Computational engine: randomForest
+##
+## Model fit template:
+## randomForest::randomForest(x = missing_arg(), y = missing_arg(),
+## mtry = min_cols(~integer(1), x), ntree = integer(1), nodesize = min_rows(~integer(1),
+## x))
+}
+}
+
+\subsection{Preprocessing requirements}{
+
+This engine does not require any special encoding of the predictors.
+Categorical predictors can be partitioned into groups of factor levels
+(e.g. \verb{\{a, c\}} vs \verb{\{b, d\}}) when splitting at a node. Dummy variables
+are not required for this model.
+}
+
+\subsection{References}{
+\itemize{
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_rand_forest_ranger.Rd b/man/details_rand_forest_ranger.Rd
new file mode 100644
index 000000000..4910b9965
--- /dev/null
+++ b/man/details_rand_forest_ranger.Rd
@@ -0,0 +1,115 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rand_forest_ranger.R
+\name{details_rand_forest_ranger}
+\alias{details_rand_forest_ranger}
+\title{Random forests via ranger}
+\description{
+\code{\link[ranger:ranger]{ranger::ranger()}} fits a model that creates a large number of decision
+trees, each independent of the others. The final prediction uses all
+predictions from the individual trees and combines them.
+}
+\details{
+For this engine, there are multiple modes: classification and regression
+\subsection{Tuning Parameters}{
+
+This model has 3 tuning parameters:
+\itemize{
+\item \code{mtry}: # Randomly Selected Predictors (type: integer, default: see
+below)
+\item \code{trees}: # Trees (type: integer, default: 500L)
+\item \code{min_n}: Minimal Node Size (type: integer, default: see below)
+}
+
+\code{mtry} depends on the number of columns. The default in
+\code{\link[ranger:ranger]{ranger::ranger()}} is \code{floor(sqrt(ncol(x)))}.
+
+\code{min_n} depends on the mode. For regression, a value of 5 is the
+default. For classification, a value of 10 is used.
+}
+
+\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{}}\preformatted{rand_forest(
+ mtry = integer(1),
+ trees = integer(1),
+ min_n = integer(1)
+) \%>\%
+ set_engine("ranger") \%>\%
+ set_mode("regression") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (regression)
+##
+## Main Arguments:
+## mtry = integer(1)
+## trees = integer(1)
+## min_n = integer(1)
+##
+## Computational engine: ranger
+##
+## Model fit template:
+## ranger::ranger(x = missing_arg(), y = missing_arg(), case.weights = missing_arg(),
+## mtry = min_cols(~integer(1), x), num.trees = integer(1),
+## min.node.size = min_rows(~integer(1), x), num.threads = 1,
+## verbose = FALSE, seed = sample.int(10^5, 1))
+}
+
+\code{min_rows()} and \code{min_cols()} will adjust the number of neighbors if the
+chosen value if it is not consistent with the actual data dimensions.
+}
+
+\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{}}\preformatted{rand_forest(
+ mtry = integer(1),
+ trees = integer(1),
+ min_n = integer(1)
+) \%>\%
+ set_engine("ranger") \%>\%
+ set_mode("classification") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (classification)
+##
+## Main Arguments:
+## mtry = integer(1)
+## trees = integer(1)
+## min_n = integer(1)
+##
+## Computational engine: ranger
+##
+## Model fit template:
+## ranger::ranger(x = missing_arg(), y = missing_arg(), case.weights = missing_arg(),
+## mtry = min_cols(~integer(1), x), num.trees = integer(1),
+## min.node.size = min_rows(~integer(1), x), num.threads = 1,
+## verbose = FALSE, seed = sample.int(10^5, 1), probability = TRUE)
+}
+
+Note that a \code{ranger} probability forest is always fit (unless the
+\code{probability} argument is changed by the user via
+\code{\link[=set_engine]{set_engine()}}).
+}
+
+\subsection{Preprocessing requirements}{
+
+This engine does not require any special encoding of the predictors.
+Categorical predictors can be partitioned into groups of factor levels
+(e.g. \verb{\{a, c\}} vs \verb{\{b, d\}}) when splitting at a node. Dummy variables
+are not required for this model.
+}
+
+\subsection{Other notes}{
+
+By default, parallel processing is turned off. When tuning, it is more
+efficient to parallelize over the resamples and tuning parameters. To
+parallelize the construction of the trees within the \code{ranger} model,
+change the \code{num.threads} argument via \code{\link[=set_engine]{set_engine()}}.
+
+For \code{ranger} confidence intervals, the intervals are constructed using
+the form \verb{estimate +/- z * std_error}. For classification probabilities,
+these values can fall outside of \verb{[0, 1]} and will be coerced to be in
+this range.
+}
+
+\subsection{References}{
+\itemize{
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_rand_forest_spark.Rd b/man/details_rand_forest_spark.Rd
new file mode 100644
index 000000000..52f8a67de
--- /dev/null
+++ b/man/details_rand_forest_spark.Rd
@@ -0,0 +1,116 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/rand_forest_spark.R
+\name{details_rand_forest_spark}
+\alias{details_rand_forest_spark}
+\title{Random forests via spark}
+\description{
+\code{\link[sparklyr:ml_random_forest]{sparklyr::ml_random_forest()}} fits a model that creates a large number of
+decision trees, each independent of the others. The final prediction uses all
+predictions from the individual trees and combines them.
+}
+\details{
+For this engine, there are multiple modes: classification and regression
+\subsection{Tuning Parameters}{
+
+This model has 3 tuning parameters:
+\itemize{
+\item \code{mtry}: # Randomly Selected Predictors (type: integer, default: see
+below)
+\item \code{trees}: # Trees (type: integer, default: 20L)
+\item \code{min_n}: Minimal Node Size (type: integer, default: 1L)
+}
+
+\code{mtry} depends on the number of columns and the model mode. The default
+in \code{\link[sparklyr:ml_random_forest]{sparklyr::ml_random_forest()}} is
+\code{floor(sqrt(ncol(x)))} for classification and \code{floor(ncol(x)/3)} for
+regression.
+}
+
+\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{}}\preformatted{rand_forest(
+ mtry = integer(1),
+ trees = integer(1),
+ min_n = integer(1)
+) \%>\%
+ set_engine("spark") \%>\%
+ set_mode("regression") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (regression)
+##
+## Main Arguments:
+## mtry = integer(1)
+## trees = integer(1)
+## min_n = integer(1)
+##
+## Computational engine: spark
+##
+## Model fit template:
+## sparklyr::ml_random_forest(x = missing_arg(), formula = missing_arg(),
+## type = "regression", feature_subset_strategy = integer(1),
+## num_trees = integer(1), min_instances_per_node = min_rows(~integer(1),
+## x), seed = sample.int(10^5, 1))
+}
+
+\code{min_rows()} and \code{min_cols()} will adjust the number of neighbors if the
+chosen value if it is not consistent with the actual data dimensions.
+}
+
+\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{}}\preformatted{rand_forest(
+ mtry = integer(1),
+ trees = integer(1),
+ min_n = integer(1)
+) \%>\%
+ set_engine("spark") \%>\%
+ set_mode("classification") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (classification)
+##
+## Main Arguments:
+## mtry = integer(1)
+## trees = integer(1)
+## min_n = integer(1)
+##
+## Computational engine: spark
+##
+## Model fit template:
+## sparklyr::ml_random_forest(x = missing_arg(), formula = missing_arg(),
+## type = "classification", feature_subset_strategy = integer(1),
+## num_trees = integer(1), min_instances_per_node = min_rows(~integer(1),
+## x), seed = sample.int(10^5, 1))
+}
+}
+
+\subsection{Preprocessing requirements}{
+
+This engine does not require any special encoding of the predictors.
+Categorical predictors can be partitioned into groups of factor levels
+(e.g. \verb{\{a, c\}} vs \verb{\{b, d\}}) when splitting at a node. Dummy variables
+are not required for this model.
+}
+
+\subsection{Other details}{
+
+For models created using the \code{"spark"} engine, there are several things
+to consider.
+\itemize{
+\item Only the formula interface to via \code{fit()} is available; using
+\code{fit_xy()} will generate an error.
+\item The predictions will always be in a Spark table format. The names
+will be the same as documented but without the dots.
+\item There is no equivalent to factor columns in Spark tables so class
+predictions are returned as character columns.
+\item To retain the model object for a new R session (via \code{save()}), the
+\code{model$fit} element of the parsnip object should be serialized via
+\code{ml_save(object$fit)} and separately saved to disk. In a new
+session, the object can be reloaded and reattached to the parsnip
+object.
+}
+}
+
+\subsection{References}{
+\itemize{
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_svm_linear_LiblineaR.Rd b/man/details_svm_linear_LiblineaR.Rd
new file mode 100644
index 000000000..6491729f8
--- /dev/null
+++ b/man/details_svm_linear_LiblineaR.Rd
@@ -0,0 +1,95 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/svm_linear_LiblineaR.R
+\name{details_svm_linear_LiblineaR}
+\alias{details_svm_linear_LiblineaR}
+\title{Linear support vector machines (SVMs) via LiblineaR}
+\description{
+\code{\link[LiblineaR:LiblineaR]{LiblineaR::LiblineaR()}} fits a support vector machine model. For classification,
+the model tries to maximize the width of the margin between classes.
+For regression, the model optimizes a robust loss function that is only
+affected by very large model residuals.
+}
+\details{
+For this engine, there are multiple modes: classification and regression
+\subsection{Tuning Parameters}{
+
+This model has 2 tuning parameters:
+\itemize{
+\item \code{cost}: Cost (type: double, default: 1.0)
+\item \code{margin}: Insensitivity Margin (type: double, default: no default)
+}
+
+This engine fits models that are L2-regularized for L2-loss. In the
+\code{\link[LiblineaR:LiblineaR]{LiblineaR::LiblineaR()}} documentation, these
+are types 1 (classification) and 11 (regression).
+}
+
+\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{}}\preformatted{svm_linear(
+ cost = double(1),
+ margin = double(1)
+) \%>\%
+ set_engine("LiblineaR") \%>\%
+ set_mode("regression") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Linear Support Vector Machine Specification (regression)
+##
+## Main Arguments:
+## cost = double(1)
+## margin = double(1)
+##
+## Computational engine: LiblineaR
+##
+## Model fit template:
+## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), wi = missing_arg(),
+## C = double(1), svr_eps = double(1), type = 11)
+}
+}
+
+\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{}}\preformatted{svm_linear(
+ cost = double(1)
+) \%>\%
+ set_engine("LiblineaR") \%>\%
+ set_mode("classification") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Linear Support Vector Machine Specification (classification)
+##
+## Main Arguments:
+## cost = double(1)
+##
+## Computational engine: LiblineaR
+##
+## Model fit template:
+## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), wi = missing_arg(),
+## C = double(1), type = 1)
+}
+
+The \code{margin} parameter does not apply to classification models.
+
+Note that the \code{LiblineaR} engine does not produce class probabilities.
+When optimizing the model using the tune package, the default metrics
+require class probabilities. To use the \verb{tune_*()} functions, a metric
+set must be passed as an argument that only contains metrics for hard
+class predictions (e.g., accuracy).
+}
+
+\subsection{Preprocessing requirements}{
+
+Factor/categorical predictors need to be converted to numeric values
+(e.g., dummy or indicator variables) for this engine. When using the
+formula method via
+\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will
+convert factor columns to indicators.
+
+Predictors should have the same scale. One way to achieve this is to
+center and scale each so that each predictor has mean zero and a
+variance of one.
+}
+
+\subsection{References}{
+\itemize{
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_svm_linear_kernlab.Rd b/man/details_svm_linear_kernlab.Rd
new file mode 100644
index 000000000..fede1ccff
--- /dev/null
+++ b/man/details_svm_linear_kernlab.Rd
@@ -0,0 +1,96 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/svm_linear_kernlab.R
+\name{details_svm_linear_kernlab}
+\alias{details_svm_linear_kernlab}
+\title{Linear support vector machines (SVMs) via kernlab}
+\description{
+\code{\link[kernlab:ksvm]{kernlab::ksvm()}} fits a support vector machine model. For classification,
+the model tries to maximize the width of the margin between classes.
+For regression, the model optimizes a robust loss function that is only
+affected by very large model residuals.
+}
+\details{
+For this engine, there are multiple modes: classification and regression
+\subsection{Tuning Parameters}{
+
+This model has 2 tuning parameters:
+\itemize{
+\item \code{cost}: Cost (type: double, default: 1.0)
+\item \code{margin}: Insensitivity Margin (type: double, default: 0.1)
+}
+}
+
+\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{}}\preformatted{svm_linear(
+ cost = double(1),
+ margin = double(1)
+) \%>\%
+ set_engine("kernlab") \%>\%
+ set_mode("regression") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Linear Support Vector Machine Specification (regression)
+##
+## Main Arguments:
+## cost = double(1)
+## margin = double(1)
+##
+## Computational engine: kernlab
+##
+## Model fit template:
+## kernlab::ksvm(x = missing_arg(), data = missing_arg(), C = double(1),
+## epsilon = double(1), kernel = "vanilladot")
+}
+}
+
+\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{}}\preformatted{svm_linear(
+ cost = double(1)
+) \%>\%
+ set_engine("kernlab") \%>\%
+ set_mode("classification") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Linear Support Vector Machine Specification (classification)
+##
+## Main Arguments:
+## cost = double(1)
+##
+## Computational engine: kernlab
+##
+## Model fit template:
+## kernlab::ksvm(x = missing_arg(), data = missing_arg(), C = double(1),
+## kernel = "vanilladot", prob.model = TRUE)
+}
+
+The \code{margin} parameter does not apply to classification models.
+
+Note that the \code{"kernlab"} engine does not naturally estimate class
+probabilities. To produce them, the decision values of the model are
+converted to probabilities using Platt scaling. This method fits an
+additional model on top of the SVM model. When fitting the Platt scaling
+model, random numbers are used that are not reproducible or controlled
+by R’s random number stream.
+}
+
+\subsection{Preprocessing requirements}{
+
+Factor/categorical predictors need to be converted to numeric values
+(e.g., dummy or indicator variables) for this engine. When using the
+formula method via
+\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will
+convert factor columns to indicators.
+
+Predictors should have the same scale. One way to achieve this is to
+center and scale each so that each predictor has mean zero and a
+variance of one.
+}
+
+\subsection{References}{
+\itemize{
+\item Lin, HT, and R Weng. \href{https://www.csie.ntu.edu.tw/~cjlin/papers/plattprob.pdf}{“A Note on Platt’s Probabilistic Outputs for Support Vector Machines”}
+\item Karatzoglou, A, Smola, A, Hornik, K, and A Zeileis. 2004.
+\href{https://www.jstatsoft.org/article/view/v011i09}{“kernlab - An S4 Package for Kernel Methods in R.”}, \emph{Journal of
+Statistical Software}.
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_svm_poly_kernlab.Rd b/man/details_svm_poly_kernlab.Rd
new file mode 100644
index 000000000..996e74a08
--- /dev/null
+++ b/man/details_svm_poly_kernlab.Rd
@@ -0,0 +1,108 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/svm_poly_kernlab.R
+\name{details_svm_poly_kernlab}
+\alias{details_svm_poly_kernlab}
+\title{Polynomial support vector machines (SVMs) via kernlab}
+\description{
+\code{\link[kernlab:ksvm]{kernlab::ksvm()}} fits a support vector machine model. For classification,
+the model tries to maximize the width of the margin between classes.
+For regression, the model optimizes a robust loss function that is only
+affected by very large model residuals.
+}
+\details{
+For this engine, there are multiple modes: classification and regression
+\subsection{Tuning Parameters}{
+
+This model has 4 tuning parameters:
+\itemize{
+\item \code{cost}: Cost (type: double, default: 1.0)
+\item \code{degree}: Degree of Interaction (type: integer, default: 1L1)
+\item \code{scale_factor}: Scale Factor (type: double, default: 1.0)
+\item \code{margin}: Insensitivity Margin (type: double, default: 0.1)
+}
+}
+
+\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{}}\preformatted{svm_poly(
+ cost = double(1),
+ degree = integer(1),
+ scale_factor = double(1),
+ margin = double(1)
+) \%>\%
+ set_engine("kernlab") \%>\%
+ set_mode("regression") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Polynomial Support Vector Machine Specification (regression)
+##
+## Main Arguments:
+## cost = double(1)
+## degree = integer(1)
+## scale_factor = double(1)
+## margin = double(1)
+##
+## Computational engine: kernlab
+##
+## Model fit template:
+## kernlab::ksvm(x = missing_arg(), data = missing_arg(), C = double(1),
+## epsilon = double(1), kernel = "polydot", kpar = list(degree = ~integer(1),
+## scale = ~double(1)))
+}
+}
+
+\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{}}\preformatted{svm_poly(
+ cost = double(1),
+ degree = integer(1),
+ scale_factor = double(1)
+) \%>\%
+ set_engine("kernlab") \%>\%
+ set_mode("classification") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Polynomial Support Vector Machine Specification (classification)
+##
+## Main Arguments:
+## cost = double(1)
+## degree = integer(1)
+## scale_factor = double(1)
+##
+## Computational engine: kernlab
+##
+## Model fit template:
+## kernlab::ksvm(x = missing_arg(), data = missing_arg(), C = double(1),
+## kernel = "polydot", prob.model = TRUE, kpar = list(degree = ~integer(1),
+## scale = ~double(1)))
+}
+
+The \code{margin} parameter does not apply to classification models.
+
+Note that the \code{"kernlab"} engine does not naturally estimate class
+probabilities. To produce them, the decision values of the model are
+converted to probabilities using Platt scaling. This method fits an
+additional model on top of the SVM model. When fitting the Platt scaling
+model, random numbers are used that are not reproducible or controlled
+by R’s random number stream.
+}
+
+\subsection{Preprocessing requirements}{
+
+Factor/categorical predictors need to be converted to numeric values
+(e.g., dummy or indicator variables) for this engine. When using the
+formula method via
+\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will
+convert factor columns to indicators.
+
+Predictors should have the same scale. One way to achieve this is to
+center and scale each so that each predictor has mean zero and a
+variance of one.
+}
+
+\subsection{References}{
+\itemize{
+\item Lin, HT, and R Weng. \href{https://www.csie.ntu.edu.tw/~cjlin/papers/plattprob.pdf}{“A Note on Platt’s Probabilistic Outputs for Support Vector Machines”}
+\item Karatzoglou, A, Smola, A, Hornik, K, and A Zeileis. 2004.
+\href{https://www.jstatsoft.org/article/view/v011i09}{“kernlab - An S4 Package for Kernel Methods in R.”}, \emph{Journal of
+Statistical Software}.
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/details_svm_rbf_kernlab.Rd b/man/details_svm_rbf_kernlab.Rd
new file mode 100644
index 000000000..8417a7d9b
--- /dev/null
+++ b/man/details_svm_rbf_kernlab.Rd
@@ -0,0 +1,108 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/svm_rbf_kernlab.R
+\name{details_svm_rbf_kernlab}
+\alias{details_svm_rbf_kernlab}
+\title{Radial basis function support vector machines (SVMs) via kernlab}
+\description{
+\code{\link[kernlab:ksvm]{kernlab::ksvm()}} fits a support vector machine model. For classification,
+the model tries to maximize the width of the margin between classes.
+For regression, the model optimizes a robust loss function that is only
+affected by very large model residuals.
+}
+\details{
+For this engine, there are multiple modes: classification and regression
+\subsection{Tuning Parameters}{
+
+This model has 3 tuning parameters:
+\itemize{
+\item \code{cost}: Cost (type: double, default: 1.0)
+\item \code{rbf_sigma}: Radial Basis Function sigma (type: double, default: see
+below)
+\item \code{margin}: Insensitivity Margin (type: double, default: 0.1)
+}
+
+There is no default for the radial basis function kernel parameter.
+kernlab estimates it from the data using a heuristic method. See
+\code{\link[kernlab:sigest]{kernlab::sigest()}}. This method uses random
+numbers so, without setting the seed before fitting, the model will not
+be reproducible.
+}
+
+\subsection{Translation from parsnip to the original package (regression)}{\if{html}{\out{}}\preformatted{svm_rbf(
+ cost = double(1),
+ rbf_sigma = double(1),
+ margin = double(1)
+) \%>\%
+ set_engine("kernlab") \%>\%
+ set_mode("regression") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Radial Basis Function Support Vector Machine Specification (regression)
+##
+## Main Arguments:
+## cost = double(1)
+## rbf_sigma = double(1)
+## margin = double(1)
+##
+## Computational engine: kernlab
+##
+## Model fit template:
+## kernlab::ksvm(x = missing_arg(), data = missing_arg(), C = double(1),
+## epsilon = double(1), kernel = "rbfdot", kpar = list(sigma = ~double(1)))
+}
+}
+
+\subsection{Translation from parsnip to the original package (classification)}{\if{html}{\out{}}\preformatted{svm_rbf(
+ cost = double(1),
+ rbf_sigma = double(1)
+) \%>\%
+ set_engine("kernlab") \%>\%
+ set_mode("classification") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Radial Basis Function Support Vector Machine Specification (classification)
+##
+## Main Arguments:
+## cost = double(1)
+## rbf_sigma = double(1)
+##
+## Computational engine: kernlab
+##
+## Model fit template:
+## kernlab::ksvm(x = missing_arg(), data = missing_arg(), C = double(1),
+## kernel = "rbfdot", prob.model = TRUE, kpar = list(sigma = ~double(1)))
+}
+
+The \code{margin} parameter does not apply to classification models.
+
+Note that the \code{"kernlab"} engine does not naturally estimate class
+probabilities. To produce them, the decision values of the model are
+converted to probabilities using Platt scaling. This method fits an
+additional model on top of the SVM model. When fitting the Platt scaling
+model, random numbers are used that are not reproducible or controlled
+by R’s random number stream.
+}
+
+\subsection{Preprocessing requirements}{
+
+Factor/categorical predictors need to be converted to numeric values
+(e.g., dummy or indicator variables) for this engine. When using the
+formula method via
+\code{\link[=fit.model_spec]{fit.model_spec()}}, parsnip will
+convert factor columns to indicators.
+
+Predictors should have the same scale. One way to achieve this is to
+center and scale each so that each predictor has mean zero and a
+variance of one.
+}
+
+\subsection{References}{
+\itemize{
+\item Lin, HT, and R Weng. \href{https://www.csie.ntu.edu.tw/~cjlin/papers/plattprob.pdf}{“A Note on Platt’s Probabilistic Outputs for Support Vector Machines”}
+\item Karatzoglou, A, Smola, A, Hornik, K, and A Zeileis. 2004.
+\href{https://www.jstatsoft.org/article/view/v011i09}{“kernlab - An S4 Package for Kernel Methods in R.”}, \emph{Journal of
+Statistical Software}.
+\item Kuhn, M, and K Johnson. 2013. \emph{Applied Predictive Modeling}.
+Springer.
+}
+}
+}
+\keyword{internal}
diff --git a/man/doc-tools.Rd b/man/doc-tools.Rd
new file mode 100644
index 000000000..f1c882d4d
--- /dev/null
+++ b/man/doc-tools.Rd
@@ -0,0 +1,71 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/aaa_models.R
+\name{doc-tools}
+\alias{doc-tools}
+\alias{find_engine_files}
+\alias{make_engine_list}
+\alias{make_seealso_list}
+\title{Tools for dynamically documenting packages}
+\usage{
+find_engine_files(mod)
+
+make_engine_list(mod)
+
+make_seealso_list(mod)
+}
+\arguments{
+\item{mod}{A character string for the model file (e.g. "linear_reg")}
+}
+\value{
+\code{make_engine_list()} returns a character string that creates a
+bulleted list of links to more specific help files.
+
+\code{make_seealso_list()} returns a formatted character string of links.
+
+\code{find_engine_files()} returns a tibble.
+}
+\description{
+These are functions used to create dynamic documentation in Rd files
+based on which parsnip-related packages are loaded by the user.
+
+These functions can be used to make dynamic lists of documentation help
+files. \pkg{parsnip} uses these, along with files contained in \code{man/rmd}
+containing expanded documentation, for specific model/engine combinations.
+\code{\link[=find_engine_files]{find_engine_files()}} looks for files that have the pattern
+\verb{details_\{model\}_\{engine\}.Rd} to link to. These files are generated by files
+named \verb{man/rmd/details_\{model\}_\{engine\}.Rmd}. \code{make_engine_list()} creates a
+list seen at the top of the model Rd files while \code{make_seealso_list()}
+populates the list seen in "See Also" below. See the details section.
+}
+\details{
+The \pkg{parsnip} documentation is generated \emph{dynamically}. Part of the Rd
+file populates a list of engines that depends on what packages are loaded
+\emph{at the time that the man file is loaded}. For example, if
+another package has a new engine for \code{linear_reg()}, the
+\code{parsnip::linear_reg()} help can show a link to a detailed help page in the
+other package.
+
+To enable this, the process for a package developer is to:
+\enumerate{
+\item Create an engine-specific R file in the \code{R} directory with the name
+\verb{\{model\}_\{engine\}.R} (e.g. \code{boost_tree_C5.0.R}). This has a small amount of
+documentation, as well as the directive
+"\verb{@includeRmd man/rmd/\{model\}_\{engine\}.Rmd details}".
+\item Copy the file in \pkg{parsnip} that is in \code{man/rmd/setup.Rmd} and put
+it in the same place in your package.
+\item Write your own \verb{man/rmd/\{model\}_\{engine\}.Rmd} file. This can include
+packages that are not listed in the DESCRIPTION file. Those are only
+required when the documentation file is created locally (probably using
+\code{\link[devtools:document]{devtools::document()}}.
+\item Run \code{\link[devtools:document]{devtools::document()}} so that the Rmd content is included in the
+Rd file.
+}
+
+The examples in \pkg{parsnip} can provide guidance for how to organize
+technical information about the models.
+}
+\examples{
+find_engine_files("linear_reg")
+cat(make_engine_list("linear_reg"))
+}
+\keyword{internal}
diff --git a/man/gen_additive_mod.Rd b/man/gen_additive_mod.Rd
index 3296bc04f..b1f0869f3 100644
--- a/man/gen_additive_mod.Rd
+++ b/man/gen_additive_mod.Rd
@@ -11,7 +11,7 @@ gen_additive_mod(
)
}
\arguments{
-\item{mode}{A single character string for the type of model.
+\item{mode}{A single character string for the prediction outcome mode.
Possible values for this model are "unknown", "regression", or
"classification".}
diff --git a/man/glmnet-details.Rd b/man/glmnet-details.Rd
new file mode 100644
index 000000000..ea42b28c9
--- /dev/null
+++ b/man/glmnet-details.Rd
@@ -0,0 +1,197 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/glmnet_details.R
+\name{glmnet-details}
+\alias{glmnet-details}
+\title{Technical aspects of the glmnet model}
+\description{
+glmnet is a popular statistical model for regularized generalized linear
+models. These notes reflect common questions about this particular model.
+}
+\section{tidymodels and glmnet}{
+The implementation of the glmnet package has some nice features. For
+example, one of the main tuning parameters, the regularization penalty,
+does not need to be specified when fitting the model. The package fits a
+compendium of values, called the regularization path. These values
+depend on the data set and the value of \code{alpha}, the mixture parameter
+between a pure ridge model (\code{alpha = 0}) and a pure lasso model
+(\code{alpha = 1}). When predicting, any penalty values can be simultaneously
+predicted, even those that are not exactly on the regularization path.
+For those, the model approximates between the closest path values to
+produce a prediction. There is an argument called \code{lambda} to the
+\code{glmnet()} function that is used to specify the path.
+
+In the discussion below, \code{linear_reg()} is used. The information is true
+for all parsnip models that have a \code{"glmnet"} engine.
+\subsection{Fitting and predicting using parsnip}{
+
+Recall that tidymodels uses standardized parameter names across models
+chosen to be low on jargon. The argument \code{penalty} is the equivalent of
+what glmnet calls the \code{lambda} value and \code{mixture} is the same as their
+\code{alpha} value.
+
+In tidymodels, our \code{predict()} methods are defined to make one
+prediction at a time. For this model, that means predictions are for a
+single penalty value. For this reason, models that have glmnet engines
+require the user to always specify a single penalty value when the model
+is defined. For example, for linear regression:\if{html}{\out{}}\preformatted{linear_reg(penalty = 1) \%>\% set_engine("glmnet")
+}\if{html}{\out{
}}
+
+When the \code{predict()} method is called, it automatically uses the penalty
+that was given when the model was defined. For example:\if{html}{\out{}}\preformatted{library(tidymodels)
+
+fit <-
+ linear_reg(penalty = 1) \%>\%
+ set_engine("glmnet") \%>\%
+ fit(mpg ~ ., data = mtcars)
+
+# predict at penalty = 1
+predict(fit, mtcars[1:3,])
+}\if{html}{\out{
}}\preformatted{## # A tibble: 3 x 1
+## .pred
+##
+## 1 22.2
+## 2 21.5
+## 3 24.9
+}
+
+However, any penalty values can be predicted simultaneously using the
+\code{multi_predict()} method:\if{html}{\out{}}\preformatted{# predict at c(0.00, 0.01)
+multi_predict(fit, mtcars[1:3,], penalty = c(0.00, 0.01))
+}\if{html}{\out{
}}\preformatted{## # A tibble: 3 x 1
+## .pred
+##
+## 1
+## 2
+## 3
+}\if{html}{\out{}}\preformatted{# unnested:
+multi_predict(fit, mtcars[1:3,], penalty = c(0.00, 0.01)) \%>\%
+ add_rowindex() \%>\%
+ unnest(cols = ".pred")
+}\if{html}{\out{
}}\preformatted{## # A tibble: 6 x 3
+## penalty .pred .row
+##
+## 1 0 22.6 1
+## 2 0.01 22.5 1
+## 3 0 22.1 2
+## 4 0.01 22.1 2
+## 5 0 26.3 3
+## 6 0.01 26.3 3
+}
+\subsection{Where did \code{lambda} go?}{
+
+It may appear odd that the \code{lambda} value does not get used in the fit:\if{html}{\out{}}\preformatted{linear_reg(penalty = 1) \%>\%
+ set_engine("glmnet") \%>\%
+ translate()
+}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression)
+##
+## Main Arguments:
+## penalty = 1
+##
+## Computational engine: glmnet
+##
+## Model fit template:
+## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(),
+## family = "gaussian")
+}
+
+Internally, the value of \code{penalty = 1} is saved in the parsnip object
+and no value is set for \code{lambda}. This enables the full path to be fit
+by \code{glmnet()}. See the section below about setting the path.
+}
+
+}
+
+\subsection{How do I set the regularization path?}{
+
+Regardless of what value you use for \code{penalty}, the full coefficient
+path is used when \code{\link[glmnet:glmnet]{glmnet::glmnet()}} is called.
+
+What if you want to manually set this path? Normally, you would pass a
+vector to \code{lambda} in \code{\link[glmnet:glmnet]{glmnet::glmnet()}}.
+
+parsnip models that use a \code{glmnet} engine can use a special optional
+argument called \code{path_values}. This is \emph{not} an argument to
+\code{\link[glmnet:glmnet]{glmnet::glmnet()}}; it is used by parsnip to
+independently set the path.
+
+For example, we have found that if you want a fully ridge regression
+model (i.e., \code{mixture = 0}), you can get the \emph{wrong coefficients} if the
+path does not contain zero (see \href{https://github.com/tidymodels/parsnip/issues/431#issuecomment-782883848}{issue #431}).
+
+If we want to use our own path, the argument is passed as an
+engine-specific option:\if{html}{\out{}}\preformatted{coef_path_values <- c(0, 10^seq(-5, 1, length.out = 7))
+
+fit_ridge <-
+ linear_reg(penalty = 1, mixture = 0) \%>\%
+ set_engine("glmnet", path_values = coef_path_values) \%>\%
+ fit(mpg ~ ., data = mtcars)
+
+all.equal(sort(fit_ridge$fit$lambda), coef_path_values)
+}\if{html}{\out{
}}\preformatted{## [1] TRUE
+}\if{html}{\out{}}\preformatted{# predict at penalty = 1
+predict(fit_ridge, mtcars[1:3,])
+}\if{html}{\out{
}}\preformatted{## # A tibble: 3 x 1
+## .pred
+##
+## 1 22.1
+## 2 21.8
+## 3 26.6
+}
+}
+
+\subsection{Tidying the model object}{
+
+\code{\link[broom:reexports]{broom::tidy()}} is a function that gives a summary of
+the object as a tibble.
+
+\strong{tl;dr} \code{tidy()} on a \code{glmnet} model produced by parsnip gives the
+coefficients for the value given by \code{penalty}.
+
+When parsnip makes a model, it gives it an extra class. Use the \code{tidy()}
+method on the object, it produces coefficients for the penalty that was
+originally requested:\if{html}{\out{}}\preformatted{tidy(fit)
+}\if{html}{\out{
}}\preformatted{## # A tibble: 11 x 3
+## term estimate penalty
+##
+## 1 (Intercept) 35.3 1
+## 2 cyl -0.872 1
+## 3 disp 0 1
+## 4 hp -0.0101 1
+## 5 drat 0 1
+## 6 wt -2.59 1
+## 7 qsec 0 1
+## 8 vs 0 1
+## 9 am 0 1
+## 10 gear 0 1
+## 11 carb 0 1
+}
+
+Note that there is a \code{tidy()} method for \code{glmnet} objects in the \code{broom}
+package. If this is used directly on the underlying \code{glmnet} object, it
+returns \emph{all of coefficients on the path}:\if{html}{\out{}}\preformatted{# Use the basic tidy() method for glmnet
+all_tidy_coefs <- broom:::tidy.glmnet(fit$fit)
+all_tidy_coefs
+}\if{html}{\out{
}}\preformatted{## # A tibble: 640 x 5
+## term step estimate lambda dev.ratio
+##
+## 1 (Intercept) 1 20.1 5.15 0
+## 2 (Intercept) 2 21.6 4.69 0.129
+## 3 (Intercept) 3 23.2 4.27 0.248
+## 4 (Intercept) 4 24.7 3.89 0.347
+## 5 (Intercept) 5 26.0 3.55 0.429
+## 6 (Intercept) 6 27.2 3.23 0.497
+## 7 (Intercept) 7 28.4 2.95 0.554
+## 8 (Intercept) 8 29.4 2.68 0.601
+## 9 (Intercept) 9 30.3 2.45 0.640
+## 10 (Intercept) 10 31.1 2.23 0.673
+## # … with 630 more rows
+}\if{html}{\out{}}\preformatted{length(unique(all_tidy_coefs$lambda))
+}\if{html}{\out{
}}\preformatted{## [1] 79
+}
+
+This can be nice for plots but it might not contain the penalty value
+that you are interested in.
+}
+}
+
+\keyword{internal}
diff --git a/man/linear_reg.Rd b/man/linear_reg.Rd
index f1955a6f1..8da30c9c8 100644
--- a/man/linear_reg.Rd
+++ b/man/linear_reg.Rd
@@ -2,12 +2,12 @@
% Please edit documentation in R/linear_reg.R
\name{linear_reg}
\alias{linear_reg}
-\title{General Interface for Linear Regression Models}
+\title{Linear regression}
\usage{
linear_reg(mode = "regression", engine = "lm", penalty = NULL, mixture = NULL)
}
\arguments{
-\item{mode}{A single character string for the prediction outcome mode.
+\item{mode}{A single character string for the type of model.
The only possible value for this model is "regression".}
\item{engine}{A single character string specifying what computational engine
@@ -15,198 +15,40 @@ to use for fitting. Possible engines are listed below. The default for this
model is \code{"lm"}.}
\item{penalty}{A non-negative number representing the total
-amount of regularization (\code{glmnet}, \code{keras}, and \code{spark} only).
-For \code{keras} models, this corresponds to purely L2 regularization
-(aka weight decay) while the other models can be a combination
-of L1 and L2 (depending on the value of \code{mixture}; see below).}
+amount of regularization (specific engines only).}
\item{mixture}{A number between zero and one (inclusive) that is the
proportion of L1 regularization (i.e. lasso) in the model. When
\code{mixture = 1}, it is a pure lasso model while \code{mixture = 0} indicates that
-ridge regression is being used. (\code{glmnet} and \code{spark} only).}
+ridge regression is being used (specific engines only).}
}
\description{
-\code{linear_reg()} is a way to generate a \emph{specification} of a model
-before fitting and allows the model to be created using
-different packages in R, Stan, keras, or via Spark. The main
-arguments for the model are:
-\itemize{
-\item \code{penalty}: The total amount of regularization
-in the model. Note that this must be zero for some engines.
-\item \code{mixture}: The mixture amounts of different types of
-regularization (see below). Note that this will be ignored for some engines.
-}
-These arguments are converted to their specific names at the
-time that the model is fit. Other options and arguments can be
-set using \code{set_engine()}. If left to their defaults
-here (\code{NULL}), the values are taken from the underlying model
-functions. If parameters need to be modified, \code{update()} can be used
-in lieu of recreating the object from scratch.
-}
-\details{
-The data given to the function are not saved and are only used
-to determine the \emph{mode} of the model. For \code{linear_reg()}, the
-mode will always be "regression".
-
-The model can be created using the \code{fit()} function using the
-following \emph{engines}:
-\itemize{
-\item \pkg{R}: \code{"lm"} (the default) or \code{"glmnet"}
-\item \pkg{Stan}: \code{"stan"}
-\item \pkg{Spark}: \code{"spark"}
-\item \pkg{keras}: \code{"keras"}
-}
-
-For this model, other packages may add additional engines. Use
-\code{\link[=show_engines]{show_engines()}} to see the current set of engines.
-}
-\note{
-For models created using the spark engine, there are
-several differences to consider. First, only the formula
-interface to via \code{fit()} is available; using \code{fit_xy()} will
-generate an error. Second, the predictions will always be in a
-spark table format. The names will be the same as documented but
-without the dots. Third, there is no equivalent to factor
-columns in spark tables so class predictions are returned as
-character columns. Fourth, to retain the model object for a new
-R session (via \code{save()}), the \code{model$fit} element of the \code{parsnip}
-object should be serialized via \code{ml_save(object$fit)} and
-separately saved to disk. In a new session, the object can be
-reloaded and reattached to the \code{parsnip} object.
-}
-\section{Engine Details}{
-Engines may have pre-set default arguments when executing the model fit
-call. For this type of model, the template of the fit calls are below.
-\subsection{lm}{\if{html}{\out{}}\preformatted{linear_reg() \%>\%
- set_engine("lm") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression)
-##
-## Computational engine: lm
-##
-## Model fit template:
-## stats::lm(formula = missing_arg(), data = missing_arg(), weights = missing_arg())
-}
-}
-
-\subsection{glmnet}{\if{html}{\out{}}\preformatted{linear_reg(penalty = 0.1) \%>\%
- set_engine("glmnet") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression)
-##
-## Main Arguments:
-## penalty = 0.1
-##
-## Computational engine: glmnet
-##
-## Model fit template:
-## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(),
-## family = "gaussian")
-}
-
-The glmnet engine requires a single value for the \code{penalty} argument (a
-number or \code{tune()}), but the full regularization path is always fit
-regardless of the value given to \code{penalty}. To pass in a custom sequence
-of values for glmnet’s \code{lambda}, use the argument \code{path_values} in
-\code{set_engine()}. This will assign the value of the glmnet \code{lambda}
-parameter without disturbing the value given of \code{linear_reg(penalty)}.
-For example:\if{html}{\out{}}\preformatted{linear_reg(penalty = .1) \%>\%
- set_engine("glmnet", path_values = c(0, 10^seq(-10, 1, length.out = 20))) \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression)
-##
-## Main Arguments:
-## penalty = 0.1
-##
-## Computational engine: glmnet
-##
-## Model fit template:
-## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(),
-## lambda = c(0, 10^seq(-10, 1, length.out = 20)), family = "gaussian")
-}
-
-When fitting a pure ridge regression model (i.e., \code{penalty = 0}), we
-\emph{strongly suggest} that you pass in a vector for \code{path_values} that
-includes zero. See \href{https://github.com/tidymodels/parsnip/issues/431}{issue #431} for a
-discussion.
-
-When using \code{predict()}, the single \code{penalty} value used for prediction
-is the one specified in \code{linear_reg()}.
+\code{linear_reg()} defines a model that can predict numeric values from
+predictors using a linear function.
-To predict on multiple penalties, use the \code{multi_predict()} function.
-This function returns a tibble with a list column called \code{.pred}
-containing all of the penalty results.
-}
-
-\subsection{stan}{\if{html}{\out{}}\preformatted{linear_reg() \%>\%
- set_engine("stan") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression)
-##
-## Computational engine: stan
-##
-## Model fit template:
-## rstanarm::stan_glm(formula = missing_arg(), data = missing_arg(),
-## weights = missing_arg(), family = stats::gaussian, refresh = 0)
-}
-
-Note that the \code{refresh} default prevents logging of the estimation
-process. Change this value in \code{set_engine()} to show the logs.
-
-For prediction, the \code{stan} engine can compute posterior intervals
-analogous to confidence and prediction intervals. In these instances,
-the units are the original outcome and when \code{std_error = TRUE}, the
-standard deviation of the posterior distribution (or posterior
-predictive distribution as appropriate) is returned.
-}
-
-\subsection{spark}{\if{html}{\out{}}\preformatted{linear_reg() \%>\%
- set_engine("spark") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression)
-##
-## Computational engine: spark
-##
-## Model fit template:
-## sparklyr::ml_linear_regression(x = missing_arg(), formula = missing_arg(),
-## weight_col = missing_arg())
-}
-}
-
-\subsection{keras}{\if{html}{\out{}}\preformatted{linear_reg() \%>\%
- set_engine("keras") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (regression)
-##
-## Computational engine: keras
-##
-## Model fit template:
-## parsnip::keras_mlp(x = missing_arg(), y = missing_arg(), hidden_units = 1,
-## act = "linear")
-}
-}
+There are different ways to fit this model. See the engine-specific pages
+for more details:
-\subsection{Parameter translations}{
+\Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("linear_reg")}
-The standardized parameter names in parsnip can be mapped to their
-original names in each engine that has main parameters. Each engine
-typically has a different default value (shown in parentheses) for each
-parameter.\tabular{llll}{
- \strong{parsnip} \tab \strong{glmnet} \tab \strong{spark} \tab \strong{keras} \cr
- penalty \tab lambda \tab reg_param (0) \tab penalty (0) \cr
- mixture \tab alpha (1) \tab elastic_net_param (0) \tab NA \cr
+More information on how \pkg{parsnip} is used for modeling is at
+\url{https://www.tidymodels.org/}.
}
+\details{
+This function only defines what \emph{type} of model is being fit. Once an engine
+is specified, the \emph{method} to fit the model is also defined.
+The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model_spec()}} function is used
+with the data.
}
-}
-
\examples{
show_engines("linear_reg")
linear_reg()
-# Parameters can be represented by a placeholder:
-linear_reg(penalty = varying())
+}
+\references{
+\url{https://www.tidymodels.org}, \href{https://tmwr.org}{\emph{Tidy Models with R}}
}
\seealso{
-\code{\link[=fit]{fit()}}, \code{\link[=set_engine]{set_engine()}}, \code{\link[=update]{update()}}
+\Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("linear_reg")}
}
diff --git a/man/logistic_reg.Rd b/man/logistic_reg.Rd
index 6545293ce..8cb616f7c 100644
--- a/man/logistic_reg.Rd
+++ b/man/logistic_reg.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/logistic_reg.R
\name{logistic_reg}
\alias{logistic_reg}
-\title{General Interface for Logistic Regression Models}
+\title{Logistic regression}
\usage{
logistic_reg(
mode = "classification",
@@ -12,7 +12,7 @@ logistic_reg(
)
}
\arguments{
-\item{mode}{A single character string for the prediction outcome mode.
+\item{mode}{A single character string for the type of model.
The only possible value for this model is "classification".}
\item{engine}{A single character string specifying what computational engine
@@ -20,7 +20,7 @@ to use for fitting. Possible engines are listed below. The default for this
model is \code{"glm"}.}
\item{penalty}{A non-negative number representing the total
-amount of regularization (\code{glmnet}, \code{LiblineaR}, \code{keras}, and \code{spark} only).
+amount of regularization (specific engines only).
For \code{keras} models, this corresponds to purely L2 regularization
(aka weight decay) while the other models can be either or a combination
of L1 and L2 (depending on the value of \code{mixture}).}
@@ -28,212 +28,37 @@ of L1 and L2 (depending on the value of \code{mixture}).}
\item{mixture}{A number between zero and one (inclusive) that is the
proportion of L1 regularization (i.e. lasso) in the model. When
\code{mixture = 1}, it is a pure lasso model while \code{mixture = 0} indicates that
-ridge regression is being used. (\code{glmnet}, \code{LiblineaR}, and \code{spark} only).
+ridge regression is being used. (specific engines only).
For \code{LiblineaR} models, \code{mixture} must be exactly 0 or 1 only.}
}
\description{
-\code{logistic_reg()} is a way to generate a \emph{specification} of a model
-before fitting and allows the model to be created using
-different packages in R, Stan, keras, or via Spark. The main
-arguments for the model are:
-\itemize{
-\item \code{penalty}: The total amount of regularization
-in the model. Note that this must be zero for some engines.
-\item \code{mixture}: The mixture amounts of different types of
-regularization (see below). Note that this will be ignored for some engines.
-}
-These arguments are converted to their specific names at the
-time that the model is fit. Other options and arguments can be
-set using \code{set_engine()}. If left to their defaults
-here (\code{NULL}), the values are taken from the underlying model
-functions. If parameters need to be modified, \code{update()} can be used
-in lieu of recreating the object from scratch.
-}
-\details{
-For \code{logistic_reg()}, the mode will always be "classification".
-
-The model can be created using the \code{fit()} function using the
-following \emph{engines}:
-\itemize{
-\item \pkg{R}: \code{"glm"} (the default), \code{"glmnet"}, or \code{"LiblineaR"}
-\item \pkg{Stan}: \code{"stan"}
-\item \pkg{Spark}: \code{"spark"}
-\item \pkg{keras}: \code{"keras"}
-}
-
-For this model, other packages may add additional engines. Use
-\code{\link[=show_engines]{show_engines()}} to see the current set of engines.
-}
-\note{
-For models created using the spark engine, there are
-several differences to consider. First, only the formula
-interface to via \code{fit()} is available; using \code{fit_xy()} will
-generate an error. Second, the predictions will always be in a
-spark table format. The names will be the same as documented but
-without the dots. Third, there is no equivalent to factor
-columns in spark tables so class predictions are returned as
-character columns. Fourth, to retain the model object for a new
-R session (via \code{save()}), the \code{model$fit} element of the \code{parsnip}
-object should be serialized via \code{ml_save(object$fit)} and
-separately saved to disk. In a new session, the object can be
-reloaded and reattached to the \code{parsnip} object.
-}
-\section{Engine Details}{
-Engines may have pre-set default arguments when executing the model fit
-call. For this type of model, the template of the fit calls are below.
-\subsection{glm}{\if{html}{\out{}}\preformatted{logistic_reg() \%>\%
- set_engine("glm") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification)
-##
-## Computational engine: glm
-##
-## Model fit template:
-## stats::glm(formula = missing_arg(), data = missing_arg(), weights = missing_arg(),
-## family = stats::binomial)
-}
-}
+\code{\link[=logistic_reg]{logistic_reg()}} defines a generalized linear model for binary outcomes. A
+linear combination of the predictors is used to model the log odds of an
+event.
-\subsection{glmnet}{\if{html}{\out{}}\preformatted{logistic_reg(penalty = 0.1) \%>\%
- set_engine("glmnet") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification)
-##
-## Main Arguments:
-## penalty = 0.1
-##
-## Computational engine: glmnet
-##
-## Model fit template:
-## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(),
-## family = "binomial")
-}
-
-The glmnet engine requires a single value for the \code{penalty} argument (a
-number or \code{tune()}), but the full regularization path is always fit
-regardless of the value given to \code{penalty}. To pass in a custom sequence
-of values for glmnet’s \code{lambda}, use the argument \code{path_values} in
-\code{set_engine()}. This will assign the value of the glmnet \code{lambda}
-parameter without disturbing the value given of \code{logistic_reg(penalty)}.
-For example:\if{html}{\out{}}\preformatted{logistic_reg(penalty = .1) \%>\%
- set_engine("glmnet", path_values = c(0, 10^seq(-10, 1, length.out = 20))) \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification)
-##
-## Main Arguments:
-## penalty = 0.1
-##
-## Computational engine: glmnet
-##
-## Model fit template:
-## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(),
-## lambda = c(0, 10^seq(-10, 1, length.out = 20)), family = "binomial")
-}
-
-When fitting a pure ridge regression model (i.e., \code{penalty = 0}), we
-\emph{strongly suggest} that you pass in a vector for \code{path_values} that
-includes zero. See \href{https://github.com/tidymodels/parsnip/issues/431}{issue #431} for a
-discussion.
-
-When using \code{predict()}, the single \code{penalty} value used for prediction
-is the one specified in \code{logistic_reg()}.
-
-To predict on multiple penalties, use the \code{multi_predict()} function.
-This function returns a tibble with a list column called \code{.pred}
-containing all of the penalty results.
-}
-
-\subsection{LiblineaR}{\if{html}{\out{}}\preformatted{logistic_reg() \%>\%
- set_engine("LiblineaR") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification)
-##
-## Computational engine: LiblineaR
-##
-## Model fit template:
-## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), wi = missing_arg(),
-## verbose = FALSE)
-}
+There are different ways to fit this model. See the engine-specific pages
+for more details:
-For \code{LiblineaR} models, the value for \code{mixture} can either be 0 (for
-ridge) or 1 (for lasso) but not other intermediate values. In the
-\code{LiblineaR} documentation, these correspond to types 0 (L2-regularized)
-and 6 (L1-regularized).
+\Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("logistic_reg")}
-Be aware that the \code{LiblineaR} engine regularizes the intercept. Other
-regularized regression models do not, which will result in different
-parameter estimates.
-}
-
-\subsection{stan}{\if{html}{\out{}}\preformatted{logistic_reg() \%>\%
- set_engine("stan") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification)
-##
-## Computational engine: stan
-##
-## Model fit template:
-## rstanarm::stan_glm(formula = missing_arg(), data = missing_arg(),
-## weights = missing_arg(), family = stats::binomial, refresh = 0)
-}
-
-Note that the \code{refresh} default prevents logging of the estimation
-process. Change this value in \code{set_engine()} to show the logs.
-
-For prediction, the \code{stan} engine can compute posterior intervals
-analogous to confidence and prediction intervals. In these instances,
-the units are the original outcome and when \code{std_error = TRUE}, the
-standard deviation of the posterior distribution (or posterior
-predictive distribution as appropriate) is returned.
-}
-
-\subsection{spark}{\if{html}{\out{}}\preformatted{logistic_reg() \%>\%
- set_engine("spark") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification)
-##
-## Computational engine: spark
-##
-## Model fit template:
-## sparklyr::ml_logistic_regression(x = missing_arg(), formula = missing_arg(),
-## weight_col = missing_arg(), family = "binomial")
-}
-}
-
-\subsection{keras}{\if{html}{\out{}}\preformatted{logistic_reg() \%>\%
- set_engine("keras") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Logistic Regression Model Specification (classification)
-##
-## Computational engine: keras
-##
-## Model fit template:
-## parsnip::keras_mlp(x = missing_arg(), y = missing_arg(), hidden_units = 1,
-## act = "linear")
-}
-}
-
-\subsection{Parameter translations}{
-
-The standardized parameter names in parsnip can be mapped to their
-original names in each engine that has main parameters. Each engine
-typically has a different default value (shown in parentheses) for each
-parameter.\tabular{lllll}{
- \strong{parsnip} \tab \strong{glmnet} \tab \strong{LiblineaR} \tab \strong{spark} \tab \strong{keras} \cr
- penalty \tab lambda \tab cost \tab reg_param (0) \tab penalty (0) \cr
- mixture \tab alpha (1) \tab type (0) \tab elastic_net_param (0) \tab NA \cr
+More information on how \pkg{parsnip} is used for modeling is at
+\url{https://www.tidymodels.org/}.
}
+\details{
+This function only defines what \emph{type} of model is being fit. Once an engine
+is specified, the \emph{method} to fit the model is also defined.
+The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model_spec()}} function is used
+with the data.
}
-}
-
\examples{
show_engines("logistic_reg")
logistic_reg()
-# Parameters can be represented by a placeholder:
-logistic_reg(penalty = varying())
+}
+\references{
+\url{https://www.tidymodels.org}, \href{https://tmwr.org}{\emph{Tidy Models with R}}
}
\seealso{
-\code{\link[=fit]{fit()}}, \code{\link[=set_engine]{set_engine()}}, \code{\link[=update]{update()}}
+\Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("logistic_reg")}
}
diff --git a/man/mars.Rd b/man/mars.Rd
index 774dbeaab..e7497834d 100644
--- a/man/mars.Rd
+++ b/man/mars.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/mars.R
\name{mars}
\alias{mars}
-\title{General Interface for MARS}
+\title{Multivariate adaptive regression splines (MARS)}
\usage{
mars(
mode = "unknown",
@@ -18,8 +18,7 @@ Possible values for this model are "unknown", "regression", or
"classification".}
\item{engine}{A single character string specifying what computational engine
-to use for fitting. Possible engines are listed below. The default for this
-model is \code{"earth"}.}
+to use for fitting.}
\item{num_terms}{The number of features that will be retained in the
final model, including the intercept.}
@@ -29,95 +28,33 @@ final model, including the intercept.}
\item{prune_method}{The pruning method.}
}
\description{
-\code{mars()} is a way to generate a \emph{specification} of a model before
-fitting and allows the model to be created using R. The main
-arguments for the
-model are:
-\itemize{
-\item \code{num_terms}: The number of features that will be retained in the
-final model.
-\item \code{prod_degree}: The highest possible degree of interaction between
-features. A value of 1 indicates an additive model while a value of 2
-allows, but does not guarantee, two-way interactions between features.
-\item \code{prune_method}: The type of pruning. Possible values are listed
-in \code{?earth}.
-}
-These arguments are converted to their specific names at the
-time that the model is fit. Other options and arguments can be
-set using \code{set_engine()}. If left to their defaults
-here (\code{NULL}), the values are taken from the underlying model
-functions. If parameters need to be modified, \code{update()} can be used
-in lieu of recreating the object from scratch.
-}
-\details{
-The model can be created using the \code{fit()} function using the
-following \emph{engines}:
-\itemize{
-\item \pkg{R}: \code{"earth"} (the default)
-}
-}
-\section{Engine Details}{
-Engines may have pre-set default arguments when executing the model fit
-call. For this type of model, the template of the fit calls are below.
-\subsection{earth}{\if{html}{\out{}}\preformatted{mars() \%>\%
- set_engine("earth") \%>\%
- set_mode("regression") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## MARS Model Specification (regression)
-##
-## Computational engine: earth
-##
-## Model fit template:
-## earth::earth(formula = missing_arg(), data = missing_arg(), weights = missing_arg(),
-## keepxy = TRUE)
-}\if{html}{\out{}}\preformatted{mars() \%>\%
- set_engine("earth") \%>\%
- set_mode("classification") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## MARS Model Specification (classification)
-##
-## Engine-Specific Arguments:
-## glm = list(family = stats::binomial)
-##
-## Computational engine: earth
-##
-## Model fit template:
-## earth::earth(formula = missing_arg(), data = missing_arg(), weights = missing_arg(),
-## glm = list(family = stats::binomial), keepxy = TRUE)
-}
+\code{mars()} defines a generalized linear model that uses artificial features for
+some predictors. These features resemble hinge functions and the result is
+a model that is a segmented regression in small dimensions.
-Note that, when the model is fit, the \code{earth} package only has its
-namespace loaded. However, if \code{multi_predict} is used, the package is
-attached.
+There are different ways to fit this model. See the engine-specific pages
+for more details:
-Also, \code{fit()} passes the data directly to \code{earth::earth()} so that its
-formula method can create dummy variables as-needed.
+\Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("mars")}
-For this engine, tuning over \code{num_terms} is very efficient since the
-same model object can be used to make predictions over multiple values
-of \code{num_terms}.
-}
-
-\subsection{Parameter translations}{
-
-The standardized parameter names in parsnip can be mapped to their
-original names in each engine that has main parameters. Each engine
-typically has a different default value (shown in parentheses) for each
-parameter.\tabular{ll}{
- \strong{parsnip} \tab \strong{earth} \cr
- num_terms \tab nprune \cr
- prod_degree \tab degree (1) \cr
- prune_method \tab pmethod (backward) \cr
+More information on how \pkg{parsnip} is used for modeling is at
+\url{https://www.tidymodels.org/}.
}
+\details{
+This function only defines what \emph{type} of model is being fit. Once an engine
+is specified, the \emph{method} to fit the model is also defined.
+The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model_spec()}} function is used
+with the data.
}
-}
-
\examples{
show_engines("mars")
mars(mode = "regression", num_terms = 5)
}
+\references{
+\url{https://www.tidymodels.org}, \href{https://tmwr.org}{\emph{Tidy Models with R}}
+}
\seealso{
-\code{\link[=fit]{fit()}}, \code{\link[=set_engine]{set_engine()}}, \code{\link[=update]{update()}}
+\Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("mars")}
}
diff --git a/man/mlp.Rd b/man/mlp.Rd
index 8730a7825..15a3c7e8e 100644
--- a/man/mlp.Rd
+++ b/man/mlp.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/mlp.R
\name{mlp}
\alias{mlp}
-\title{General Interface for Single Layer Neural Network}
+\title{Single layer neural network}
\usage{
mlp(
mode = "unknown",
@@ -20,8 +20,7 @@ Possible values for this model are "unknown", "regression", or
"classification".}
\item{engine}{A single character string specifying what computational engine
-to use for fitting. Possible engines are listed below. The default for this
-model is \code{"nnet"}.}
+to use for fitting.}
\item{hidden_units}{An integer for the number of units in the hidden model.}
@@ -40,125 +39,32 @@ function between the hidden and output layers is automatically set to either
"linear", "softmax", "relu", and "elu"}
}
\description{
-\code{mlp()}, for multilayer perceptron, is a way to generate a \emph{specification} of
-a model before fitting and allows the model to be created using
-different packages in R or via keras The main arguments for the
-model are:
-\itemize{
-\item \code{hidden_units}: The number of units in the hidden layer
-(default: 5).
-\item \code{penalty}: The amount of L2 regularization (aka weight
-decay, default is zero).
-\item \code{dropout}: The proportion of parameters randomly dropped out of
-the model (\code{keras} only, default is zero).
-\item \code{epochs}: The number of training iterations (default: 20).
-\item \code{activation}: The type of function that connects the hidden
-layer and the input variables (\code{keras} only, default is softmax).
-}
-}
-\details{
-These arguments are converted to their specific names at the
-time that the model is fit. Other options and arguments can be
-set using \code{set_engine()}. If left to their defaults
-here (see above), the values are taken from the underlying model
-functions. One exception is \code{hidden_units} when \code{nnet::nnet} is used; that
-function's \code{size} argument has no default so a value of 5 units will be
-used. Also, unless otherwise specified, the \code{linout} argument to
-\code{nnet::nnet()} will be set to \code{TRUE} when a regression model is created.
-If parameters need to be modified, \code{update()} can be used
-in lieu of recreating the object from scratch.
+\code{mlp()} defines a multilayer perceptron model (a.k.a. a single layer,
+feed-forward neural network).
-The model can be created using the \code{fit()} function using the
-following \emph{engines}:
-\itemize{
-\item \pkg{R}: \code{"nnet"} (the default)
-\item \pkg{keras}: \code{"keras"}
-}
-}
-\section{Engine Details}{
-Engines may have pre-set default arguments when executing the model fit
-call. For this type of model, the template of the fit calls are below:
-\subsection{keras}{\if{html}{\out{}}\preformatted{mlp() \%>\%
- set_engine("keras") \%>\%
- set_mode("regression") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Single Layer Neural Network Specification (regression)
-##
-## Computational engine: keras
-##
-## Model fit template:
-## parsnip::keras_mlp(x = missing_arg(), y = missing_arg())
-}\if{html}{\out{}}\preformatted{mlp() \%>\%
- set_engine("keras") \%>\%
- set_mode("classification") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Single Layer Neural Network Specification (classification)
-##
-## Computational engine: keras
-##
-## Model fit template:
-## parsnip::keras_mlp(x = missing_arg(), y = missing_arg())
-}
+There are different ways to fit this model. See the engine-specific pages
+for more details:
-An error is thrown if both \code{penalty} and \code{dropout} are specified for
-\code{keras} models.
-}
+\Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("mlp")}
-\subsection{nnet}{\if{html}{\out{}}\preformatted{mlp() \%>\%
- set_engine("nnet") \%>\%
- set_mode("regression") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Single Layer Neural Network Specification (regression)
-##
-## Main Arguments:
-## hidden_units = 5
-##
-## Computational engine: nnet
-##
-## Model fit template:
-## nnet::nnet(formula = missing_arg(), data = missing_arg(), weights = missing_arg(),
-## size = 5, trace = FALSE, linout = TRUE)
-}\if{html}{\out{}}\preformatted{mlp() \%>\%
- set_engine("nnet") \%>\%
- set_mode("classification") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Single Layer Neural Network Specification (classification)
-##
-## Main Arguments:
-## hidden_units = 5
-##
-## Computational engine: nnet
-##
-## Model fit template:
-## nnet::nnet(formula = missing_arg(), data = missing_arg(), weights = missing_arg(),
-## size = 5, trace = FALSE, linout = FALSE)
-}
-}
-
-\subsection{Parameter translations}{
-
-The standardized parameter names in parsnip can be mapped to their
-original names in each engine that has main parameters. Each engine
-typically has a different default value (shown in parentheses) for each
-parameter.\tabular{lll}{
- \strong{parsnip} \tab \strong{keras} \tab \strong{nnet} \cr
- hidden_units \tab hidden_units (5) \tab size \cr
- penalty \tab penalty (0) \tab decay (0) \cr
- dropout \tab dropout (0) \tab NA \cr
- epochs \tab epochs (20) \tab maxit (100) \cr
- activation \tab activation (softmax) \tab NA \cr
+More information on how \pkg{parsnip} is used for modeling is at
+\url{https://www.tidymodels.org/}.
}
+\details{
+This function only defines what \emph{type} of model is being fit. Once an engine
+is specified, the \emph{method} to fit the model is also defined.
+The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model_spec()}} function is used
+with the data.
}
-}
-
\examples{
show_engines("mlp")
mlp(mode = "classification", penalty = 0.01)
-# Parameters can be represented by a placeholder:
-mlp(mode = "regression", hidden_units = varying())
+}
+\references{
+\url{https://www.tidymodels.org}, \href{https://tmwr.org}{\emph{Tidy Models with R}}
}
\seealso{
-\code{\link[=fit]{fit()}}, \code{\link[=set_engine]{set_engine()}}, \code{\link[=update]{update()}}
+\Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("mlp")}
}
diff --git a/man/model_spec.Rd b/man/model_spec.Rd
index c1e3cca66..4b2e0f24e 100644
--- a/man/model_spec.Rd
+++ b/man/model_spec.Rd
@@ -73,8 +73,8 @@ This can be seen in the output:
The model functions save the argument \emph{expressions} and their
associated environments (a.k.a. a quosure) to be evaluated later
-when either \code{\link[=fit]{fit()}} or \code{\link[=fit_xy]{fit_xy()}} are called with the actual
-data.
+when either \code{\link[=fit.model_spec]{fit.model_spec()}} or \code{\link[=fit_xy.model_spec]{fit_xy.model_spec()}} are
+called with the actual data.
The consequence of this strategy is that any data required to
get the parameter values must be available when the model is
diff --git a/man/multinom_reg.Rd b/man/multinom_reg.Rd
index af0c1e28a..fc202b5c5 100644
--- a/man/multinom_reg.Rd
+++ b/man/multinom_reg.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/multinom_reg.R
\name{multinom_reg}
\alias{multinom_reg}
-\title{General Interface for Multinomial Regression Models}
+\title{Multinomial regression}
\usage{
multinom_reg(
mode = "classification",
@@ -12,7 +12,7 @@ multinom_reg(
)
}
\arguments{
-\item{mode}{A single character string for the prediction outcome mode.
+\item{mode}{A single character string for the type of model.
The only possible value for this model is "classification".}
\item{engine}{A single character string specifying what computational engine
@@ -20,7 +20,7 @@ to use for fitting. Possible engines are listed below. The default for this
model is \code{"nnet"}.}
\item{penalty}{A non-negative number representing the total
-amount of regularization (\code{glmnet}, \code{keras}, and \code{spark} only).
+amount of regularization (specific engines only).
For \code{keras} models, this corresponds to purely L2 regularization
(aka weight decay) while the other models can be a combination
of L1 and L2 (depending on the value of \code{mixture}).}
@@ -28,163 +28,35 @@ of L1 and L2 (depending on the value of \code{mixture}).}
\item{mixture}{A number between zero and one (inclusive) that is the
proportion of L1 regularization (i.e. lasso) in the model. When
\code{mixture = 1}, it is a pure lasso model while \code{mixture = 0} indicates that
-ridge regression is being used. (\code{glmnet} and \code{spark} only).}
+ridge regression is being used. (specific engines only).}
}
\description{
-\code{multinom_reg()} is a way to generate a \emph{specification} of a model
-before fitting and allows the model to be created using
-different packages in R, keras, or Spark. The main arguments for the
-model are:
-\itemize{
-\item \code{penalty}: The total amount of regularization
-in the model. Note that this must be zero for some engines.
-\item \code{mixture}: The mixture amounts of different types of
-regularization (see below). Note that this will be ignored for some engines.
-}
-These arguments are converted to their specific names at the
-time that the model is fit. Other options and arguments can be
-set using \code{set_engine()}. If left to their defaults
-here (\code{NULL}), the values are taken from the underlying model
-functions. If parameters need to be modified, \code{update()} can be used
-in lieu of recreating the object from scratch.
-}
-\details{
-For \code{multinom_reg()}, the mode will always be "classification".
-
-The model can be created using the \code{fit()} function using the
-following \emph{engines}:
-\itemize{
-\item \pkg{R}: \code{"nnet"} (the default), \code{"glmnet"}
-\item \pkg{Spark}: \code{"spark"}
-\item \pkg{keras}: \code{"keras"}
-}
-}
-\note{
-For models created using the spark engine, there are
-several differences to consider. First, only the formula
-interface to via \code{fit()} is available; using \code{fit_xy()} will
-generate an error. Second, the predictions will always be in a
-spark table format. The names will be the same as documented but
-without the dots. Third, there is no equivalent to factor
-columns in spark tables so class predictions are returned as
-character columns. Fourth, to retain the model object for a new
-R session (via \code{save()}), the \code{model$fit} element of the \code{parsnip}
-object should be serialized via \code{ml_save(object$fit)} and
-separately saved to disk. In a new session, the object can be
-reloaded and reattached to the \code{parsnip} object.
-}
-\section{Engine Details}{
-Engines may have pre-set default arguments when executing the model fit
-call. For this type of model, the template of the fit calls are below.
-\subsection{glmnet}{\if{html}{\out{}}\preformatted{multinom_reg(penalty = 0.1) \%>\%
- set_engine("glmnet") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Multinomial Regression Model Specification (classification)
-##
-## Main Arguments:
-## penalty = 0.1
-##
-## Computational engine: glmnet
-##
-## Model fit template:
-## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(),
-## family = "multinomial")
-}
-
-The glmnet engine requires a single value for the \code{penalty} argument (a
-number or \code{tune()}), but the full regularization path is always fit
-regardless of the value given to \code{penalty}. To pass in a custom sequence
-of values for glmnet’s \code{lambda}, use the argument \code{path_values} in
-\code{set_engine()}. This will assign the value of the glmnet \code{lambda}
-parameter without disturbing the value given of \code{multinom_reg(penalty)}.
-For example:\if{html}{\out{}}\preformatted{multinom_reg(penalty = .1) \%>\%
- set_engine("glmnet", path_values = c(0, 10^seq(-10, 1, length.out = 20))) \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Multinomial Regression Model Specification (classification)
-##
-## Main Arguments:
-## penalty = 0.1
-##
-## Computational engine: glmnet
-##
-## Model fit template:
-## glmnet::glmnet(x = missing_arg(), y = missing_arg(), weights = missing_arg(),
-## lambda = c(0, 10^seq(-10, 1, length.out = 20)), family = "multinomial")
-}
-
-When fitting a pure ridge regression model (i.e., \code{penalty = 0}), we
-\emph{strongly suggest} that you pass in a vector for \code{path_values} that
-includes zero. See \href{https://github.com/tidymodels/parsnip/issues/431}{issue #431} for a
-discussion.
-
-When using \code{predict()}, the single \code{penalty} value used for prediction
-is the one specified in \code{multinom_reg()}.
-
-To predict on multiple penalties, use the \code{multi_predict()} function.
-This function returns a tibble with a list column called \code{.pred}
-containing all of the penalty results.
-}
+\code{multinom_reg()} defines a model that uses linear predictors to predict
+multiclass data using the multinomial distribution.
-\subsection{nnet}{\if{html}{\out{}}\preformatted{multinom_reg() \%>\%
- set_engine("nnet") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Multinomial Regression Model Specification (classification)
-##
-## Computational engine: nnet
-##
-## Model fit template:
-## nnet::multinom(formula = missing_arg(), data = missing_arg(),
-## weights = missing_arg(), trace = FALSE)
-}
-}
+There are different ways to fit this model. See the engine-specific pages
+for more details:
-\subsection{spark}{\if{html}{\out{}}\preformatted{multinom_reg() \%>\%
- set_engine("spark") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Multinomial Regression Model Specification (classification)
-##
-## Computational engine: spark
-##
-## Model fit template:
-## sparklyr::ml_logistic_regression(x = missing_arg(), formula = missing_arg(),
-## weight_col = missing_arg(), family = "multinomial")
-}
-}
+\Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("multinom_reg")}
-\subsection{keras}{\if{html}{\out{}}\preformatted{multinom_reg() \%>\%
- set_engine("keras") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Multinomial Regression Model Specification (classification)
-##
-## Computational engine: keras
-##
-## Model fit template:
-## parsnip::keras_mlp(x = missing_arg(), y = missing_arg(), hidden_units = 1,
-## act = "linear")
-}
-}
-
-\subsection{Parameter translations}{
-
-The standardized parameter names in parsnip can be mapped to their
-original names in each engine that has main parameters. Each engine
-typically has a different default value (shown in parentheses) for each
-parameter.\tabular{lllll}{
- \strong{parsnip} \tab \strong{glmnet} \tab \strong{spark} \tab \strong{keras} \tab \strong{nnet} \cr
- penalty \tab lambda \tab reg_param (0) \tab penalty (0) \tab decay (0) \cr
- mixture \tab alpha (1) \tab elastic_net_param (0) \tab NA \tab NA \cr
+More information on how \pkg{parsnip} is used for modeling is at
+\url{https://www.tidymodels.org/}.
}
+\details{
+This function only defines what \emph{type} of model is being fit. Once an engine
+is specified, the \emph{method} to fit the model is also defined.
+The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model_spec()}} function is used
+with the data.
}
-}
-
\examples{
show_engines("multinom_reg")
multinom_reg()
-# Parameters can be represented by a placeholder:
-multinom_reg(penalty = varying())
+}
+\references{
+\url{https://www.tidymodels.org}, \href{https://tmwr.org}{\emph{Tidy Models with R}}
}
\seealso{
-\code{\link[=fit]{fit()}}, \code{\link[=set_engine]{set_engine()}}, \code{\link[=update]{update()}}
+\Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("multinom_reg")}
}
diff --git a/man/nearest_neighbor.Rd b/man/nearest_neighbor.Rd
index b04a12625..4c2200ab6 100644
--- a/man/nearest_neighbor.Rd
+++ b/man/nearest_neighbor.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/nearest_neighbor.R
\name{nearest_neighbor}
\alias{nearest_neighbor}
-\title{General Interface for K-Nearest Neighbor Models}
+\title{K-nearest neighbors}
\usage{
nearest_neighbor(
mode = "unknown",
@@ -14,12 +14,11 @@ nearest_neighbor(
}
\arguments{
\item{mode}{A single character string for the prediction outcome mode.
-Possible values for this model are \code{"unknown"}, \code{"regression"}, or
-\code{"classification"}.}
+Possible values for this model are "unknown", "regression", or
+"classification".}
\item{engine}{A single character string specifying what computational engine
-to use for fitting. Possible engines are listed below. The default for this
-model is \code{"kknn"}.}
+to use for fitting.}
\item{neighbors}{A single integer for the number of neighbors
to consider (often called \code{k}). For \pkg{kknn}, a value of 5
@@ -34,92 +33,33 @@ to weight distances between samples. Valid choices are: \code{"rectangular"},
calculating Minkowski distance.}
}
\description{
-\code{nearest_neighbor()} is a way to generate a \emph{specification} of a model
-before fitting and allows the model to be created using
-different packages in R. The main arguments for the
-model are:
-\itemize{
-\item \code{neighbors}: The number of neighbors considered at
-each prediction.
-\item \code{weight_func}: The type of kernel function that weights the
-distances between samples.
-\item \code{dist_power}: The parameter used when calculating the Minkowski
-distance. This corresponds to the Manhattan distance with \code{dist_power = 1}
-and the Euclidean distance with \code{dist_power = 2}.
-}
-These arguments are converted to their specific names at the
-time that the model is fit. Other options and arguments can be
-set using \code{set_engine()}. If left to their defaults
-here (\code{NULL}), the values are taken from the underlying model
-functions. If parameters need to be modified, \code{update()} can be used
-in lieu of recreating the object from scratch.
-}
-\details{
-The model can be created using the \code{fit()} function using the
-following \emph{engines}:
-\itemize{
-\item \pkg{R}: \code{"kknn"} (the default)
-}
-}
-\section{Engine Details}{
-Engines may have pre-set default arguments when executing the model fit
-call. For this type of model, the template of the fit calls are below:
-\subsection{kknn}{\if{html}{\out{}}\preformatted{nearest_neighbor() \%>\%
- set_engine("kknn") \%>\%
- set_mode("regression") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## K-Nearest Neighbor Model Specification (regression)
-##
-## Computational engine: kknn
-##
-## Model fit template:
-## kknn::train.kknn(formula = missing_arg(), data = missing_arg(),
-## ks = min_rows(5, data, 5))
-}\if{html}{\out{}}\preformatted{nearest_neighbor() \%>\%
- set_engine("kknn") \%>\%
- set_mode("classification") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## K-Nearest Neighbor Model Specification (classification)
-##
-## Computational engine: kknn
-##
-## Model fit template:
-## kknn::train.kknn(formula = missing_arg(), data = missing_arg(),
-## ks = min_rows(5, data, 5))
-}
+\code{nearest_neighbor()} defines a model that uses the \code{K} most similar data
+points from the training set to predict new samples.
-For \code{kknn}, the underlying modeling function used is a restricted
-version of \code{train.kknn()} and not \code{kknn()}. It is set up in this way so
-that \code{parsnip} can utilize the underlying \code{predict.train.kknn} method to
-predict on new data. This also means that a single value of that
-function’s \code{kernel} argument (a.k.a \code{weight_func} here) can be supplied
+There are different ways to fit this model. See the engine-specific pages
+for more details:
-For this engine, tuning over \code{neighbors} is very efficient since the
-same model object can be used to make predictions over multiple values
-of \code{neighbors}.
-}
-
-\subsection{Parameter translations}{
+\Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("nearest_neighbor")}
-The standardized parameter names in parsnip can be mapped to their
-original names in each engine that has main parameters. Each engine
-typically has a different default value (shown in parentheses) for each
-parameter.\tabular{ll}{
- \strong{parsnip} \tab \strong{kknn} \cr
- neighbors \tab ks \cr
- weight_func \tab kernel (optimal) \cr
- dist_power \tab distance (2) \cr
+More information on how \pkg{parsnip} is used for modeling is at
+\url{https://www.tidymodels.org/}.
}
+\details{
+This function only defines what \emph{type} of model is being fit. Once an engine
+is specified, the \emph{method} to fit the model is also defined.
+The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model_spec()}} function is used
+with the data.
}
-}
-
\examples{
show_engines("nearest_neighbor")
nearest_neighbor(neighbors = 11)
+}
+\references{
+\url{https://www.tidymodels.org}, \href{https://tmwr.org}{\emph{Tidy Models with R}}
}
\seealso{
-\code{\link[=fit]{fit()}}, \code{\link[=set_engine]{set_engine()}}, \code{\link[=update]{update()}}
+\Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("nearest_neighbor")}
}
diff --git a/man/null_model.Rd b/man/null_model.Rd
index 6ed2123e1..ff78f08ff 100644
--- a/man/null_model.Rd
+++ b/man/null_model.Rd
@@ -2,18 +2,17 @@
% Please edit documentation in R/nullmodel.R
\name{null_model}
\alias{null_model}
-\title{General Interface for null models}
+\title{Null model}
\usage{
null_model(mode = "classification")
}
\arguments{
-\item{mode}{A single character string for the type of model.
+\item{mode}{A single character string for the prediction outcome mode.
Possible values for this model are "unknown", "regression", or
"classification".}
}
\description{
-\code{null_model()} is a way to generate a \emph{specification} of a model before
-fitting and allows the model to be created using R. It doesn't have any
+\code{null_model()} defines a simple, non-informative model. It doesn't have any
main arguments.
}
\details{
@@ -54,5 +53,5 @@ call. For this type of model, the template of the fit calls are below:
null_model(mode = "regression")
}
\seealso{
-\code{\link[=fit]{fit()}}
+\code{\link[=fit.model_spec]{fit.model_spec()}}
}
diff --git a/man/parsnip_update.Rd b/man/parsnip_update.Rd
index 8bec62e48..2d1259d5b 100644
--- a/man/parsnip_update.Rd
+++ b/man/parsnip_update.Rd
@@ -180,8 +180,8 @@ these will supersede the values in \code{parameters}. Also, using
engine arguments in this object will result in an error.}
\item{mtry}{A number for the number (or proportion) of predictors that will
-be randomly sampled at each split when creating the tree models (\code{xgboost}
-only).}
+be randomly sampled at each split when creating the tree models
+(specific engines only)}
\item{trees}{An integer for the number of trees contained in
the ensemble.}
@@ -190,20 +190,20 @@ the ensemble.}
in a node that is required for the node to be split further.}
\item{tree_depth}{An integer for the maximum depth of the tree (i.e. number
-of splits) (\code{xgboost} only).}
+of splits) (specific engines only).}
\item{learn_rate}{A number for the rate at which the boosting algorithm adapts
-from iteration-to-iteration (\code{xgboost} only).}
+from iteration-to-iteration (specific engines only).}
\item{loss_reduction}{A number for the reduction in the loss function required
-to split further (\code{xgboost} only).}
+to split further (specific engines only).}
\item{sample_size}{A number for the number (or proportion) of data that is
exposed to the fitting routine. For \code{xgboost}, the sampling is done at
each iteration while \code{C5.0} samples once during training.}
\item{stop_iter}{The number of iterations without improvement before
-stopping (\code{xgboost} only).}
+stopping (specific engines only).}
\item{fresh}{A logical for whether the arguments should be
modified in-place or replaced wholesale.}
@@ -211,7 +211,7 @@ modified in-place or replaced wholesale.}
\item{...}{Not used for \code{update()}.}
\item{cost_complexity}{A positive number for the the cost/complexity
-parameter (a.k.a. \code{Cp}) used by CART models (\code{rpart} only).}
+parameter (a.k.a. \code{Cp}) used by CART models (specific engines only).}
\item{select_features}{TRUE or FALSE. If this is TRUE then can add an
extra penalty to each term so that it can be penalized to zero.
@@ -224,15 +224,12 @@ Use \code{adjust_deg_free} to increase level of penalization.}
Increase this beyond 1 to produce smoother models.}
\item{penalty}{A non-negative number representing the total
-amount of regularization (\code{glmnet}, \code{keras}, and \code{spark} only).
-For \code{keras} models, this corresponds to purely L2 regularization
-(aka weight decay) while the other models can be a combination
-of L1 and L2 (depending on the value of \code{mixture}; see below).}
+amount of regularization (specific engines only).}
\item{mixture}{A number between zero and one (inclusive) that is the
proportion of L1 regularization (i.e. lasso) in the model. When
\code{mixture = 1}, it is a pure lasso model while \code{mixture = 0} indicates that
-ridge regression is being used. (\code{glmnet} and \code{spark} only).}
+ridge regression is being used (specific engines only).}
\item{num_terms}{The number of features that will be retained in the
final model, including the intercept.}
diff --git a/man/proportional_hazards.Rd b/man/proportional_hazards.Rd
index f5d5f0b5a..2b6acc514 100644
--- a/man/proportional_hazards.Rd
+++ b/man/proportional_hazards.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/proportional_hazards.R
\name{proportional_hazards}
\alias{proportional_hazards}
-\title{General Interface for Proportional Hazards Models}
+\title{Proportional hazards regression}
\usage{
proportional_hazards(
mode = "censored regression",
@@ -20,15 +20,12 @@ to use for fitting. Possible engines are listed below. The default for this
model is \code{"survival"}.}
\item{penalty}{A non-negative number representing the total
-amount of regularization (\code{glmnet}, \code{keras}, and \code{spark} only).
-For \code{keras} models, this corresponds to purely L2 regularization
-(aka weight decay) while the other models can be a combination
-of L1 and L2 (depending on the value of \code{mixture}; see below).}
+amount of regularization (specific engines only).}
\item{mixture}{A number between zero and one (inclusive) that is the
proportion of L1 regularization (i.e. lasso) in the model. When
\code{mixture = 1}, it is a pure lasso model while \code{mixture = 0} indicates that
-ridge regression is being used. (\code{glmnet} and \code{spark} only).}
+ridge regression is being used (specific engines only).}
}
\description{
\code{proportional_hazards()} is a way to generate a \emph{specification} of a model
@@ -55,6 +52,6 @@ For \code{proportional_hazards()}, the mode will always be "censored regression"
show_engines("proportional_hazards")
}
\seealso{
-\code{\link[=fit]{fit()}}, \code{\link[=set_engine]{set_engine()}}, \code{\link[=update]{update()}}
+\code{\link[=fit.model_spec]{fit.model_spec()}}, \code{\link[=set_engine]{set_engine()}}, \code{\link[=update]{update()}}
}
\keyword{internal}
diff --git a/man/rand_forest.Rd b/man/rand_forest.Rd
index 68aec1f45..6c3646bf2 100644
--- a/man/rand_forest.Rd
+++ b/man/rand_forest.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/rand_forest.R
\name{rand_forest}
\alias{rand_forest}
-\title{General Interface for Random Forest Models}
+\title{Random forest}
\usage{
rand_forest(
mode = "unknown",
@@ -18,8 +18,7 @@ Possible values for this model are "unknown", "regression", or
"classification".}
\item{engine}{A single character string specifying what computational engine
-to use for fitting. Possible engines are listed below. The default for this
-model is \code{"ranger"}.}
+to use for fitting.}
\item{mtry}{An integer for the number of predictors that will
be randomly sampled at each split when creating the tree models.}
@@ -31,173 +30,33 @@ the ensemble.}
in a node that are required for the node to be split further.}
}
\description{
-\code{rand_forest()} is a way to generate a \emph{specification} of a model
-before fitting and allows the model to be created using
-different packages in R or via Spark. The main arguments for the
-model are:
-\itemize{
-\item \code{mtry}: The number of predictors that will be
-randomly sampled at each split when creating the tree models.
-\item \code{trees}: The number of trees contained in the ensemble.
-\item \code{min_n}: The minimum number of data points in a node
-that are required for the node to be split further.
-}
-These arguments are converted to their specific names at the
-time that the model is fit. Other options and argument can be
-set using \code{set_engine()}. If left to their defaults
-here (\code{NULL}), the values are taken from the underlying model
-functions. If parameters need to be modified, \code{update()} can be used
-in lieu of recreating the object from scratch.
-}
-\details{
-The model can be created using the \code{fit()} function using the
-following \emph{engines}:
-\itemize{
-\item \pkg{R}: \code{"ranger"} (the default) or \code{"randomForest"}
-\item \pkg{Spark}: \code{"spark"}
-}
-}
-\note{
-For models created using the spark engine, there are
-several differences to consider. First, only the formula
-interface to via \code{fit()} is available; using \code{fit_xy()} will
-generate an error. Second, the predictions will always be in a
-spark table format. The names will be the same as documented but
-without the dots. Third, there is no equivalent to factor
-columns in spark tables so class predictions are returned as
-character columns. Fourth, to retain the model object for a new
-R session (via \code{save}), the \code{model$fit} element of the \code{parsnip}
-object should be serialized via \code{ml_save(object$fit)} and
-separately saved to disk. In a new session, the object can be
-reloaded and reattached to the \code{parsnip} object.
-}
-\section{Engine Details}{
-Engines may have pre-set default arguments when executing the model fit
-call. For this type of model, the template of the fit calls are below:
-\subsection{ranger}{\if{html}{\out{}}\preformatted{rand_forest() \%>\%
- set_engine("ranger") \%>\%
- set_mode("regression") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (regression)
-##
-## Computational engine: ranger
-##
-## Model fit template:
-## ranger::ranger(x = missing_arg(), y = missing_arg(), case.weights = missing_arg(),
-## num.threads = 1, verbose = FALSE, seed = sample.int(10^5,
-## 1))
-}\if{html}{\out{}}\preformatted{rand_forest() \%>\%
- set_engine("ranger") \%>\%
- set_mode("classification") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (classification)
-##
-## Computational engine: ranger
-##
-## Model fit template:
-## ranger::ranger(x = missing_arg(), y = missing_arg(), case.weights = missing_arg(),
-## num.threads = 1, verbose = FALSE, seed = sample.int(10^5,
-## 1), probability = TRUE)
-}
-
-Note that \code{\link[ranger:ranger]{ranger::ranger()}} does not require factor
-predictors to be converted to indicator variables. \code{fit()} does not
-affect the encoding of the predictor values (i.e. factors stay factors)
-for this model.
-
-For \code{ranger} confidence intervals, the intervals are constructed using
-the form \verb{estimate +/- z * std_error}. For classification probabilities,
-these values can fall outside of \verb{[0, 1]} and will be coerced to be in
-this range.
-}
-
-\subsection{randomForest}{\if{html}{\out{}}\preformatted{rand_forest() \%>\%
- set_engine("randomForest") \%>\%
- set_mode("regression") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (regression)
-##
-## Computational engine: randomForest
-##
-## Model fit template:
-## randomForest::randomForest(x = missing_arg(), y = missing_arg())
-}\if{html}{\out{}}\preformatted{rand_forest() \%>\%
- set_engine("randomForest") \%>\%
- set_mode("classification") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (classification)
-##
-## Computational engine: randomForest
-##
-## Model fit template:
-## randomForest::randomForest(x = missing_arg(), y = missing_arg())
-}
-
-Note that \code{\link[randomForest:randomForest]{randomForest::randomForest()}}
-does not require factor predictors to be converted to indicator
-variables. \code{fit()} does not affect the encoding of the predictor values
-(i.e. factors stay factors) for this model.
-}
+\code{rand_forest()} defines a model that creates a large number of decision
+trees, each independent of the others. The final prediction uses all
+predictions from the individual trees and combines them.
-\subsection{spark}{\if{html}{\out{}}\preformatted{rand_forest() \%>\%
- set_engine("spark") \%>\%
- set_mode("regression") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (regression)
-##
-## Computational engine: spark
-##
-## Model fit template:
-## sparklyr::ml_random_forest(x = missing_arg(), formula = missing_arg(),
-## type = "regression", seed = sample.int(10^5, 1))
-}\if{html}{\out{}}\preformatted{rand_forest() \%>\%
- set_engine("spark") \%>\%
- set_mode("classification") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Random Forest Model Specification (classification)
-##
-## Computational engine: spark
-##
-## Model fit template:
-## sparklyr::ml_random_forest(x = missing_arg(), formula = missing_arg(),
-## type = "classification", seed = sample.int(10^5, 1))
-}
-
-\code{fit()} does not affect the encoding of the predictor values
-(i.e. factors stay factors) for this model.
-}
+There are different ways to fit this model. See the engine-specific pages
+for more details:
-\subsection{Parameter translations}{
+\Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("rand_forest")}
-The standardized parameter names in parsnip can be mapped to their
-original names in each engine that has main parameters. Each engine
-typically has a different default value (shown in parentheses) for each
-parameter.\tabular{llll}{
- \strong{parsnip} \tab \strong{ranger} \tab \strong{randomForest} \tab \strong{spark} \cr
- mtry \tab mtry (see below) \tab mtry (see below) \tab feature_subset_strategy (see below) \cr
- trees \tab num.trees (500) \tab ntree (500) \tab num_trees (20) \cr
- min_n \tab min.node.size (see below) \tab nodesize (see below) \tab min_instances_per_node (1) \cr
+More information on how \pkg{parsnip} is used for modeling is at
+\url{https://www.tidymodels.org/}.
}
+\details{
+This function only defines what \emph{type} of model is being fit. Once an engine
+is specified, the \emph{method} to fit the model is also defined.
-\itemize{
-\item For randomForest and spark, the default \code{mtry} is the square root of
-the number of predictors for classification, and one-third of the
-predictors for regression.
-\item For ranger, the default \code{mtry} is the square root of the number of
-predictors.
-\item The default \code{min_n} for both ranger and randomForest is 1 for
-classification and 5 for regression.
-}
-}
+The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model_spec()}} function is used
+with the data.
}
-
\examples{
show_engines("rand_forest")
rand_forest(mode = "classification", trees = 2000)
-# Parameters can be represented by a placeholder:
-rand_forest(mode = "regression", mtry = varying())
+}
+\references{
+\url{https://www.tidymodels.org}, \href{https://tmwr.org}{\emph{Tidy Models with R}}
}
\seealso{
-\code{\link[=fit]{fit()}}, \code{\link[=set_engine]{set_engine()}}, \code{\link[=update]{update()}}
+\Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("rand_forest")}
}
diff --git a/man/rmd/boost-tree.Rmd b/man/rmd/boost-tree.Rmd
deleted file mode 100644
index 43c338a1b..000000000
--- a/man/rmd/boost-tree.Rmd
+++ /dev/null
@@ -1,111 +0,0 @@
-# Engine Details
-
-```{r, child = "setup.Rmd", include = FALSE}
-```
-
-Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are below:
-
-## xgboost
-
-```{r xgboost-reg}
-boost_tree() %>%
- set_engine("xgboost") %>%
- set_mode("regression") %>%
- translate()
-```
-
-```{r xgboost-csl}
-boost_tree() %>%
- set_engine("xgboost") %>%
- set_mode("classification") %>%
- translate()
-```
-
-Note that, for most engines to `boost_tree()`, the `sample_size` argument is in terms of the _number_ of training set points. The `xgboost` package parameterizes this as the _proportion_ of training set samples instead. When using the `tune`, this **occurs automatically**.
-
-If you would like to use a custom range when tuning `sample_size`, the `dials::sample_prop()` function can be used in that case. For example, using a parameter set:
-
-```{r xgb-update, eval = FALSE}
-mod <-
- boost_tree(sample_size = tune()) %>%
- set_engine("xgboost") %>%
- set_mode("classification")
-
-# update the parameters using the `dials` function
-mod_param <-
- mod %>%
- parameters() %>%
- update(sample_size = sample_prop(c(0.4, 0.9)))
-```
-
-For this engine, tuning over `trees` is very efficient since the same model object can be used to make predictions over multiple values of `trees`.
-
-Note that `xgboost` models require that non-numeric predictors (e.g., factors) must be converted to dummy variables or some other numeric representation. By default, when using `fit()` with `xgboost`, a one-hot encoding is used to convert factor predictors to indicator variables.
-
-Finally, in the classification mode, non-numeric outcomes (i.e., factors) are converted to numeric. For binary classification, the `event_level` argument of `set_engine()` can be set to either `"first"` or `"second"` to specify which level should be used as the event. This can be helpful when a watchlist is used to monitor performance from with the xgboost training process.
-
-
-## C5.0
-
-```{r C5.0-csl}
-boost_tree() %>%
- set_engine("C5.0") %>%
- set_mode("classification") %>%
- translate()
-```
-
-Note that [C50::C5.0()] does not require factor predictors to be converted to indicator variables. `fit()` does not affect the encoding of the predictor values (i.e. factors stay factors) for this model.
-
-For this engine, tuning over `trees` is very efficient since the same model
-object can be used to make predictions over multiple values of `trees`.
-
-## spark
-
-```{r spark-reg}
-boost_tree() %>%
- set_engine("spark") %>%
- set_mode("regression") %>%
- translate()
-```
-
-```{r spark-csl}
-boost_tree() %>%
- set_engine("spark") %>%
- set_mode("classification") %>%
- translate()
-```
-
-`fit()` does not affect the encoding of the predictor values (i.e. factors stay factors) for this model.
-
-## Parameter translations
-
-The standardized parameter names in parsnip can be mapped to their original names in each engine that has main parameters. Each engine typically has a different default value (shown in parentheses) for each parameter.
-
-```{r echo = FALSE, results = "asis"}
-get_defaults_boost_tree <- function() {
- tibble::tribble(
- ~model, ~engine, ~parsnip, ~original, ~default,
- "boost_tree", "xgboost", "tree_depth", "max_depth", get_arg("parsnip", "xgb_train", "max_depth"),
- "boost_tree", "xgboost", "trees", "nrounds", get_arg("parsnip", "xgb_train", "nrounds"),
- "boost_tree", "xgboost", "learn_rate", "eta", get_arg("parsnip", "xgb_train", "eta"),
- "boost_tree", "xgboost", "mtry", "colsample_bynode", get_arg("parsnip", "xgb_train", "colsample_bynode"),
- "boost_tree", "xgboost", "min_n", "min_child_weight", get_arg("parsnip", "xgb_train", "min_child_weight"),
- "boost_tree", "xgboost", "loss_reduction", "gamma", get_arg("parsnip", "xgb_train", "gamma"),
- "boost_tree", "xgboost", "sample_size", "subsample", get_arg("parsnip", "xgb_train", "subsample"),
- "boost_tree", "C5.0", "trees", "trials", get_arg("parsnip", "C5.0_train", "trials"),
- "boost_tree", "C5.0", "min_n", "minCases", get_arg("C50", "C5.0Control", "minCases"),
- "boost_tree", "C5.0", "sample_size", "sample", get_arg("C50", "C5.0Control", "sample"),
- "boost_tree", "spark", "tree_depth", "max_depth", get_arg("sparklyr", "ml_gradient_boosted_trees", "max_depth"),
- "boost_tree", "spark", "trees", "max_iter", get_arg("sparklyr", "ml_gradient_boosted_trees", "max_iter"),
- "boost_tree", "spark", "learn_rate", "step_size", get_arg("sparklyr", "ml_gradient_boosted_trees", "step_size"),
- "boost_tree", "spark", "mtry", "feature_subset_strategy", "see below",
- "boost_tree", "spark", "min_n", "min_instances_per_node", get_arg("sparklyr", "ml_gradient_boosted_trees", "min_instances_per_node"),
- "boost_tree", "spark", "loss_reduction", "min_info_gain", get_arg("sparklyr", "ml_gradient_boosted_trees", "min_info_gain"),
- "boost_tree", "spark", "sample_size", "subsampling_rate", get_arg("sparklyr", "ml_gradient_boosted_trees", "subsampling_rate"),
-
- )
-}
-convert_args("boost_tree")
-```
-
-For spark, the default `mtry` is the square root of the number of predictors for classification, and one-third of the predictors for regression.
diff --git a/man/rmd/boost_tree_C5.0.Rmd b/man/rmd/boost_tree_C5.0.Rmd
new file mode 100644
index 000000000..cf43aa369
--- /dev/null
+++ b/man/rmd/boost_tree_C5.0.Rmd
@@ -0,0 +1,63 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("boost_tree", "C5.0")`
+
+## Tuning Parameters
+
+```{r C5.0-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("trees", "min_n", "sample_size"),
+ default = c("15L", "2L", "1.0"))
+
+param <-
+ boost_tree() %>%
+ set_engine("C5.0") %>%
+ set_mode("regression") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r C5.0-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+The implementation of C5.0 limits the number of trees to be between 1 and 100.
+
+## Translation from parsnip to the original package (classification)
+
+```{r C5.0-cls}
+boost_tree(trees = integer(), min_n = integer(), sample_size = numeric()) %>%
+ set_engine("C5.0") %>%
+ set_mode("classification") %>%
+ translate()
+```
+
+[C5.0_train()] is a wrapper around [C50::C5.0()] that makes it easier to run this model.
+
+## Preprocessing requirements
+
+```{r child = "template-tree-split-factors.Rmd"}
+```
+
+## Other details
+
+### Early stopping
+
+By default, early stopping is used. To use the complete set of boosting iterations, pass `earlyStopping = FALSE` to [set_engine()]. Also, it is unlikely that early stopping will occur if `sample_size = 1`.
+
+## References
+
+- Kuhn, M, and K Johnson. 2013. *Applied Predictive Modeling*. Springer.
diff --git a/man/rmd/boost_tree_spark.Rmd b/man/rmd/boost_tree_spark.Rmd
new file mode 100644
index 000000000..8d257b332
--- /dev/null
+++ b/man/rmd/boost_tree_spark.Rmd
@@ -0,0 +1,80 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("boost_tree", "spark")`. However, multiclass classification is not supported yet.
+
+## Tuning Parameters
+
+```{r spark-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("tree_depth", "trees", "learn_rate", "mtry", "min_n", "loss_reduction", "sample_size"),
+ default = c("5L", "20L", "0.1", "see below", "1L", "0.0", "1.0"))
+
+# For this model, this is the same for all modes
+param <-
+ boost_tree() %>%
+ set_engine("spark") %>%
+ set_mode("regression") %>%
+ tunable() %>%
+ dplyr::filter(name != "stop_iter") %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r spark-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+The `mtry` parameter is related to the number of predictors. The default depends on the model mode. For classification, the square root of the number of predictors is used and for regression, one third of the predictors are sampled.
+
+## Translation from parsnip to the original package (regression)
+
+```{r spark-reg}
+boost_tree(
+ mtry = integer(), trees = integer(), min_n = integer(), tree_depth = integer(),
+ learn_rate = numeric(), loss_reduction = numeric(), sample_size = numeric()
+) %>%
+ set_engine("spark") %>%
+ set_mode("regression") %>%
+ translate()
+```
+
+## Translation from parsnip to the original package (classification)
+
+```{r spark-cls}
+boost_tree(
+ mtry = integer(), trees = integer(), min_n = integer(), tree_depth = integer(),
+ learn_rate = numeric(), loss_reduction = numeric(), sample_size = numeric()
+) %>%
+ set_engine("spark") %>%
+ set_mode("classification") %>%
+ translate()
+```
+
+## Preprocessing requirements
+
+```{r child = "template-tree-split-factors.Rmd"}
+```
+
+## Other details
+
+```{r child = "template-spark-notes.Rmd"}
+```
+
+## References
+
+ - Luraschi, J, K Kuo, and E Ruiz. 2019. _Mastering Spark with R_. O'Reilly Media
+
+ - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
+
diff --git a/man/rmd/boost_tree_xgboost.Rmd b/man/rmd/boost_tree_xgboost.Rmd
new file mode 100644
index 000000000..1f56cfcc3
--- /dev/null
+++ b/man/rmd/boost_tree_xgboost.Rmd
@@ -0,0 +1,100 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("boost_tree", "xgboost")`
+
+## Tuning Parameters
+
+```{r xgboost-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("tree_depth", "trees", "learn_rate", "mtry", "min_n", "loss_reduction", "sample_size", "stop_iter"),
+ default = c("6L", "15L", "0.3", "see below", "1L", "0.0", "1.0", "Inf"))
+
+# For this model, this is the same for all modes
+param <-
+ boost_tree() %>%
+ set_engine("xgboost") %>%
+ set_mode("regression") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r xgboost-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+The `mtry` parameter is related to the number of predictors. The default is to use all predictors. [xgboost::xgb.train()] encodes this as a real number between zero and one. parsnip translates the number of columns to this type of value. The user should give the argument to `boost_tree()` as an integer (not a real number).
+
+## Translation from parsnip to the original package (regression)
+
+```{r xgboost-reg}
+boost_tree(
+ mtry = integer(), trees = integer(), min_n = integer(), tree_depth = integer(),
+ learn_rate = numeric(), loss_reduction = numeric(), sample_size = numeric(),
+ stop_iter = integer()
+) %>%
+ set_engine("xgboost") %>%
+ set_mode("regression") %>%
+ translate()
+```
+
+## Translation from parsnip to the original package (classification)
+
+```{r xgboost-cls}
+boost_tree(
+ mtry = integer(), trees = integer(), min_n = integer(), tree_depth = integer(),
+ learn_rate = numeric(), loss_reduction = numeric(), sample_size = numeric(),
+ stop_iter = integer()
+) %>%
+ set_engine("xgboost") %>%
+ set_mode("classification") %>%
+ translate()
+```
+
+[xgb_train()] is a wrapper around [xgboost::xgb.train()] (and other functions) that makes it easier to run this model.
+
+## Preprocessing requirements
+
+xgboost does not have a means to translate factor predictors to grouped splits. Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via [fit.model_spec()], parsnip will convert factor columns to indicators using a one-hot encoding.
+
+For classification, non-numeric outcomes (i.e., factors) are internally converted to numeric. For binary classification, the `event_level` argument of `set_engine()` can be set to either `"first"` or `"second"` to specify which level should be used as the event. This can be helpful when a watchlist is used to monitor performance from with the xgboost training process.
+
+## Other details
+
+### Sparse matrices
+
+xgboost requires the data to be in a sparse format. If your predictor data are already in this format, then use [fit_xy.model_spec()] to pass it to the model function. Otherwise, parsnip converts the data to this format.
+
+### Parallel processing
+
+By default, the model is trained without parallel processing. This can be change by passing the `nthread` parameter to [set_engine()]. However, it is unwise to combine this with external parallel processing when using the \pkg{tune} package.
+
+### Early stopping
+
+The `stop_iter()` argument allows the model to prematurely stop training if the objective function does not improve within `early_stop` iterations.
+
+The best way to use this feature is in conjunction with an _internal validation set_. To do this, pass the `validation` parameter of [xgb_train()] via the parsnip [set_engine()] function. This is the proportion of the training set that should be reserved for measuring performance (and stop early).
+
+If the model specification has `early_stop >= trees`, `early_stop` is converted to `trees - 1` and a warning is issued.
+
+### Objective function
+
+parsnip chooses the objective function based on the characteristics of the outcome. To use a different loss, pass the `objective` argument to [set_engine()].
+
+## References
+
+ - [XGBoost: A Scalable Tree Boosting System](https://arxiv.org/abs/1603.02754)
+
+ - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
diff --git a/man/rmd/decision-tree.Rmd b/man/rmd/decision-tree.Rmd
deleted file mode 100644
index 0a52568d1..000000000
--- a/man/rmd/decision-tree.Rmd
+++ /dev/null
@@ -1,73 +0,0 @@
-# Engine Details
-
-```{r, child = "setup.Rmd", include = FALSE}
-```
-
-Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are below:
-
-## rpart
-
-```{r rpart-reg}
-decision_tree() %>%
- set_engine("rpart") %>%
- set_mode("regression") %>%
- translate()
-```
-
-```{r rpart-csl}
-decision_tree() %>%
- set_engine("rpart") %>%
- set_mode("classification") %>%
- translate()
-```
-
-Note that [rpart::rpart()] does not require factor predictors to be converted to indicator variables. `fit()` does not affect the encoding of the predictor values (i.e. factors stay factors) for this model
-
-## C5.0
-
-```{r C5.0-csl}
-decision_tree() %>%
- set_engine("C5.0") %>%
- set_mode("classification") %>%
- translate()
-```
-
-Note that [C50::C5.0()] does not require factor predictors to be converted to indicator variables. `fit()` does not affect the encoding of the predictor values (i.e. factors stay factors) for this model
-
-## spark
-
-```{r spark-reg}
-decision_tree() %>%
- set_engine("spark") %>%
- set_mode("regression") %>%
- translate()
-```
-
-```{r spark-csl}
-decision_tree() %>%
- set_engine("spark") %>%
- set_mode("classification") %>%
- translate()
-```
-
-`fit()` does not affect the encoding of the predictor values (i.e. factors stay factors) for this model
-
-## Parameter translations
-
-The standardized parameter names in parsnip can be mapped to their original names in each engine that has main parameters. Each engine typically has a different default value (shown in parentheses) for each parameter.
-
-```{r echo = FALSE, results = "asis"}
-get_defaults_decision_tree <- function() {
- tibble::tribble(
- ~model, ~engine, ~parsnip, ~original, ~default,
- "decision_tree", "rpart", "tree_depth", "maxdepth", get_arg("rpart", "rpart.control", "maxdepth"),
- "decision_tree", "rpart", "min_n", "minsplit", get_arg("rpart", "rpart.control", "minsplit"),
- "decision_tree", "rpart", "cost_complexity", "cp", get_arg("rpart", "rpart.control", "cp"),
- "decision_tree", "C5.0", "min_n", "minCases", get_arg("C50", "C5.0Control", "minCases"),
- "decision_tree", "spark", "tree_depth", "max_depth", get_arg("sparklyr", "ml_decision_tree", "max_depth"),
- "decision_tree", "spark", "min_n", "min_instances_per_node", get_arg("sparklyr", "ml_decision_tree", "min_instances_per_node"),
- )
-}
-convert_args("decision_tree")
-```
-
diff --git a/man/rmd/decision_tree_C5.0.Rmd b/man/rmd/decision_tree_C5.0.Rmd
new file mode 100644
index 000000000..7147fde64
--- /dev/null
+++ b/man/rmd/decision_tree_C5.0.Rmd
@@ -0,0 +1,56 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("decision_tree", "C5.0")`
+
+## Tuning Parameters
+
+```{r C5.0-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("min_n"),
+ default = c("2L"))
+
+param <-
+ decision_tree() %>%
+ set_engine("C5.0") %>%
+ set_mode("regression") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r C5.0-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+## Translation from parsnip to the original package (classification)
+
+```{r C5.0-cls}
+decision_tree(min_n = integer()) %>%
+ set_engine("C5.0") %>%
+ set_mode("classification") %>%
+ translate()
+```
+
+[C5.0_train()] is a wrapper around [C50::C5.0()] that makes it easier to run this model.
+
+## Preprocessing requirements
+
+```{r child = "template-tree-split-factors.Rmd"}
+```
+
+## References
+
+- Kuhn, M, and K Johnson. 2013. *Applied Predictive Modeling*. Springer.
+
diff --git a/man/rmd/decision_tree_rpart.Rmd b/man/rmd/decision_tree_rpart.Rmd
new file mode 100644
index 000000000..94f302a3a
--- /dev/null
+++ b/man/rmd/decision_tree_rpart.Rmd
@@ -0,0 +1,64 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("decision_tree", "rpart")`
+
+## Tuning Parameters
+
+```{r rpart-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("tree_depth", "min_n", "cost_complexity"),
+ default = c("30L", "2L", "0.01"))
+
+param <-
+ decision_tree() %>%
+ set_engine("rpart") %>%
+ set_mode("regression") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r rpart-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+## Translation from parsnip to the original package (classification)
+
+```{r rpart-cls}
+decision_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) %>%
+ set_engine("rpart") %>%
+ set_mode("classification") %>%
+ translate()
+```
+
+
+## Translation from parsnip to the original package (regression)
+
+```{r rpart-reg}
+decision_tree(tree_depth = integer(1), min_n = integer(1), cost_complexity = double(1)) %>%
+ set_engine("rpart") %>%
+ set_mode("regression") %>%
+ translate()
+```
+
+## Preprocessing requirements
+
+```{r child = "template-tree-split-factors.Rmd"}
+```
+
+## References
+
+- Kuhn, M, and K Johnson. 2013. *Applied Predictive Modeling*. Springer.
+
diff --git a/man/rmd/decision_tree_spark.Rmd b/man/rmd/decision_tree_spark.Rmd
new file mode 100644
index 000000000..22627438a
--- /dev/null
+++ b/man/rmd/decision_tree_spark.Rmd
@@ -0,0 +1,69 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("decision_tree", "spark")`
+
+## Tuning Parameters
+
+```{r spark-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("tree_depth", "min_n"),
+ default = c("5L", "1L"))
+
+param <-
+ decision_tree() %>%
+ set_engine("spark") %>%
+ set_mode("regression") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r spark-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+## Translation from parsnip to the original package (classification)
+
+```{r spark-cls}
+decision_tree(tree_depth = integer(1), min_n = integer(1)) %>%
+ set_engine("spark") %>%
+ set_mode("classification") %>%
+ translate()
+```
+
+
+## Translation from parsnip to the original package (regression)
+
+```{r spark-reg}
+decision_tree(tree_depth = integer(1), min_n = integer(1)) %>%
+ set_engine("spark") %>%
+ set_mode("regression") %>%
+ translate()
+```
+
+## Preprocessing requirements
+
+```{r child = "template-tree-split-factors.Rmd"}
+```
+
+## Other details
+
+```{r child = "template-spark-notes.Rmd"}
+```
+
+## References
+
+- Kuhn, M, and K Johnson. 2013. *Applied Predictive Modeling*. Springer.
+
diff --git a/man/rmd/glmnet-details.Rmd b/man/rmd/glmnet-details.Rmd
new file mode 100644
index 000000000..d79b82d5f
--- /dev/null
+++ b/man/rmd/glmnet-details.Rmd
@@ -0,0 +1,110 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+# tidymodels and glmnet
+
+The implementation of the glmnet package has some nice features. For example, one of the main tuning parameters, the regularization penalty, does not need to be specified when fitting the model. The package fits a compendium of values, called the regularization path. These values depend on the data set and the value of `alpha`, the mixture parameter between a pure ridge model (`alpha = 0`) and a pure lasso model (`alpha = 1`). When predicting, any penalty values can be simultaneously predicted, even those that are not exactly on the regularization path. For those, the model approximates between the closest path values to produce a prediction. There is an argument called `lambda` to the `glmnet()` function that is used to specify the path.
+
+In the discussion below, `linear_reg()` is used. The information is true for all parsnip models that have a `"glmnet"` engine.
+
+## Fitting and predicting using parsnip
+
+Recall that tidymodels uses standardized parameter names across models chosen to be low on jargon. The argument `penalty` is the equivalent of what glmnet calls the `lambda` value and `mixture` is the same as their `alpha` value.
+
+In tidymodels, our `predict()` methods are defined to make one prediction at a time. For this model, that means predictions are for a single penalty value. For this reason, models that have glmnet engines require the user to always specify a single penalty value when the model is defined. For example, for linear regression:
+
+```r
+linear_reg(penalty = 1) %>% set_engine("glmnet")
+```
+
+When the `predict()` method is called, it automatically uses the penalty that was given when the model was defined. For example:
+
+```{r, include = FALSE}
+library(tidymodels)
+library(glmnet)
+```
+```{r}
+library(tidymodels)
+
+fit <-
+ linear_reg(penalty = 1) %>%
+ set_engine("glmnet") %>%
+ fit(mpg ~ ., data = mtcars)
+
+# predict at penalty = 1
+predict(fit, mtcars[1:3,])
+```
+
+However, any penalty values can be predicted simultaneously using the `multi_predict()` method:
+
+```{r}
+# predict at c(0.00, 0.01)
+multi_predict(fit, mtcars[1:3,], penalty = c(0.00, 0.01))
+
+# unnested:
+multi_predict(fit, mtcars[1:3,], penalty = c(0.00, 0.01)) %>%
+ add_rowindex() %>%
+ unnest(cols = ".pred")
+```
+
+### Where did `lambda` go?
+
+It may appear odd that the `lambda` value does not get used in the fit:
+
+```{r}
+linear_reg(penalty = 1) %>%
+ set_engine("glmnet") %>%
+ translate()
+```
+
+Internally, the value of `penalty = 1` is saved in the parsnip object and no value is set for `lambda`. This enables the full path to be fit by `glmnet()`. See the section below about setting the path.
+
+## How do I set the regularization path?
+
+Regardless of what value you use for `penalty`, the full coefficient path is used when [glmnet::glmnet()] is called.
+
+What if you want to manually set this path? Normally, you would pass a vector to `lambda` in [glmnet::glmnet()].
+
+parsnip models that use a `glmnet` engine can use a special optional argument called `path_values`. This is _not_ an argument to [glmnet::glmnet()]; it is used by parsnip to independently set the path.
+
+For example, we have found that if you want a fully ridge regression model (i.e., `mixture = 0`), you can get the _wrong coefficients_ if the path does not contain zero (see [issue #431](https://github.com/tidymodels/parsnip/issues/431#issuecomment-782883848)).
+
+If we want to use our own path, the argument is passed as an engine-specific option:
+
+```{r}
+coef_path_values <- c(0, 10^seq(-5, 1, length.out = 7))
+
+fit_ridge <-
+ linear_reg(penalty = 1, mixture = 0) %>%
+ set_engine("glmnet", path_values = coef_path_values) %>%
+ fit(mpg ~ ., data = mtcars)
+
+all.equal(sort(fit_ridge$fit$lambda), coef_path_values)
+
+# predict at penalty = 1
+predict(fit_ridge, mtcars[1:3,])
+```
+
+## Tidying the model object
+
+[broom::tidy()] is a function that gives a summary of the object as a tibble.
+
+**tl;dr** `tidy()` on a `glmnet` model produced by parsnip gives the coefficients for the value given by `penalty`.
+
+When parsnip makes a model, it gives it an extra class. Use the `tidy()` method on the object, it produces coefficients for the penalty that was originally requested:
+
+```{r tidy-parsnip, message = FALSE}
+tidy(fit)
+```
+
+Note that there is a `tidy()` method for `glmnet` objects in the `broom` package. If this is used directly on the underlying `glmnet` object, it returns _all of coefficients on the path_:
+
+```{r tidy-broom}
+# Use the basic tidy() method for glmnet
+all_tidy_coefs <- broom:::tidy.glmnet(fit$fit)
+all_tidy_coefs
+
+length(unique(all_tidy_coefs$lambda))
+```
+
+This can be nice for plots but it might not contain the penalty value that you are interested in.
diff --git a/man/rmd/linear-reg.Rmd b/man/rmd/linear-reg.Rmd
deleted file mode 100644
index dbef43a45..000000000
--- a/man/rmd/linear-reg.Rmd
+++ /dev/null
@@ -1,100 +0,0 @@
-# Engine Details
-
-```{r, child = "setup.Rmd", include = FALSE}
-```
-
-Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are below.
-
-## lm
-
-```{r lm-reg}
-linear_reg() %>%
- set_engine("lm") %>%
- translate()
-```
-
-## glmnet
-
-```{r glmnet-csl}
-linear_reg(penalty = 0.1) %>%
- set_engine("glmnet") %>%
- translate()
-```
-
-The glmnet engine requires a single value for the `penalty` argument (a number
-or `tune()`), but the full regularization path is always fit
-regardless of the value given to `penalty`. To pass in a custom sequence of
-values for glmnet's `lambda`, use the argument `path_values` in `set_engine()`.
-This will assign the value of the glmnet `lambda` parameter without disturbing
-the value given of `linear_reg(penalty)`. For example:
-
-```{r glmnet-path}
-linear_reg(penalty = .1) %>%
- set_engine("glmnet", path_values = c(0, 10^seq(-10, 1, length.out = 20))) %>%
- translate()
-```
-
-When fitting a pure ridge regression model (i.e., `penalty = 0`), we _strongly
-suggest_ that you pass in a vector for `path_values` that includes zero. See
-[issue #431](https://github.com/tidymodels/parsnip/issues/431) for a discussion.
-
-When using `predict()`, the single `penalty` value used for prediction is the
-one specified in `linear_reg()`.
-
-To predict on multiple penalties, use the `multi_predict()` function.
-This function returns a tibble with a list column called `.pred` containing
-all of the penalty results.
-
-## stan
-
-```{r stan-reg}
-linear_reg() %>%
- set_engine("stan") %>%
- translate()
-```
-
-Note that the `refresh` default prevents logging of the estimation process.
-Change this value in `set_engine()` to show the logs.
-
-For prediction, the `stan` engine can compute posterior intervals analogous to
-confidence and prediction intervals. In these instances, the units are the
-original outcome and when `std_error = TRUE`, the standard deviation of the
-posterior distribution (or posterior predictive distribution as appropriate) is
-returned.
-
-## spark
-
-```{r spark-reg}
-linear_reg() %>%
- set_engine("spark") %>%
- translate()
-```
-
-## keras
-
-```{r keras-reg}
-linear_reg() %>%
- set_engine("keras") %>%
- translate()
-```
-
-
-## Parameter translations
-
-The standardized parameter names in parsnip can be mapped to their original
-names in each engine that has main parameters. Each engine typically has a
-different default value (shown in parentheses) for each parameter.
-
-```{r echo = FALSE, results = "asis"}
-get_defaults_linear_reg <- function() {
- tibble::tribble(
- ~model, ~engine, ~parsnip, ~original, ~default,
- "linear_reg", "glmnet", "mixture", "alpha", get_arg("glmnet", "glmnet", "alpha"),
- "linear_reg", "spark", "penalty", "reg_param", get_arg("sparklyr", "ml_linear_regression", "reg_param"),
- "linear_reg", "spark", "mixture", "elastic_net_param", get_arg("sparklyr", "ml_linear_regression", "elastic_net_param"),
- "linear_reg", "keras", "penalty", "penalty", get_arg("parsnip", "keras_mlp", "penalty"),
- )
-}
-convert_args("linear_reg")
-```
-
diff --git a/man/rmd/linear_reg_glmnet.Rmd b/man/rmd/linear_reg_glmnet.Rmd
new file mode 100644
index 000000000..4949319c3
--- /dev/null
+++ b/man/rmd/linear_reg_glmnet.Rmd
@@ -0,0 +1,62 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("linear_reg", "glmnet")`
+
+## Tuning Parameters
+
+```{r glmnet-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("penalty", "mixture"),
+ default = c("see below", "1.0"))
+
+param <-
+linear_reg() %>%
+ set_engine("glmnet") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r glmnet-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+A value of `mixture = 1` corresponds to a pure lasso model, while `mixture = 0` indicates ridge regression.
+
+The `penalty` parameter has no default and requires a single numeric value. For more details about this, and the `glmnet` model in general, see [glmnet-details].
+
+## Translation from parsnip to the original package
+
+```{r glmnet-csl}
+linear_reg(penalty = double(1), mixture = double(1)) %>%
+ set_engine("glmnet") %>%
+ translate()
+```
+
+## Preprocessing requirements
+
+```{r child = "template-makes-dummies.Rmd"}
+```
+
+```{r child = "template-same-scale.Rmd"}
+```
+By default, [glmnet::glmnet()] uses the argument `standardize = TRUE` to center and scale the data.
+
+## References
+
+ - Hastie, T, R Tibshirani, and M Wainwright. 2015. _Statistical Learning with Sparsity_. CRC Press.
+
+ - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
+
diff --git a/man/rmd/linear_reg_keras.Rmd b/man/rmd/linear_reg_keras.Rmd
new file mode 100644
index 000000000..b2ac2d3a6
--- /dev/null
+++ b/man/rmd/linear_reg_keras.Rmd
@@ -0,0 +1,60 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("linear_reg", "keras")`
+
+## Tuning Parameters
+
+```{r keras-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("penalty"),
+ default = c("0.0"))
+
+param <-
+ linear_reg() %>%
+ set_engine("keras") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ dplyr::mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has one tuning parameter:
+
+```{r keras-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+For `penalty`, the amount of regularization is _only_ L2 penalty (i.e., ridge or weight decay).
+
+## Translation from parsnip to the original package
+
+```{r keras-csl}
+linear_reg(penalty = double(1)) %>%
+ set_engine("keras") %>%
+ translate()
+```
+
+[keras_mlp()] is a parsnip wrapper around keras code for neural networks. This model fits a linear regression as a network with a single hidden unit.
+
+## Preprocessing requirements
+
+```{r child = "template-makes-dummies.Rmd"}
+```
+
+```{r child = "template-same-scale.Rmd"}
+```
+
+
+## References
+
+ - Hoerl, A., & Kennard, R. (2000). _Ridge Regression: Biased Estimation for Nonorthogonal Problems_. Technometrics, 42(1), 80-86.
+
diff --git a/man/rmd/linear_reg_lm.Rmd b/man/rmd/linear_reg_lm.Rmd
new file mode 100644
index 000000000..dd61b16d2
--- /dev/null
+++ b/man/rmd/linear_reg_lm.Rmd
@@ -0,0 +1,25 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("linear_reg", "lm")`
+
+## Tuning Parameters
+
+This engine has no tuning parameters.
+
+## Translation from parsnip to the original package
+
+```{r lm-reg}
+linear_reg() %>%
+ set_engine("lm") %>%
+ translate()
+```
+
+## Preprocessing requirements
+
+```{r child = "template-makes-dummies.Rmd"}
+```
+
+## References
+
+ - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
diff --git a/man/rmd/linear_reg_spark.Rmd b/man/rmd/linear_reg_spark.Rmd
new file mode 100644
index 000000000..08c503683
--- /dev/null
+++ b/man/rmd/linear_reg_spark.Rmd
@@ -0,0 +1,69 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("linear_reg", "spark")`
+
+## Tuning Parameters
+
+```{r spark-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("penalty", "mixture"),
+ default = c("0.0", "0.0"))
+
+param <-
+ linear_reg() %>%
+ set_engine("spark") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ dplyr::mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r spark-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+For `penalty`, the amount of regularization includes both the L1 penalty (i.e., lasso) and the L2 penalty (i.e., ridge or weight decay).
+
+A value of `mixture = 1` corresponds to a pure lasso model, while `mixture = 0` indicates ridge regression.
+
+## Translation from parsnip to the original package
+
+```{r spark-csl}
+linear_reg(penalty = double(1), mixture = double(1)) %>%
+ set_engine("spark") %>%
+ translate()
+```
+
+## Preprocessing requirements
+
+```{r child = "template-makes-dummies.Rmd"}
+```
+
+```{r child = "template-same-scale.Rmd"}
+```
+By default, `ml_linear_regression()` uses the argument `standardization = TRUE` to center and scale the data.
+
+## Other details
+
+```{r child = "template-spark-notes.Rmd"}
+```
+
+## References
+
+ - Luraschi, J, K Kuo, and E Ruiz. 2019. _Mastering Spark with R_. O'Reilly Media
+
+ - Hastie, T, R Tibshirani, and M Wainwright. 2015. _Statistical Learning with Sparsity_. CRC Press.
+
+ - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
+
diff --git a/man/rmd/linear_reg_stan.Rmd b/man/rmd/linear_reg_stan.Rmd
new file mode 100644
index 000000000..3b064e303
--- /dev/null
+++ b/man/rmd/linear_reg_stan.Rmd
@@ -0,0 +1,44 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("linear_reg", "stan")`
+
+## Tuning Parameters
+
+This engine has no tuning parameters.
+
+## Important engine-specific options
+
+Some relevant arguments that can be passed to `set_engine()`:
+
+ * `chains`: A positive integer specifying the number of Markov chains. The default is 4.
+ * `iter`: A positive integer specifying the number of iterations for each chain (including warmup). The default is 2000.
+ * `seed`: The seed for random number generation.
+ * `cores`: Number of cores to use when executing the chains in parallel.
+ * `prior`: The prior distribution for the (non-hierarchical) regression coefficients. The `"stan"` engine does not fit any hierarchical terms. See the `"stan_glmer"` engine from the multilevelmod package for that type of model.
+ * `prior_intercept`: The prior distribution for the intercept (after centering all predictors).
+
+See [rstan::sampling()] and [rstanarm::priors()] for more information on these and other options.
+
+## Translation from parsnip to the original package
+
+```{r stan-csl}
+linear_reg() %>%
+ set_engine("stan") %>%
+ translate()
+```
+
+Note that the `refresh` default prevents logging of the estimation process. Change this value in `set_engine()` to show the MCMC logs.
+
+## Preprocessing requirements
+
+```{r child = "template-makes-dummies.Rmd"}
+```
+
+## Other details
+
+For prediction, the `"stan"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome and when `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned.
+
+## References
+
+ - McElreath, R. 2020 _Statistical Rethinking_. CRC Press.
diff --git a/man/rmd/logistic_reg_LiblineaR.Rmd b/man/rmd/logistic_reg_LiblineaR.Rmd
new file mode 100644
index 000000000..fd888d517
--- /dev/null
+++ b/man/rmd/logistic_reg_LiblineaR.Rmd
@@ -0,0 +1,61 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("logistic_reg", "LiblineaR")`
+
+## Tuning Parameters
+
+```{r LiblineaR-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("penalty", "mixture"),
+ default = c("see below", "0"))
+
+param <-
+logistic_reg() %>%
+ set_engine("LiblineaR") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r LiblineaR-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+For `LiblineaR` models, the value for `mixture` can either be 0 (for ridge) or 1 (for lasso) but not other intermediate values. In the [LiblineaR::LiblineaR()] documentation, these correspond to types 0 (L2-regularized) and 6 (L1-regularized).
+
+Be aware that the `LiblineaR` engine regularizes the intercept. Other regularized regression models do not, which will result in different parameter estimates.
+
+## Translation from parsnip to the original package
+
+```{r LiblineaR-cls}
+logistic_reg(penalty = double(1), mixture = double(1)) %>%
+ set_engine("LiblineaR") %>%
+ translate()
+```
+
+## Preprocessing requirements
+
+```{r child = "template-makes-dummies.Rmd"}
+```
+
+```{r child = "template-same-scale.Rmd"}
+```
+
+## References
+
+ - Hastie, T, R Tibshirani, and M Wainwright. 2015. _Statistical Learning with Sparsity_. CRC Press.
+
+ - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
+
diff --git a/man/rmd/logistic_reg_glm.Rmd b/man/rmd/logistic_reg_glm.Rmd
new file mode 100644
index 000000000..7cd4ccf5f
--- /dev/null
+++ b/man/rmd/logistic_reg_glm.Rmd
@@ -0,0 +1,25 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("logistic_reg", "glm")`
+
+## Tuning Parameters
+
+This engine has no tuning parameters.
+
+## Translation from parsnip to the original package
+
+```{r glm-reg}
+logistic_reg() %>%
+ set_engine("glm") %>%
+ translate()
+```
+
+## Preprocessing requirements
+
+```{r child = "template-makes-dummies.Rmd"}
+```
+
+## References
+
+ - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
diff --git a/man/rmd/logistic_reg_glmnet.Rmd b/man/rmd/logistic_reg_glmnet.Rmd
new file mode 100644
index 000000000..0835bed75
--- /dev/null
+++ b/man/rmd/logistic_reg_glmnet.Rmd
@@ -0,0 +1,62 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("logistic_reg", "glmnet")`
+
+## Tuning Parameters
+
+```{r glmnet-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("penalty", "mixture"),
+ default = c("see below", "1.0"))
+
+param <-
+logistic_reg() %>%
+ set_engine("glmnet") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r glmnet-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+A value of `mixture = 1` corresponds to a pure lasso model, while `mixture = 0` indicates ridge regression.
+
+The `penalty` parameter has no default and requires a single numeric value. For more details about this, and the `glmnet` model in general, see [glmnet-details].
+
+## Translation from parsnip to the original package
+
+```{r glmnet-cls}
+logistic_reg(penalty = double(1), mixture = double(1)) %>%
+ set_engine("glmnet") %>%
+ translate()
+```
+
+## Preprocessing requirements
+
+```{r child = "template-makes-dummies.Rmd"}
+```
+
+```{r child = "template-same-scale.Rmd"}
+```
+By default, [glmnet::glmnet()] uses the argument `standardize = TRUE` to center and scale the data.
+
+## References
+
+ - Hastie, T, R Tibshirani, and M Wainwright. 2015. _Statistical Learning with Sparsity_. CRC Press.
+
+ - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
+
diff --git a/man/rmd/logistic_reg_keras.Rmd b/man/rmd/logistic_reg_keras.Rmd
new file mode 100644
index 000000000..4818f800e
--- /dev/null
+++ b/man/rmd/logistic_reg_keras.Rmd
@@ -0,0 +1,60 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("logistic_reg", "keras")`
+
+## Tuning Parameters
+
+```{r keras-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("penalty"),
+ default = c("0.0"))
+
+param <-
+ logistic_reg() %>%
+ set_engine("keras") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ dplyr::mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has one tuning parameter:
+
+```{r keras-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+For `penalty`, the amount of regularization is _only_ L2 penalty (i.e., ridge or weight decay).
+
+## Translation from parsnip to the original package
+
+```{r keras-csl}
+logistic_reg(penalty = double(1)) %>%
+ set_engine("keras") %>%
+ translate()
+```
+
+[keras_mlp()] is a parsnip wrapper around keras code for neural networks. This model fits a linear regression as a network with a single hidden unit.
+
+## Preprocessing requirements
+
+```{r child = "template-makes-dummies.Rmd"}
+```
+
+```{r child = "template-same-scale.Rmd"}
+```
+
+
+## References
+
+ - Hoerl, A., & Kennard, R. (2000). _Ridge Regression: Biased Estimation for Nonorthogonal Problems_. Technometrics, 42(1), 80-86.
+
diff --git a/man/rmd/logistic_reg_spark.Rmd b/man/rmd/logistic_reg_spark.Rmd
new file mode 100644
index 000000000..428b1645d
--- /dev/null
+++ b/man/rmd/logistic_reg_spark.Rmd
@@ -0,0 +1,69 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("logistic_reg", "spark")`
+
+## Tuning Parameters
+
+```{r spark-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("penalty", "mixture"),
+ default = c("0.0", "0.0"))
+
+param <-
+ logistic_reg() %>%
+ set_engine("spark") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ dplyr::mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r spark-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+For `penalty`, the amount of regularization includes both the L1 penalty (i.e., lasso) and the L2 penalty (i.e., ridge or weight decay).
+
+A value of `mixture = 1` corresponds to a pure lasso model, while `mixture = 0` indicates ridge regression.
+
+## Translation from parsnip to the original package
+
+```{r spark-csl}
+logistic_reg(penalty = double(1), mixture = double(1)) %>%
+ set_engine("spark") %>%
+ translate()
+```
+
+## Preprocessing requirements
+
+```{r child = "template-makes-dummies.Rmd"}
+```
+
+```{r child = "template-same-scale.Rmd"}
+```
+By default, `ml_logistic_regression()` uses the argument `standardization = TRUE` to center and scale the data.
+
+## Other details
+
+```{r child = "template-spark-notes.Rmd"}
+```
+
+## References
+
+ - Luraschi, J, K Kuo, and E Ruiz. 2019. _Mastering Spark with R_. O'Reilly Media
+
+ - Hastie, T, R Tibshirani, and M Wainwright. 2015. _Statistical Learning with Sparsity_. CRC Press.
+
+ - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
+
diff --git a/man/rmd/logistic_reg_stan.Rmd b/man/rmd/logistic_reg_stan.Rmd
new file mode 100644
index 000000000..cab623bfd
--- /dev/null
+++ b/man/rmd/logistic_reg_stan.Rmd
@@ -0,0 +1,44 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("logistic_reg", "stan")`
+
+## Tuning Parameters
+
+This engine has no tuning parameters.
+
+## Important engine-specific options
+
+Some relevant arguments that can be passed to `set_engine()`:
+
+ * `chains`: A positive integer specifying the number of Markov chains. The default is 4.
+ * `iter`: A positive integer specifying the number of iterations for each chain (including warmup). The default is 2000.
+ * `seed`: The seed for random number generation.
+ * `cores`: Number of cores to use when executing the chains in parallel.
+ * `prior`: The prior distribution for the (non-hierarchical) regression coefficients. This `"stan"` engine does not fit any hierarchical terms.
+ * `prior_intercept`: The prior distribution for the intercept (after centering all predictors).
+
+See [rstan::sampling()] and [rstanarm::priors()] for more information on these and other options.
+
+## Translation from parsnip to the original package
+
+```{r stan-csl}
+logistic_reg() %>%
+ set_engine("stan") %>%
+ translate()
+```
+
+Note that the `refresh` default prevents logging of the estimation process. Change this value in `set_engine()` to show the MCMC logs.
+
+## Preprocessing requirements
+
+```{r child = "template-makes-dummies.Rmd"}
+```
+
+## Other details
+
+For prediction, the `"stan"` engine can compute posterior intervals analogous to confidence and prediction intervals. In these instances, the units are the original outcome and when `std_error = TRUE`, the standard deviation of the posterior distribution (or posterior predictive distribution as appropriate) is returned.
+
+## References
+
+ - McElreath, R. 2020 _Statistical Rethinking_. CRC Press.
diff --git a/man/rmd/mars.Rmd b/man/rmd/mars.Rmd
deleted file mode 100644
index 88ab12440..000000000
--- a/man/rmd/mars.Rmd
+++ /dev/null
@@ -1,48 +0,0 @@
-# Engine Details
-
-```{r, child = "setup.Rmd", include = FALSE}
-```
-
-Engines may have pre-set default arguments when executing the model fit call.
-For this type of model, the template of the fit calls are below.
-
-## earth
-
-```{r earth-reg}
-mars() %>%
- set_engine("earth") %>%
- set_mode("regression") %>%
- translate()
-```
-
-```{r earth-cls}
-mars() %>%
- set_engine("earth") %>%
- set_mode("classification") %>%
- translate()
-```
-
-Note that, when the model is fit, the `earth` package only has its namespace loaded. However, if `multi_predict` is used, the package is attached.
-
-Also, `fit()` passes the data directly to `earth::earth()` so that its formula method can create dummy variables as-needed.
-
-For this engine, tuning over `num_terms` is very efficient since the same model
-object can be used to make predictions over multiple values of `num_terms`.
-
-## Parameter translations
-
-The standardized parameter names in parsnip can be mapped to their original
-names in each engine that has main parameters. Each engine typically has a
-different default value (shown in parentheses) for each parameter.
-
-```{r echo = FALSE, results = "asis"}
-get_defaults_mars <- function() {
- tibble::tribble(
- ~model, ~engine, ~parsnip, ~original, ~default,
- "mars", "earth", "prod_degree", "degree", get_arg("earth", "earth.fit", "degree"),
- "mars", "earth", "prune_method", "pmethod", get_arg("earth", "earth.fit", "pmethod")[2]
- )
-}
-convert_args("mars")
-```
-
diff --git a/man/rmd/mars_earth.Rmd b/man/rmd/mars_earth.Rmd
new file mode 100644
index 000000000..9e730d326
--- /dev/null
+++ b/man/rmd/mars_earth.Rmd
@@ -0,0 +1,71 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("mars", "earth")`
+
+## Tuning Parameters
+
+```{r earth-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("num_terms", "prod_degree", "prune_method"),
+ default = c("see below", "1L", "'backward'"))
+
+param <-
+ mars() %>%
+ set_engine("earth") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r earth-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+The default value of `num_terms` depends on the number of predictor columns. For a data frame `x`, the default is `min(200, max(20, 2 * ncol(x))) + 1` (see [earth::earth()] and the reference below).
+
+## Translation from parsnip to the original package (regression)
+
+```{r earth-reg}
+mars(num_terms = integer(1), prod_degree = integer(1), prune_method = character(1)) %>%
+ set_engine("earth") %>%
+ set_mode("regression") %>%
+ translate()
+```
+
+## Translation from parsnip to the original package (classification)
+
+```{r earth-cls}
+mars(num_terms = integer(1), prod_degree = integer(1), prune_method = character(1)) %>%
+ set_engine("earth") %>%
+ set_mode("classification") %>%
+ translate()
+```
+
+An alternate method for using MARs for categorical outcomes can be found in [discrim::discrim_flexible()].
+
+
+## Preprocessing requirements
+
+```{r child = "template-makes-dummies.Rmd"}
+```
+
+## References
+
+ - Friedman, J. 1991. "Multivariate Adaptive Regression Splines." _The Annals of Statistics_, vol. 19, no. 1, pp. 1-67.
+
+ - Milborrow, S. ["Notes on the earth package."](http://www.milbo.org/doc/earth-notes.pdf)
+
+ - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
+
diff --git a/man/rmd/mlp.Rmd b/man/rmd/mlp.Rmd
deleted file mode 100644
index cd4f5b3af..000000000
--- a/man/rmd/mlp.Rmd
+++ /dev/null
@@ -1,65 +0,0 @@
-# Engine Details
-
-```{r, child = "setup.Rmd", include = FALSE}
-```
-
-Engines may have pre-set default arguments when executing the model fit call.
-For this type of model, the template of the fit calls are below:
-
-## keras
-
-```{r keras-reg}
-mlp() %>%
- set_engine("keras") %>%
- set_mode("regression") %>%
- translate()
-```
-
-```{r keras-cls}
-mlp() %>%
- set_engine("keras") %>%
- set_mode("classification") %>%
- translate()
-```
-
-An error is thrown if both `penalty` and `dropout` are specified for `keras` models.
-
-## nnet
-
-```{r nnet-reg}
-mlp() %>%
- set_engine("nnet") %>%
- set_mode("regression") %>%
- translate()
-```
-
-```{r nnet-cls}
-mlp() %>%
- set_engine("nnet") %>%
- set_mode("classification") %>%
- translate()
-```
-
-
-## Parameter translations
-
-The standardized parameter names in parsnip can be mapped to their original
-names in each engine that has main parameters. Each engine typically has a
-different default value (shown in parentheses) for each parameter.
-
-```{r echo = FALSE, results = "asis"}
-get_defaults_mlp <- function() {
- tibble::tribble(
- ~model, ~engine, ~parsnip, ~original, ~default,
- "mlp", "keras", "hidden_units", "hidden_units", get_arg("parsnip", "keras_mlp", "hidden_units"),
- "mlp", "keras", "penalty", "penalty", get_arg("parsnip", "keras_mlp", "penalty"),
- "mlp", "keras", "dropout", "dropout", get_arg("parsnip", "keras_mlp", "dropout"),
- "mlp", "keras", "epochs", "epochs", get_arg("parsnip", "keras_mlp", "epochs"),
- "mlp", "keras", "activation", "activation", get_arg("parsnip", "keras_mlp", "activation"),
- "mlp", "nnet", "penalty", "decay", get_arg("nnet", "nnet.default", "decay"),
- "mlp", "nnet", "epochs", "maxit", get_arg("nnet", "nnet.default", "maxit"),
- )
-}
-convert_args("mlp")
-```
-
diff --git a/man/rmd/mlp_keras.Rmd b/man/rmd/mlp_keras.Rmd
new file mode 100644
index 000000000..bc980e7b9
--- /dev/null
+++ b/man/rmd/mlp_keras.Rmd
@@ -0,0 +1,79 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("mlp", "keras")`
+
+## Tuning Parameters
+
+```{r keras-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("hidden_units", "penalty", "dropout", "epochs", "activation"),
+ default = c("5L", "0.0", "0.0", "20L", "'softmax'"))
+
+param <-
+ mlp() %>%
+ set_engine("keras") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r keras-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+## Translation from parsnip to the original package (regression)
+
+```{r keras-reg}
+mlp(
+ hidden_units = integer(1),
+ penalty = double(1),
+ dropout = double(1),
+ epochs = integer(1),
+ activation = character(1)
+) %>%
+ set_engine("keras") %>%
+ set_mode("regression") %>%
+ translate()
+```
+
+## Translation from parsnip to the original package (classification)
+
+```{r keras-cls}
+mlp(
+ hidden_units = integer(1),
+ penalty = double(1),
+ dropout = double(1),
+ epochs = integer(1),
+ activation = character(1)
+) %>%
+ set_engine("keras") %>%
+ set_mode("classification") %>%
+ translate()
+```
+
+
+## Preprocessing requirements
+
+```{r child = "template-makes-dummies.Rmd"}
+```
+
+```{r child = "template-same-scale.Rmd"}
+```
+
+## References
+
+ - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
+
+
diff --git a/man/rmd/mlp_nnet.Rmd b/man/rmd/mlp_nnet.Rmd
new file mode 100644
index 000000000..b4568dfed
--- /dev/null
+++ b/man/rmd/mlp_nnet.Rmd
@@ -0,0 +1,81 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("mlp", "nnet")`
+
+## Tuning Parameters
+
+```{r nnet-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("hidden_units", "penalty", "epochs"),
+ default = c("none", "0.0", "100L"))
+
+param <-
+ mlp() %>%
+ set_engine("nnet") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r nnet-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+Note that, in [nnet::nnet()], the maximum number of parameters is an argument with a fairly low value of `maxit = 1000`. For some models, you may need to pass this value in via [set_engine()] so that the model does not fail.
+
+
+## Translation from parsnip to the original package (regression)
+
+```{r nnet-reg}
+mlp(
+ hidden_units = integer(1),
+ penalty = double(1),
+ epochs = integer(1)
+) %>%
+ set_engine("nnet") %>%
+ set_mode("regression") %>%
+ translate()
+```
+
+Note that parsnip automatically sets linear activation in the last layer.
+
+## Translation from parsnip to the original package (classification)
+
+```{r nnet-cls}
+mlp(
+ hidden_units = integer(1),
+ penalty = double(1),
+ epochs = integer(1)
+) %>%
+ set_engine("nnet") %>%
+ set_mode("classification") %>%
+ translate()
+```
+
+
+## Preprocessing requirements
+
+```{r child = "template-makes-dummies.Rmd"}
+```
+
+```{r child = "template-same-scale.Rmd"}
+```
+
+## References
+
+ - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
+
+
+
diff --git a/man/rmd/multinom-reg.Rmd b/man/rmd/multinom-reg.Rmd
deleted file mode 100644
index 878c1eb4c..000000000
--- a/man/rmd/multinom-reg.Rmd
+++ /dev/null
@@ -1,87 +0,0 @@
-# Engine Details
-
-```{r, child = "setup.Rmd", include = FALSE}
-```
-
-Engines may have pre-set default arguments when executing the model fit call.
-For this type of model, the template of the fit calls are below.
-
-## glmnet
-
-```{r glmnet-cls}
-multinom_reg(penalty = 0.1) %>%
- set_engine("glmnet") %>%
- translate()
-```
-
-The glmnet engine requires a single value for the `penalty` argument (a number
-or `tune()`), but the full regularization path is always fit
-regardless of the value given to `penalty`. To pass in a custom sequence of
-values for glmnet's `lambda`, use the argument `path_values` in `set_engine()`.
-This will assign the value of the glmnet `lambda` parameter without disturbing
-the value given of `multinom_reg(penalty)`. For example:
-
-
-```{r glmnet-path}
-multinom_reg(penalty = .1) %>%
- set_engine("glmnet", path_values = c(0, 10^seq(-10, 1, length.out = 20))) %>%
- translate()
-```
-
-When fitting a pure ridge regression model (i.e., `penalty = 0`), we _strongly
-suggest_ that you pass in a vector for `path_values` that includes zero. See
-[issue #431](https://github.com/tidymodels/parsnip/issues/431) for a discussion.
-
-When using `predict()`, the single `penalty` value used for prediction is the
-one specified in `multinom_reg()`.
-
-To predict on multiple penalties, use the `multi_predict()` function.
-This function returns a tibble with a list column called `.pred` containing
-all of the penalty results.
-
-
-## nnet
-
-```{r nnet-cls}
-multinom_reg() %>%
- set_engine("nnet") %>%
- translate()
-```
-
-## spark
-
-```{r spark-cls}
-multinom_reg() %>%
- set_engine("spark") %>%
- translate()
-```
-
-## keras
-
-```{r keras-cls}
-multinom_reg() %>%
- set_engine("keras") %>%
- translate()
-```
-
-
-## Parameter translations
-
-The standardized parameter names in parsnip can be mapped to their original
-names in each engine that has main parameters. Each engine typically has a
-different default value (shown in parentheses) for each parameter.
-
-```{r echo = FALSE, results = "asis"}
-get_defaults_multinom_reg <- function() {
- tibble::tribble(
- ~model, ~engine, ~parsnip, ~original, ~default,
- "multinom_reg", "glmnet", "mixture", "alpha", get_arg("glmnet", "glmnet", "alpha"),
- "multinom_reg", "spark", "penalty", "reg_param", get_arg("sparklyr", "ml_logistic_regression", "reg_param"),
- "multinom_reg", "spark", "mixture", "elastic_net_param", get_arg("sparklyr", "ml_logistic_regression", "elastic_net_param"),
- "multinom_reg", "keras", "penalty", "penalty", get_arg("parsnip", "keras_mlp", "penalty"),
- "multinom_reg", "nnet", "penalty", "decay", get_arg("nnet", "nnet.default", "decay"),
- )
-}
-convert_args("multinom_reg")
-```
-
diff --git a/man/rmd/multinom_reg_glmnet.Rmd b/man/rmd/multinom_reg_glmnet.Rmd
new file mode 100644
index 000000000..339983ad9
--- /dev/null
+++ b/man/rmd/multinom_reg_glmnet.Rmd
@@ -0,0 +1,62 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("multinom_reg", "glmnet")`
+
+## Tuning Parameters
+
+```{r glmnet-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("penalty", "mixture"),
+ default = c("see below", "1.0"))
+
+param <-
+multinom_reg() %>%
+ set_engine("glmnet") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r glmnet-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+A value of `mixture = 1` corresponds to a pure lasso model, while `mixture = 0` indicates ridge regression.
+
+The `penalty` parameter has no default and requires a single numeric value. For more details about this, and the `glmnet` model in general, see [glmnet-details].
+
+## Translation from parsnip to the original package
+
+```{r glmnet-cls}
+multinom_reg(penalty = double(1), mixture = double(1)) %>%
+ set_engine("glmnet") %>%
+ translate()
+```
+
+## Preprocessing requirements
+
+```{r child = "template-makes-dummies.Rmd"}
+```
+
+```{r child = "template-same-scale.Rmd"}
+```
+By default, [glmnet::glmnet()] uses the argument `standardize = TRUE` to center and scale the data.
+
+## References
+
+ - Hastie, T, R Tibshirani, and M Wainwright. 2015. _Statistical Learning with Sparsity_. CRC Press.
+
+ - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
+
diff --git a/man/rmd/multinom_reg_keras.Rmd b/man/rmd/multinom_reg_keras.Rmd
new file mode 100644
index 000000000..354b9fc36
--- /dev/null
+++ b/man/rmd/multinom_reg_keras.Rmd
@@ -0,0 +1,60 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("multinom_reg", "keras")`
+
+## Tuning Parameters
+
+```{r keras-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("penalty"),
+ default = c("0.0"))
+
+param <-
+ multinom_reg() %>%
+ set_engine("keras") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ dplyr::mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has one tuning parameter:
+
+```{r keras-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+For `penalty`, the amount of regularization is _only_ L2 penalty (i.e., ridge or weight decay).
+
+## Translation from parsnip to the original package
+
+```{r keras-csl}
+multinom_reg(penalty = double(1)) %>%
+ set_engine("keras") %>%
+ translate()
+```
+
+[keras_mlp()] is a parsnip wrapper around keras code for neural networks. This model fits a linear regression as a network with a single hidden unit.
+
+## Preprocessing requirements
+
+```{r child = "template-makes-dummies.Rmd"}
+```
+
+```{r child = "template-same-scale.Rmd"}
+```
+
+
+## References
+
+ - Hoerl, A., & Kennard, R. (2000). _Ridge Regression: Biased Estimation for Nonorthogonal Problems_. Technometrics, 42(1), 80-86.
+
diff --git a/man/rmd/multinom_reg_nnet.Rmd b/man/rmd/multinom_reg_nnet.Rmd
new file mode 100644
index 000000000..107262d88
--- /dev/null
+++ b/man/rmd/multinom_reg_nnet.Rmd
@@ -0,0 +1,61 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("multinom_reg", "nnet")`
+
+## Tuning Parameters
+
+```{r nnet-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("penalty"),
+ default = c("0.0"))
+
+param <-
+ multinom_reg() %>%
+ set_engine("nnet") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ dplyr::mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r nnet-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+For `penalty`, the amount of regularization includes only the L2 penalty (i.e., ridge or weight decay).
+
+## Translation from parsnip to the original package
+
+```{r nnet-csl}
+multinom_reg(penalty = double(1)) %>%
+ set_engine("nnet") %>%
+ translate()
+```
+
+## Preprocessing requirements
+
+```{r child = "template-makes-dummies.Rmd"}
+```
+
+```{r child = "template-same-scale.Rmd"}
+```
+
+## References
+
+ - Luraschi, J, K Kuo, and E Ruiz. 2019. _Mastering nnet with R_. O'Reilly Media
+
+ - Hastie, T, R Tibshirani, and M Wainwright. 2015. _Statistical Learning with Sparsity_. CRC Press.
+
+ - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
+
diff --git a/man/rmd/multinom_reg_spark.Rmd b/man/rmd/multinom_reg_spark.Rmd
new file mode 100644
index 000000000..d9fb28f03
--- /dev/null
+++ b/man/rmd/multinom_reg_spark.Rmd
@@ -0,0 +1,69 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("multinom_reg", "spark")`
+
+## Tuning Parameters
+
+```{r spark-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("penalty", "mixture"),
+ default = c("0.0", "0.0"))
+
+param <-
+ multinom_reg() %>%
+ set_engine("spark") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ dplyr::mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r spark-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+For `penalty`, the amount of regularization includes both the L1 penalty (i.e., lasso) and the L2 penalty (i.e., ridge or weight decay).
+
+A value of `mixture = 1` corresponds to a pure lasso model, while `mixture = 0` indicates ridge regression.
+
+## Translation from parsnip to the original package
+
+```{r spark-csl}
+multinom_reg(penalty = double(1), mixture = double(1)) %>%
+ set_engine("spark") %>%
+ translate()
+```
+
+## Preprocessing requirements
+
+```{r child = "template-makes-dummies.Rmd"}
+```
+
+```{r child = "template-same-scale.Rmd"}
+```
+By default, `ml_multinom_regression()` uses the argument `standardization = TRUE` to center and scale the data.
+
+## Other details
+
+```{r child = "template-spark-notes.Rmd"}
+```
+
+## References
+
+ - Luraschi, J, K Kuo, and E Ruiz. 2019. _Mastering Spark with R_. O'Reilly Media
+
+ - Hastie, T, R Tibshirani, and M Wainwright. 2015. _Statistical Learning with Sparsity_. CRC Press.
+
+ - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
+
diff --git a/man/rmd/nearest_neighbor_kknn.Rmd b/man/rmd/nearest_neighbor_kknn.Rmd
new file mode 100644
index 000000000..4c9927966
--- /dev/null
+++ b/man/rmd/nearest_neighbor_kknn.Rmd
@@ -0,0 +1,76 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("nearest_neighbor", "kknn")`
+
+## Tuning Parameters
+
+```{r kknn-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("neighbors", "weight_func", "dist_power"),
+ default = c("5L", "'optimal'", "2.0"))
+
+param <-
+ nearest_neighbor() %>%
+ set_engine("kknn") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r kknn-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+## Translation from parsnip to the original package (regression)
+
+```{r kknn-reg}
+nearest_neighbor(
+ neighbors = integer(1),
+ weight_func = character(1),
+ dist_power = double(1)
+) %>%
+ set_engine("kknn") %>%
+ set_mode("regression") %>%
+ translate()
+```
+
+`min_rows()` will adjust the number of neighbors if the chosen value if it is not consistent with the actual data dimensions.
+
+## Translation from parsnip to the original package (classification)
+
+```{r kknn-cls}
+nearest_neighbor(
+ neighbors = integer(1),
+ weight_func = character(1),
+ dist_power = double(1)
+) %>%
+ set_engine("kknn") %>%
+ set_mode("classification") %>%
+ translate()
+```
+
+## Preprocessing requirements
+
+```{r child = "template-makes-dummies.Rmd"}
+```
+
+```{r child = "template-same-scale.Rmd"}
+```
+
+## References
+
+ - Hechenbichler K. and Schliep K.P. (2004) [Weighted k-Nearest-Neighbor Techniques and Ordinal Classification](https://epub.ub.uni-muenchen.de/1769/), Discussion Paper 399, SFB 386, Ludwig-Maximilians University Munich
+
+ - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
diff --git a/man/rmd/rand-forest.Rmd b/man/rmd/rand-forest.Rmd
deleted file mode 100644
index ecfd0ef20..000000000
--- a/man/rmd/rand-forest.Rmd
+++ /dev/null
@@ -1,93 +0,0 @@
-# Engine Details
-
-```{r, child = "setup.Rmd", include = FALSE}
-```
-
-Engines may have pre-set default arguments when executing the model fit call.
-For this type of model, the template of the fit calls are below:
-
-## ranger
-
-```{r ranger-reg}
-rand_forest() %>%
- set_engine("ranger") %>%
- set_mode("regression") %>%
- translate()
-```
-
-```{r ranger-cls}
-rand_forest() %>%
- set_engine("ranger") %>%
- set_mode("classification") %>%
- translate()
-```
-
-Note that [ranger::ranger()] does not require factor predictors to be converted to indicator variables. `fit()` does not affect the encoding of the predictor values (i.e. factors stay factors) for this model.
-
-For `ranger` confidence intervals, the intervals are constructed using the form
-`estimate +/- z * std_error`. For classification probabilities, these values can
-fall outside of `[0, 1]` and will be coerced to be in this range.
-
-## randomForest
-
-```{r randomForest-reg}
-rand_forest() %>%
- set_engine("randomForest") %>%
- set_mode("regression") %>%
- translate()
-```
-
-```{r randomForest-cls}
-rand_forest() %>%
- set_engine("randomForest") %>%
- set_mode("classification") %>%
- translate()
-```
-
-Note that [randomForest::randomForest()] does not require factor predictors to be converted to indicator variables. `fit()` does not affect the encoding of the predictor values (i.e. factors stay factors) for this model.
-
-## spark
-
-```{r spark-reg}
-rand_forest() %>%
- set_engine("spark") %>%
- set_mode("regression") %>%
- translate()
-```
-
-```{r spark-cls}
-rand_forest() %>%
- set_engine("spark") %>%
- set_mode("classification") %>%
- translate()
-```
-
-`fit()` does not affect the encoding of the predictor values (i.e. factors stay factors) for this model.
-
-## Parameter translations
-
-The standardized parameter names in parsnip can be mapped to their original
-names in each engine that has main parameters. Each engine typically has a
-different default value (shown in parentheses) for each parameter.
-
-```{r echo = FALSE, results = "asis"}
-get_defaults_rand_forest <- function() {
- tibble::tribble(
- ~model, ~engine, ~parsnip, ~original, ~default,
- "rand_forest", "ranger", "trees", "num.trees", get_arg("ranger", "ranger", "num.trees"),
- "rand_forest", "ranger", "mtry", "mtry", "see below",
- "rand_forest", "ranger", "min_n", "min.node.size", "see below",
- "rand_forest", "randomForest", "trees", "ntree", get_arg("randomForest", "randomForest.default", "ntree"),
- "rand_forest", "randomForest", "mtry", "mtry", "see below",
- "rand_forest", "randomForest", "min_n", "nodesize", "see below",
- "rand_forest", "spark", "trees", "num_trees", get_arg("sparklyr", "ml_random_forest", "num_trees"),
- "rand_forest", "spark", "mtry", "feature_subset_strategy", "see below",
- "rand_forest", "spark", "min_n", "min_instances_per_node", get_arg("sparklyr", "ml_random_forest", "min_instances_per_node"),
- )
-}
-convert_args("rand_forest")
-```
-
-- For randomForest and spark, the default `mtry` is the square root of the number of predictors for classification, and one-third of the predictors for regression.
-- For ranger, the default `mtry` is the square root of the number of predictors.
-- The default `min_n` for both ranger and randomForest is 1 for classification and 5 for regression.
diff --git a/man/rmd/rand_forest_randomForest.Rmd b/man/rmd/rand_forest_randomForest.Rmd
new file mode 100644
index 000000000..fe0f9ef8c
--- /dev/null
+++ b/man/rmd/rand_forest_randomForest.Rmd
@@ -0,0 +1,76 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("rand_forest", "randomForest")`
+
+## Tuning Parameters
+
+```{r randomForest-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("mtry", "trees", "min_n"),
+ default = c("see below", "500L", "see below"))
+
+param <-
+ rand_forest() %>%
+ set_engine("randomForest") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r randomForest-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+`mtry` depends on the number of columns and the model mode. The default in [randomForest::randomForest()] is `floor(sqrt(ncol(x)))` for classification and `floor(ncol(x)/3)` for regression.
+
+`min_n` depends on the mode. For regression, a value of 5 is the default. For classification, a value of 10 is used.
+
+## Translation from parsnip to the original package (regression)
+
+```{r randomForest-reg}
+rand_forest(
+ mtry = integer(1),
+ trees = integer(1),
+ min_n = integer(1)
+) %>%
+ set_engine("randomForest") %>%
+ set_mode("regression") %>%
+ translate()
+```
+
+`min_rows()` and `min_cols()` will adjust the number of neighbors if the chosen value if it is not consistent with the actual data dimensions.
+
+## Translation from parsnip to the original package (classification)
+
+```{r randomForest-cls}
+rand_forest(
+ mtry = integer(1),
+ trees = integer(1),
+ min_n = integer(1)
+) %>%
+ set_engine("randomForest") %>%
+ set_mode("classification") %>%
+ translate()
+```
+
+## Preprocessing requirements
+
+```{r child = "template-tree-split-factors.Rmd"}
+```
+
+## References
+
+ - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
+
diff --git a/man/rmd/rand_forest_ranger.Rmd b/man/rmd/rand_forest_ranger.Rmd
new file mode 100644
index 000000000..f8ae3f03c
--- /dev/null
+++ b/man/rmd/rand_forest_ranger.Rmd
@@ -0,0 +1,83 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("rand_forest", "ranger")`
+
+## Tuning Parameters
+
+```{r ranger-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("mtry", "trees", "min_n"),
+ default = c("see below", "500L", "see below"))
+
+param <-
+ rand_forest() %>%
+ set_engine("ranger") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r ranger-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+`mtry` depends on the number of columns. The default in [ranger::ranger()] is `floor(sqrt(ncol(x)))`.
+
+`min_n` depends on the mode. For regression, a value of 5 is the default. For classification, a value of 10 is used.
+
+## Translation from parsnip to the original package (regression)
+
+```{r ranger-reg}
+rand_forest(
+ mtry = integer(1),
+ trees = integer(1),
+ min_n = integer(1)
+) %>%
+ set_engine("ranger") %>%
+ set_mode("regression") %>%
+ translate()
+```
+
+`min_rows()` and `min_cols()` will adjust the number of neighbors if the chosen value if it is not consistent with the actual data dimensions.
+
+## Translation from parsnip to the original package (classification)
+
+```{r ranger-cls}
+rand_forest(
+ mtry = integer(1),
+ trees = integer(1),
+ min_n = integer(1)
+) %>%
+ set_engine("ranger") %>%
+ set_mode("classification") %>%
+ translate()
+```
+
+Note that a `ranger` probability forest is always fit (unless the `probability` argument is changed by the user via [set_engine()]).
+
+## Preprocessing requirements
+
+```{r child = "template-tree-split-factors.Rmd"}
+```
+
+## Other notes
+
+By default, parallel processing is turned off. When tuning, it is more efficient to parallelize over the resamples and tuning parameters. To parallelize the construction of the trees within the `ranger` model, change the `num.threads` argument via [set_engine()].
+
+For `ranger` confidence intervals, the intervals are constructed using the form `estimate +/- z * std_error`. For classification probabilities, these values can fall outside of `[0, 1]` and will be coerced to be in this range.
+
+## References
+
+ - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
diff --git a/man/rmd/rand_forest_spark.Rmd b/man/rmd/rand_forest_spark.Rmd
new file mode 100644
index 000000000..e10f26ae2
--- /dev/null
+++ b/man/rmd/rand_forest_spark.Rmd
@@ -0,0 +1,79 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("rand_forest", "spark")`
+
+## Tuning Parameters
+
+```{r spark-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("mtry", "trees", "min_n"),
+ default = c("see below", "20L", "1L"))
+
+param <-
+ rand_forest() %>%
+ set_engine("spark") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r spark-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+`mtry` depends on the number of columns and the model mode. The default in [sparklyr::ml_random_forest()] is `floor(sqrt(ncol(x)))` for classification and `floor(ncol(x)/3)` for regression.
+
+## Translation from parsnip to the original package (regression)
+
+```{r spark-reg}
+rand_forest(
+ mtry = integer(1),
+ trees = integer(1),
+ min_n = integer(1)
+) %>%
+ set_engine("spark") %>%
+ set_mode("regression") %>%
+ translate()
+```
+
+`min_rows()` and `min_cols()` will adjust the number of neighbors if the chosen value if it is not consistent with the actual data dimensions.
+
+## Translation from parsnip to the original package (classification)
+
+```{r spark-cls}
+rand_forest(
+ mtry = integer(1),
+ trees = integer(1),
+ min_n = integer(1)
+) %>%
+ set_engine("spark") %>%
+ set_mode("classification") %>%
+ translate()
+```
+
+## Preprocessing requirements
+
+```{r child = "template-tree-split-factors.Rmd"}
+```
+
+## Other details
+
+```{r child = "template-spark-notes.Rmd"}
+```
+
+## References
+
+ - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
+
diff --git a/man/rmd/setup.Rmd b/man/rmd/setup.Rmd
index 02dd2b22b..405e18a59 100644
--- a/man/rmd/setup.Rmd
+++ b/man/rmd/setup.Rmd
@@ -1,4 +1,8 @@
```{r, include = FALSE}
+library(dials)
+library(parsnip)
+library(tune)
+
convert_args <- function(model_name) {
envir <- get_model_env()
@@ -39,4 +43,28 @@ get_arg <- function(ns, f, arg) {
as.character(args[[arg]])
}
+get_dials <- function(x) {
+ if (any(names(x) == "range")) {
+ cl <- rlang::call2(x$fun, .ns = x$pgk, range = x$range)
+ } else {
+ cl <- rlang::call2(x$fun, .ns = x$pgk)
+ }
+ rlang::eval_tidy(cl)
+}
+
+descr_models <- function(mod, eng) {
+ res <- get_from_env(mod) %>%
+ dplyr::filter(engine == eng) %>%
+ dplyr::distinct() %>%
+ purrr::pluck("mode")
+
+ if (length(res) == 1) {
+ txt <- "is a single mode:"
+ } else {
+ txt <- "are multiple modes:"
+ }
+ paste("For this engine, there", txt, knitr::combine_words(res))
+}
+
+options(width = 80)
```
diff --git a/man/rmd/svm-linear.Rmd b/man/rmd/svm-linear.Rmd
deleted file mode 100644
index 50cdf542d..000000000
--- a/man/rmd/svm-linear.Rmd
+++ /dev/null
@@ -1,71 +0,0 @@
-# Engine Details
-
-```{r, child = "setup.Rmd", include = FALSE}
-```
-
-Engines may have pre-set default arguments when executing the model fit call.
-For this type of model, the template of the fit calls are below:
-
-## LiblineaR
-
-```{r LiblineaR-reg}
-svm_linear() %>%
- set_engine("LiblineaR") %>%
- set_mode("regression") %>%
- translate()
-```
-
-```{r LiblineaR-cls}
-svm_linear() %>%
- set_engine("LiblineaR") %>%
- set_mode("classification") %>%
- translate()
-```
-
-Note that the `LiblineaR` engine cannot produce class probabilities. When
-optimizing the model using the `tune` package, the default metrics require class
-probabilities. To be able to use the `tune_*()` functions, a metric set must be
-passed as an argument and it can only contain metrics associated with hard class
-predictions (e.g., accuracy and so on).
-
-This engine fits models that are L2-regularized for L2-loss. In the `LiblineaR`
-documentation, these are types 1 (classification) and 11 (regression).
-
-## kernlab
-
-```{r kernlab-reg}
-svm_linear() %>%
- set_engine("kernlab") %>%
- set_mode("regression") %>%
- translate()
-```
-
-```{r kernlab-cls}
-svm_linear() %>%
- set_engine("kernlab") %>%
- set_mode("classification") %>%
- translate()
-```
-
-`fit()` passes the data directly to `kernlab::ksvm()` so that its formula method can create dummy variables as-needed.
-
-
-## Parameter translations
-
-The standardized parameter names in parsnip can be mapped to their original
-names in each engine that has main parameters. Each engine typically has a
-different default value (shown in parentheses) for each parameter.
-
-```{r echo = FALSE, results = "asis"}
-## these values were manually checked :/ on 2021-02-01
-get_defaults_svm_linear <- function() {
- tibble::tribble(
- ~model, ~engine, ~parsnip, ~original, ~default,
- "svm_linear", "LiblineaR", "cost", "C", "1",
- "svm_linear", "LiblineaR", "margin", "svr_eps", "0.1",
- "svm_linear", "kernlab", "cost", "C", "1",
- "svm_linear", "kernlab", "margin", "epsilon", "0.1",
- )
-}
-convert_args("svm_linear")
-```
diff --git a/man/rmd/svm-poly.Rmd b/man/rmd/svm-poly.Rmd
deleted file mode 100644
index 7f4f7959a..000000000
--- a/man/rmd/svm-poly.Rmd
+++ /dev/null
@@ -1,46 +0,0 @@
-# Engine Details
-
-```{r, child = "setup.Rmd", include = FALSE}
-```
-
-Engines may have pre-set default arguments when executing the model fit call.
-For this type of model, the template of the fit calls are below:
-
-## kernlab
-
-```{r kernlab-reg}
-svm_poly() %>%
- set_engine("kernlab") %>%
- set_mode("regression") %>%
- translate()
-```
-
-```{r kernlab-cls}
-svm_poly() %>%
- set_engine("kernlab") %>%
- set_mode("classification") %>%
- translate()
-```
-
-`fit()` passes the data directly to `kernlab::ksvm()` so that its formula method can create dummy variables as-needed.
-
-## Parameter translations
-
-The standardized parameter names in parsnip can be mapped to their original
-names in each engine that has main parameters. Each engine typically has a
-different default value (shown in parentheses) for each parameter.
-
-```{r echo = FALSE, results = "asis"}
-## these values were manually checked :/ on 2020-05-28
-get_defaults_svm_poly <- function() {
- tibble::tribble(
- ~model, ~engine, ~parsnip, ~original, ~default,
- "svm_poly", "kernlab", "cost", "C", "1",
- "svm_poly", "kernlab", "degree", "degree", "1",
- "svm_poly", "kernlab", "scale_factor", "scale", "1",
- "svm_poly", "kernlab", "margin", "epsilon", "0.1",
- )
-}
-convert_args("svm_poly")
-```
-
diff --git a/man/rmd/svm-rbf.Rmd b/man/rmd/svm-rbf.Rmd
deleted file mode 100644
index 500010229..000000000
--- a/man/rmd/svm-rbf.Rmd
+++ /dev/null
@@ -1,45 +0,0 @@
-# Engine Details
-
-```{r, child = "setup.Rmd", include = FALSE}
-```
-
-Engines may have pre-set default arguments when executing the model fit call.
-For this type of model, the template of the fit calls are below:
-
-## kernlab
-
-```{r kernlab-reg}
-svm_rbf() %>%
- set_engine("kernlab") %>%
- set_mode("regression") %>%
- translate()
-```
-
-```{r kernlab-cls}
-svm_rbf() %>%
- set_engine("kernlab") %>%
- set_mode("classification") %>%
- translate()
-```
-
-`fit()` passes the data directly to `kernlab::ksvm()` so that its formula method can create dummy variables as-needed.
-
-## Parameter translations
-
-The standardized parameter names in parsnip can be mapped to their original
-names in each engine that has main parameters. Each engine typically has a
-different default value (shown in parentheses) for each parameter.
-
-```{r echo = FALSE, results = "asis"}
-## these values were manually checked :/ on 2020-05-28
-get_defaults_svm_rbf <- function() {
- tibble::tribble(
- ~model, ~engine, ~parsnip, ~original, ~default,
- "svm_rbf", "kernlab", "cost", "C", "1",
- "svm_rbf", "kernlab", "rbf_sigma", "sigma", "varies",
- "svm_rbf", "kernlab", "margin", "epsilon", "0.1",
- )
-}
-convert_args("svm_rbf")
-```
-
diff --git a/man/rmd/svm_linear_LiblineaR.Rmd b/man/rmd/svm_linear_LiblineaR.Rmd
new file mode 100644
index 000000000..f60c4b159
--- /dev/null
+++ b/man/rmd/svm_linear_LiblineaR.Rmd
@@ -0,0 +1,76 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("svm_linear", "LiblineaR")`
+
+## Tuning Parameters
+
+```{r LiblineaR-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("cost", "margin"),
+ default = c("1.0", "no default"))
+
+param <-
+ svm_linear() %>%
+ set_engine("LiblineaR") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r LiblineaR-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+This engine fits models that are L2-regularized for L2-loss. In the [LiblineaR::LiblineaR()] documentation, these are types 1 (classification) and 11 (regression).
+
+## Translation from parsnip to the original package (regression)
+
+```{r LiblineaR-reg}
+svm_linear(
+ cost = double(1),
+ margin = double(1)
+) %>%
+ set_engine("LiblineaR") %>%
+ set_mode("regression") %>%
+ translate()
+```
+
+## Translation from parsnip to the original package (classification)
+
+```{r LiblineaR-cls}
+svm_linear(
+ cost = double(1)
+) %>%
+ set_engine("LiblineaR") %>%
+ set_mode("classification") %>%
+ translate()
+```
+
+The `margin` parameter does not apply to classification models.
+
+Note that the `LiblineaR` engine does not produce class probabilities. When optimizing the model using the tune package, the default metrics require class probabilities. To use the `tune_*()` functions, a metric set must be passed as an argument that only contains metrics for hard class predictions (e.g., accuracy).
+
+## Preprocessing requirements
+
+```{r child = "template-makes-dummies.Rmd"}
+```
+
+```{r child = "template-same-scale.Rmd"}
+```
+
+## References
+
+ - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
+
diff --git a/man/rmd/svm_linear_kernlab.Rmd b/man/rmd/svm_linear_kernlab.Rmd
new file mode 100644
index 000000000..80fd9a8fb
--- /dev/null
+++ b/man/rmd/svm_linear_kernlab.Rmd
@@ -0,0 +1,78 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("svm_linear", "kernlab")`
+
+## Tuning Parameters
+
+```{r kernlab-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("cost", "margin"),
+ default = c("1.0", "0.1"))
+
+param <-
+ svm_linear() %>%
+ set_engine("kernlab") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r kernlab-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+## Translation from parsnip to the original package (regression)
+
+```{r kernlab-reg}
+svm_linear(
+ cost = double(1),
+ margin = double(1)
+) %>%
+ set_engine("kernlab") %>%
+ set_mode("regression") %>%
+ translate()
+```
+
+## Translation from parsnip to the original package (classification)
+
+```{r kernlab-cls}
+svm_linear(
+ cost = double(1)
+) %>%
+ set_engine("kernlab") %>%
+ set_mode("classification") %>%
+ translate()
+```
+
+The `margin` parameter does not apply to classification models.
+
+Note that the `"kernlab"` engine does not naturally estimate class probabilities. To produce them, the decision values of the model are converted to probabilities using Platt scaling. This method fits an additional model on top of the SVM model. When fitting the Platt scaling model, random numbers are used that are not reproducible or controlled by R's random number stream.
+
+## Preprocessing requirements
+
+```{r child = "template-makes-dummies.Rmd"}
+```
+
+```{r child = "template-same-scale.Rmd"}
+```
+
+## References
+
+ - Lin, HT, and R Weng. ["A Note on Platt’s Probabilistic Outputs for Support Vector Machines"](https://www.csie.ntu.edu.tw/~cjlin/papers/plattprob.pdf)
+
+ - Karatzoglou, A, Smola, A, Hornik, K, and A Zeileis. 2004. ["kernlab - An S4 Package for Kernel Methods in R."](https://www.jstatsoft.org/article/view/v011i09), _Journal of Statistical Software_.
+
+ - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
+
diff --git a/man/rmd/svm_poly_kernlab.Rmd b/man/rmd/svm_poly_kernlab.Rmd
new file mode 100644
index 000000000..b543c43f5
--- /dev/null
+++ b/man/rmd/svm_poly_kernlab.Rmd
@@ -0,0 +1,82 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("svm_poly", "kernlab")`
+
+## Tuning Parameters
+
+```{r kernlab-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("cost", "degree", "scale_factor", "margin"),
+ default = c("1.0", "1L1", "1.0", "0.1"))
+
+param <-
+ svm_poly() %>%
+ set_engine("kernlab") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r kernlab-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+## Translation from parsnip to the original package (regression)
+
+```{r kernlab-reg}
+svm_poly(
+ cost = double(1),
+ degree = integer(1),
+ scale_factor = double(1),
+ margin = double(1)
+) %>%
+ set_engine("kernlab") %>%
+ set_mode("regression") %>%
+ translate()
+```
+
+## Translation from parsnip to the original package (classification)
+
+```{r kernlab-cls}
+svm_poly(
+ cost = double(1),
+ degree = integer(1),
+ scale_factor = double(1)
+) %>%
+ set_engine("kernlab") %>%
+ set_mode("classification") %>%
+ translate()
+```
+
+The `margin` parameter does not apply to classification models.
+
+Note that the `"kernlab"` engine does not naturally estimate class probabilities. To produce them, the decision values of the model are converted to probabilities using Platt scaling. This method fits an additional model on top of the SVM model. When fitting the Platt scaling model, random numbers are used that are not reproducible or controlled by R's random number stream.
+
+## Preprocessing requirements
+
+```{r child = "template-makes-dummies.Rmd"}
+```
+
+```{r child = "template-same-scale.Rmd"}
+```
+
+## References
+
+ - Lin, HT, and R Weng. ["A Note on Platt’s Probabilistic Outputs for Support Vector Machines"](https://www.csie.ntu.edu.tw/~cjlin/papers/plattprob.pdf)
+
+ - Karatzoglou, A, Smola, A, Hornik, K, and A Zeileis. 2004. ["kernlab - An S4 Package for Kernel Methods in R."](https://www.jstatsoft.org/article/view/v011i09), _Journal of Statistical Software_.
+
+ - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
+
diff --git a/man/rmd/svm_rbf_kernlab.Rmd b/man/rmd/svm_rbf_kernlab.Rmd
new file mode 100644
index 000000000..a01a4f710
--- /dev/null
+++ b/man/rmd/svm_rbf_kernlab.Rmd
@@ -0,0 +1,82 @@
+```{r, child = "setup.Rmd", include = FALSE}
+```
+
+`r descr_models("svm_rbf", "kernlab")`
+
+## Tuning Parameters
+
+```{r kernlab-param-info, echo = FALSE}
+defaults <-
+ tibble::tibble(parsnip = c("cost","rbf_sigma", "margin"),
+ default = c("1.0", "see below", "0.1"))
+
+param <-
+ svm_rbf() %>%
+ set_engine("kernlab") %>%
+ tunable() %>%
+ dplyr::select(-source, -component, -component_id, parsnip = name) %>%
+ dplyr::mutate(
+ dials = purrr::map(call_info, get_dials),
+ label = purrr::map_chr(dials, ~ .x$label),
+ type = purrr::map_chr(dials, ~ .x$type)
+ ) %>%
+ dplyr::full_join(defaults, by = "parsnip") %>%
+ mutate(
+ item =
+ glue::glue("- `{parsnip}`: {label} (type: {type}, default: {default})\n\n")
+ )
+```
+
+This model has `r nrow(param)` tuning parameters:
+
+```{r kernlab-param-list, echo = FALSE, results = "asis"}
+param$item
+```
+
+There is no default for the radial basis function kernel parameter. kernlab estimates it from the data using a heuristic method. See [kernlab::sigest()]. This method uses random numbers so, without setting the seed before fitting, the model will not be reproducible.
+
+## Translation from parsnip to the original package (regression)
+
+```{r kernlab-reg}
+svm_rbf(
+ cost = double(1),
+ rbf_sigma = double(1),
+ margin = double(1)
+) %>%
+ set_engine("kernlab") %>%
+ set_mode("regression") %>%
+ translate()
+```
+
+## Translation from parsnip to the original package (classification)
+
+```{r kernlab-cls}
+svm_rbf(
+ cost = double(1),
+ rbf_sigma = double(1)
+) %>%
+ set_engine("kernlab") %>%
+ set_mode("classification") %>%
+ translate()
+```
+
+The `margin` parameter does not apply to classification models.
+
+Note that the `"kernlab"` engine does not naturally estimate class probabilities. To produce them, the decision values of the model are converted to probabilities using Platt scaling. This method fits an additional model on top of the SVM model. When fitting the Platt scaling model, random numbers are used that are not reproducible or controlled by R's random number stream.
+
+## Preprocessing requirements
+
+```{r child = "template-makes-dummies.Rmd"}
+```
+
+```{r child = "template-same-scale.Rmd"}
+```
+
+## References
+
+ - Lin, HT, and R Weng. ["A Note on Platt’s Probabilistic Outputs for Support Vector Machines"](https://www.csie.ntu.edu.tw/~cjlin/papers/plattprob.pdf)
+
+ - Karatzoglou, A, Smola, A, Hornik, K, and A Zeileis. 2004. ["kernlab - An S4 Package for Kernel Methods in R."](https://www.jstatsoft.org/article/view/v011i09), _Journal of Statistical Software_.
+
+ - Kuhn, M, and K Johnson. 2013. _Applied Predictive Modeling_. Springer.
+
diff --git a/man/rmd/template-makes-dummies.Rmd b/man/rmd/template-makes-dummies.Rmd
new file mode 100644
index 000000000..bbca2037f
--- /dev/null
+++ b/man/rmd/template-makes-dummies.Rmd
@@ -0,0 +1 @@
+Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit.model_spec()}}, parsnip will convert factor columns to indicators.
diff --git a/man/rmd/template-same-scale.Rmd b/man/rmd/template-same-scale.Rmd
new file mode 100644
index 000000000..5dc46fc6c
--- /dev/null
+++ b/man/rmd/template-same-scale.Rmd
@@ -0,0 +1,2 @@
+Predictors should have the same scale. One way to achieve this is to center and
+scale each so that each predictor has mean zero and a variance of one.
diff --git a/man/rmd/template-spark-notes.Rmd b/man/rmd/template-spark-notes.Rmd
new file mode 100644
index 000000000..b1cdb76c9
--- /dev/null
+++ b/man/rmd/template-spark-notes.Rmd
@@ -0,0 +1,6 @@
+For models created using the `"spark"` engine, there are several things to consider.
+
+* Only the formula interface to via `fit()` is available; using `fit_xy()` will generate an error.
+* The predictions will always be in a Spark table format. The names will be the same as documented but without the dots.
+* There is no equivalent to factor columns in Spark tables so class predictions are returned as character columns.
+* To retain the model object for a new R session (via `save()`), the `model$fit` element of the parsnip object should be serialized via `ml_save(object$fit)` and separately saved to disk. In a new session, the object can be reloaded and reattached to the parsnip object.
diff --git a/man/rmd/template-tree-split-factors.Rmd b/man/rmd/template-tree-split-factors.Rmd
new file mode 100644
index 000000000..efd28f910
--- /dev/null
+++ b/man/rmd/template-tree-split-factors.Rmd
@@ -0,0 +1 @@
+This engine does not require any special encoding of the predictors. Categorical predictors can be partitioned into groups of factor levels (e.g. `{a, c}` vs `{b, d}`) when splitting at a node. Dummy variables are not required for this model.
diff --git a/man/surv_reg.Rd b/man/surv_reg.Rd
index 162d0cfde..aa58a2439 100644
--- a/man/surv_reg.Rd
+++ b/man/surv_reg.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/surv_reg.R
\name{surv_reg}
\alias{surv_reg}
-\title{General Interface for Parametric Survival Models}
+\title{Parametric survival regression}
\usage{
surv_reg(mode = "regression", engine = "survival", dist = NULL)
}
@@ -40,7 +40,7 @@ to determine the \emph{mode} of the model. For \code{surv_reg()},the
mode will always be "regression".
Since survival models typically involve censoring (and require the use of
-\code{\link[survival:Surv]{survival::Surv()}} objects), the \code{\link[=fit]{fit()}} function will require that the
+\code{\link[survival:Surv]{survival::Surv()}} objects), the \code{\link[=fit.model_spec]{fit.model_spec()}} function will require that the
survival model be specified via the formula interface.
Also, for the \code{flexsurv::flexsurvfit} engine, the typical
@@ -116,15 +116,6 @@ parameter.\tabular{lll}{
show_engines("surv_reg")
surv_reg()
-# Parameters can be represented by a placeholder:
-surv_reg(dist = varying())
-
-# ->
-show_engines("survival_reg")
-
-survival_reg()
-# Parameters can be represented by a placeholder:
-survival_reg(dist = varying())
}
\references{
@@ -132,6 +123,6 @@ Jackson, C. (2016). \code{flexsurv}: A Platform for Parametric Survival
Modeling in R. \emph{Journal of Statistical Software}, 70(8), 1 - 33.
}
\seealso{
-\code{\link[=fit]{fit()}}, \code{\link[survival:Surv]{survival::Surv()}}, \code{\link[=set_engine]{set_engine()}}, \code{\link[=update]{update()}}
+\code{\link[=fit.model_spec]{fit.model_spec()}}, \code{\link[survival:Surv]{survival::Surv()}}, \code{\link[=set_engine]{set_engine()}}, \code{\link[=update]{update()}}
}
\keyword{internal}
diff --git a/man/survival_reg.Rd b/man/survival_reg.Rd
index 27bd1c312..e60df3195 100644
--- a/man/survival_reg.Rd
+++ b/man/survival_reg.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/survival_reg.R
\name{survival_reg}
\alias{survival_reg}
-\title{General Interface for Parametric Survival Models}
+\title{Parametric survival regression}
\usage{
survival_reg(mode = "censored regression", engine = "survival", dist = NULL)
}
@@ -38,15 +38,13 @@ to determine the \emph{mode} of the model. For \code{survival_reg()},the
mode will always be "censored regression".
Since survival models typically involve censoring (and require the use of
-\code{\link[survival:Surv]{survival::Surv()}} objects), the \code{\link[=fit]{fit()}} function will require that the
+\code{\link[survival:Surv]{survival::Surv()}} objects), the \code{\link[=fit.model_spec]{fit.model_spec()}} function will require that the
survival model be specified via the formula interface.
}
\examples{
survival_reg()
-# Parameters can be represented by a placeholder:
-survival_reg(dist = varying())
}
\seealso{
-\code{\link[=fit]{fit()}}, \code{\link[survival:Surv]{survival::Surv()}}, \code{\link[=set_engine]{set_engine()}}, \code{\link[=update]{update()}}
+\code{\link[=fit.model_spec]{fit.model_spec()}}, \code{\link[survival:Surv]{survival::Surv()}}, \code{\link[=set_engine]{set_engine()}}, \code{\link[=update]{update()}}
}
\keyword{internal}
diff --git a/man/svm_linear.Rd b/man/svm_linear.Rd
index 71f2d82c2..d00032487 100644
--- a/man/svm_linear.Rd
+++ b/man/svm_linear.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/svm_linear.R
\name{svm_linear}
\alias{svm_linear}
-\title{General interface for linear support vector machines}
+\title{Linear support vector machines}
\usage{
svm_linear(mode = "unknown", engine = "LiblineaR", cost = NULL, margin = NULL)
}
@@ -12,8 +12,7 @@ Possible values for this model are "unknown", "regression", or
"classification".}
\item{engine}{A single character string specifying what computational engine
-to use for fitting. Possible engines are listed below. The default for this
-model is \code{"LiblineaR"}.}
+to use for fitting.}
\item{cost}{A positive number for the cost of predicting a sample within
or on the wrong side of the margin}
@@ -22,117 +21,37 @@ or on the wrong side of the margin}
loss function (regression only)}
}
\description{
-\code{svm_linear()} is a way to generate a \emph{specification} of a model
-before fitting and allows the model to be created using
-different packages in R or via Spark. The main arguments for the
-model are:
-\itemize{
-\item \code{cost}: The cost of predicting a sample within or on the
-wrong side of the margin.
-\item \code{margin}: The epsilon in the SVM insensitive loss function
-(regression only)
-}
-These arguments are converted to their specific names at the
-time that the model is fit. Other options and arguments can be
-set using \code{set_engine()}. If left to their defaults
-here (\code{NULL}), the values are taken from the underlying model
-functions. If parameters need to be modified, \code{update()} can be used
-in lieu of recreating the object from scratch.
-}
-\details{
-The model can be created using the \code{fit()} function using the
-following \emph{engines}:
-\itemize{
-\item \pkg{R}: \code{"LiblineaR"} (the default) or \code{"kernlab"}
-}
-}
-\section{Engine Details}{
-Engines may have pre-set default arguments when executing the model fit
-call. For this type of model, the template of the fit calls are below:
-\subsection{LiblineaR}{\if{html}{\out{}}\preformatted{svm_linear() \%>\%
- set_engine("LiblineaR") \%>\%
- set_mode("regression") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Linear Support Vector Machine Specification (regression)
-##
-## Computational engine: LiblineaR
-##
-## Model fit template:
-## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), wi = missing_arg(),
-## type = 11, svr_eps = 0.1)
-}\if{html}{\out{}}\preformatted{svm_linear() \%>\%
- set_engine("LiblineaR") \%>\%
- set_mode("classification") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Linear Support Vector Machine Specification (classification)
-##
-## Computational engine: LiblineaR
-##
-## Model fit template:
-## LiblineaR::LiblineaR(x = missing_arg(), y = missing_arg(), wi = missing_arg(),
-## type = 1)
-}
+\code{svm_linear()} defines a support vector machine model. For classification,
+the model tries to maximize the width of the margin between classes.
+For regression, the model optimizes a robust loss function that is only
+affected by very large model residuals.
-Note that the \code{LiblineaR} engine cannot produce class probabilities.
-When optimizing the model using the \code{tune} package, the default metrics
-require class probabilities. To be able to use the \verb{tune_*()} functions,
-a metric set must be passed as an argument and it can only contain
-metrics associated with hard class predictions (e.g., accuracy and so
-on).
+This SVM model uses a linear function to create the decision boundary or
+regression line.
-This engine fits models that are L2-regularized for L2-loss. In the
-\code{LiblineaR} documentation, these are types 1 (classification) and 11
-(regression).
-}
+There are different ways to fit this model. See the engine-specific pages
+for more details:
-\subsection{kernlab}{\if{html}{\out{}}\preformatted{svm_linear() \%>\%
- set_engine("kernlab") \%>\%
- set_mode("regression") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Linear Support Vector Machine Specification (regression)
-##
-## Computational engine: kernlab
-##
-## Model fit template:
-## kernlab::ksvm(x = missing_arg(), data = missing_arg(), kernel = "vanilladot")
-}\if{html}{\out{}}\preformatted{svm_linear() \%>\%
- set_engine("kernlab") \%>\%
- set_mode("classification") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Linear Support Vector Machine Specification (classification)
-##
-## Computational engine: kernlab
-##
-## Model fit template:
-## kernlab::ksvm(x = missing_arg(), data = missing_arg(), kernel = "vanilladot",
-## prob.model = TRUE)
-}
+\Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("svm_linear")}
-\code{fit()} passes the data directly to \code{kernlab::ksvm()} so that its
-formula method can create dummy variables as-needed.
-}
-
-\subsection{Parameter translations}{
-
-The standardized parameter names in parsnip can be mapped to their
-original names in each engine that has main parameters. Each engine
-typically has a different default value (shown in parentheses) for each
-parameter.\tabular{lll}{
- \strong{parsnip} \tab \strong{LiblineaR} \tab \strong{kernlab} \cr
- cost \tab C (1) \tab C (1) \cr
- margin \tab svr_eps (0.1) \tab epsilon (0.1) \cr
+More information on how \pkg{parsnip} is used for modeling is at
+\url{https://www.tidymodels.org/}.
}
+\details{
+This function only defines what \emph{type} of model is being fit. Once an engine
+is specified, the \emph{method} to fit the model is also defined.
+The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model_spec()}} function is used
+with the data.
}
-}
-
\examples{
show_engines("svm_linear")
svm_linear(mode = "classification")
-# Parameters can be represented by a placeholder:
-svm_linear(mode = "regression", cost = varying())
+}
+\references{
+\url{https://www.tidymodels.org}, \href{https://tmwr.org}{\emph{Tidy Models with R}}
}
\seealso{
-\code{\link[=fit]{fit()}}, \code{\link[=set_engine]{set_engine()}}, \code{\link[=update]{update()}}
+\Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("svm_linear")}
}
diff --git a/man/svm_poly.Rd b/man/svm_poly.Rd
index ab8a237ac..f9ea410ba 100644
--- a/man/svm_poly.Rd
+++ b/man/svm_poly.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/svm_poly.R
\name{svm_poly}
\alias{svm_poly}
-\title{General interface for polynomial support vector machines}
+\title{Polynomial support vector machines}
\usage{
svm_poly(
mode = "unknown",
@@ -19,8 +19,7 @@ Possible values for this model are "unknown", "regression", or
"classification".}
\item{engine}{A single character string specifying what computational engine
-to use for fitting. Possible engines are listed below. The default for this
-model is \code{"kernlab"}.}
+to use for fitting.}
\item{cost}{A positive number for the cost of predicting a sample within
or on the wrong side of the margin}
@@ -33,85 +32,37 @@ or on the wrong side of the margin}
loss function (regression only)}
}
\description{
-\code{svm_poly()} is a way to generate a \emph{specification} of a model
-before fitting and allows the model to be created using
-different packages in R or via Spark. The main arguments for the
-model are:
-\itemize{
-\item \code{cost}: The cost of predicting a sample within or on the
-wrong side of the margin.
-\item \code{degree}: The polynomial degree.
-\item \code{scale_factor}: A scaling factor for the kernel.
-\item \code{margin}: The epsilon in the SVM insensitive loss function
-(regression only)
-}
-These arguments are converted to their specific names at the
-time that the model is fit. Other options and arguments can be
-set using \code{set_engine()}. If left to their defaults
-here (\code{NULL}), the values are taken from the underlying model
-functions. If parameters need to be modified, \code{update()} can be used
-in lieu of recreating the object from scratch.
-}
-\details{
-The model can be created using the \code{fit()} function using the
-following \emph{engines}:
-\itemize{
-\item \pkg{R}: \code{"kernlab"} (the default)
-}
-}
-\section{Engine Details}{
-Engines may have pre-set default arguments when executing the model fit
-call. For this type of model, the template of the fit calls are below:
-\subsection{kernlab}{\if{html}{\out{}}\preformatted{svm_poly() \%>\%
- set_engine("kernlab") \%>\%
- set_mode("regression") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Polynomial Support Vector Machine Specification (regression)
-##
-## Computational engine: kernlab
-##
-## Model fit template:
-## kernlab::ksvm(x = missing_arg(), data = missing_arg(), kernel = "polydot")
-}\if{html}{\out{}}\preformatted{svm_poly() \%>\%
- set_engine("kernlab") \%>\%
- set_mode("classification") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Polynomial Support Vector Machine Specification (classification)
-##
-## Computational engine: kernlab
-##
-## Model fit template:
-## kernlab::ksvm(x = missing_arg(), data = missing_arg(), kernel = "polydot",
-## prob.model = TRUE)
-}
+\code{svm_poly()} defines a support vector machine model. For classification,
+the model tries to maximize the width of the margin between classes.
+For regression, the model optimizes a robust loss function that is only
+affected by very large model residuals.
-\code{fit()} passes the data directly to \code{kernlab::ksvm()} so that its
-formula method can create dummy variables as-needed.
-}
+This SVM model uses a nonlinear function, specifically a polynomial function,
+to create the decision boundary or regression line.
-\subsection{Parameter translations}{
+There are different ways to fit this model. See the engine-specific pages
+for more details:
-The standardized parameter names in parsnip can be mapped to their
-original names in each engine that has main parameters. Each engine
-typically has a different default value (shown in parentheses) for each
-parameter.\tabular{ll}{
- \strong{parsnip} \tab \strong{kernlab} \cr
- cost \tab C (1) \cr
- degree \tab degree (1) \cr
- scale_factor \tab scale (1) \cr
- margin \tab epsilon (0.1) \cr
-}
+\Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("svm_poly")}
+More information on how \pkg{parsnip} is used for modeling is at
+\url{https://www.tidymodels.org/}.
}
-}
+\details{
+This function only defines what \emph{type} of model is being fit. Once an engine
+is specified, the \emph{method} to fit the model is also defined.
+The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model_spec()}} function is used
+with the data.
+}
\examples{
show_engines("svm_poly")
svm_poly(mode = "classification", degree = 1.2)
-# Parameters can be represented by a placeholder:
-svm_poly(mode = "regression", cost = varying())
+}
+\references{
+\url{https://www.tidymodels.org}, \href{https://tmwr.org}{\emph{Tidy Models with R}}
}
\seealso{
-\code{\link[=fit]{fit()}}, \code{\link[=set_engine]{set_engine()}}, \code{\link[=update]{update()}}
+\Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("svm_poly")}
}
diff --git a/man/svm_rbf.Rd b/man/svm_rbf.Rd
index 946e6b98f..16a2ed1a5 100644
--- a/man/svm_rbf.Rd
+++ b/man/svm_rbf.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/svm_rbf.R
\name{svm_rbf}
\alias{svm_rbf}
-\title{General interface for radial basis function support vector machines}
+\title{Radial basis function support vector machines}
\usage{
svm_rbf(
mode = "unknown",
@@ -30,84 +30,37 @@ or on the wrong side of the margin}
loss function (regression only)}
}
\description{
-\code{svm_rbf()} is a way to generate a \emph{specification} of a model
-before fitting and allows the model to be created using
-different packages in R or via Spark. The main arguments for the
-model are:
-\itemize{
-\item \code{cost}: The cost of predicting a sample within or on the
-wrong side of the margin.
-\item \code{rbf_sigma}: The precision parameter for the radial basis
-function.
-\item \code{margin}: The epsilon in the SVM insensitive loss function
-(regression only)
-}
-These arguments are converted to their specific names at the
-time that the model is fit. Other options and arguments can be
-set using \code{set_engine()}. If left to their defaults
-here (\code{NULL}), the values are taken from the underlying model
-functions. If parameters need to be modified, \code{update()} can be used
-in lieu of recreating the object from scratch.
-}
-\details{
-The model can be created using the \code{fit()} function using the
-following \emph{engines}:
-\itemize{
-\item \pkg{R}: \code{"kernlab"} (the default)
-}
-}
-\section{Engine Details}{
-Engines may have pre-set default arguments when executing the model fit
-call. For this type of model, the template of the fit calls are below:
-\subsection{kernlab}{\if{html}{\out{}}\preformatted{svm_rbf() \%>\%
- set_engine("kernlab") \%>\%
- set_mode("regression") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Radial Basis Function Support Vector Machine Specification (regression)
-##
-## Computational engine: kernlab
-##
-## Model fit template:
-## kernlab::ksvm(x = missing_arg(), data = missing_arg(), kernel = "rbfdot")
-}\if{html}{\out{}}\preformatted{svm_rbf() \%>\%
- set_engine("kernlab") \%>\%
- set_mode("classification") \%>\%
- translate()
-}\if{html}{\out{
}}\preformatted{## Radial Basis Function Support Vector Machine Specification (classification)
-##
-## Computational engine: kernlab
-##
-## Model fit template:
-## kernlab::ksvm(x = missing_arg(), data = missing_arg(), kernel = "rbfdot",
-## prob.model = TRUE)
-}
+\code{svm_rbf()} defines a support vector machine model. For classification,
+the model tries to maximize the width of the margin between classes.
+For regression, the model optimizes a robust loss function that is only
+affected by very large model residuals.
-\code{fit()} passes the data directly to \code{kernlab::ksvm()} so that its
-formula method can create dummy variables as-needed.
-}
+This SVM model uses a nonlinear function, specifically the radial basis function,
+to create the decision boundary or regression line.
-\subsection{Parameter translations}{
+There are different ways to fit this model. See the engine-specific pages
+for more details:
-The standardized parameter names in parsnip can be mapped to their
-original names in each engine that has main parameters. Each engine
-typically has a different default value (shown in parentheses) for each
-parameter.\tabular{lll}{
- \strong{parsnip} \tab \strong{kernlab} \tab \strong{liquidSVM} \cr
- cost \tab C (1) \tab lambdas \cr
- rbf_sigma \tab sigma (varies) \tab gammas \cr
- margin \tab epsilon (0.1) \tab NA \cr
-}
+\Sexpr[stage=render,results=rd]{parsnip:::make_engine_list("svm_rbf")}
+More information on how \pkg{parsnip} is used for modeling is at
+\url{https://www.tidymodels.org/}.
}
-}
+\details{
+This function only defines what \emph{type} of model is being fit. Once an engine
+is specified, the \emph{method} to fit the model is also defined.
+The model is not trained or fit until the \code{\link[=fit.model_spec]{fit.model_spec()}} function is used
+with the data.
+}
\examples{
show_engines("svm_rbf")
svm_rbf(mode = "classification", rbf_sigma = 0.2)
-# Parameters can be represented by a placeholder:
-svm_rbf(mode = "regression", cost = varying())
+}
+\references{
+\url{https://www.tidymodels.org}, \href{https://tmwr.org}{\emph{Tidy Models with R}}
}
\seealso{
-\code{\link[=fit]{fit()}}, \code{\link[=set_engine]{set_engine()}}, \code{\link[=update]{update()}}
+\Sexpr[stage=render,results=rd]{parsnip:::make_seealso_list("svm_rbf")}
}