From c65b2cf89dee7a1b36471aa43ad923b106b5d6c8 Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Wed, 27 May 2020 16:38:22 -0600 Subject: [PATCH 01/20] First draft of engine specific defaults into docs --- R/aaa.R | 27 +++++++++++++++++++++++---- R/decision_tree_data.R | 15 +++++++++++++++ 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/R/aaa.R b/R/aaa.R index 15372bd53..03f8b7211 100644 --- a/R/aaa.R +++ b/R/aaa.R @@ -30,8 +30,7 @@ convert_stan_interval <- function(x, level = 0.95, lower = TRUE) { res } -#' Make a table of arguments -#' @param model_name A character string for the model +#' @rdname convert_args #' @keywords internal #' @export convert_args <- function(model_name) { @@ -43,11 +42,14 @@ convert_args <- function(model_name) { dplyr::filter(grepl("args", name)) %>% dplyr::mutate(model = sub("_args", "", name), args = purrr::map(name, ~envir[[.x]])) %>% + dplyr::filter(grepl(model_name, model)) %>% tidyr::unnest(args) %>% - dplyr::select(model:original) + dplyr::select(model:original) %>% + full_join(get_arg_defaults(model_name)) %>% + mutate(original = paste0(original, " (", default, ")")) %>% + select(-default) convert_df <- args %>% - dplyr::filter(grepl(model_name, model)) %>% dplyr::select(-model) %>% tidyr::pivot_wider(names_from = engine, values_from = original) @@ -56,6 +58,23 @@ convert_args <- function(model_name) { } +#' @rdname convert_args +#' @keywords internal +#' @export +get_arg_defaults <- function(model) { + check_model_exists(model) + gdf <- get(paste0("get_defaults_", model)) + gdf() +} + +#' @rdname convert_args +#' @keywords internal +#' @export +get_arg <- function(ns, f, arg) { + args <- formals(getFromNamespace(f, ns)) + args <- as.list(args) + as.character(args[[arg]]) +} # ------------------------------------------------------------------------------ # nocov diff --git a/R/decision_tree_data.R b/R/decision_tree_data.R index 4f0d46160..4a38941c8 100644 --- a/R/decision_tree_data.R +++ b/R/decision_tree_data.R @@ -297,3 +297,18 @@ set_pred( args = list(object = quote(object$fit), dataset = quote(new_data)) ) ) + +# ------------------------------------------------------------------------------ + +get_defaults_decision_tree <- function() { + tibble::tribble( + ~model, ~engine, ~parsnip, ~original, ~default, + "decision_tree", "rpart", "tree_depth", "maxdepth", get_arg("rpart", "rpart.control", "maxdepth"), + "decision_tree", "rpart", "min_n", "minsplit", get_arg("rpart", "rpart.control", "minsplit"), + "decision_tree", "rpart", "cost_complexity", "cp", get_arg("rpart", "rpart.control", "cp"), + "decision_tree", "C5.0", "min_n", "minCases", get_arg("C50", "C5.0Control", "minCases"), + "decision_tree", "spark", "tree_depth", "max_depth", get_arg("sparklyr", "ml_decision_tree", "max_depth"), + "decision_tree", "spark", "min_n", "min_instances_per_node", get_arg("sparklyr", "ml_decision_tree", "min_instances_per_node"), + ) +} + From 7f976f907030a5cbb724162ffdc23b1e31f04ab6 Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Wed, 27 May 2020 17:59:29 -0600 Subject: [PATCH 02/20] Move argument default table to man/rmd/ --- R/decision_tree_data.R | 15 --------------- man/decision_tree.Rd | 2 +- man/rmd/decision-tree.Rmd | 13 ++++++++++++- 3 files changed, 13 insertions(+), 17 deletions(-) diff --git a/R/decision_tree_data.R b/R/decision_tree_data.R index 4a38941c8..4f0d46160 100644 --- a/R/decision_tree_data.R +++ b/R/decision_tree_data.R @@ -297,18 +297,3 @@ set_pred( args = list(object = quote(object$fit), dataset = quote(new_data)) ) ) - -# ------------------------------------------------------------------------------ - -get_defaults_decision_tree <- function() { - tibble::tribble( - ~model, ~engine, ~parsnip, ~original, ~default, - "decision_tree", "rpart", "tree_depth", "maxdepth", get_arg("rpart", "rpart.control", "maxdepth"), - "decision_tree", "rpart", "min_n", "minsplit", get_arg("rpart", "rpart.control", "minsplit"), - "decision_tree", "rpart", "cost_complexity", "cp", get_arg("rpart", "rpart.control", "cp"), - "decision_tree", "C5.0", "min_n", "minCases", get_arg("C50", "C5.0Control", "minCases"), - "decision_tree", "spark", "tree_depth", "max_depth", get_arg("sparklyr", "ml_decision_tree", "max_depth"), - "decision_tree", "spark", "min_n", "min_instances_per_node", get_arg("sparklyr", "ml_decision_tree", "min_instances_per_node"), - ) -} - diff --git a/man/decision_tree.Rd b/man/decision_tree.Rd index 425ac6a36..3714f257e 100644 --- a/man/decision_tree.Rd +++ b/man/decision_tree.Rd @@ -35,7 +35,7 @@ parameter (a.k.a. \code{Cp}) used by CART models (\code{rpart} only).} \item{min_n}{An integer for the minimum number of data points in a node that are required for the node to be split further.} -\item{object}{A random forest model specification.} +\item{object}{A decision tree model specification.} \item{parameters}{A 1-row tibble or named list with \emph{main} parameters to update. If the individual arguments are used, diff --git a/man/rmd/decision-tree.Rmd b/man/rmd/decision-tree.Rmd index 8ddf9225a..babdddf78 100644 --- a/man/rmd/decision-tree.Rmd +++ b/man/rmd/decision-tree.Rmd @@ -52,9 +52,20 @@ decision_tree() %>% ## Parameter translations -The standardized parameter names in parsnip can be mapped to their original names in each engine that has main parameters: +The standardized parameter names in parsnip can be mapped to their original names in each engine that has main parameters. Each engine typically has a different default value (shown in parentheses) for each parameter. ```{r echo = FALSE, results = "asis"} +get_defaults_decision_tree <- function() { + tibble::tribble( + ~model, ~engine, ~parsnip, ~original, ~default, + "decision_tree", "rpart", "tree_depth", "maxdepth", get_arg("rpart", "rpart.control", "maxdepth"), + "decision_tree", "rpart", "min_n", "minsplit", get_arg("rpart", "rpart.control", "minsplit"), + "decision_tree", "rpart", "cost_complexity", "cp", get_arg("rpart", "rpart.control", "cp"), + "decision_tree", "C5.0", "min_n", "minCases", get_arg("C50", "C5.0Control", "minCases"), + "decision_tree", "spark", "tree_depth", "max_depth", get_arg("sparklyr", "ml_decision_tree", "max_depth"), + "decision_tree", "spark", "min_n", "min_instances_per_node", get_arg("sparklyr", "ml_decision_tree", "min_instances_per_node"), + ) +} parsnip::convert_args("decision_tree") ``` From 9ec200ac292bb76624d23c8ac4409a6e1299abbb Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Thu, 28 May 2020 10:27:51 -0600 Subject: [PATCH 03/20] Correct loss_reduction arg for Spark boosted trees --- R/boost_tree_data.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/boost_tree_data.R b/R/boost_tree_data.R index 25924454d..6734270da 100644 --- a/R/boost_tree_data.R +++ b/R/boost_tree_data.R @@ -317,8 +317,8 @@ set_model_arg( set_model_arg( model = "boost_tree", eng = "spark", - parsnip = "min_info_gain", - original = "loss_reduction", + parsnip = "loss_reduction", + original = "min_info_gain", func = list(pkg = "dials", fun = "loss_reduction"), has_submodel = FALSE ) From 56fb899e3ba38c8bf1065fd79ad28f115c34d929 Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Thu, 28 May 2020 10:30:39 -0600 Subject: [PATCH 04/20] Engine specific defaults for boosted trees --- man/rmd/boost-tree.Rmd | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/man/rmd/boost-tree.Rmd b/man/rmd/boost-tree.Rmd index e10e2c6f5..2ae15b476 100644 --- a/man/rmd/boost-tree.Rmd +++ b/man/rmd/boost-tree.Rmd @@ -50,9 +50,32 @@ boost_tree() %>% ## Parameter translations -The standardized parameter names in parsnip can be mapped to their original names in each engine that has main parameters: +The standardized parameter names in parsnip can be mapped to their original names in each engine that has main parameters. Each engine typically has a different default value (shown in parentheses) for each parameter. ```{r echo = FALSE, results = "asis"} +get_defaults_boost_tree <- function() { + tibble::tribble( + ~model, ~engine, ~parsnip, ~original, ~default, + "boost_tree", "xgboost", "tree_depth", "max_depth", get_arg("parsnip", "xgb_train", "max_depth"), + "boost_tree", "xgboost", "trees", "nrounds", get_arg("parsnip", "xgb_train", "nrounds"), + "boost_tree", "xgboost", "learn_rate", "eta", get_arg("parsnip", "xgb_train", "eta"), + "boost_tree", "xgboost", "mtry", "colsample_bytree", get_arg("parsnip", "xgb_train", "colsample_bytree"), + "boost_tree", "xgboost", "min_n", "min_child_weight", get_arg("parsnip", "xgb_train", "min_child_weight"), + "boost_tree", "xgboost", "loss_reduction", "gamma", get_arg("parsnip", "xgb_train", "gamma"), + "boost_tree", "xgboost", "sample_size", "subsample", get_arg("parsnip", "xgb_train", "subsample"), + "boost_tree", "C5.0", "trees", "trials", get_arg("parsnip", "C5.0_train", "trials"), + "boost_tree", "C5.0", "min_n", "minCases", get_arg("C50", "C5.0Control", "minCases"), + "boost_tree", "C5.0", "sample_size", "sample", get_arg("C50", "C5.0Control", "sample"), + "boost_tree", "spark", "tree_depth", "max_depth", get_arg("sparklyr", "ml_gradient_boosted_trees", "max_depth"), + "boost_tree", "spark", "trees", "max_iter", get_arg("sparklyr", "ml_gradient_boosted_trees", "max_iter"), + "boost_tree", "spark", "learn_rate", "step_size", get_arg("sparklyr", "ml_gradient_boosted_trees", "step_size"), + "boost_tree", "spark", "mtry", "feature_subset_strategy", get_arg("sparklyr", "ml_gradient_boosted_trees", "feature_subset_strategy"), + "boost_tree", "spark", "min_n", "min_instances_per_node", get_arg("sparklyr", "ml_gradient_boosted_trees", "min_instances_per_node"), + "boost_tree", "spark", "loss_reduction", "min_info_gain", get_arg("sparklyr", "ml_gradient_boosted_trees", "min_info_gain"), + "boost_tree", "spark", "sample_size", "subsampling_rate", get_arg("sparklyr", "ml_gradient_boosted_trees", "subsampling_rate"), + + ) +} parsnip::convert_args("boost_tree") ``` From e039fb2ab2c341ff795ea467e65f60f5cf1111c0 Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Thu, 28 May 2020 10:31:02 -0600 Subject: [PATCH 05/20] No parentheses when no default in table of args --- R/aaa.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/aaa.R b/R/aaa.R index 03f8b7211..de74c02ef 100644 --- a/R/aaa.R +++ b/R/aaa.R @@ -46,7 +46,9 @@ convert_args <- function(model_name) { tidyr::unnest(args) %>% dplyr::select(model:original) %>% full_join(get_arg_defaults(model_name)) %>% - mutate(original = paste0(original, " (", default, ")")) %>% + mutate(original = dplyr::if_else(!is.na(default), + paste0(original, " (", default, ")"), + original)) %>% select(-default) convert_df <- args %>% From e9ac0c7b35b7e6f4e8155cbfee61b31a1cc0d3ce Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Thu, 28 May 2020 10:41:56 -0600 Subject: [PATCH 06/20] Engine specific defaults for linear regression --- man/rmd/linear-reg.Rmd | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/man/rmd/linear-reg.Rmd b/man/rmd/linear-reg.Rmd index bbfb5a8ab..71c4f3f2e 100644 --- a/man/rmd/linear-reg.Rmd +++ b/man/rmd/linear-reg.Rmd @@ -68,10 +68,20 @@ linear_reg() %>% ## Parameter translations -The standardized parameter names in parsnip can be mapped to their original names -in each engine that has main parameters: +The standardized parameter names in parsnip can be mapped to their original +names in each engine that has main parameters. Each engine typically has a +different default value (shown in parentheses) for each parameter. ```{r echo = FALSE, results = "asis"} +get_defaults_linear_reg <- function() { + tibble::tribble( + ~model, ~engine, ~parsnip, ~original, ~default, + "linear_reg", "glmnet", "mixture", "alpha", get_arg("glmnet", "glmnet", "alpha"), + "linear_reg", "spark", "penalty", "reg_param", get_arg("sparklyr", "ml_linear_regression", "reg_param"), + "linear_reg", "spark", "mixture", "elastic_net_param", get_arg("sparklyr", "ml_linear_regression", "elastic_net_param"), + "linear_reg", "keras", "penalty", "penalty", get_arg("parsnip", "keras_mlp", "penalty"), + ) +} parsnip::convert_args("linear_reg") ``` From d4ce19851f28e527fcedea8bde544753b09146a1 Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Thu, 28 May 2020 10:51:01 -0600 Subject: [PATCH 07/20] Engine specific defaults for logistic regression --- man/rmd/logistic-reg.Rmd | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/man/rmd/logistic-reg.Rmd b/man/rmd/logistic-reg.Rmd index 498d43387..5cbedb9c1 100644 --- a/man/rmd/logistic-reg.Rmd +++ b/man/rmd/logistic-reg.Rmd @@ -69,10 +69,20 @@ logistic_reg() %>% ## Parameter translations -The standardized parameter names in parsnip can be mapped to their original names -in each engine that has main parameters: +The standardized parameter names in parsnip can be mapped to their original +names in each engine that has main parameters. Each engine typically has a +different default value (shown in parentheses) for each parameter. ```{r echo = FALSE, results = "asis"} +get_defaults_logistic_reg <- function() { + tibble::tribble( + ~model, ~engine, ~parsnip, ~original, ~default, + "logistic_reg", "glmnet", "mixture", "alpha", get_arg("glmnet", "glmnet", "alpha"), + "logistic_reg", "spark", "penalty", "reg_param", get_arg("sparklyr", "ml_logistic_regression", "reg_param"), + "logistic_reg", "spark", "mixture", "elastic_net_param", get_arg("sparklyr", "ml_logistic_regression", "elastic_net_param"), + "logistic_reg", "keras", "penalty", "penalty", get_arg("parsnip", "keras_mlp", "penalty"), + ) +} parsnip::convert_args("logistic_reg") ``` From 16f182512a88bdb367955a10ae7d81e4a4e7252d Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Thu, 28 May 2020 11:13:20 -0600 Subject: [PATCH 08/20] Engine specific defaults for MARS --- man/rmd/mars.Rmd | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/man/rmd/mars.Rmd b/man/rmd/mars.Rmd index 35f2653b6..8c2bd8233 100644 --- a/man/rmd/mars.Rmd +++ b/man/rmd/mars.Rmd @@ -25,10 +25,18 @@ loaded. However, if `multi_predict` is used, the package is attached. ## Parameter translations -The standardized parameter names in parsnip can be mapped to their original names -in each engine that has main parameters: +The standardized parameter names in parsnip can be mapped to their original +names in each engine that has main parameters. Each engine typically has a +different default value (shown in parentheses) for each parameter. ```{r echo = FALSE, results = "asis"} +get_defaults_mars <- function() { + tibble::tribble( + ~model, ~engine, ~parsnip, ~original, ~default, + "mars", "earth", "prod_degree", "degree", get_arg("earth", "earth.fit", "degree"), + "mars", "earth", "prune_method", "pmethod", get_arg("earth", "earth.fit", "pmethod")[2] + ) +} parsnip::convert_args("mars") ``` From 5938408457f03bdfbbd5b71a07fbcd5591864398 Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Thu, 28 May 2020 11:21:36 -0600 Subject: [PATCH 09/20] Engine specific defaults for multilayer perceptron --- man/rmd/mlp.Rmd | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/man/rmd/mlp.Rmd b/man/rmd/mlp.Rmd index f93ef26f4..cd4f223b8 100644 --- a/man/rmd/mlp.Rmd +++ b/man/rmd/mlp.Rmd @@ -41,9 +41,22 @@ mlp() %>% ## Parameter translations The standardized parameter names in parsnip can be mapped to their original -names in each engine that has main parameters: +names in each engine that has main parameters. Each engine typically has a +different default value (shown in parentheses) for each parameter. ```{r echo = FALSE, results = "asis"} +get_defaults_mlp <- function() { + tibble::tribble( + ~model, ~engine, ~parsnip, ~original, ~default, + "mlp", "keras", "hidden_units", "hidden_units", get_arg("parsnip", "keras_mlp", "hidden_units"), + "mlp", "keras", "penalty", "penalty", get_arg("parsnip", "keras_mlp", "penalty"), + "mlp", "keras", "dropout", "dropout", get_arg("parsnip", "keras_mlp", "dropout"), + "mlp", "keras", "epochs", "epochs", get_arg("parsnip", "keras_mlp", "epochs"), + "mlp", "keras", "activation", "activation", get_arg("parsnip", "keras_mlp", "activation"), + "mlp", "nnet", "penalty", "decay", get_arg("nnet", "nnet.default", "decay"), + "mlp", "nnet", "epochs", "maxit", get_arg("nnet", "nnet.default", "maxit"), + ) +} parsnip::convert_args("mlp") ``` From f1c3197ebe474d80776c2f9b91feed3da7591f4b Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Thu, 28 May 2020 13:22:37 -0600 Subject: [PATCH 10/20] Engine specific defaults for multinomial regression --- man/rmd/multinom-reg.Rmd | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/man/rmd/multinom-reg.Rmd b/man/rmd/multinom-reg.Rmd index d79d23279..e377202b5 100644 --- a/man/rmd/multinom-reg.Rmd +++ b/man/rmd/multinom-reg.Rmd @@ -51,10 +51,21 @@ multinom_reg() %>% ## Parameter translations -The standardized parameter names in parsnip can be mapped to their original names -in each engine that has main parameters: +The standardized parameter names in parsnip can be mapped to their original +names in each engine that has main parameters. Each engine typically has a +different default value (shown in parentheses) for each parameter. ```{r echo = FALSE, results = "asis"} +get_defaults_multinom_reg <- function() { + tibble::tribble( + ~model, ~engine, ~parsnip, ~original, ~default, + "multinom_reg", "glmnet", "mixture", "alpha", get_arg("glmnet", "glmnet", "alpha"), + "multinom_reg", "spark", "penalty", "reg_param", get_arg("sparklyr", "ml_logistic_regression", "reg_param"), + "multinom_reg", "spark", "mixture", "elastic_net_param", get_arg("sparklyr", "ml_logistic_regression", "elastic_net_param"), + "multinom_reg", "keras", "penalty", "penalty", get_arg("parsnip", "keras_mlp", "penalty"), + "multinom_reg", "nnet", "penalty", "decay", get_arg("nnet", "nnet.default", "decay"), + ) +} parsnip::convert_args("multinom_reg") ``` From c81509f3354ae1435014d2913db55ddac203c26a Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Thu, 28 May 2020 13:26:21 -0600 Subject: [PATCH 11/20] Engine specific defaults for nearest neighbors --- man/rmd/nearest-neighbor.Rmd | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/man/rmd/nearest-neighbor.Rmd b/man/rmd/nearest-neighbor.Rmd index 42deebfe3..01a95568e 100644 --- a/man/rmd/nearest-neighbor.Rmd +++ b/man/rmd/nearest-neighbor.Rmd @@ -26,9 +26,18 @@ also means that a single value of that function's `kernel` argument (a.k.a ## Parameter translations -The standardized parameter names in parsnip can be mapped to their original names in each engine that has main parameters: +The standardized parameter names in parsnip can be mapped to their original +names in each engine that has main parameters. Each engine typically has a +different default value (shown in parentheses) for each parameter. ```{r echo = FALSE, results = "asis"} +get_defaults_nearest_neighbor <- function() { + tibble::tribble( + ~model, ~engine, ~parsnip, ~original, ~default, + "nearest_neighbor", "kknn", "weight_func", "kernel", get_arg("kknn", "train.kknn", "kernel"), + "nearest_neighbor", "kknn", "dist_power", "distance", get_arg("kknn", "train.kknn", "distance"), + ) +} parsnip::convert_args("nearest_neighbor") ``` From 16c02717f4990abe6f1cbcac4e6e61a8a233faf0 Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Thu, 28 May 2020 14:32:29 -0600 Subject: [PATCH 12/20] Don't need defaults for surv_reg() ("weibull" is the default) --- man/rmd/surv-reg.Rmd | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/man/rmd/surv-reg.Rmd b/man/rmd/surv-reg.Rmd index 608a0cb2f..ec855de17 100644 --- a/man/rmd/surv-reg.Rmd +++ b/man/rmd/surv-reg.Rmd @@ -25,10 +25,18 @@ a stratification variable and can be overridden in other cases. ## Parameter translations -The standardized parameter names in parsnip can be mapped to their original names -in each engine that has main parameters: +The standardized parameter names in parsnip can be mapped to their original +names in each engine that has main parameters. Each engine typically has a +different default value (shown in parentheses) for each parameter. ```{r echo = FALSE, results = "asis"} +get_defaults_surv_reg <- function() { + tibble::tribble( + ~model, ~engine, ~parsnip, ~original, ~default, + "surv_reg", "flexsurv", "dist", "dist", NA, + "surv_reg", "survival", "dist", "dist", NA, + ) +} parsnip::convert_args("surv_reg") ``` From 212765fe3ccf1116068e7aa008c3ce50f186c50b Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Thu, 28 May 2020 15:59:05 -0600 Subject: [PATCH 13/20] Engine specific defaults for random forest --- man/rmd/rand-forest.Rmd | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/man/rmd/rand-forest.Rmd b/man/rmd/rand-forest.Rmd index 7564038ca..85eaf5745 100644 --- a/man/rmd/rand-forest.Rmd +++ b/man/rmd/rand-forest.Rmd @@ -65,9 +65,27 @@ rand_forest() %>% ## Parameter translations The standardized parameter names in parsnip can be mapped to their original -names in each engine that has main parameters: +names in each engine that has main parameters. Each engine typically has a +different default value (shown in parentheses) for each parameter. ```{r echo = FALSE, results = "asis"} +get_defaults_rand_forest <- function() { + tibble::tribble( + ~model, ~engine, ~parsnip, ~original, ~default, + "rand_forest", "ranger", "trees", "num.trees", get_arg("ranger", "ranger", "num.trees"), + "rand_forest", "ranger", "mtry", "mtry", "see below", + "rand_forest", "ranger", "min_n", "min.node.size", "see below", + "rand_forest", "randomForest", "trees", "ntree", get_arg("randomForest", "randomForest.default", "ntree"), + "rand_forest", "randomForest", "mtry", "mtry", "see below", + "rand_forest", "randomForest", "min_n", "nodesize", "see below", + "rand_forest", "spark", "trees", "num_trees", get_arg("sparklyr", "ml_random_forest", "num_trees"), + "rand_forest", "spark", "mtry", "feature_subset_strategy", "see below", + "rand_forest", "spark", "min_n", "min_instances_per_node", get_arg("sparklyr", "ml_random_forest", "min_instances_per_node"), + ) +} parsnip::convert_args("rand_forest") ``` +- The default `min_n` for both ranger and randomForest is 1 for classification and 5 for regression. +- For randomForest and spark, the default `mtry` is the square root of the number of predictors for classification, and one-third of the predictors for regression. +- For ranger, the default `mtry` is the square root of the number of predictors. From 3717f6d01ef7705a7b621b246eabcead6083ac04 Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Thu, 28 May 2020 15:59:19 -0600 Subject: [PATCH 14/20] More detail on boosted tree --- man/rmd/boost-tree.Rmd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/man/rmd/boost-tree.Rmd b/man/rmd/boost-tree.Rmd index 2ae15b476..6567cdf6b 100644 --- a/man/rmd/boost-tree.Rmd +++ b/man/rmd/boost-tree.Rmd @@ -69,7 +69,7 @@ get_defaults_boost_tree <- function() { "boost_tree", "spark", "tree_depth", "max_depth", get_arg("sparklyr", "ml_gradient_boosted_trees", "max_depth"), "boost_tree", "spark", "trees", "max_iter", get_arg("sparklyr", "ml_gradient_boosted_trees", "max_iter"), "boost_tree", "spark", "learn_rate", "step_size", get_arg("sparklyr", "ml_gradient_boosted_trees", "step_size"), - "boost_tree", "spark", "mtry", "feature_subset_strategy", get_arg("sparklyr", "ml_gradient_boosted_trees", "feature_subset_strategy"), + "boost_tree", "spark", "mtry", "feature_subset_strategy", "see below", "boost_tree", "spark", "min_n", "min_instances_per_node", get_arg("sparklyr", "ml_gradient_boosted_trees", "min_instances_per_node"), "boost_tree", "spark", "loss_reduction", "min_info_gain", get_arg("sparklyr", "ml_gradient_boosted_trees", "min_info_gain"), "boost_tree", "spark", "sample_size", "subsampling_rate", get_arg("sparklyr", "ml_gradient_boosted_trees", "subsampling_rate"), @@ -79,3 +79,4 @@ get_defaults_boost_tree <- function() { parsnip::convert_args("boost_tree") ``` +For spark, the default `mtry` is the square root of the number of predictors for classification, and one-third of the predictors for regression. From 2789b86753b2b9a9610aa036615fac11afafbe2a Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Thu, 28 May 2020 16:39:19 -0600 Subject: [PATCH 15/20] Specify what to join by and document --- R/aaa.R | 4 +++- man/convert_args.Rd | 13 ++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/R/aaa.R b/R/aaa.R index de74c02ef..1e8db1aa7 100644 --- a/R/aaa.R +++ b/R/aaa.R @@ -30,6 +30,7 @@ convert_stan_interval <- function(x, level = 0.95, lower = TRUE) { res } +#' Find args for documentation #' @rdname convert_args #' @keywords internal #' @export @@ -45,7 +46,8 @@ convert_args <- function(model_name) { dplyr::filter(grepl(model_name, model)) %>% tidyr::unnest(args) %>% dplyr::select(model:original) %>% - full_join(get_arg_defaults(model_name)) %>% + full_join(get_arg_defaults(model_name), + by = c("model", "engine", "parsnip", "original")) %>% mutate(original = dplyr::if_else(!is.na(default), paste0(original, " (", default, ")"), original)) %>% diff --git a/man/convert_args.Rd b/man/convert_args.Rd index 10922e95c..b29da5543 100644 --- a/man/convert_args.Rd +++ b/man/convert_args.Rd @@ -2,14 +2,17 @@ % Please edit documentation in R/aaa.R \name{convert_args} \alias{convert_args} -\title{Make a table of arguments} +\alias{get_arg_defaults} +\alias{get_arg} +\title{Find args for documentation} \usage{ convert_args(model_name) -} -\arguments{ -\item{model_name}{A character string for the model} + +get_arg_defaults(model) + +get_arg(ns, f, arg) } \description{ -Make a table of arguments +Find args for documentation } \keyword{internal} From 9c716b6ca206da6a3752df669c5e6bbd204e1a68 Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Thu, 28 May 2020 16:40:37 -0600 Subject: [PATCH 16/20] I think the best we can do for engine specific defaults for SVM --- man/rmd/svm-poly.Rmd | 13 ++++++++++++- man/rmd/svm-rbf.Rmd | 14 +++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/man/rmd/svm-poly.Rmd b/man/rmd/svm-poly.Rmd index c3ff474a8..5ec7a9769 100644 --- a/man/rmd/svm-poly.Rmd +++ b/man/rmd/svm-poly.Rmd @@ -22,9 +22,20 @@ svm_poly() %>% ## Parameter translations The standardized parameter names in parsnip can be mapped to their original -names in each engine that has main parameters: +names in each engine that has main parameters. Each engine typically has a +different default value (shown in parentheses) for each parameter. ```{r echo = FALSE, results = "asis"} +## these values were manually checked :/ on 2020-05-28 +get_defaults_svm_poly <- function() { + tibble::tribble( + ~model, ~engine, ~parsnip, ~original, ~default, + "svm_poly", "kernlab", "cost", "C", "1", + "svm_poly", "kernlab", "degree", "degree", "1", + "svm_poly", "kernlab", "scale_factor", "scale", "1", + "svm_poly", "kernlab", "margin", "epsilon", "0.1", + ) +} parsnip::convert_args("svm_poly") ``` diff --git a/man/rmd/svm-rbf.Rmd b/man/rmd/svm-rbf.Rmd index d4e194d4e..329f5e99e 100644 --- a/man/rmd/svm-rbf.Rmd +++ b/man/rmd/svm-rbf.Rmd @@ -48,9 +48,21 @@ translate between engines, `sigma = 1/gammas^2`. Users will be specifying ## Parameter translations The standardized parameter names in parsnip can be mapped to their original -names in each engine that has main parameters: +names in each engine that has main parameters. Each engine typically has a +different default value (shown in parentheses) for each parameter. ```{r echo = FALSE, results = "asis"} +## these values were manually checked :/ on 2020-05-28 +get_defaults_svm_rbf <- function() { + tibble::tribble( + ~model, ~engine, ~parsnip, ~original, ~default, + "svm_rbf", "kernlab", "cost", "C", "1", + "svm_rbf", "kernlab", "rbf_sigma", "sigma", "1", + "svm_rbf", "kernlab", "margin", "epsilon", "0.1", + "svm_rbf", "liquidSVM", "cost", "lambdas", "varies", + "svm_rbf", "liquidSVM", "rbf_sigma", "gammas", "varies", + ) +} parsnip::convert_args("svm_rbf") ``` From 77d03873b3fe9a1d5501903a54e8d2f7fcd0649f Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Thu, 28 May 2020 16:40:58 -0600 Subject: [PATCH 17/20] Reorder rand_forest() notes to match table --- man/rmd/rand-forest.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/rmd/rand-forest.Rmd b/man/rmd/rand-forest.Rmd index 85eaf5745..138b357b0 100644 --- a/man/rmd/rand-forest.Rmd +++ b/man/rmd/rand-forest.Rmd @@ -86,6 +86,6 @@ get_defaults_rand_forest <- function() { parsnip::convert_args("rand_forest") ``` -- The default `min_n` for both ranger and randomForest is 1 for classification and 5 for regression. - For randomForest and spark, the default `mtry` is the square root of the number of predictors for classification, and one-third of the predictors for regression. - For ranger, the default `mtry` is the square root of the number of predictors. +- The default `min_n` for both ranger and randomForest is 1 for classification and 5 for regression. From d7b371bf672c4f9e3034d9141f3a7d17e6dd3aaa Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Thu, 28 May 2020 16:53:32 -0600 Subject: [PATCH 18/20] Redocument to create new versions of tables --- man/boost_tree.Rd | 23 ++++++++++++++--------- man/decision_tree.Rd | 10 ++++++---- man/linear_reg.Rd | 8 +++++--- man/logistic_reg.Rd | 8 +++++--- man/mars.Rd | 8 +++++--- man/mlp.Rd | 18 ++++++++++-------- man/multinom_reg.Rd | 8 +++++--- man/nearest_neighbor.Rd | 8 +++++--- man/rand_forest.Rd | 19 +++++++++++++++---- man/surv_reg.Rd | 4 +++- man/svm_poly.Rd | 12 +++++++----- man/svm_rbf.Rd | 10 ++++++---- 12 files changed, 86 insertions(+), 50 deletions(-) diff --git a/man/boost_tree.Rd b/man/boost_tree.Rd index 4a5b4a1c9..ec71a5901 100644 --- a/man/boost_tree.Rd +++ b/man/boost_tree.Rd @@ -204,19 +204,24 @@ predictors to be converted to indicator variables. \subsection{Parameter translations}{ The standardized parameter names in parsnip can be mapped to their -original names in each engine that has main parameters:\tabular{llll}{ +original names in each engine that has main parameters. Each engine +typically has a different default value (shown in parentheses) for each +parameter.\tabular{llll}{ \strong{parsnip} \tab \strong{xgboost} \tab \strong{C5.0} \tab \strong{spark} \cr - tree_depth \tab max_depth \tab NA \tab max_depth \cr - trees \tab nrounds \tab trials \tab max_iter \cr - learn_rate \tab eta \tab NA \tab step_size \cr - mtry \tab colsample_bytree \tab NA \tab feature_subset_strategy \cr - min_n \tab min_child_weight \tab minCases \tab min_instances_per_node \cr - loss_reduction \tab gamma \tab NA \tab NA \cr - sample_size \tab subsample \tab sample \tab subsampling_rate \cr + tree_depth \tab max_depth (6) \tab NA \tab max_depth (5) \cr + trees \tab nrounds (15) \tab trials (15) \tab max_iter (20) \cr + learn_rate \tab eta (0.3) \tab NA \tab step_size (0.1) \cr + mtry \tab colsample_bytree (1) \tab NA \tab feature_subset_strategy (auto) \cr + min_n \tab min_child_weight (1) \tab minCases (2) \tab min_instances_per_node (1) \cr + loss_reduction \tab gamma (0) \tab NA \tab min_info_gain (0) \cr + sample_size \tab subsample (1) \tab sample (0) \tab subsampling_rate (1) \cr stop_iter \tab early_stop \tab NA \tab NA \cr - min_info_gain \tab NA \tab NA \tab loss_reduction \cr } + +For spark, the default \code{mtry} is the square root of the number of +predictors for classification, and one-third of the predictors for +regression. } } diff --git a/man/decision_tree.Rd b/man/decision_tree.Rd index 3714f257e..41a68f019 100644 --- a/man/decision_tree.Rd +++ b/man/decision_tree.Rd @@ -169,11 +169,13 @@ predictors to be converted to indicator variables. \subsection{Parameter translations}{ The standardized parameter names in parsnip can be mapped to their -original names in each engine that has main parameters:\tabular{llll}{ +original names in each engine that has main parameters. Each engine +typically has a different default value (shown in parentheses) for each +parameter.\tabular{llll}{ \strong{parsnip} \tab \strong{rpart} \tab \strong{C5.0} \tab \strong{spark} \cr - tree_depth \tab maxdepth \tab NA \tab max_depth \cr - min_n \tab minsplit \tab minCases \tab min_instances_per_node \cr - cost_complexity \tab cp \tab NA \tab NA \cr + tree_depth \tab maxdepth (30) \tab NA \tab max_depth (5) \cr + min_n \tab minsplit (20) \tab minCases (2) \tab min_instances_per_node (1) \cr + cost_complexity \tab cp (0.01) \tab NA \tab NA \cr } } diff --git a/man/linear_reg.Rd b/man/linear_reg.Rd index 04c302751..258498704 100644 --- a/man/linear_reg.Rd +++ b/man/linear_reg.Rd @@ -183,10 +183,12 @@ predictive distribution as appropriate) is returned. \subsection{Parameter translations}{ The standardized parameter names in parsnip can be mapped to their -original names in each engine that has main parameters:\tabular{llll}{ +original names in each engine that has main parameters. Each engine +typically has a different default value (shown in parentheses) for each +parameter.\tabular{llll}{ \strong{parsnip} \tab \strong{glmnet} \tab \strong{spark} \tab \strong{keras} \cr - penalty \tab lambda \tab reg_param \tab penalty \cr - mixture \tab alpha \tab elastic_net_param \tab NA \cr + penalty \tab lambda \tab reg_param (0) \tab penalty (0) \cr + mixture \tab alpha (1) \tab elastic_net_param (0) \tab NA \cr } } diff --git a/man/logistic_reg.Rd b/man/logistic_reg.Rd index a597eb81d..8fd8c4541 100644 --- a/man/logistic_reg.Rd +++ b/man/logistic_reg.Rd @@ -182,10 +182,12 @@ predictive distribution as appropriate) is returned. \subsection{Parameter translations}{ The standardized parameter names in parsnip can be mapped to their -original names in each engine that has main parameters:\tabular{llll}{ +original names in each engine that has main parameters. Each engine +typically has a different default value (shown in parentheses) for each +parameter.\tabular{llll}{ \strong{parsnip} \tab \strong{glmnet} \tab \strong{spark} \tab \strong{keras} \cr - penalty \tab lambda \tab reg_param \tab penalty \cr - mixture \tab alpha \tab elastic_net_param \tab NA \cr + penalty \tab lambda \tab reg_param (0) \tab penalty (0) \cr + mixture \tab alpha (1) \tab elastic_net_param (0) \tab NA \cr } } diff --git a/man/mars.Rd b/man/mars.Rd index 9089fe5c5..6a0340baf 100644 --- a/man/mars.Rd +++ b/man/mars.Rd @@ -112,11 +112,13 @@ attached. \subsection{Parameter translations}{ The standardized parameter names in parsnip can be mapped to their -original names in each engine that has main parameters:\tabular{ll}{ +original names in each engine that has main parameters. Each engine +typically has a different default value (shown in parentheses) for each +parameter.\tabular{ll}{ \strong{parsnip} \tab \strong{earth} \cr num_terms \tab nprune \cr - prod_degree \tab degree \cr - prune_method \tab pmethod \cr + prod_degree \tab degree (1) \cr + prune_method \tab pmethod (backward) \cr } } diff --git a/man/mlp.Rd b/man/mlp.Rd index 76b792f8f..ed6b1087a 100644 --- a/man/mlp.Rd +++ b/man/mlp.Rd @@ -41,13 +41,13 @@ of model parameters randomly set to zero during model training.} \item{epochs}{An integer for the number of training iterations.} -\item{activation}{A single character strong denoting the type of relationship +\item{activation}{A single character string denoting the type of relationship between the original predictors and the hidden unit layer. The activation function between the hidden and output layers is automatically set to either "linear" or "softmax" depending on the type of outcome. Possible values are: "linear", "softmax", "relu", and "elu"} -\item{object}{A random forest model specification.} +\item{object}{A multilayer perceptron model specification.} \item{parameters}{A 1-row tibble or named list with \emph{main} parameters to update. If the individual arguments are used, @@ -161,13 +161,15 @@ An error is thrown if both \code{penalty} and \code{dropout} are specified for \subsection{Parameter translations}{ The standardized parameter names in parsnip can be mapped to their -original names in each engine that has main parameters:\tabular{lll}{ +original names in each engine that has main parameters. Each engine +typically has a different default value (shown in parentheses) for each +parameter.\tabular{lll}{ \strong{parsnip} \tab \strong{keras} \tab \strong{nnet} \cr - hidden_units \tab hidden_units \tab size \cr - penalty \tab penalty \tab decay \cr - dropout \tab dropout \tab NA \cr - epochs \tab epochs \tab maxit \cr - activation \tab activation \tab NA \cr + hidden_units \tab hidden_units (5) \tab size \cr + penalty \tab penalty (0) \tab decay (0) \cr + dropout \tab dropout (0) \tab NA \cr + epochs \tab epochs (20) \tab maxit (100) \cr + activation \tab activation (softmax) \tab NA \cr } } diff --git a/man/multinom_reg.Rd b/man/multinom_reg.Rd index b2b4a93b8..a0fad2c62 100644 --- a/man/multinom_reg.Rd +++ b/man/multinom_reg.Rd @@ -158,10 +158,12 @@ results. \subsection{Parameter translations}{ The standardized parameter names in parsnip can be mapped to their -original names in each engine that has main parameters:\tabular{lllll}{ +original names in each engine that has main parameters. Each engine +typically has a different default value (shown in parentheses) for each +parameter.\tabular{lllll}{ \strong{parsnip} \tab \strong{glmnet} \tab \strong{spark} \tab \strong{keras} \tab \strong{nnet} \cr - penalty \tab lambda \tab reg_param \tab penalty \tab decay \cr - mixture \tab alpha \tab elastic_net_param \tab NA \tab NA \cr + penalty \tab lambda \tab reg_param (0) \tab penalty (0) \tab decay (0) \cr + mixture \tab alpha (1) \tab elastic_net_param (0) \tab NA \tab NA \cr } } diff --git a/man/nearest_neighbor.Rd b/man/nearest_neighbor.Rd index f262331d9..d4dd84df1 100644 --- a/man/nearest_neighbor.Rd +++ b/man/nearest_neighbor.Rd @@ -93,11 +93,13 @@ function’s \code{kernel} argument (a.k.a \code{weight_func} here) can be suppl \subsection{Parameter translations}{ The standardized parameter names in parsnip can be mapped to their -original names in each engine that has main parameters:\tabular{ll}{ +original names in each engine that has main parameters. Each engine +typically has a different default value (shown in parentheses) for each +parameter.\tabular{ll}{ \strong{parsnip} \tab \strong{kknn} \cr neighbors \tab ks \cr - weight_func \tab kernel \cr - dist_power \tab distance \cr + weight_func \tab kernel (optimal) \cr + dist_power \tab distance (2) \cr } } diff --git a/man/rand_forest.Rd b/man/rand_forest.Rd index 86f840b48..404eda280 100644 --- a/man/rand_forest.Rd +++ b/man/rand_forest.Rd @@ -177,13 +177,24 @@ not require factor predictors to be converted to indicator variables. \subsection{Parameter translations}{ The standardized parameter names in parsnip can be mapped to their -original names in each engine that has main parameters:\tabular{llll}{ +original names in each engine that has main parameters. Each engine +typically has a different default value (shown in parentheses) for each +parameter.\tabular{llll}{ \strong{parsnip} \tab \strong{ranger} \tab \strong{randomForest} \tab \strong{spark} \cr - mtry \tab mtry \tab mtry \tab feature_subset_strategy \cr - trees \tab num.trees \tab ntree \tab num_trees \cr - min_n \tab min.node.size \tab nodesize \tab min_instances_per_node \cr + mtry \tab mtry (see below) \tab mtry (see below) \tab feature_subset_strategy (see below) \cr + trees \tab num.trees (500) \tab ntree (500) \tab num_trees (20) \cr + min_n \tab min.node.size (see below) \tab nodesize (see below) \tab min_instances_per_node (1) \cr } +\itemize{ +\item For randomForest and spark, the default \code{mtry} is the square root of +the number of predictors for classification, and one-third of the +predictors for regression. +\item For ranger, the default \code{mtry} is the square root of the number of +predictors. +\item The default \code{min_n} for both ranger and randomForest is 1 for +classification and 5 for regression. +} } } diff --git a/man/surv_reg.Rd b/man/surv_reg.Rd index 656d85ef3..4565af143 100644 --- a/man/surv_reg.Rd +++ b/man/surv_reg.Rd @@ -103,7 +103,9 @@ there is a stratification variable and can be overridden in other cases. \subsection{Parameter translations}{ The standardized parameter names in parsnip can be mapped to their -original names in each engine that has main parameters:\tabular{lll}{ +original names in each engine that has main parameters. Each engine +typically has a different default value (shown in parentheses) for each +parameter.\tabular{lll}{ \strong{parsnip} \tab \strong{flexsurv} \tab \strong{survival} \cr dist \tab dist \tab dist \cr } diff --git a/man/svm_poly.Rd b/man/svm_poly.Rd index 6862dc160..4dc3a34e4 100644 --- a/man/svm_poly.Rd +++ b/man/svm_poly.Rd @@ -108,12 +108,14 @@ call. For this type of model, the template of the fit calls are below: \subsection{Parameter translations}{ The standardized parameter names in parsnip can be mapped to their -original names in each engine that has main parameters:\tabular{ll}{ +original names in each engine that has main parameters. Each engine +typically has a different default value (shown in parentheses) for each +parameter.\tabular{ll}{ \strong{parsnip} \tab \strong{kernlab} \cr - cost \tab C \cr - degree \tab degree \cr - scale_factor \tab scale \cr - margin \tab epsilon \cr + cost \tab C (1) \cr + degree \tab degree (1) \cr + scale_factor \tab scale (1) \cr + margin \tab epsilon (0.1) \cr } } diff --git a/man/svm_rbf.Rd b/man/svm_rbf.Rd index dd6aa1b07..9d7cec973 100644 --- a/man/svm_rbf.Rd +++ b/man/svm_rbf.Rd @@ -137,11 +137,13 @@ will be specifying \code{sigma} and the function translates the value to \subsection{Parameter translations}{ The standardized parameter names in parsnip can be mapped to their -original names in each engine that has main parameters:\tabular{lll}{ +original names in each engine that has main parameters. Each engine +typically has a different default value (shown in parentheses) for each +parameter.\tabular{lll}{ \strong{parsnip} \tab \strong{kernlab} \tab \strong{liquidSVM} \cr - cost \tab C \tab lambdas \cr - rbf_sigma \tab sigma \tab gammas \cr - margin \tab epsilon \tab NA \cr + cost \tab C (1) \tab lambdas (varies) \cr + rbf_sigma \tab sigma (1) \tab gammas (varies) \cr + margin \tab epsilon (0.1) \tab NA \cr } } From f49ff095c64eefd786224f22d6f854225e56555e Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Thu, 28 May 2020 18:25:37 -0600 Subject: [PATCH 19/20] Move functions for finding args to /man/rmd, redocument --- NAMESPACE | 1 - R/aaa.R | 50 ------------------------------------ man/boost_tree.Rd | 2 +- man/convert_args.Rd | 18 ------------- man/rmd/boost-tree.Rmd | 5 +++- man/rmd/decision-tree.Rmd | 5 +++- man/rmd/linear-reg.Rmd | 5 +++- man/rmd/logistic-reg.Rmd | 5 +++- man/rmd/mars.Rmd | 5 +++- man/rmd/mlp.Rmd | 5 +++- man/rmd/multinom-reg.Rmd | 5 +++- man/rmd/nearest-neighbor.Rmd | 5 +++- man/rmd/rand-forest.Rmd | 5 +++- man/rmd/setup.Rmd | 42 ++++++++++++++++++++++++++++++ man/rmd/surv-reg.Rmd | 5 +++- man/rmd/svm-poly.Rmd | 5 +++- man/rmd/svm-rbf.Rmd | 5 +++- 17 files changed, 91 insertions(+), 82 deletions(-) delete mode 100644 man/convert_args.Rd create mode 100644 man/rmd/setup.Rmd diff --git a/NAMESPACE b/NAMESPACE index 8f388d332..2a6fa8971 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -100,7 +100,6 @@ export(boost_tree) export(check_empty_ellipse) export(check_final_param) export(control_parsnip) -export(convert_args) export(convert_stan_interval) export(decision_tree) export(eval_args) diff --git a/R/aaa.R b/R/aaa.R index 1e8db1aa7..54ae36241 100644 --- a/R/aaa.R +++ b/R/aaa.R @@ -30,56 +30,6 @@ convert_stan_interval <- function(x, level = 0.95, lower = TRUE) { res } -#' Find args for documentation -#' @rdname convert_args -#' @keywords internal -#' @export -convert_args <- function(model_name) { - envir <- get_model_env() - - args <- - ls(envir) %>% - tibble::tibble(name = .) %>% - dplyr::filter(grepl("args", name)) %>% - dplyr::mutate(model = sub("_args", "", name), - args = purrr::map(name, ~envir[[.x]])) %>% - dplyr::filter(grepl(model_name, model)) %>% - tidyr::unnest(args) %>% - dplyr::select(model:original) %>% - full_join(get_arg_defaults(model_name), - by = c("model", "engine", "parsnip", "original")) %>% - mutate(original = dplyr::if_else(!is.na(default), - paste0(original, " (", default, ")"), - original)) %>% - select(-default) - - convert_df <- args %>% - dplyr::select(-model) %>% - tidyr::pivot_wider(names_from = engine, values_from = original) - - convert_df %>% - knitr::kable(col.names = paste0("**", colnames(convert_df), "**")) - -} - -#' @rdname convert_args -#' @keywords internal -#' @export -get_arg_defaults <- function(model) { - check_model_exists(model) - gdf <- get(paste0("get_defaults_", model)) - gdf() -} - -#' @rdname convert_args -#' @keywords internal -#' @export -get_arg <- function(ns, f, arg) { - args <- formals(getFromNamespace(f, ns)) - args <- as.list(args) - as.character(args[[arg]]) -} - # ------------------------------------------------------------------------------ # nocov diff --git a/man/boost_tree.Rd b/man/boost_tree.Rd index ec71a5901..ac101cf3c 100644 --- a/man/boost_tree.Rd +++ b/man/boost_tree.Rd @@ -211,7 +211,7 @@ parameter.\tabular{llll}{ tree_depth \tab max_depth (6) \tab NA \tab max_depth (5) \cr trees \tab nrounds (15) \tab trials (15) \tab max_iter (20) \cr learn_rate \tab eta (0.3) \tab NA \tab step_size (0.1) \cr - mtry \tab colsample_bytree (1) \tab NA \tab feature_subset_strategy (auto) \cr + mtry \tab colsample_bytree (1) \tab NA \tab feature_subset_strategy (see below) \cr min_n \tab min_child_weight (1) \tab minCases (2) \tab min_instances_per_node (1) \cr loss_reduction \tab gamma (0) \tab NA \tab min_info_gain (0) \cr sample_size \tab subsample (1) \tab sample (0) \tab subsampling_rate (1) \cr diff --git a/man/convert_args.Rd b/man/convert_args.Rd deleted file mode 100644 index b29da5543..000000000 --- a/man/convert_args.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aaa.R -\name{convert_args} -\alias{convert_args} -\alias{get_arg_defaults} -\alias{get_arg} -\title{Find args for documentation} -\usage{ -convert_args(model_name) - -get_arg_defaults(model) - -get_arg(ns, f, arg) -} -\description{ -Find args for documentation -} -\keyword{internal} diff --git a/man/rmd/boost-tree.Rmd b/man/rmd/boost-tree.Rmd index 6567cdf6b..d6eec6747 100644 --- a/man/rmd/boost-tree.Rmd +++ b/man/rmd/boost-tree.Rmd @@ -1,5 +1,8 @@ # Engine Details +```{r, child = "setup.Rmd", include = FALSE} +``` + Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are below: ## xgboost @@ -76,7 +79,7 @@ get_defaults_boost_tree <- function() { ) } -parsnip::convert_args("boost_tree") +convert_args("boost_tree") ``` For spark, the default `mtry` is the square root of the number of predictors for classification, and one-third of the predictors for regression. diff --git a/man/rmd/decision-tree.Rmd b/man/rmd/decision-tree.Rmd index babdddf78..345c953be 100644 --- a/man/rmd/decision-tree.Rmd +++ b/man/rmd/decision-tree.Rmd @@ -1,5 +1,8 @@ # Engine Details +```{r, child = "setup.Rmd", include = FALSE} +``` + Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are below: ## rpart @@ -66,6 +69,6 @@ get_defaults_decision_tree <- function() { "decision_tree", "spark", "min_n", "min_instances_per_node", get_arg("sparklyr", "ml_decision_tree", "min_instances_per_node"), ) } -parsnip::convert_args("decision_tree") +convert_args("decision_tree") ``` diff --git a/man/rmd/linear-reg.Rmd b/man/rmd/linear-reg.Rmd index 71c4f3f2e..43def129a 100644 --- a/man/rmd/linear-reg.Rmd +++ b/man/rmd/linear-reg.Rmd @@ -1,5 +1,8 @@ # Engine Details +```{r, child = "setup.Rmd", include = FALSE} +``` + Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are below. ## lm @@ -82,6 +85,6 @@ get_defaults_linear_reg <- function() { "linear_reg", "keras", "penalty", "penalty", get_arg("parsnip", "keras_mlp", "penalty"), ) } -parsnip::convert_args("linear_reg") +convert_args("linear_reg") ``` diff --git a/man/rmd/logistic-reg.Rmd b/man/rmd/logistic-reg.Rmd index 5cbedb9c1..eef6ac8da 100644 --- a/man/rmd/logistic-reg.Rmd +++ b/man/rmd/logistic-reg.Rmd @@ -1,5 +1,8 @@ # Engine Details +```{r, child = "setup.Rmd", include = FALSE} +``` + Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are below. @@ -83,6 +86,6 @@ get_defaults_logistic_reg <- function() { "logistic_reg", "keras", "penalty", "penalty", get_arg("parsnip", "keras_mlp", "penalty"), ) } -parsnip::convert_args("logistic_reg") +convert_args("logistic_reg") ``` diff --git a/man/rmd/mars.Rmd b/man/rmd/mars.Rmd index 8c2bd8233..201adef26 100644 --- a/man/rmd/mars.Rmd +++ b/man/rmd/mars.Rmd @@ -1,5 +1,8 @@ # Engine Details +```{r, child = "setup.Rmd", include = FALSE} +``` + Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are below. @@ -37,6 +40,6 @@ get_defaults_mars <- function() { "mars", "earth", "prune_method", "pmethod", get_arg("earth", "earth.fit", "pmethod")[2] ) } -parsnip::convert_args("mars") +convert_args("mars") ``` diff --git a/man/rmd/mlp.Rmd b/man/rmd/mlp.Rmd index cd4f223b8..cd4f5b3af 100644 --- a/man/rmd/mlp.Rmd +++ b/man/rmd/mlp.Rmd @@ -1,5 +1,8 @@ # Engine Details +```{r, child = "setup.Rmd", include = FALSE} +``` + Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are below: @@ -57,6 +60,6 @@ get_defaults_mlp <- function() { "mlp", "nnet", "epochs", "maxit", get_arg("nnet", "nnet.default", "maxit"), ) } -parsnip::convert_args("mlp") +convert_args("mlp") ``` diff --git a/man/rmd/multinom-reg.Rmd b/man/rmd/multinom-reg.Rmd index e377202b5..5d08847d2 100644 --- a/man/rmd/multinom-reg.Rmd +++ b/man/rmd/multinom-reg.Rmd @@ -1,5 +1,8 @@ # Engine Details +```{r, child = "setup.Rmd", include = FALSE} +``` + Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are below. @@ -66,6 +69,6 @@ get_defaults_multinom_reg <- function() { "multinom_reg", "nnet", "penalty", "decay", get_arg("nnet", "nnet.default", "decay"), ) } -parsnip::convert_args("multinom_reg") +convert_args("multinom_reg") ``` diff --git a/man/rmd/nearest-neighbor.Rmd b/man/rmd/nearest-neighbor.Rmd index 01a95568e..002b9c8d6 100644 --- a/man/rmd/nearest-neighbor.Rmd +++ b/man/rmd/nearest-neighbor.Rmd @@ -1,5 +1,8 @@ # Engine Details +```{r, child = "setup.Rmd", include = FALSE} +``` + Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are below: ## kknn @@ -38,6 +41,6 @@ get_defaults_nearest_neighbor <- function() { "nearest_neighbor", "kknn", "dist_power", "distance", get_arg("kknn", "train.kknn", "distance"), ) } -parsnip::convert_args("nearest_neighbor") +convert_args("nearest_neighbor") ``` diff --git a/man/rmd/rand-forest.Rmd b/man/rmd/rand-forest.Rmd index 138b357b0..7cf6e69ef 100644 --- a/man/rmd/rand-forest.Rmd +++ b/man/rmd/rand-forest.Rmd @@ -1,5 +1,8 @@ # Engine Details +```{r, child = "setup.Rmd", include = FALSE} +``` + Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are below: @@ -83,7 +86,7 @@ get_defaults_rand_forest <- function() { "rand_forest", "spark", "min_n", "min_instances_per_node", get_arg("sparklyr", "ml_random_forest", "min_instances_per_node"), ) } -parsnip::convert_args("rand_forest") +convert_args("rand_forest") ``` - For randomForest and spark, the default `mtry` is the square root of the number of predictors for classification, and one-third of the predictors for regression. diff --git a/man/rmd/setup.Rmd b/man/rmd/setup.Rmd new file mode 100644 index 000000000..02dd2b22b --- /dev/null +++ b/man/rmd/setup.Rmd @@ -0,0 +1,42 @@ +```{r, include = FALSE} +convert_args <- function(model_name) { + envir <- get_model_env() + + args <- + ls(envir) %>% + tibble::tibble(name = .) %>% + dplyr::filter(grepl("args", name)) %>% + dplyr::mutate(model = sub("_args", "", name), + args = purrr::map(name, ~envir[[.x]])) %>% + dplyr::filter(grepl(model_name, model)) %>% + tidyr::unnest(args) %>% + dplyr::select(model:original) %>% + full_join(get_arg_defaults(model_name), + by = c("model", "engine", "parsnip", "original")) %>% + mutate(original = dplyr::if_else(!is.na(default), + paste0(original, " (", default, ")"), + original)) %>% + select(-default) + + convert_df <- args %>% + dplyr::select(-model) %>% + tidyr::pivot_wider(names_from = engine, values_from = original) + + convert_df %>% + knitr::kable(col.names = paste0("**", colnames(convert_df), "**")) + +} + +get_arg_defaults <- function(model) { + check_model_exists(model) + gdf <- get(paste0("get_defaults_", model)) + gdf() +} + +get_arg <- function(ns, f, arg) { + args <- formals(getFromNamespace(f, ns)) + args <- as.list(args) + as.character(args[[arg]]) +} + +``` diff --git a/man/rmd/surv-reg.Rmd b/man/rmd/surv-reg.Rmd index ec855de17..b6c776f51 100644 --- a/man/rmd/surv-reg.Rmd +++ b/man/rmd/surv-reg.Rmd @@ -1,5 +1,8 @@ # Engine Details +```{r, child = "setup.Rmd", include = FALSE} +``` + Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are below. ## flexsurv @@ -37,6 +40,6 @@ get_defaults_surv_reg <- function() { "surv_reg", "survival", "dist", "dist", NA, ) } -parsnip::convert_args("surv_reg") +convert_args("surv_reg") ``` diff --git a/man/rmd/svm-poly.Rmd b/man/rmd/svm-poly.Rmd index 5ec7a9769..0cdeac2b5 100644 --- a/man/rmd/svm-poly.Rmd +++ b/man/rmd/svm-poly.Rmd @@ -1,5 +1,8 @@ # Engine Details +```{r, child = "setup.Rmd", include = FALSE} +``` + Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are below: @@ -36,6 +39,6 @@ get_defaults_svm_poly <- function() { "svm_poly", "kernlab", "margin", "epsilon", "0.1", ) } -parsnip::convert_args("svm_poly") +convert_args("svm_poly") ``` diff --git a/man/rmd/svm-rbf.Rmd b/man/rmd/svm-rbf.Rmd index 329f5e99e..44a593c76 100644 --- a/man/rmd/svm-rbf.Rmd +++ b/man/rmd/svm-rbf.Rmd @@ -1,5 +1,8 @@ # Engine Details +```{r, child = "setup.Rmd", include = FALSE} +``` + Engines may have pre-set default arguments when executing the model fit call. For this type of model, the template of the fit calls are below: @@ -63,6 +66,6 @@ get_defaults_svm_rbf <- function() { "svm_rbf", "liquidSVM", "rbf_sigma", "gammas", "varies", ) } -parsnip::convert_args("svm_rbf") +convert_args("svm_rbf") ``` From 73dbb73826500fc1acefd3004b533eb768836fee Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Thu, 4 Jun 2020 10:13:11 -0600 Subject: [PATCH 20/20] sigma for RBF kernal also depends on data --- man/rmd/svm-rbf.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/rmd/svm-rbf.Rmd b/man/rmd/svm-rbf.Rmd index 44a593c76..a4b35a31a 100644 --- a/man/rmd/svm-rbf.Rmd +++ b/man/rmd/svm-rbf.Rmd @@ -60,7 +60,7 @@ get_defaults_svm_rbf <- function() { tibble::tribble( ~model, ~engine, ~parsnip, ~original, ~default, "svm_rbf", "kernlab", "cost", "C", "1", - "svm_rbf", "kernlab", "rbf_sigma", "sigma", "1", + "svm_rbf", "kernlab", "rbf_sigma", "sigma", "varies", "svm_rbf", "kernlab", "margin", "epsilon", "0.1", "svm_rbf", "liquidSVM", "cost", "lambdas", "varies", "svm_rbf", "liquidSVM", "rbf_sigma", "gammas", "varies",