Closed
Description
Whenever mtry = 1
in the model spec the corresponding value of colsample_bytree
is not converted to proportion correctly.
example: when mtry = 1
, colsample_bytree = 1
when it should be 0.125.
library(tidyverse)
library(tidymodels)
#> Registered S3 method overwritten by 'tune':
#> method from
#> required_pkgs.model_spec parsnip
library(mlbench)
library(xgboost)
#>
#> Attaching package: 'xgboost'
#> The following object is masked from 'package:dplyr':
#>
#> slice
library(reprex)
data("PimaIndiansDiabetes")
df <- PimaIndiansDiabetes %>%
mutate(diabetes = fct_relevel(diabetes, 'pos'))
#Model with mtry = 1, col_sample_by_tree should be 1/8 = 0.125
tidy_model <-
boost_tree(trees = 10,
mtry = 1,
tree_depth = 3) %>%
set_engine('xgboost',
eval_metric = 'auc',
event_level = "first",
verbose = 1) %>%
set_mode('classification')
set.seed(24)
tidy_model_fitted <- tidy_model %>%
fit(diabetes ~ . , data = df)
#> [1] training-auc:0.834787
#> [2] training-auc:0.853881
#> [3] training-auc:0.873048
#> [4] training-auc:0.875000
#> [5] training-auc:0.882873
#> [6] training-auc:0.891716
#> [7] training-auc:0.896067
#> [8] training-auc:0.900414
#> [9] training-auc:0.904313
#> [10] training-auc:0.906937
#Model with mtry = 2, col_sample_by_tree should by 2/8 = 0.25
tidy_model_2 <-
boost_tree(trees = 10,
mtry = 2,
tree_depth = 3) %>%
set_engine('xgboost',
eval_metric = 'auc',
event_level = "first",
verbose = 1) %>%
set_mode('classification')
set.seed(24)
tidy_model_fitted_2 <- tidy_model_2 %>%
fit(diabetes ~ . , data = df)
#> [1] training-auc:0.684049
#> [2] training-auc:0.776993
#> [3] training-auc:0.802187
#> [4] training-auc:0.808041
#> [5] training-auc:0.830478
#> [6] training-auc:0.837851
#> [7] training-auc:0.847817
#> [8] training-auc:0.847254
#> [9] training-auc:0.850034
#> [10] training-auc:0.856455
#col_sample by tree should be 1/8 = 0.125, instead model is using all predictors
tidy_model_fitted$fit$call$params$colsample_bytree
#> [1] 1
#colsample_bytree is correct 2/8 = 0.25
tidy_model_fitted_2$fit$call$params$colsample_bytree
#> [1] 0.25
Created on 2021-04-06 by the reprex package (v1.0.0)