Skip to content

when mtry =1 all predictors are used in xgboost engine #461

Closed
@joeycouse

Description

@joeycouse

Whenever mtry = 1 in the model spec the corresponding value of colsample_bytree is not converted to proportion correctly.
example: when mtry = 1 , colsample_bytree = 1 when it should be 0.125.

library(tidyverse)
library(tidymodels)
#> Registered S3 method overwritten by 'tune':
#>   method                   from   
#>   required_pkgs.model_spec parsnip
library(mlbench)
library(xgboost)
#> 
#> Attaching package: 'xgboost'
#> The following object is masked from 'package:dplyr':
#> 
#>     slice
library(reprex)


data("PimaIndiansDiabetes")
df <- PimaIndiansDiabetes %>%
  mutate(diabetes = fct_relevel(diabetes, 'pos'))

#Model with mtry = 1, col_sample_by_tree should be 1/8 = 0.125

tidy_model <- 
  boost_tree(trees = 10,
             mtry = 1,
             tree_depth = 3) %>%
  set_engine('xgboost', 
             eval_metric = 'auc',
             event_level = "first",
             verbose = 1) %>%
  set_mode('classification')

set.seed(24)

tidy_model_fitted <- tidy_model %>% 
  fit(diabetes ~ . , data = df)
#> [1]  training-auc:0.834787 
#> [2]  training-auc:0.853881 
#> [3]  training-auc:0.873048 
#> [4]  training-auc:0.875000 
#> [5]  training-auc:0.882873 
#> [6]  training-auc:0.891716 
#> [7]  training-auc:0.896067 
#> [8]  training-auc:0.900414 
#> [9]  training-auc:0.904313 
#> [10] training-auc:0.906937

#Model with mtry  = 2, col_sample_by_tree should by 2/8 = 0.25
tidy_model_2 <- 
  boost_tree(trees = 10,
                         mtry = 2,
                         tree_depth = 3) %>%
  set_engine('xgboost', 
             eval_metric = 'auc',
             event_level = "first",
             verbose = 1) %>%
  set_mode('classification')

set.seed(24)
tidy_model_fitted_2 <- tidy_model_2 %>% 
  fit(diabetes ~ . , data = df)
#> [1]  training-auc:0.684049 
#> [2]  training-auc:0.776993 
#> [3]  training-auc:0.802187 
#> [4]  training-auc:0.808041 
#> [5]  training-auc:0.830478 
#> [6]  training-auc:0.837851 
#> [7]  training-auc:0.847817 
#> [8]  training-auc:0.847254 
#> [9]  training-auc:0.850034 
#> [10] training-auc:0.856455

#col_sample by tree should be 1/8 = 0.125, instead model is using all predictors
tidy_model_fitted$fit$call$params$colsample_bytree
#> [1] 1

#colsample_bytree is correct 2/8 = 0.25
tidy_model_fitted_2$fit$call$params$colsample_bytree
#> [1] 0.25

Created on 2021-04-06 by the reprex package (v1.0.0)

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugan unexpected problem or unintended behavior

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions