Replies: 4 comments 1 reply
-
With |
Beta Was this translation helpful? Give feedback.
-
I can't provide my original data, but this is pretty reliably reproducing my main three errors from the liver data dataset. Sorry for the length--I've added in some extra columns and done an approximate reproduction of my workflow in case something in there is causing the issue.
|
Beta Was this translation helpful? Give feedback.
-
I had the same issue when trying to train or benchmark |
Beta Was this translation helpful? Give feedback.
-
@MattJEM I am getting a different error in your reprex which seems unrelated to the one from your OP. Could you have a look? library(mlr3verse)
#> Loading required package: mlr3
library(tidyverse)
data("ilpd", package = "mlr3data")
group_name <- sample(c("red", "blue", "yellow"), size = nrow(ilpd), replace = TRUE)
id_name <- 1:nrow(ilpd)
another_factor <- sample(c("factor1", "factor2", "factor3"), size = nrow(ilpd), replace = TRUE)
extra <- tibble(group_name, id_name, another_factor)
ilpd2 <- ilpd %>%
bind_cols(extra) %>%
mutate(
gender = as.numeric(ifelse(gender == "Female", 1, 0)),
diseased = factor(ifelse(diseased == "yes", 1, 0)),
alkaline_phosphatase_log = log(alkaline_phosphatase),
aspartate_transaminase_log = log(aspartate_transaminase),
direct_bilirubin_log = log(direct_bilirubin),
group_name = factor(group_name),
another_factor = factor(another_factor)
)
ilpd_tsk <- TaskClassif$new(
id = "ilpd_tsk",
backend = ilpd2,
target = "diseased",
positive = "1"
)
ilpd_tsk$set_col_roles("group_name", roles = "group")
ilpd_tsk$set_col_roles(c("id_name"), roles = "name")
ilpd_tsk$col_roles$feature <- setdiff(
ilpd_tsk$col_roles$feature,
c(
"total_bilirubin",
"direct_bilirubin",
"alkaline_phosphatase",
"aspartate_transaminase",
"alanine_transaminase",
"total_protein",
"albumin_globulin_ratio"
)
)
lrn_xgboost_pretreat <- lrn("classif.xgboost",
nrounds = 500,
predict_type = "prob",
predict_sets = c("train", "test")
)
fct_encoder <- po("encode", method = "one-hot", affect_columns = selector_type("factor"))
the_pipe <- fct_encoder %>>% lrn_xgboost_pretreat
xgboost_piped <- as_learner(the_pipe)
lrnrs <- list(
lrn_xgboost = xgboost_piped,
lrn_gbm = lrn("classif.gbm",
n.trees = 500,
predict_type = "prob",
predict_sets = c("train", "test")
)
)
lrnrs_thresh <- lapply(lrnrs, function(x) {
GraphLearner$new(po("learner_cv", x) %>>% po("tunethreshold",
param_vals = list(
measure = "classif.prauc"
)
))
})
tune_params_xgboost <- ps(
encode.classif.xgboost.classif.xgboost.eta = p_dbl(lower = 0.005, upper = 0.2),
encode.classif.xgboost.classif.xgboost.min_child_weight = p_dbl(lower = 1, upper = 10),
encode.classif.xgboost.classif.xgboost.max_depth = p_int(lower = 3, upper = 8)
)
tune_params_gbm <- ps(
classif.gbm.interaction.depth = p_int(lower = 1, upper = 6),
classif.gbm.shrinkage = p_dbl(lower = 0.0005, upper = 0.01)
)
lrnrs_thresh$lrn_xgboost <- auto_tuner(
method = "random_search",
learner = lrnrs_thresh$lrn_xgboost,
resampling = rsmp("repeated_cv", repeats = 5, folds = 5),
measure = msr("classif.ce"),
search_space = tune_params_xgboost,
term_evals = 200
)
lrnrs_thresh$lrn_xgboost$predict_sets <- c("train", "test")
lrnrs_thresh$lrn_gbm <- auto_tuner(
method = "random_search",
learner = lrnrs_thresh$lrn_gbm,
resampling = rsmp("repeated_cv", repeats = 5, folds = 5),
measure = msr("classif.ce"),
search_space = tune_params_gbm,
term_evals = 200
)
lrnrs_thresh$lrn_gbm$predict_sets <- c("train", "test")
fitting_rsmp <- rsmp("repeated_cv", repeats = 10, folds = 5)
big_grid <- benchmark_grid(
tasks = ilpd_tsk,
learners = lrnrs_thresh,
resamplings = fitting_rsmp
)
bmrs <- benchmark(big_grid, store_models = TRUE)
#> INFO [16:22:43.879] [mlr3] Running benchmark with 100 resampling iterations
#> INFO [16:22:43.915] [mlr3] Applying learner 'encode.classif.xgboost.tunethreshold.tuned' on task 'ilpd_tsk' (iter 23/50)
#> INFO [16:22:44.002] [bbotk] Starting to optimize 3 parameter(s) with '<OptimizerRandomSearch>' and '<TerminatorEvals> [n_evals=200, k=0]'
#> INFO [16:22:44.010] [bbotk] Evaluating 1 configuration(s)
#> INFO [16:22:44.042] [mlr3] Running benchmark with 25 resampling iterations
#> INFO [16:22:44.045] [mlr3] Applying learner 'encode.classif.xgboost.tunethreshold' on task 'ilpd_tsk' (iter 24/25)
#> INFO [16:22:44.698] [mlr3] Applying learner 'encode.classif.xgboost' on task 'ilpd_tsk' (iter 1/3)
#> INFO [16:22:44.829] [mlr3] Applying learner 'encode.classif.xgboost' on task 'ilpd_tsk' (iter 3/3)
#> Error: DataBackend did not return the queried rows correctly: 1 requested, 0 received
#> This happened PipeOp encode's $predict()
#> This happened PipeOp encode.classif.xgboost's $train() Created on 2022-06-03 by the reprex package (v2.0.1) Session infosessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.2.0 (2022-04-22)
#> os macOS Monterey 12.4
#> system aarch64, darwin20
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz Europe/Zurich
#> date 2022-06-03
#> pandoc 2.18 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date (UTC) lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.2.0)
#> backports 1.4.1 2021-12-13 [1] CRAN (R 4.2.0)
#> bbotk 0.5.3 2022-05-04 [1] CRAN (R 4.2.0)
#> broom 0.8.0 2022-04-13 [1] CRAN (R 4.2.0)
#> cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.2.0)
#> checkmate 2.1.0 2022-04-21 [1] CRAN (R 4.2.0)
#> cli 3.3.0 2022-04-25 [1] CRAN (R 4.2.0)
#> clue 0.3-61 2022-05-30 [1] CRAN (R 4.2.0)
#> cluster 2.1.3 2022-03-28 [1] CRAN (R 4.2.0)
#> clusterCrit 1.2.8 2018-07-26 [1] CRAN (R 4.2.0)
#> codetools 0.2-18 2020-11-04 [3] CRAN (R 4.2.0)
#> colorspace 2.0-3 2022-02-21 [1] CRAN (R 4.2.0)
#> crayon 1.5.1 2022-03-26 [1] CRAN (R 4.2.0)
#> data.table 1.14.2 2021-09-27 [1] CRAN (R 4.2.0)
#> DBI 1.1.2 2021-12-20 [1] CRAN (R 4.2.0)
#> dbplyr 2.1.1 2021-04-06 [1] CRAN (R 4.2.0)
#> digest 0.6.29 2021-12-01 [1] CRAN (R 4.2.0)
#> dplyr * 1.0.9 2022-04-28 [1] CRAN (R 4.2.0)
#> ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.2.0)
#> evaluate 0.15 2022-02-18 [1] CRAN (R 4.2.0)
#> fansi 1.0.3 2022-03-24 [1] CRAN (R 4.2.0)
#> fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.2.0)
#> forcats * 0.5.1 2021-01-27 [1] CRAN (R 4.2.0)
#> fs 1.5.2 2021-12-08 [1] CRAN (R 4.2.0)
#> future 1.26.1 2022-05-27 [1] CRAN (R 4.2.0)
#> future.apply 1.9.0 2022-04-25 [1] CRAN (R 4.2.0)
#> generics 0.1.2 2022-01-31 [1] CRAN (R 4.2.0)
#> ggplot2 * 3.3.6 2022-05-03 [1] CRAN (R 4.2.0)
#> globals 0.15.0 2022-05-09 [1] CRAN (R 4.2.0)
#> glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.0)
#> gtable 0.3.0 2019-03-25 [1] CRAN (R 4.2.0)
#> haven 2.5.0 2022-04-15 [1] CRAN (R 4.2.0)
#> highr 0.9 2021-04-16 [1] CRAN (R 4.2.0)
#> hms 1.1.1 2021-09-26 [1] CRAN (R 4.2.0)
#> htmltools 0.5.2 2021-08-25 [1] CRAN (R 4.2.0)
#> httr 1.4.3 2022-05-04 [1] CRAN (R 4.2.0)
#> jsonlite 1.8.0 2022-02-22 [1] CRAN (R 4.2.0)
#> knitr 1.39 2022-04-26 [1] CRAN (R 4.2.0)
#> lattice 0.20-45 2021-09-22 [3] CRAN (R 4.2.0)
#> lgr 0.4.3 2021-09-16 [1] CRAN (R 4.2.0)
#> lifecycle 1.0.1 2021-09-24 [1] CRAN (R 4.2.0)
#> listenv 0.8.0 2019-12-05 [1] CRAN (R 4.2.0)
#> lubridate 1.8.0 2021-10-07 [1] CRAN (R 4.2.0)
#> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.0)
#> Matrix 1.4-1 2022-03-23 [1] CRAN (R 4.2.0)
#> mlr3 * 0.13.3-9000 2022-05-28 [1] Github (mlr-org/mlr3@7313ae8)
#> mlr3cluster 0.1.3 2022-04-06 [1] CRAN (R 4.2.0)
#> mlr3data 0.6.0 2022-03-18 [1] CRAN (R 4.2.0)
#> mlr3extralearners 0.5.37 2022-05-19 [1] Github (mlr-org/mlr3extralearners@f06f209)
#> mlr3filters 0.5.0.9000 2022-05-09 [1] Github (mlr-org/mlr3filters@3dde376)
#> mlr3fselect 0.7.1 2022-05-09 [1] Github (mlr-org/mlr3fselect@c2284af)
#> mlr3learners 0.5.3 2022-05-25 [1] CRAN (R 4.2.0)
#> mlr3measures 0.4.1 2022-01-13 [1] CRAN (R 4.2.0)
#> mlr3misc 0.10.0 2022-01-11 [1] CRAN (R 4.2.0)
#> mlr3pipelines 0.4.0-9000 2022-05-31 [1] Github (mlr-org/mlr3pipelines@1aae6ba)
#> mlr3tuning 0.13.1 2022-05-03 [1] CRAN (R 4.2.0)
#> mlr3tuningspaces 0.2.0 2022-05-30 [1] Github (mlr-org/mlr3tuningspaces@a5a0ca3)
#> mlr3verse * 0.2.5 2022-05-18 [1] CRAN (R 4.2.0)
#> mlr3viz 0.5.9 2022-05-30 [1] local
#> modelr 0.1.8 2020-05-19 [1] CRAN (R 4.2.0)
#> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.2.0)
#> palmerpenguins 0.1.0 2020-07-23 [1] CRAN (R 4.2.0)
#> paradox 0.9.0.9000 2022-05-09 [1] Github (mlr-org/paradox@d41cc29)
#> parallelly 1.31.1 2022-04-22 [1] CRAN (R 4.2.0)
#> pillar 1.7.0 2022-02-01 [1] CRAN (R 4.2.0)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.0)
#> purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.2.0)
#> R.cache 0.15.0 2021-04-30 [1] CRAN (R 4.2.0)
#> R.methodsS3 1.8.1 2020-08-26 [1] CRAN (R 4.2.0)
#> R.oo 1.24.0 2020-08-26 [1] CRAN (R 4.2.0)
#> R.utils 2.11.0 2021-09-26 [1] CRAN (R 4.2.0)
#> R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.0)
#> readr * 2.1.2 2022-01-30 [1] CRAN (R 4.2.0)
#> readxl 1.4.0 2022-03-28 [1] CRAN (R 4.2.0)
#> reprex 2.0.1 2021-08-05 [1] CRAN (R 4.2.0)
#> rlang 1.0.2 2022-03-04 [1] CRAN (R 4.2.0)
#> rmarkdown 2.14 2022-04-25 [1] CRAN (R 4.2.0)
#> rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.2.0)
#> rvest 1.0.2 2021-10-16 [1] CRAN (R 4.2.0)
#> scales 1.2.0 2022-04-13 [1] CRAN (R 4.2.0)
#> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.2.0)
#> stringi 1.7.6 2021-11-29 [1] CRAN (R 4.2.0)
#> stringr * 1.4.0 2019-02-10 [1] CRAN (R 4.2.0)
#> styler 1.7.0 2022-03-13 [1] CRAN (R 4.2.0)
#> tibble * 3.1.7 2022-05-03 [1] CRAN (R 4.2.0)
#> tidyr * 1.2.0 2022-02-01 [1] CRAN (R 4.2.0)
#> tidyselect 1.1.2 2022-02-21 [1] CRAN (R 4.2.0)
#> tidyverse * 1.3.1 2021-04-15 [1] CRAN (R 4.2.0)
#> tzdb 0.3.0 2022-03-28 [1] CRAN (R 4.2.0)
#> utf8 1.2.2 2021-07-24 [1] CRAN (R 4.2.0)
#> uuid 1.1-0 2022-04-19 [1] CRAN (R 4.2.0)
#> vctrs 0.4.1 2022-04-13 [1] CRAN (R 4.2.0)
#> withr 2.5.0 2022-03-03 [1] CRAN (R 4.2.0)
#> xfun 0.31 2022-05-10 [1] CRAN (R 4.2.0)
#> xgboost 1.6.0.1 2022-04-16 [1] CRAN (R 4.2.0)
#> xml2 1.3.3 2021-11-30 [1] CRAN (R 4.2.0)
#> yaml 2.3.5 2022-02-21 [1] CRAN (R 4.2.0)
#>
#> [1] /Users/pjs/Library/R/arm64/4.2/library
#> [2] /opt/R/4.2.0-arm64/Resources/site-library
#> [3] /opt/R/4.2.0-arm64/Resources/library
#>
#> ────────────────────────────────────────────────────────────────────────────── |
Beta Was this translation helpful? Give feedback.
-
I have learners that have gone through a few transformations and have ended up with some pretty long parameter names, such as 'encode.classif.xgboost.classif.xgboost.min_child_weight'.
I think these names are interfering with my ability to tune my models. When I try to run
benchmark
on my learners I get the following error:Would it be possible to either allow the user to change parameter names in mlr3 or to disable this check?
Beta Was this translation helpful? Give feedback.
All reactions