From 38f4f5ec9db6d757ad3650951fe14fc391079058 Mon Sep 17 00:00:00 2001 From: kenomersmannLaptop Date: Sat, 7 Sep 2024 17:13:18 +0200 Subject: [PATCH 01/15] init pipeops + working adas --- R/PipeOpADAS.R | 112 ++++++++++++++++++++++++++++++++++++++++++++++ R/PipeOpBLSmote.R | 111 +++++++++++++++++++++++++++++++++++++++++++++ R/bibentries.R | 9 ++++ 3 files changed, 232 insertions(+) create mode 100644 R/PipeOpADAS.R create mode 100644 R/PipeOpBLSmote.R diff --git a/R/PipeOpADAS.R b/R/PipeOpADAS.R new file mode 100644 index 000000000..816da02d1 --- /dev/null +++ b/R/PipeOpADAS.R @@ -0,0 +1,112 @@ +#' @title ADAS Balancing +#' +#' @usage NULL +#' @name mlr_pipeops_adas +#' @format [`R6Class`][R6::R6Class] object inheriting from [`PipeOpTaskPreproc`]/[`PipeOp`]. +#' +#' @description +#' +#' +#' Generates a more balanced data set by creating +#' synthetic instances of the minority class using the ADAS algorithm. +#' The algorithm samples for each minority instance a new data point based on the `K` nearest +#' neighbors of that data point. +#' It can only be applied to tasks with numeric features that have no missing values. +#' See [`smotefamily::ADAS`] for details. +#' +#' @section Construction: +#' ``` +#' PipeOpADAS$new(id = "adas", param_vals = list()) +#' ``` +#' +#' * `id` :: `character(1)`\cr +#' Identifier of resulting object, default `"smote"`. +#' * `param_vals` :: named `list`\cr +#' List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default `list()`. +#' +#' @section Input and Output Channels: +#' Input and output channels are inherited from [`PipeOpTaskPreproc`]. +#' +#' The output during training is the input [`Task`][mlr3::Task] with added synthetic rows for the minority class. +#' The output during prediction is the unchanged input. +#' +#' @section State: +#' The `$state` is a named `list` with the `$state` elements inherited from [`PipeOpTaskPreproc`]. +#' +#' @section Parameters: +#' The parameters are the parameters inherited from [`PipeOpTaskPreproc`], as well as: +#' * `K` :: `numeric(1)` \cr +#' The number of nearest neighbors used for sampling new values. +#' See [`ADAS()`][`smotefamily::ADAS`]. +#' +#' @section Fields: +#' Only fields inherited from [`PipeOpTaskPreproc`]/[`PipeOp`]. +#' +#' @section Methods: +#' Only methods inherited from [`PipeOpTaskPreproc`]/[`PipeOp`]. +#' +#' @references +#' `r format_bib("he_2008")` +#' +#' @family PipeOps +#' @template seealso_pipeopslist +#' @include PipeOpTaskPreproc.R +#' @export +#' @examples +#' \dontshow{ if (requireNamespace("smotefamily")) \{ } +#' library("mlr3") +#' +#' # Create example task +#' data = smotefamily::sample_generator(1000, ratio = 0.80) +#' data$result = factor(data$result) +#' task = TaskClassif$new(id = "example", backend = data, target = "result") +#' task$data() +#' table(task$data()$result) +#' +#' # Generate synthetic data for minority class +#' pop = po("adas") +#' adasdata = pop$train(list(task))[[1]]$data() +#' table(adasdata$result) +#' \dontshow{ \} } +PipeOpADAS = R6Class("PipeOpADAS", + inherit = PipeOpTaskPreproc, + public = list( + initialize = function(id = "adas", param_vals = list()) { + ps = ps( + K = p_int(lower = 1, default = 5, tags = c("train", "adas")) + ) + super$initialize(id, param_set = ps, param_vals = param_vals, can_subset_cols = FALSE, + packages = "smotefamily", task_type = "TaskClassif", tags = "imbalanced data") + } + ), + private = list( + + .train_task = function(task) { + assert_true(all(task$feature_types$type == "numeric")) + cols = task$feature_names + + unsupported_cols = setdiff(unlist(task$col_roles), union(cols, task$target_names)) + if (length(unsupported_cols)) { + stopf("ADAS cannot generate synthetic data for the following columns since they are neither features nor targets: '%s'", + paste(unsupported_cols, collapse = "', '")) + } + + if (!length(cols)) { + return(task) + } + dt = task$data(cols = cols) + + # calculate synthetic data + st = setDT(invoke(smotefamily::ADAS, X = dt, target = task$truth(), + .args = self$param_set$get_values(tags = "adas"))$syn_data) + + # rename target column and fix character conversion + st[["class"]] = as_factor(st[["class"]], levels = task$class_names) + setnames(st, "class", task$target_names) + + task$rbind(st) + } + ) +) + +mlr_pipeops$add("adas", PipeOpADAS) diff --git a/R/PipeOpBLSmote.R b/R/PipeOpBLSmote.R new file mode 100644 index 000000000..24696022f --- /dev/null +++ b/R/PipeOpBLSmote.R @@ -0,0 +1,111 @@ +#' @title SMOTE Balancing +#' +#' @usage NULL +#' @name mlr_pipeops_smote +#' @format [`R6Class`][R6::R6Class] object inheriting from [`PipeOpTaskPreproc`]/[`PipeOp`]. +#' +#' @description +#' Generates a more balanced data set by creating +#' synthetic instances of the minority class using the SMOTE algorithm. +#' The algorithm samples for each minority instance a new data point based on the `K` nearest +#' neighbors of that data point. +#' It can only be applied to tasks with purely numeric features. +#' See [`smotefamily::SMOTE`] for details. +#' +#' @section Construction: +#' ``` +#' PipeOpSmote$new(id = "smote", param_vals = list()) +#' ``` +#' +#' * `id` :: `character(1)`\cr +#' Identifier of resulting object, default `"smote"`. +#' * `param_vals` :: named `list`\cr +#' List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default `list()`. +#' +#' @section Input and Output Channels: +#' Input and output channels are inherited from [`PipeOpTaskPreproc`]. +#' +#' The output during training is the input [`Task`][mlr3::Task] with added synthetic rows for the minority class. +#' The output during prediction is the unchanged input. +#' +#' @section State: +#' The `$state` is a named `list` with the `$state` elements inherited from [`PipeOpTaskPreproc`]. +#' +#' @section Parameters: +#' The parameters are the parameters inherited from [`PipeOpTaskPreproc`], as well as: +#' * `K` :: `numeric(1)` \cr +#' The number of nearest neighbors used for sampling new values. +#' See [`SMOTE()`][`smotefamily::SMOTE`]. +#' * `dup_size` :: `numeric` \cr +#' Desired times of synthetic minority instances over the original number of +#' majority instances. See [`SMOTE()`][`smotefamily::SMOTE`]. +#' +#' @section Fields: +#' Only fields inherited from [`PipeOpTaskPreproc`]/[`PipeOp`]. +#' +#' @section Methods: +#' Only methods inherited from [`PipeOpTaskPreproc`]/[`PipeOp`]. +#' +#' @references +#' `r format_bib("chawla_2002")` +#' +#' @family PipeOps +#' @template seealso_pipeopslist +#' @include PipeOpTaskPreproc.R +#' @export +#' @examples +#' \dontshow{ if (requireNamespace("smotefamily")) \{ } +#' library("mlr3") +#' +#' # Create example task +#' data = smotefamily::sample_generator(1000, ratio = 0.80) +#' data$result = factor(data$result) +#' task = TaskClassif$new(id = "example", backend = data, target = "result") +#' task$data() +#' table(task$data()$result) +#' +#' # Generate synthetic data for minority class +#' pop = po("smote") +#' smotedata = pop$train(list(task))[[1]]$data() +#' table(smotedata$result) +#' \dontshow{ \} } +PipeOpSmote = R6Class("PipeOpSmote", + inherit = PipeOpTaskPreproc, + public = list( + initialize = function(id = "smote", param_vals = list()) { + ps = ps( + K = p_int(lower = 1, default = 5, tags = c("train", "smote")), + # dup_size = 0 leads to behaviour different from 1, 2, 3, ..., because it means "autodetect", + # so it is a 'special_vals'. + dup_size = p_int(lower = 1, default = 0, special_vals = list(0), tags = c("train", "smote")) + ) + super$initialize(id, param_set = ps, param_vals = param_vals, can_subset_cols = FALSE, + packages = "smotefamily", task_type = "TaskClassif", tags = "imbalanced data") + } + ), + private = list( + + .train_task = function(task) { + assert_true(all(task$feature_types$type == "numeric")) + cols = private$.select_cols(task) + + if (!length(cols)) { + return(task) + } + dt = task$data(cols = cols) + + # calculate synthetic data + st = setDT(invoke(smotefamily::SMOTE, X = dt, target = task$truth(), + .args = self$param_set$get_values(tags = "smote"), + .opts = list(warnPartialMatchArgs = FALSE))$syn_data) + + # rename target column and fix character conversion + st[["class"]] = as_factor(st[["class"]], levels = task$class_names) + setnames(st, "class", task$target_names) + + task$rbind(st) + } + ) +) + +mlr_pipeops$add("smote", PipeOpSmote) diff --git a/R/bibentries.R b/R/bibentries.R index de55741d5..6e3b2ec80 100644 --- a/R/bibentries.R +++ b/R/bibentries.R @@ -52,5 +52,14 @@ bibentries = c( author = "Yujun Wu and Dennis D Boos and Leonard A Stefanski", title = "Controlling Variable Selection by the Addition of Pseudovariables", journal = "Journal of the American Statistical Association" + ), + + he_2008 = bibentry("InProceedings", + author = "Haibo He and Yang Bai and Garcia, Edwardo A. and Shutao Li", + booktitle = "2008 IEEE International Joint Conference on Neural Networks (IEEE World Congress on Computational Intelligence)", + title = "ADASYN: Adaptive synthetic sampling approach for imbalanced learning", + year = "2008", + pages = "1322-1328", + doi = "10.1109/IJCNN.2008.4633969}" ) ) From 620d6c0853fc50f8f658c10643419ede31b6cba4 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 14 Sep 2024 22:48:28 +0200 Subject: [PATCH 02/15] added pipeops --- R/PipeOpADAS.R | 28 ++++++++-------- R/PipeOpBLSmote.R | 84 +++++++++++++++++++++++++++-------------------- R/bibentries.R | 15 ++++++++- 3 files changed, 78 insertions(+), 49 deletions(-) diff --git a/R/PipeOpADAS.R b/R/PipeOpADAS.R index 816da02d1..133795305 100644 --- a/R/PipeOpADAS.R +++ b/R/PipeOpADAS.R @@ -5,13 +5,11 @@ #' @format [`R6Class`][R6::R6Class] object inheriting from [`PipeOpTaskPreproc`]/[`PipeOp`]. #' #' @description +#' Generates a more balanced data set by creating synthetic instances of the minority class using the ADASYN algorithm. #' -#' -#' Generates a more balanced data set by creating -#' synthetic instances of the minority class using the ADAS algorithm. -#' The algorithm samples for each minority instance a new data point based on the `K` nearest -#' neighbors of that data point. +#' The algorithm generates for each minority instance new data points based on its `K` nearest neighbors and the difficulty of learning for that data point. #' It can only be applied to tasks with numeric features that have no missing values. +#' #' See [`smotefamily::ADAS`] for details. #' #' @section Construction: @@ -36,7 +34,7 @@ #' @section Parameters: #' The parameters are the parameters inherited from [`PipeOpTaskPreproc`], as well as: #' * `K` :: `numeric(1)` \cr -#' The number of nearest neighbors used for sampling new values. +#' The number of nearest neighbors used for sampling new values. Default is `5`. #' See [`ADAS()`][`smotefamily::ADAS`]. #' #' @section Fields: @@ -57,16 +55,20 @@ #' library("mlr3") #' #' # Create example task -#' data = smotefamily::sample_generator(1000, ratio = 0.80) -#' data$result = factor(data$result) -#' task = TaskClassif$new(id = "example", backend = data, target = "result") -#' task$data() -#' table(task$data()$result) +#' data = data.frame( +#' target = factor(sample(c("c1", "c2"), size = 300, replace = TRUE, prob = c(0.1, 0.9))), +#' x1 = rnorm(300), +#' x2 = rnorm(300) +#' ) +#' task = TaskClassif$new(id = "example", backend = data, target = "target") +#' task$head() +#' table(task$data(cols = "target")) #' #' # Generate synthetic data for minority class #' pop = po("adas") -#' adasdata = pop$train(list(task))[[1]]$data() -#' table(adasdata$result) +#' adas_result = pop$train(list(task))[[1]]$data() +#' nrow(adas_result) +#' table(adas_result$target) #' \dontshow{ \} } PipeOpADAS = R6Class("PipeOpADAS", inherit = PipeOpTaskPreproc, diff --git a/R/PipeOpBLSmote.R b/R/PipeOpBLSmote.R index 24696022f..375669610 100644 --- a/R/PipeOpBLSmote.R +++ b/R/PipeOpBLSmote.R @@ -1,20 +1,17 @@ -#' @title SMOTE Balancing +#' @title BLSMOTE Balancing #' #' @usage NULL -#' @name mlr_pipeops_smote +#' @name mlr_pipeops_blsmote #' @format [`R6Class`][R6::R6Class] object inheriting from [`PipeOpTaskPreproc`]/[`PipeOp`]. #' #' @description -#' Generates a more balanced data set by creating -#' synthetic instances of the minority class using the SMOTE algorithm. -#' The algorithm samples for each minority instance a new data point based on the `K` nearest -#' neighbors of that data point. -#' It can only be applied to tasks with purely numeric features. -#' See [`smotefamily::SMOTE`] for details. +#' Adds new data points by generating synthetic instances for the minority class using the Borderline-SMOTE algorithm. +#' This can only be applied to [classification tasks][mlr3::TaskClassif] with numeric features that have no missing values. +#' See [`smotefamily::BLSMOTE`] for details. #' #' @section Construction: #' ``` -#' PipeOpSmote$new(id = "smote", param_vals = list()) +#' PipeOpBLSmote$new(id = "blsmote", param_vals = list()) #' ``` #' #' * `id` :: `character(1)`\cr @@ -34,11 +31,17 @@ #' @section Parameters: #' The parameters are the parameters inherited from [`PipeOpTaskPreproc`], as well as: #' * `K` :: `numeric(1)` \cr -#' The number of nearest neighbors used for sampling new values. -#' See [`SMOTE()`][`smotefamily::SMOTE`]. +#' The number of nearest neighbors used for sampling from the minority class. Default is `5`. +#' See [`BLSMOTE()`][`smotefamily::BLSMOTE`]. +#' * `C` :: `numeric(1)` \cr +#' The number of nearest neighbors used for classifying sample points as SAFE/DANGER/NOISE. Default is `5`. +#' See [`BLSMOTE()`][`smotefamily::BLSMOTE`]. #' * `dup_size` :: `numeric` \cr -#' Desired times of synthetic minority instances over the original number of -#' majority instances. See [`SMOTE()`][`smotefamily::SMOTE`]. +#' Desired times of synthetic minority instances over the original number of majority instances. `0` leads to balancing minority and majority class. +#' Default is `0`. See [`BLSMOTE()`][`smotefamily::BLSMOTE`]. +#' * `method` :: `character(1)` \cr +#' The type of Borderline-SMOTE algorithm to use. Default is `"type1"`. +#' See [`BLSMOTE()`][`smotefamily::BLSMOTE`]. #' #' @section Fields: #' Only fields inherited from [`PipeOpTaskPreproc`]/[`PipeOp`]. @@ -47,7 +50,7 @@ #' Only methods inherited from [`PipeOpTaskPreproc`]/[`PipeOp`]. #' #' @references -#' `r format_bib("chawla_2002")` +#' `r format_bib("han_2005")` #' #' @family PipeOps #' @template seealso_pipeopslist @@ -58,26 +61,31 @@ #' library("mlr3") #' #' # Create example task -#' data = smotefamily::sample_generator(1000, ratio = 0.80) -#' data$result = factor(data$result) -#' task = TaskClassif$new(id = "example", backend = data, target = "result") -#' task$data() -#' table(task$data()$result) +#' data = data.frame( +#' target = factor(sample(c("c1", "c2"), size = 200, replace = TRUE, prob = c(0.1, 0.9))), +#' feature = rnorm(200) +#' ) +#' task = TaskClassif$new(id = "example", backend = data, target = "target") +#' task$head() +#' table(task$data(cols = "target")) #' #' # Generate synthetic data for minority class -#' pop = po("smote") -#' smotedata = pop$train(list(task))[[1]]$data() -#' table(smotedata$result) +#' pop = po("blsmote") +#' bls_result = pop$train(list(task))[[1]]$data() +#' nrow(bls_result) +#' table(bls_result$target) #' \dontshow{ \} } -PipeOpSmote = R6Class("PipeOpSmote", +PipeOpBLSmote = R6Class("PipeOpBLSmote", inherit = PipeOpTaskPreproc, public = list( - initialize = function(id = "smote", param_vals = list()) { + initialize = function(id = "blsmote", param_vals = list()) { ps = ps( - K = p_int(lower = 1, default = 5, tags = c("train", "smote")), - # dup_size = 0 leads to behaviour different from 1, 2, 3, ..., because it means "autodetect", - # so it is a 'special_vals'. - dup_size = p_int(lower = 1, default = 0, special_vals = list(0), tags = c("train", "smote")) + K = p_int(lower = 1, default = 5, tags = c("train", "blsmote")), + C = p_int(lower = 1, default = 5, tags = c("train", "blsmote")), + # dup_size = 0 leads to behaviour different from 1, 2, 3, ..., because it means "duplicating until balanced", so it is a 'special_vals'. + dupSize = p_int(lower = 1, default = 0, special_vals = list(0), tags = c("train", "blsmote")), + # Default of `method` is derived from the source code of smotefamily::BLSMOTE(), not documented there. + method = p_fct(levels = c("type1", "type2"), tags = c("train", "blsmote")) ) super$initialize(id, param_set = ps, param_vals = param_vals, can_subset_cols = FALSE, packages = "smotefamily", task_type = "TaskClassif", tags = "imbalanced data") @@ -87,19 +95,25 @@ PipeOpSmote = R6Class("PipeOpSmote", .train_task = function(task) { assert_true(all(task$feature_types$type == "numeric")) - cols = private$.select_cols(task) + cols = task$feature_names + + unsupported_cols = setdiff(unlist(task$col_roles), union(cols, task$target_names)) + if (length(unsupported_cols)) { + stopf("BLSMOTE cannot generate synthetic data for the following columns since they are neither features nor targets: '%s'", + paste(unsupported_cols, collapse = "', '")) + } if (!length(cols)) { return(task) } dt = task$data(cols = cols) - # calculate synthetic data - st = setDT(invoke(smotefamily::SMOTE, X = dt, target = task$truth(), - .args = self$param_set$get_values(tags = "smote"), - .opts = list(warnPartialMatchArgs = FALSE))$syn_data) + # Calculate synthetic data + # TODO: Do we have a way to suppress messages by print()? + st = setDT(invoke(smotefamily::BLSMOTE, X = dt, target = task$truth(), + .args = self$param_set$get_values(tags = "blsmote"))$syn_data) - # rename target column and fix character conversion + # Rename target column and fix character conversion st[["class"]] = as_factor(st[["class"]], levels = task$class_names) setnames(st, "class", task$target_names) @@ -108,4 +122,4 @@ PipeOpSmote = R6Class("PipeOpSmote", ) ) -mlr_pipeops$add("smote", PipeOpSmote) +mlr_pipeops$add("blsmote", PipeOpBLSmote) diff --git a/R/bibentries.R b/R/bibentries.R index 6e3b2ec80..ecd2f09d9 100644 --- a/R/bibentries.R +++ b/R/bibentries.R @@ -60,6 +60,19 @@ bibentries = c( title = "ADASYN: Adaptive synthetic sampling approach for imbalanced learning", year = "2008", pages = "1322-1328", - doi = "10.1109/IJCNN.2008.4633969}" + doi = "10.1109/IJCNN.2008.4633969" + ), + + han_2005 = bibentry("InProceedings", + doi = "10.1007/11538059_91", + author = "Han, Hui and Wang, Wen-Yuan and Mao, Bing-Huan", + editor = "Huang, De-Shuang and Zhang, Xiao-Ping and Huang, Guang-Bin", + title = "Borderline-SMOTE: A New Over-Sampling Method in Imbalanced Data Sets Learning", + booktitle = "Advances in Intelligent Computing", + year = "2005", + publisher = "Springer Berlin Heidelberg", + address = "Berlin, Heidelberg", + pages = "878--887", + isbn = "978-3-540-31902-3" ) ) From 09e29b55df302ff9098bec60a3c5c47e396c2d00 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 14 Sep 2024 22:48:56 +0200 Subject: [PATCH 03/15] docs --- DESCRIPTION | 2 + NAMESPACE | 2 + man/PipeOp.Rd | 2 + man/PipeOpEnsemble.Rd | 2 + man/PipeOpImpute.Rd | 2 + man/PipeOpTargetTrafo.Rd | 2 + man/PipeOpTaskPreproc.Rd | 2 + man/PipeOpTaskPreprocSimple.Rd | 2 + man/mlr_pipeops.Rd | 2 + man/mlr_pipeops_adas.Rd | 169 ++++++++++++++++++++++ man/mlr_pipeops_blsmote.Rd | 174 +++++++++++++++++++++++ man/mlr_pipeops_boxcox.Rd | 2 + man/mlr_pipeops_branch.Rd | 2 + man/mlr_pipeops_chunk.Rd | 2 + man/mlr_pipeops_classbalancing.Rd | 2 + man/mlr_pipeops_classifavg.Rd | 2 + man/mlr_pipeops_classweights.Rd | 2 + man/mlr_pipeops_colapply.Rd | 2 + man/mlr_pipeops_collapsefactors.Rd | 2 + man/mlr_pipeops_colroles.Rd | 2 + man/mlr_pipeops_copy.Rd | 2 + man/mlr_pipeops_datefeatures.Rd | 2 + man/mlr_pipeops_encode.Rd | 2 + man/mlr_pipeops_encodeimpact.Rd | 2 + man/mlr_pipeops_encodelmer.Rd | 2 + man/mlr_pipeops_featureunion.Rd | 2 + man/mlr_pipeops_filter.Rd | 2 + man/mlr_pipeops_fixfactors.Rd | 2 + man/mlr_pipeops_histbin.Rd | 2 + man/mlr_pipeops_ica.Rd | 2 + man/mlr_pipeops_imputeconstant.Rd | 2 + man/mlr_pipeops_imputehist.Rd | 2 + man/mlr_pipeops_imputelearner.Rd | 2 + man/mlr_pipeops_imputemean.Rd | 2 + man/mlr_pipeops_imputemedian.Rd | 2 + man/mlr_pipeops_imputemode.Rd | 2 + man/mlr_pipeops_imputeoor.Rd | 2 + man/mlr_pipeops_imputesample.Rd | 2 + man/mlr_pipeops_kernelpca.Rd | 2 + man/mlr_pipeops_learner.Rd | 2 + man/mlr_pipeops_missind.Rd | 2 + man/mlr_pipeops_modelmatrix.Rd | 2 + man/mlr_pipeops_multiplicityexply.Rd | 2 + man/mlr_pipeops_multiplicityimply.Rd | 2 + man/mlr_pipeops_mutate.Rd | 2 + man/mlr_pipeops_nmf.Rd | 2 + man/mlr_pipeops_nop.Rd | 2 + man/mlr_pipeops_ovrsplit.Rd | 2 + man/mlr_pipeops_ovrunite.Rd | 2 + man/mlr_pipeops_pca.Rd | 2 + man/mlr_pipeops_proxy.Rd | 2 + man/mlr_pipeops_quantilebin.Rd | 2 + man/mlr_pipeops_randomprojection.Rd | 2 + man/mlr_pipeops_randomresponse.Rd | 2 + man/mlr_pipeops_regravg.Rd | 2 + man/mlr_pipeops_removeconstants.Rd | 2 + man/mlr_pipeops_renamecolumns.Rd | 2 + man/mlr_pipeops_replicate.Rd | 2 + man/mlr_pipeops_rowapply.Rd | 2 + man/mlr_pipeops_scale.Rd | 2 + man/mlr_pipeops_scalemaxabs.Rd | 2 + man/mlr_pipeops_scalerange.Rd | 2 + man/mlr_pipeops_select.Rd | 2 + man/mlr_pipeops_smote.Rd | 2 + man/mlr_pipeops_spatialsign.Rd | 2 + man/mlr_pipeops_subsample.Rd | 2 + man/mlr_pipeops_targetinvert.Rd | 2 + man/mlr_pipeops_targetmutate.Rd | 2 + man/mlr_pipeops_targettrafoscalerange.Rd | 2 + man/mlr_pipeops_textvectorizer.Rd | 2 + man/mlr_pipeops_threshold.Rd | 2 + man/mlr_pipeops_tunethreshold.Rd | 2 + man/mlr_pipeops_unbranch.Rd | 2 + man/mlr_pipeops_updatetarget.Rd | 2 + man/mlr_pipeops_vtreat.Rd | 2 + man/mlr_pipeops_yeojohnson.Rd | 2 + 76 files changed, 491 insertions(+) create mode 100644 man/mlr_pipeops_adas.Rd create mode 100644 man/mlr_pipeops_blsmote.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 85253382c..05f8f6d99 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -108,6 +108,8 @@ Collate: 'LearnerAvg.R' 'NO_OP.R' 'PipeOpTaskPreproc.R' + 'PipeOpADAS.R' + 'PipeOpBLSmote.R' 'PipeOpBoxCox.R' 'PipeOpBranch.R' 'PipeOpChunk.R' diff --git a/NAMESPACE b/NAMESPACE index 6d8c22381..d5f07786b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -42,6 +42,8 @@ export(LearnerRegrAvg) export(Multiplicity) export(NO_OP) export(PipeOp) +export(PipeOpADAS) +export(PipeOpBLSmote) export(PipeOpBoxCox) export(PipeOpBranch) export(PipeOpChunk) diff --git a/man/PipeOp.Rd b/man/PipeOp.Rd index 3458719fd..545a0bfc9 100644 --- a/man/PipeOp.Rd +++ b/man/PipeOp.Rd @@ -273,6 +273,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/PipeOpEnsemble.Rd b/man/PipeOpEnsemble.Rd index 46bc5918b..73723343f 100644 --- a/man/PipeOpEnsemble.Rd +++ b/man/PipeOpEnsemble.Rd @@ -105,6 +105,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/PipeOpImpute.Rd b/man/PipeOpImpute.Rd index 52203632d..21997c81a 100644 --- a/man/PipeOpImpute.Rd +++ b/man/PipeOpImpute.Rd @@ -138,6 +138,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/PipeOpTargetTrafo.Rd b/man/PipeOpTargetTrafo.Rd index 8a534ec18..ec79b9632 100644 --- a/man/PipeOpTargetTrafo.Rd +++ b/man/PipeOpTargetTrafo.Rd @@ -146,6 +146,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/PipeOpTaskPreproc.Rd b/man/PipeOpTaskPreproc.Rd index 69f92477c..12084c9d6 100644 --- a/man/PipeOpTaskPreproc.Rd +++ b/man/PipeOpTaskPreproc.Rd @@ -201,6 +201,8 @@ Other PipeOps: \code{\link{PipeOpTargetTrafo}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/PipeOpTaskPreprocSimple.Rd b/man/PipeOpTaskPreprocSimple.Rd index d836e75a5..986bc76d3 100644 --- a/man/PipeOpTaskPreprocSimple.Rd +++ b/man/PipeOpTaskPreprocSimple.Rd @@ -138,6 +138,8 @@ Other PipeOps: \code{\link{PipeOpTargetTrafo}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops.Rd b/man/mlr_pipeops.Rd index e2a7d1e1a..4c202643b 100644 --- a/man/mlr_pipeops.Rd +++ b/man/mlr_pipeops.Rd @@ -75,6 +75,8 @@ Other PipeOps: \code{\link{PipeOpTargetTrafo}}, \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_adas.Rd b/man/mlr_pipeops_adas.Rd new file mode 100644 index 000000000..e5ccdee96 --- /dev/null +++ b/man/mlr_pipeops_adas.Rd @@ -0,0 +1,169 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PipeOpADAS.R +\name{mlr_pipeops_adas} +\alias{mlr_pipeops_adas} +\alias{PipeOpADAS} +\title{ADAS Balancing} +\format{ +\code{\link[R6:R6Class]{R6Class}} object inheriting from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. +} +\description{ +Generates a more balanced data set by creating synthetic instances of the minority class using the ADASYN algorithm. + +The algorithm generates for each minority instance new data points based on its \code{K} nearest neighbors and the difficulty of learning for that data point. +It can only be applied to tasks with numeric features that have no missing values. + +See \code{\link[smotefamily:adas]{smotefamily::ADAS}} for details. +} +\section{Construction}{ + + +\if{html}{\out{
}}\preformatted{PipeOpADAS$new(id = "adas", param_vals = list()) +}\if{html}{\out{
}} +\itemize{ +\item \code{id} :: \code{character(1)}\cr +Identifier of resulting object, default \code{"smote"}. +\item \code{param_vals} :: named \code{list}\cr +List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default \code{list()}. +} +} + +\section{Input and Output Channels}{ + +Input and output channels are inherited from \code{\link{PipeOpTaskPreproc}}. + +The output during training is the input \code{\link[mlr3:Task]{Task}} with added synthetic rows for the minority class. +The output during prediction is the unchanged input. +} + +\section{State}{ + +The \verb{$state} is a named \code{list} with the \verb{$state} elements inherited from \code{\link{PipeOpTaskPreproc}}. +} + +\section{Parameters}{ + +The parameters are the parameters inherited from \code{\link{PipeOpTaskPreproc}}, as well as: +\itemize{ +\item \code{K} :: \code{numeric(1)} \cr +The number of nearest neighbors used for sampling new values. Default is \code{5}. +See \code{\link[smotefamily:adas]{ADAS()}}. +} +} + +\section{Fields}{ + +Only fields inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. +} + +\section{Methods}{ + +Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. +} + +\examples{ +\dontshow{ if (requireNamespace("smotefamily")) \{ } +library("mlr3") + +# Create example task +data = data.frame( + target = factor(sample(c("c1", "c2"), size = 300, replace = TRUE, prob = c(0.1, 0.9))), + x1 = rnorm(300), + x2 = rnorm(300) +) +task = TaskClassif$new(id = "example", backend = data, target = "target") +task$head() +table(task$data(cols = "target")) + +# Generate synthetic data for minority class +pop = po("adas") +adas_result = pop$train(list(task))[[1]]$data() +nrow(adas_result) +table(adas_result$target) +\dontshow{ \} } +} +\references{ +He H, Bai Y, Garcia, A. E, Li S (2008). +\dQuote{ADASYN: Adaptive synthetic sampling approach for imbalanced learning.} +In \emph{2008 IEEE International Joint Conference on Neural Networks (IEEE World Congress on Computational Intelligence)}, 1322-1328. +\doi{10.1109/IJCNN.2008.4633969}. +} +\seealso{ +https://mlr-org.com/pipeops.html + +Other PipeOps: +\code{\link{PipeOp}}, +\code{\link{PipeOpEnsemble}}, +\code{\link{PipeOpImpute}}, +\code{\link{PipeOpTargetTrafo}}, +\code{\link{PipeOpTaskPreproc}}, +\code{\link{PipeOpTaskPreprocSimple}}, +\code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_blsmote}}, +\code{\link{mlr_pipeops_boxcox}}, +\code{\link{mlr_pipeops_branch}}, +\code{\link{mlr_pipeops_chunk}}, +\code{\link{mlr_pipeops_classbalancing}}, +\code{\link{mlr_pipeops_classifavg}}, +\code{\link{mlr_pipeops_classweights}}, +\code{\link{mlr_pipeops_colapply}}, +\code{\link{mlr_pipeops_collapsefactors}}, +\code{\link{mlr_pipeops_colroles}}, +\code{\link{mlr_pipeops_copy}}, +\code{\link{mlr_pipeops_datefeatures}}, +\code{\link{mlr_pipeops_encode}}, +\code{\link{mlr_pipeops_encodeimpact}}, +\code{\link{mlr_pipeops_encodelmer}}, +\code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filter}}, +\code{\link{mlr_pipeops_fixfactors}}, +\code{\link{mlr_pipeops_histbin}}, +\code{\link{mlr_pipeops_ica}}, +\code{\link{mlr_pipeops_imputeconstant}}, +\code{\link{mlr_pipeops_imputehist}}, +\code{\link{mlr_pipeops_imputelearner}}, +\code{\link{mlr_pipeops_imputemean}}, +\code{\link{mlr_pipeops_imputemedian}}, +\code{\link{mlr_pipeops_imputemode}}, +\code{\link{mlr_pipeops_imputeoor}}, +\code{\link{mlr_pipeops_imputesample}}, +\code{\link{mlr_pipeops_kernelpca}}, +\code{\link{mlr_pipeops_learner}}, +\code{\link{mlr_pipeops_missind}}, +\code{\link{mlr_pipeops_modelmatrix}}, +\code{\link{mlr_pipeops_multiplicityexply}}, +\code{\link{mlr_pipeops_multiplicityimply}}, +\code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nmf}}, +\code{\link{mlr_pipeops_nop}}, +\code{\link{mlr_pipeops_ovrsplit}}, +\code{\link{mlr_pipeops_ovrunite}}, +\code{\link{mlr_pipeops_pca}}, +\code{\link{mlr_pipeops_proxy}}, +\code{\link{mlr_pipeops_quantilebin}}, +\code{\link{mlr_pipeops_randomprojection}}, +\code{\link{mlr_pipeops_randomresponse}}, +\code{\link{mlr_pipeops_regravg}}, +\code{\link{mlr_pipeops_removeconstants}}, +\code{\link{mlr_pipeops_renamecolumns}}, +\code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, +\code{\link{mlr_pipeops_scale}}, +\code{\link{mlr_pipeops_scalemaxabs}}, +\code{\link{mlr_pipeops_scalerange}}, +\code{\link{mlr_pipeops_select}}, +\code{\link{mlr_pipeops_smote}}, +\code{\link{mlr_pipeops_spatialsign}}, +\code{\link{mlr_pipeops_subsample}}, +\code{\link{mlr_pipeops_targetinvert}}, +\code{\link{mlr_pipeops_targetmutate}}, +\code{\link{mlr_pipeops_targettrafoscalerange}}, +\code{\link{mlr_pipeops_textvectorizer}}, +\code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_unbranch}}, +\code{\link{mlr_pipeops_updatetarget}}, +\code{\link{mlr_pipeops_vtreat}}, +\code{\link{mlr_pipeops_yeojohnson}} +} +\concept{PipeOps} diff --git a/man/mlr_pipeops_blsmote.Rd b/man/mlr_pipeops_blsmote.Rd new file mode 100644 index 000000000..5c38845c4 --- /dev/null +++ b/man/mlr_pipeops_blsmote.Rd @@ -0,0 +1,174 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PipeOpBLSmote.R +\name{mlr_pipeops_blsmote} +\alias{mlr_pipeops_blsmote} +\alias{PipeOpBLSmote} +\title{BLSMOTE Balancing} +\format{ +\code{\link[R6:R6Class]{R6Class}} object inheriting from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. +} +\description{ +Adds new data points by generating synthetic instances for the minority class using the Borderline-SMOTE algorithm. +This can only be applied to \link[mlr3:TaskClassif]{classification tasks} with numeric features that have no missing values. +See \code{\link[smotefamily:BLSMOTE]{smotefamily::BLSMOTE}} for details. +} +\section{Construction}{ + + +\if{html}{\out{
}}\preformatted{PipeOpBLSmote$new(id = "blsmote", param_vals = list()) +}\if{html}{\out{
}} +\itemize{ +\item \code{id} :: \code{character(1)}\cr +Identifier of resulting object, default \code{"smote"}. +\item \code{param_vals} :: named \code{list}\cr +List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default \code{list()}. +} +} + +\section{Input and Output Channels}{ + +Input and output channels are inherited from \code{\link{PipeOpTaskPreproc}}. + +The output during training is the input \code{\link[mlr3:Task]{Task}} with added synthetic rows for the minority class. +The output during prediction is the unchanged input. +} + +\section{State}{ + +The \verb{$state} is a named \code{list} with the \verb{$state} elements inherited from \code{\link{PipeOpTaskPreproc}}. +} + +\section{Parameters}{ + +The parameters are the parameters inherited from \code{\link{PipeOpTaskPreproc}}, as well as: +\itemize{ +\item \code{K} :: \code{numeric(1)} \cr +The number of nearest neighbors used for sampling from the minority class. Default is \code{5}. +See \code{\link[smotefamily:BLSMOTE]{BLSMOTE()}}. +\item \code{C} :: \code{numeric(1)} \cr +The number of nearest neighbors used for classifying sample points as SAFE/DANGER/NOISE. Default is \code{5}. +See \code{\link[smotefamily:BLSMOTE]{BLSMOTE()}}. +\item \code{dup_size} :: \code{numeric} \cr +Desired times of synthetic minority instances over the original number of majority instances. \code{0} leads to balancing minority and majority class. +Default is \code{0}. See \code{\link[smotefamily:BLSMOTE]{BLSMOTE()}}. +\item \code{method} :: \code{character(1)} \cr +The type of Borderline-SMOTE algorithm to use. Default is \code{"type1"}. +See \code{\link[smotefamily:BLSMOTE]{BLSMOTE()}}. +} +} + +\section{Fields}{ + +Only fields inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. +} + +\section{Methods}{ + +Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. +} + +\examples{ +\dontshow{ if (requireNamespace("smotefamily")) \{ } +library("mlr3") + +# Create example task +data = data.frame( + target = factor(sample(c("c1", "c2"), size = 200, replace = TRUE, prob = c(0.1, 0.9))), + feature = rnorm(200) +) +task = TaskClassif$new(id = "example", backend = data, target = "target") +task$head() +table(task$data(cols = "target")) + +# Generate synthetic data for minority class +pop = po("blsmote") +bls_result = pop$train(list(task))[[1]]$data() +nrow(bls_result) +table(bls_result$target) +\dontshow{ \} } +} +\references{ +Han, Hui, Wang, Wen-Yuan, Mao, Bing-Huan (2005). +\dQuote{Borderline-SMOTE: A New Over-Sampling Method in Imbalanced Data Sets Learning.} +In Huang, De-Shuang, Zhang, Xiao-Ping, Huang, Guang-Bin (eds.), \emph{Advances in Intelligent Computing}, 878--887. +ISBN 978-3-540-31902-3, \doi{10.1007/11538059_91}. +} +\seealso{ +https://mlr-org.com/pipeops.html + +Other PipeOps: +\code{\link{PipeOp}}, +\code{\link{PipeOpEnsemble}}, +\code{\link{PipeOpImpute}}, +\code{\link{PipeOpTargetTrafo}}, +\code{\link{PipeOpTaskPreproc}}, +\code{\link{PipeOpTaskPreprocSimple}}, +\code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_boxcox}}, +\code{\link{mlr_pipeops_branch}}, +\code{\link{mlr_pipeops_chunk}}, +\code{\link{mlr_pipeops_classbalancing}}, +\code{\link{mlr_pipeops_classifavg}}, +\code{\link{mlr_pipeops_classweights}}, +\code{\link{mlr_pipeops_colapply}}, +\code{\link{mlr_pipeops_collapsefactors}}, +\code{\link{mlr_pipeops_colroles}}, +\code{\link{mlr_pipeops_copy}}, +\code{\link{mlr_pipeops_datefeatures}}, +\code{\link{mlr_pipeops_encode}}, +\code{\link{mlr_pipeops_encodeimpact}}, +\code{\link{mlr_pipeops_encodelmer}}, +\code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filter}}, +\code{\link{mlr_pipeops_fixfactors}}, +\code{\link{mlr_pipeops_histbin}}, +\code{\link{mlr_pipeops_ica}}, +\code{\link{mlr_pipeops_imputeconstant}}, +\code{\link{mlr_pipeops_imputehist}}, +\code{\link{mlr_pipeops_imputelearner}}, +\code{\link{mlr_pipeops_imputemean}}, +\code{\link{mlr_pipeops_imputemedian}}, +\code{\link{mlr_pipeops_imputemode}}, +\code{\link{mlr_pipeops_imputeoor}}, +\code{\link{mlr_pipeops_imputesample}}, +\code{\link{mlr_pipeops_kernelpca}}, +\code{\link{mlr_pipeops_learner}}, +\code{\link{mlr_pipeops_missind}}, +\code{\link{mlr_pipeops_modelmatrix}}, +\code{\link{mlr_pipeops_multiplicityexply}}, +\code{\link{mlr_pipeops_multiplicityimply}}, +\code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nmf}}, +\code{\link{mlr_pipeops_nop}}, +\code{\link{mlr_pipeops_ovrsplit}}, +\code{\link{mlr_pipeops_ovrunite}}, +\code{\link{mlr_pipeops_pca}}, +\code{\link{mlr_pipeops_proxy}}, +\code{\link{mlr_pipeops_quantilebin}}, +\code{\link{mlr_pipeops_randomprojection}}, +\code{\link{mlr_pipeops_randomresponse}}, +\code{\link{mlr_pipeops_regravg}}, +\code{\link{mlr_pipeops_removeconstants}}, +\code{\link{mlr_pipeops_renamecolumns}}, +\code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_rowapply}}, +\code{\link{mlr_pipeops_scale}}, +\code{\link{mlr_pipeops_scalemaxabs}}, +\code{\link{mlr_pipeops_scalerange}}, +\code{\link{mlr_pipeops_select}}, +\code{\link{mlr_pipeops_smote}}, +\code{\link{mlr_pipeops_spatialsign}}, +\code{\link{mlr_pipeops_subsample}}, +\code{\link{mlr_pipeops_targetinvert}}, +\code{\link{mlr_pipeops_targetmutate}}, +\code{\link{mlr_pipeops_targettrafoscalerange}}, +\code{\link{mlr_pipeops_textvectorizer}}, +\code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_unbranch}}, +\code{\link{mlr_pipeops_updatetarget}}, +\code{\link{mlr_pipeops_vtreat}}, +\code{\link{mlr_pipeops_yeojohnson}} +} +\concept{PipeOps} diff --git a/man/mlr_pipeops_boxcox.Rd b/man/mlr_pipeops_boxcox.Rd index 064a069ca..3040cee0f 100644 --- a/man/mlr_pipeops_boxcox.Rd +++ b/man/mlr_pipeops_boxcox.Rd @@ -90,6 +90,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, \code{\link{mlr_pipeops_classbalancing}}, diff --git a/man/mlr_pipeops_branch.Rd b/man/mlr_pipeops_branch.Rd index a83b502a1..ada5dadd8 100644 --- a/man/mlr_pipeops_branch.Rd +++ b/man/mlr_pipeops_branch.Rd @@ -108,6 +108,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_chunk}}, \code{\link{mlr_pipeops_classbalancing}}, diff --git a/man/mlr_pipeops_chunk.Rd b/man/mlr_pipeops_chunk.Rd index 4b98bbd2a..89b02198f 100644 --- a/man/mlr_pipeops_chunk.Rd +++ b/man/mlr_pipeops_chunk.Rd @@ -87,6 +87,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_classbalancing}}, diff --git a/man/mlr_pipeops_classbalancing.Rd b/man/mlr_pipeops_classbalancing.Rd index 19dcd067e..3e3964114 100644 --- a/man/mlr_pipeops_classbalancing.Rd +++ b/man/mlr_pipeops_classbalancing.Rd @@ -128,6 +128,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_classifavg.Rd b/man/mlr_pipeops_classifavg.Rd index 160ba73ab..d187fd359 100644 --- a/man/mlr_pipeops_classifavg.Rd +++ b/man/mlr_pipeops_classifavg.Rd @@ -104,6 +104,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_classweights.Rd b/man/mlr_pipeops_classweights.Rd index 7493a3a6b..f98e1a9af 100644 --- a/man/mlr_pipeops_classweights.Rd +++ b/man/mlr_pipeops_classweights.Rd @@ -107,6 +107,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_colapply.Rd b/man/mlr_pipeops_colapply.Rd index bf8065f8e..e1116b900 100644 --- a/man/mlr_pipeops_colapply.Rd +++ b/man/mlr_pipeops_colapply.Rd @@ -117,6 +117,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_collapsefactors.Rd b/man/mlr_pipeops_collapsefactors.Rd index 91798c99d..eba248bf4 100644 --- a/man/mlr_pipeops_collapsefactors.Rd +++ b/man/mlr_pipeops_collapsefactors.Rd @@ -84,6 +84,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_colroles.Rd b/man/mlr_pipeops_colroles.Rd index a0e742faa..09118f2b2 100644 --- a/man/mlr_pipeops_colroles.Rd +++ b/man/mlr_pipeops_colroles.Rd @@ -76,6 +76,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_copy.Rd b/man/mlr_pipeops_copy.Rd index a4160342d..6ac37cbde 100644 --- a/man/mlr_pipeops_copy.Rd +++ b/man/mlr_pipeops_copy.Rd @@ -106,6 +106,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_datefeatures.Rd b/man/mlr_pipeops_datefeatures.Rd index 5028544b2..5027cfb87 100644 --- a/man/mlr_pipeops_datefeatures.Rd +++ b/man/mlr_pipeops_datefeatures.Rd @@ -123,6 +123,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_encode.Rd b/man/mlr_pipeops_encode.Rd index 71a13f26f..8101640a4 100644 --- a/man/mlr_pipeops_encode.Rd +++ b/man/mlr_pipeops_encode.Rd @@ -119,6 +119,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_encodeimpact.Rd b/man/mlr_pipeops_encodeimpact.Rd index 7a435e1f2..6b28027a6 100644 --- a/man/mlr_pipeops_encodeimpact.Rd +++ b/man/mlr_pipeops_encodeimpact.Rd @@ -101,6 +101,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_encodelmer.Rd b/man/mlr_pipeops_encodelmer.Rd index ad391725f..aebc17350 100644 --- a/man/mlr_pipeops_encodelmer.Rd +++ b/man/mlr_pipeops_encodelmer.Rd @@ -116,6 +116,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_featureunion.Rd b/man/mlr_pipeops_featureunion.Rd index a509b87eb..94b2852c6 100644 --- a/man/mlr_pipeops_featureunion.Rd +++ b/man/mlr_pipeops_featureunion.Rd @@ -121,6 +121,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_filter.Rd b/man/mlr_pipeops_filter.Rd index a37d1328a..94d07c99b 100644 --- a/man/mlr_pipeops_filter.Rd +++ b/man/mlr_pipeops_filter.Rd @@ -152,6 +152,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_fixfactors.Rd b/man/mlr_pipeops_fixfactors.Rd index 6a4ac569c..0990e5c9e 100644 --- a/man/mlr_pipeops_fixfactors.Rd +++ b/man/mlr_pipeops_fixfactors.Rd @@ -76,6 +76,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_histbin.Rd b/man/mlr_pipeops_histbin.Rd index ce133cd8b..cb1036529 100644 --- a/man/mlr_pipeops_histbin.Rd +++ b/man/mlr_pipeops_histbin.Rd @@ -88,6 +88,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_ica.Rd b/man/mlr_pipeops_ica.Rd index d6e93d163..b1ab7a6a2 100644 --- a/man/mlr_pipeops_ica.Rd +++ b/man/mlr_pipeops_ica.Rd @@ -116,6 +116,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_imputeconstant.Rd b/man/mlr_pipeops_imputeconstant.Rd index a6ab5d027..19a336b88 100644 --- a/man/mlr_pipeops_imputeconstant.Rd +++ b/man/mlr_pipeops_imputeconstant.Rd @@ -90,6 +90,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_imputehist.Rd b/man/mlr_pipeops_imputehist.Rd index d71500f0b..3f4fbf874 100644 --- a/man/mlr_pipeops_imputehist.Rd +++ b/man/mlr_pipeops_imputehist.Rd @@ -82,6 +82,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_imputelearner.Rd b/man/mlr_pipeops_imputelearner.Rd index 4819be20f..12000d2fa 100644 --- a/man/mlr_pipeops_imputelearner.Rd +++ b/man/mlr_pipeops_imputelearner.Rd @@ -127,6 +127,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_imputemean.Rd b/man/mlr_pipeops_imputemean.Rd index 64dd29a38..ab209fe64 100644 --- a/man/mlr_pipeops_imputemean.Rd +++ b/man/mlr_pipeops_imputemean.Rd @@ -75,6 +75,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_imputemedian.Rd b/man/mlr_pipeops_imputemedian.Rd index 1f4286c64..aef066768 100644 --- a/man/mlr_pipeops_imputemedian.Rd +++ b/man/mlr_pipeops_imputemedian.Rd @@ -75,6 +75,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_imputemode.Rd b/man/mlr_pipeops_imputemode.Rd index 9cbcdba06..70771b58a 100644 --- a/man/mlr_pipeops_imputemode.Rd +++ b/man/mlr_pipeops_imputemode.Rd @@ -82,6 +82,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_imputeoor.Rd b/man/mlr_pipeops_imputeoor.Rd index 499d7fb60..35788e02a 100644 --- a/man/mlr_pipeops_imputeoor.Rd +++ b/man/mlr_pipeops_imputeoor.Rd @@ -123,6 +123,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_imputesample.Rd b/man/mlr_pipeops_imputesample.Rd index d9f4d8f75..1a6380bfb 100644 --- a/man/mlr_pipeops_imputesample.Rd +++ b/man/mlr_pipeops_imputesample.Rd @@ -77,6 +77,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_kernelpca.Rd b/man/mlr_pipeops_kernelpca.Rd index a9bddd763..2fdbd1d4a 100644 --- a/man/mlr_pipeops_kernelpca.Rd +++ b/man/mlr_pipeops_kernelpca.Rd @@ -91,6 +91,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_learner.Rd b/man/mlr_pipeops_learner.Rd index 43c259806..69166ce8c 100644 --- a/man/mlr_pipeops_learner.Rd +++ b/man/mlr_pipeops_learner.Rd @@ -122,6 +122,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_missind.Rd b/man/mlr_pipeops_missind.Rd index b9f8d51da..eaefa29f4 100644 --- a/man/mlr_pipeops_missind.Rd +++ b/man/mlr_pipeops_missind.Rd @@ -105,6 +105,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_modelmatrix.Rd b/man/mlr_pipeops_modelmatrix.Rd index 1e1b00c2e..b85e1dc25 100644 --- a/man/mlr_pipeops_modelmatrix.Rd +++ b/man/mlr_pipeops_modelmatrix.Rd @@ -81,6 +81,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_multiplicityexply.Rd b/man/mlr_pipeops_multiplicityexply.Rd index e4c67c232..0a81e96be 100644 --- a/man/mlr_pipeops_multiplicityexply.Rd +++ b/man/mlr_pipeops_multiplicityexply.Rd @@ -87,6 +87,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_multiplicityimply.Rd b/man/mlr_pipeops_multiplicityimply.Rd index c07f85bab..4e0d9c167 100644 --- a/man/mlr_pipeops_multiplicityimply.Rd +++ b/man/mlr_pipeops_multiplicityimply.Rd @@ -93,6 +93,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_mutate.Rd b/man/mlr_pipeops_mutate.Rd index 8da58522f..dd0f3139b 100644 --- a/man/mlr_pipeops_mutate.Rd +++ b/man/mlr_pipeops_mutate.Rd @@ -98,6 +98,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_nmf.Rd b/man/mlr_pipeops_nmf.Rd index f651ec210..500fd18fb 100644 --- a/man/mlr_pipeops_nmf.Rd +++ b/man/mlr_pipeops_nmf.Rd @@ -134,6 +134,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_nop.Rd b/man/mlr_pipeops_nop.Rd index fd6fd2ea4..e41af83c4 100644 --- a/man/mlr_pipeops_nop.Rd +++ b/man/mlr_pipeops_nop.Rd @@ -83,6 +83,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_ovrsplit.Rd b/man/mlr_pipeops_ovrsplit.Rd index 76c661fde..ea08978f0 100644 --- a/man/mlr_pipeops_ovrsplit.Rd +++ b/man/mlr_pipeops_ovrsplit.Rd @@ -100,6 +100,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_ovrunite.Rd b/man/mlr_pipeops_ovrunite.Rd index f01cba41e..bbe17034e 100644 --- a/man/mlr_pipeops_ovrunite.Rd +++ b/man/mlr_pipeops_ovrunite.Rd @@ -95,6 +95,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_pca.Rd b/man/mlr_pipeops_pca.Rd index 18f5eb086..165d25684 100644 --- a/man/mlr_pipeops_pca.Rd +++ b/man/mlr_pipeops_pca.Rd @@ -92,6 +92,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_proxy.Rd b/man/mlr_pipeops_proxy.Rd index a5ef51112..c009b1af9 100644 --- a/man/mlr_pipeops_proxy.Rd +++ b/man/mlr_pipeops_proxy.Rd @@ -106,6 +106,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_quantilebin.Rd b/man/mlr_pipeops_quantilebin.Rd index 6e5a85a24..bd6502e5d 100644 --- a/man/mlr_pipeops_quantilebin.Rd +++ b/man/mlr_pipeops_quantilebin.Rd @@ -80,6 +80,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_randomprojection.Rd b/man/mlr_pipeops_randomprojection.Rd index 2323caf66..f28888d6c 100644 --- a/man/mlr_pipeops_randomprojection.Rd +++ b/man/mlr_pipeops_randomprojection.Rd @@ -92,6 +92,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_randomresponse.Rd b/man/mlr_pipeops_randomresponse.Rd index c497d3ad2..07b894204 100644 --- a/man/mlr_pipeops_randomresponse.Rd +++ b/man/mlr_pipeops_randomresponse.Rd @@ -109,6 +109,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_regravg.Rd b/man/mlr_pipeops_regravg.Rd index a97bde700..9a98022c2 100644 --- a/man/mlr_pipeops_regravg.Rd +++ b/man/mlr_pipeops_regravg.Rd @@ -95,6 +95,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_removeconstants.Rd b/man/mlr_pipeops_removeconstants.Rd index ef3d43d75..d934de46f 100644 --- a/man/mlr_pipeops_removeconstants.Rd +++ b/man/mlr_pipeops_removeconstants.Rd @@ -85,6 +85,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_renamecolumns.Rd b/man/mlr_pipeops_renamecolumns.Rd index 20947f1be..6b4cb6030 100644 --- a/man/mlr_pipeops_renamecolumns.Rd +++ b/man/mlr_pipeops_renamecolumns.Rd @@ -84,6 +84,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_replicate.Rd b/man/mlr_pipeops_replicate.Rd index 71949f16c..862c84502 100644 --- a/man/mlr_pipeops_replicate.Rd +++ b/man/mlr_pipeops_replicate.Rd @@ -77,6 +77,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_rowapply.Rd b/man/mlr_pipeops_rowapply.Rd index cc15306ab..f429c5049 100644 --- a/man/mlr_pipeops_rowapply.Rd +++ b/man/mlr_pipeops_rowapply.Rd @@ -83,6 +83,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_scale.Rd b/man/mlr_pipeops_scale.Rd index 9c8a3a316..b841893ce 100644 --- a/man/mlr_pipeops_scale.Rd +++ b/man/mlr_pipeops_scale.Rd @@ -99,6 +99,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_scalemaxabs.Rd b/man/mlr_pipeops_scalemaxabs.Rd index 46c5c4c45..bbed1298a 100644 --- a/man/mlr_pipeops_scalemaxabs.Rd +++ b/man/mlr_pipeops_scalemaxabs.Rd @@ -74,6 +74,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_scalerange.Rd b/man/mlr_pipeops_scalerange.Rd index 678e54b0d..a86dbdfe9 100644 --- a/man/mlr_pipeops_scalerange.Rd +++ b/man/mlr_pipeops_scalerange.Rd @@ -79,6 +79,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_select.Rd b/man/mlr_pipeops_select.Rd index 353e280b0..b56b884dc 100644 --- a/man/mlr_pipeops_select.Rd +++ b/man/mlr_pipeops_select.Rd @@ -95,6 +95,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_smote.Rd b/man/mlr_pipeops_smote.Rd index ccbd9c6cd..49849b5c8 100644 --- a/man/mlr_pipeops_smote.Rd +++ b/man/mlr_pipeops_smote.Rd @@ -98,6 +98,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_spatialsign.Rd b/man/mlr_pipeops_spatialsign.Rd index 9fdb650d6..36c81559e 100644 --- a/man/mlr_pipeops_spatialsign.Rd +++ b/man/mlr_pipeops_spatialsign.Rd @@ -74,6 +74,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_subsample.Rd b/man/mlr_pipeops_subsample.Rd index c89142226..93d370dc0 100644 --- a/man/mlr_pipeops_subsample.Rd +++ b/man/mlr_pipeops_subsample.Rd @@ -89,6 +89,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_targetinvert.Rd b/man/mlr_pipeops_targetinvert.Rd index fe5073375..1c971e6c5 100644 --- a/man/mlr_pipeops_targetinvert.Rd +++ b/man/mlr_pipeops_targetinvert.Rd @@ -74,6 +74,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_targetmutate.Rd b/man/mlr_pipeops_targetmutate.Rd index dd7982fdf..62cb92ea5 100644 --- a/man/mlr_pipeops_targetmutate.Rd +++ b/man/mlr_pipeops_targetmutate.Rd @@ -122,6 +122,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_targettrafoscalerange.Rd b/man/mlr_pipeops_targettrafoscalerange.Rd index e651099eb..58a246584 100644 --- a/man/mlr_pipeops_targettrafoscalerange.Rd +++ b/man/mlr_pipeops_targettrafoscalerange.Rd @@ -88,6 +88,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_textvectorizer.Rd b/man/mlr_pipeops_textvectorizer.Rd index 57ab20d9a..60cac3a8b 100644 --- a/man/mlr_pipeops_textvectorizer.Rd +++ b/man/mlr_pipeops_textvectorizer.Rd @@ -188,6 +188,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_threshold.Rd b/man/mlr_pipeops_threshold.Rd index 44f63dc31..34f80b951 100644 --- a/man/mlr_pipeops_threshold.Rd +++ b/man/mlr_pipeops_threshold.Rd @@ -87,6 +87,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_tunethreshold.Rd b/man/mlr_pipeops_tunethreshold.Rd index 34fa82948..4eeae626c 100644 --- a/man/mlr_pipeops_tunethreshold.Rd +++ b/man/mlr_pipeops_tunethreshold.Rd @@ -117,6 +117,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_unbranch.Rd b/man/mlr_pipeops_unbranch.Rd index 6d17dfeb3..ea6864c01 100644 --- a/man/mlr_pipeops_unbranch.Rd +++ b/man/mlr_pipeops_unbranch.Rd @@ -86,6 +86,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_updatetarget.Rd b/man/mlr_pipeops_updatetarget.Rd index 2774382f7..9327e732a 100644 --- a/man/mlr_pipeops_updatetarget.Rd +++ b/man/mlr_pipeops_updatetarget.Rd @@ -101,6 +101,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_vtreat.Rd b/man/mlr_pipeops_vtreat.Rd index 28d5f205a..7059dc19e 100644 --- a/man/mlr_pipeops_vtreat.Rd +++ b/man/mlr_pipeops_vtreat.Rd @@ -154,6 +154,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, diff --git a/man/mlr_pipeops_yeojohnson.Rd b/man/mlr_pipeops_yeojohnson.Rd index 89123d332..eb3a73e20 100644 --- a/man/mlr_pipeops_yeojohnson.Rd +++ b/man/mlr_pipeops_yeojohnson.Rd @@ -91,6 +91,8 @@ Other PipeOps: \code{\link{PipeOpTaskPreproc}}, \code{\link{PipeOpTaskPreprocSimple}}, \code{\link{mlr_pipeops}}, +\code{\link{mlr_pipeops_adas}}, +\code{\link{mlr_pipeops_blsmote}}, \code{\link{mlr_pipeops_boxcox}}, \code{\link{mlr_pipeops_branch}}, \code{\link{mlr_pipeops_chunk}}, From 98a051ecd9f7254f91a3b6f7794da04bc6111d14 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 14 Sep 2024 22:49:04 +0200 Subject: [PATCH 04/15] tests --- tests/testthat/test_pipeop_adas.R | 55 ++++++++++++++++++++++ tests/testthat/test_pipeop_blsmote.R | 70 ++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+) create mode 100644 tests/testthat/test_pipeop_adas.R create mode 100644 tests/testthat/test_pipeop_blsmote.R diff --git a/tests/testthat/test_pipeop_adas.R b/tests/testthat/test_pipeop_adas.R new file mode 100644 index 000000000..a392c1f61 --- /dev/null +++ b/tests/testthat/test_pipeop_adas.R @@ -0,0 +1,55 @@ +context("PipeOpADAS") + +test_that("PipeOpADAS - basic properties", { + skip_if_not_installed("smotefamily") + + task = mlr_tasks$get("spam") + + expect_datapreproc_pipeop_class(PipeOpADAS, task = task, predict_like_train = FALSE, deterministic_train = FALSE) +}) + +test_that("PipeOpADAS - train works as intended", { + skip_if_not_installed("smotefamily") + + op = PipeOpADAS$new() + + df = data.frame( + class = factor(sample(c("c1", "c2"), size = 200, replace = TRUE, prob = c(0.1, 0.9))), + x1 = rnorm(200), + x2 = rnorm(200) + ) + task = TaskClassif$new(id = "test", backend = df, target = "class") + + # Compare to smotefamily::ADAS with default params + set.seed(1234L) + train_out = op$train(list(task))[[1]]$data() + set.seed(1234L) + # rbind for same row (and col) order + adas_out = setDT(rbind( + df, + invoke(smotefamily::ADAS, X = task$data(cols = task$feature_names), target = task$truth())$syn_data + )) + + expect_equal(train_out, adas_out) + + # Compare to smotefamily::ADAS with changed params + op$param_set$set_values(K = 10) + + set.seed(1234L) + train_out = op$train(list(task))[[1]]$data() + set.seed(1234L) + adas_out = setDT(rbind( + df, + invoke(smotefamily::ADAS, X = task$data(cols = task$feature_names), target = task$truth(), K = 10)$syn_data + )) + + expect_equal(train_out, adas_out) + + # Empty task is returned unchanged + task$select(character(0)) + expect_equal( + op$train(list(task))[[1L]], + task + ) + +}) diff --git a/tests/testthat/test_pipeop_blsmote.R b/tests/testthat/test_pipeop_blsmote.R new file mode 100644 index 000000000..32f9c874e --- /dev/null +++ b/tests/testthat/test_pipeop_blsmote.R @@ -0,0 +1,70 @@ +context("PipeOpBLSmote") + +test_that("PipeOpBLSmote - basic properties", { + skip_if_not_installed("smotefamily") + + task = mlr_tasks$get("spam") + + expect_datapreproc_pipeop_class(PipeOpBLSmote, task = task, predict_like_train = FALSE, deterministic_train = FALSE) +}) + +test_that("PipeOpBLSmote - train works as intended", { + skip_if_not_installed("smotefamily") + + op = PipeOpBLSmote$new() + + df = data.frame( + class = factor(sample(c("c1", "c2"), size = 200, replace = TRUE, prob = c(0.1, 0.9))), + x1 = rnorm(200), + x2 = rnorm(200) + ) + task = TaskClassif$new(id = "test", backend = df, target = "class") + + # Compare to smotefamily::BLSMOTE with default params + set.seed(1234L) + train_out = op$train(list(task))[[1]]$data() + set.seed(1234L) + blsmote_out = setDT(rbind( + df, + invoke(smotefamily::BLSMOTE, X = task$data(cols = task$feature_names), target = task$truth())$syn_data + )) + + expect_equal(train_out, blsmote_out) + + # Compare to smotefamily::BLSMOTE with changed params + # method = "type2" + op$param_set$set_values(K = 10, C = 8, dupSize = 0, method = "type1") + + set.seed(1234L) + train_out = op$train(list(task))[[1]]$data() + set.seed(1234L) + blsmote_out = setDT(rbind( + df, + invoke(smotefamily::BLSMOTE, X = task$data(cols = task$feature_names), target = task$truth(), + K = 10, C = 8, dupSize = 0, method = "type1")$syn_data + )) + + expect_equal(train_out, blsmote_out) + + # method = "type1" + op$param_set$set_values(K = 10, C = 8, dupSize = 0, method = "type2") + + set.seed(1234L) + train_out = op$train(list(task))[[1]]$data() + set.seed(1234L) + blsmote_out = setDT(rbind( + df, + invoke(smotefamily::BLSMOTE, X = task$data(cols = task$feature_names), target = task$truth(), + K = 10, C = 8, dupSize = 0, method = "type2")$syn_data + )) + + expect_equal(train_out, blsmote_out) + + # Empty task is returned unchanged + task$select(character(0)) + expect_equal( + op$train(list(task))[[1L]], + task + ) + +}) From 28d4617b9735cb60a39aaf11e42909266e178871 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 14 Sep 2024 23:16:15 +0200 Subject: [PATCH 05/15] fixed example --- R/PipeOpBLSmote.R | 5 +++-- man/mlr_pipeops_blsmote.Rd | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/R/PipeOpBLSmote.R b/R/PipeOpBLSmote.R index 375669610..4f7a82d77 100644 --- a/R/PipeOpBLSmote.R +++ b/R/PipeOpBLSmote.R @@ -62,8 +62,9 @@ #' #' # Create example task #' data = data.frame( -#' target = factor(sample(c("c1", "c2"), size = 200, replace = TRUE, prob = c(0.1, 0.9))), -#' feature = rnorm(200) +#' target = factor(sample(c("c1", "c2"), size = 300, replace = TRUE, prob = c(0.1, 0.9))), +#' x1 = rnorm(300), +#' x2 = rnorm(300) #' ) #' task = TaskClassif$new(id = "example", backend = data, target = "target") #' task$head() diff --git a/man/mlr_pipeops_blsmote.Rd b/man/mlr_pipeops_blsmote.Rd index 5c38845c4..9e09bf354 100644 --- a/man/mlr_pipeops_blsmote.Rd +++ b/man/mlr_pipeops_blsmote.Rd @@ -73,8 +73,9 @@ library("mlr3") # Create example task data = data.frame( - target = factor(sample(c("c1", "c2"), size = 200, replace = TRUE, prob = c(0.1, 0.9))), - feature = rnorm(200) + target = factor(sample(c("c1", "c2"), size = 300, replace = TRUE, prob = c(0.1, 0.9))), + x1 = rnorm(300), + x2 = rnorm(300) ) task = TaskClassif$new(id = "example", backend = data, target = "target") task$head() From fdb82ab56ba8acb7cdb0f974410d7848b00cc714 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 14 Sep 2024 23:32:37 +0200 Subject: [PATCH 06/15] suppress partial arg matching warnings --- R/PipeOpADAS.R | 3 ++- R/PipeOpBLSmote.R | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/PipeOpADAS.R b/R/PipeOpADAS.R index 133795305..2839ef15a 100644 --- a/R/PipeOpADAS.R +++ b/R/PipeOpADAS.R @@ -100,7 +100,8 @@ PipeOpADAS = R6Class("PipeOpADAS", # calculate synthetic data st = setDT(invoke(smotefamily::ADAS, X = dt, target = task$truth(), - .args = self$param_set$get_values(tags = "adas"))$syn_data) + .args = self$param_set$get_values(tags = "adas"), + .opts = list(warnPartialMatchArgs = FALSE))$syn_data) # ADAS uses partial arg matching internally # rename target column and fix character conversion st[["class"]] = as_factor(st[["class"]], levels = task$class_names) diff --git a/R/PipeOpBLSmote.R b/R/PipeOpBLSmote.R index 4f7a82d77..d452593ef 100644 --- a/R/PipeOpBLSmote.R +++ b/R/PipeOpBLSmote.R @@ -112,7 +112,8 @@ PipeOpBLSmote = R6Class("PipeOpBLSmote", # Calculate synthetic data # TODO: Do we have a way to suppress messages by print()? st = setDT(invoke(smotefamily::BLSMOTE, X = dt, target = task$truth(), - .args = self$param_set$get_values(tags = "blsmote"))$syn_data) + .args = self$param_set$get_values(tags = "blsmote"), + .opts = list(warnPartialMatchArgs = FALSE))$syn_data) # BLSMOTE uses partial arg matching internally # Rename target column and fix character conversion st[["class"]] = as_factor(st[["class"]], levels = task$class_names) From c42bbab3949da61da77894a770e5cbcb1bf360ae Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sun, 15 Sep 2024 12:51:54 +0200 Subject: [PATCH 07/15] fix tests and examples to avoid cases with too small DANGER sets --- R/PipeOpBLSmote.R | 13 +++++-------- tests/testthat/test_pipeop_blsmote.R | 28 +++++++++++++++------------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/R/PipeOpBLSmote.R b/R/PipeOpBLSmote.R index d452593ef..f5068ca08 100644 --- a/R/PipeOpBLSmote.R +++ b/R/PipeOpBLSmote.R @@ -61,20 +61,17 @@ #' library("mlr3") #' #' # Create example task -#' data = data.frame( -#' target = factor(sample(c("c1", "c2"), size = 300, replace = TRUE, prob = c(0.1, 0.9))), -#' x1 = rnorm(300), -#' x2 = rnorm(300) -#' ) -#' task = TaskClassif$new(id = "example", backend = data, target = "target") +#' data = smotefamily::sample_generator(500, 0.8) +#' data$result = factor(data$result) +#' task = TaskClassif$new(id = "example", backend = data, target = "result") #' task$head() -#' table(task$data(cols = "target")) +#' table(task$data(cols = "result")) #' #' # Generate synthetic data for minority class #' pop = po("blsmote") #' bls_result = pop$train(list(task))[[1]]$data() #' nrow(bls_result) -#' table(bls_result$target) +#' table(bls_result$result) #' \dontshow{ \} } PipeOpBLSmote = R6Class("PipeOpBLSmote", inherit = PipeOpTaskPreproc, diff --git a/tests/testthat/test_pipeop_blsmote.R b/tests/testthat/test_pipeop_blsmote.R index 32f9c874e..3fd20c49f 100644 --- a/tests/testthat/test_pipeop_blsmote.R +++ b/tests/testthat/test_pipeop_blsmote.R @@ -13,11 +13,10 @@ test_that("PipeOpBLSmote - train works as intended", { op = PipeOpBLSmote$new() - df = data.frame( - class = factor(sample(c("c1", "c2"), size = 200, replace = TRUE, prob = c(0.1, 0.9))), - x1 = rnorm(200), - x2 = rnorm(200) - ) + df = smotefamily::sample_generator(500, 0.8) + df$result = factor(df$result) + setnames(df, "result", "class") # we do this to avoid renaming later + df = df[, c(3,1,2)] # we do this to avoid reordering later task = TaskClassif$new(id = "test", backend = df, target = "class") # Compare to smotefamily::BLSMOTE with default params @@ -26,42 +25,45 @@ test_that("PipeOpBLSmote - train works as intended", { set.seed(1234L) blsmote_out = setDT(rbind( df, - invoke(smotefamily::BLSMOTE, X = task$data(cols = task$feature_names), target = task$truth())$syn_data + invoke(smotefamily::BLSMOTE, X = task$data(cols = task$feature_names), target = task$truth(), + .opts = list(warnPartialMatchArgs = FALSE))$syn_data )) expect_equal(train_out, blsmote_out) # Compare to smotefamily::BLSMOTE with changed params # method = "type2" - op$param_set$set_values(K = 10, C = 8, dupSize = 0, method = "type1") + pv = list(K = 4L, C = 8L, dupSize = 0, method = "type1") + op$param_set$set_values(.values = pv) set.seed(1234L) - train_out = op$train(list(task))[[1]]$data() + train_out = op$train(list(task))[[1L]]$data() set.seed(1234L) blsmote_out = setDT(rbind( df, invoke(smotefamily::BLSMOTE, X = task$data(cols = task$feature_names), target = task$truth(), - K = 10, C = 8, dupSize = 0, method = "type1")$syn_data + .args = pv, .opts = list(warnPartialMatchArgs = FALSE))$syn_data )) expect_equal(train_out, blsmote_out) # method = "type1" - op$param_set$set_values(K = 10, C = 8, dupSize = 0, method = "type2") + pv = list(K = 4L, C = 8L, dupSize = 0, method = "type2") + op$param_set$set_values(.values = pv) set.seed(1234L) - train_out = op$train(list(task))[[1]]$data() + train_out = op$train(list(task))[[1L]]$data() set.seed(1234L) blsmote_out = setDT(rbind( df, invoke(smotefamily::BLSMOTE, X = task$data(cols = task$feature_names), target = task$truth(), - K = 10, C = 8, dupSize = 0, method = "type2")$syn_data + .args = pv, .opts = list(warnPartialMatchArgs = FALSE))$syn_data )) expect_equal(train_out, blsmote_out) # Empty task is returned unchanged - task$select(character(0)) + task$select(character(0L)) expect_equal( op$train(list(task))[[1L]], task From 5ad96989d149504972c64b0f6b9789dfdf2cb6c3 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sun, 15 Sep 2024 12:54:10 +0200 Subject: [PATCH 08/15] docs --- man/mlr_pipeops_blsmote.Rd | 13 +++++-------- tests/testthat/test_pipeop_blsmote.R | 2 +- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/man/mlr_pipeops_blsmote.Rd b/man/mlr_pipeops_blsmote.Rd index 9e09bf354..34046abab 100644 --- a/man/mlr_pipeops_blsmote.Rd +++ b/man/mlr_pipeops_blsmote.Rd @@ -72,20 +72,17 @@ Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}} library("mlr3") # Create example task -data = data.frame( - target = factor(sample(c("c1", "c2"), size = 300, replace = TRUE, prob = c(0.1, 0.9))), - x1 = rnorm(300), - x2 = rnorm(300) -) -task = TaskClassif$new(id = "example", backend = data, target = "target") +data = smotefamily::sample_generator(500, 0.8) +data$result = factor(data$result) +task = TaskClassif$new(id = "example", backend = data, target = "result") task$head() -table(task$data(cols = "target")) +table(task$data(cols = "result")) # Generate synthetic data for minority class pop = po("blsmote") bls_result = pop$train(list(task))[[1]]$data() nrow(bls_result) -table(bls_result$target) +table(bls_result$result) \dontshow{ \} } } \references{ diff --git a/tests/testthat/test_pipeop_blsmote.R b/tests/testthat/test_pipeop_blsmote.R index 3fd20c49f..c1bbff014 100644 --- a/tests/testthat/test_pipeop_blsmote.R +++ b/tests/testthat/test_pipeop_blsmote.R @@ -32,7 +32,7 @@ test_that("PipeOpBLSmote - train works as intended", { expect_equal(train_out, blsmote_out) # Compare to smotefamily::BLSMOTE with changed params - # method = "type2" + # method = "type1" pv = list(K = 4L, C = 8L, dupSize = 0, method = "type1") op$param_set$set_values(.values = pv) From c33d3d50de601b2532cf023738b535e0d0725391 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sun, 15 Sep 2024 13:26:00 +0200 Subject: [PATCH 09/15] suppress partial arg match warnings in adas tests --- tests/testthat/test_pipeop_adas.R | 6 ++++-- tests/testthat/test_pipeop_blsmote.R | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test_pipeop_adas.R b/tests/testthat/test_pipeop_adas.R index a392c1f61..2a31db5b9 100644 --- a/tests/testthat/test_pipeop_adas.R +++ b/tests/testthat/test_pipeop_adas.R @@ -27,7 +27,8 @@ test_that("PipeOpADAS - train works as intended", { # rbind for same row (and col) order adas_out = setDT(rbind( df, - invoke(smotefamily::ADAS, X = task$data(cols = task$feature_names), target = task$truth())$syn_data + invoke(smotefamily::ADAS, X = task$data(cols = task$feature_names), target = task$truth(), + .opts = list(warnPartialMatchArgs = FALSE))$syn_data )) expect_equal(train_out, adas_out) @@ -40,7 +41,8 @@ test_that("PipeOpADAS - train works as intended", { set.seed(1234L) adas_out = setDT(rbind( df, - invoke(smotefamily::ADAS, X = task$data(cols = task$feature_names), target = task$truth(), K = 10)$syn_data + invoke(smotefamily::ADAS, X = task$data(cols = task$feature_names), target = task$truth(), K = 10, + .opts = list(warnPartialMatchArgs = FALSE))$syn_data )) expect_equal(train_out, adas_out) diff --git a/tests/testthat/test_pipeop_blsmote.R b/tests/testthat/test_pipeop_blsmote.R index c1bbff014..1f95e13ac 100644 --- a/tests/testthat/test_pipeop_blsmote.R +++ b/tests/testthat/test_pipeop_blsmote.R @@ -16,12 +16,12 @@ test_that("PipeOpBLSmote - train works as intended", { df = smotefamily::sample_generator(500, 0.8) df$result = factor(df$result) setnames(df, "result", "class") # we do this to avoid renaming later - df = df[, c(3,1,2)] # we do this to avoid reordering later + df = df[, c(3L, 1L, 2L)] # we do this to avoid reordering later task = TaskClassif$new(id = "test", backend = df, target = "class") # Compare to smotefamily::BLSMOTE with default params set.seed(1234L) - train_out = op$train(list(task))[[1]]$data() + train_out = op$train(list(task))[[1L]]$data() set.seed(1234L) blsmote_out = setDT(rbind( df, From c540e63bf535dbc328d66e4f638630cf67c2ddd9 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 21 Sep 2024 22:04:56 +0200 Subject: [PATCH 10/15] code review changes --- R/PipeOpADAS.R | 9 +++-- R/PipeOpBLSmote.R | 28 ++++++++++---- tests/testthat/test_pipeop_adas.R | 50 +++++++++++++++++------- tests/testthat/test_pipeop_blsmote.R | 58 ++++++++++++++++++++-------- 4 files changed, 103 insertions(+), 42 deletions(-) diff --git a/R/PipeOpADAS.R b/R/PipeOpADAS.R index 2839ef15a..53cd21673 100644 --- a/R/PipeOpADAS.R +++ b/R/PipeOpADAS.R @@ -98,14 +98,15 @@ PipeOpADAS = R6Class("PipeOpADAS", } dt = task$data(cols = cols) - # calculate synthetic data + # Calculate synthetic data st = setDT(invoke(smotefamily::ADAS, X = dt, target = task$truth(), .args = self$param_set$get_values(tags = "adas"), .opts = list(warnPartialMatchArgs = FALSE))$syn_data) # ADAS uses partial arg matching internally - # rename target column and fix character conversion - st[["class"]] = as_factor(st[["class"]], levels = task$class_names) - setnames(st, "class", task$target_names) + # Rename target column and fix character conversion + # We index by position (target should be last column) instead of indexing by name, which would lead to problems if a feature were called "class" + st[[ncol(st)]] = as_factor(st[[ncol(st)]], levels = task$class_names) + setnames(st, ncol(st), task$target_names) task$rbind(st) } diff --git a/R/PipeOpBLSmote.R b/R/PipeOpBLSmote.R index f5068ca08..259084cd9 100644 --- a/R/PipeOpBLSmote.R +++ b/R/PipeOpBLSmote.R @@ -42,6 +42,8 @@ #' * `method` :: `character(1)` \cr #' The type of Borderline-SMOTE algorithm to use. Default is `"type1"`. #' See [`BLSMOTE()`][`smotefamily::BLSMOTE`]. +#' * `quiet` :: `logical(1)` \cr +#' Whether to suppress printing status during training. Initialized to `TRUE`. #' #' @section Fields: #' Only fields inherited from [`PipeOpTaskPreproc`]/[`PipeOp`]. @@ -80,11 +82,13 @@ PipeOpBLSmote = R6Class("PipeOpBLSmote", ps = ps( K = p_int(lower = 1, default = 5, tags = c("train", "blsmote")), C = p_int(lower = 1, default = 5, tags = c("train", "blsmote")), - # dup_size = 0 leads to behaviour different from 1, 2, 3, ..., because it means "duplicating until balanced", so it is a 'special_vals'. + # dup_size = 0 leads to behaviour different from 1, 2, 3, ..., because it means "duplicating until balanced", so it is a "special_vals". dupSize = p_int(lower = 1, default = 0, special_vals = list(0), tags = c("train", "blsmote")), # Default of `method` is derived from the source code of smotefamily::BLSMOTE(), not documented there. - method = p_fct(levels = c("type1", "type2"), tags = c("train", "blsmote")) + method = p_fct(levels = c("type1", "type2"), default = "type1", tags = c("train", "blsmote")), + quiet = p_lgl(tags = "train") ) + ps$values = list(quiet = TRUE) super$initialize(id, param_set = ps, param_vals = param_vals, can_subset_cols = FALSE, packages = "smotefamily", task_type = "TaskClassif", tags = "imbalanced data") } @@ -107,14 +111,22 @@ PipeOpBLSmote = R6Class("PipeOpBLSmote", dt = task$data(cols = cols) # Calculate synthetic data - # TODO: Do we have a way to suppress messages by print()? - st = setDT(invoke(smotefamily::BLSMOTE, X = dt, target = task$truth(), - .args = self$param_set$get_values(tags = "blsmote"), - .opts = list(warnPartialMatchArgs = FALSE))$syn_data) # BLSMOTE uses partial arg matching internally + if (self$param_set$values$quiet) { + base::invisible(utils::capture.output({ + st = setDT(invoke(smotefamily::BLSMOTE, X = dt, target = task$truth(), + .args = self$param_set$get_values(tags = "blsmote"), + .opts = list(warnPartialMatchArgs = FALSE))$syn_data) # BLSMOTE uses partial arg matching internally + })) # using {} not elegant? + } else { + st = setDT(invoke(smotefamily::BLSMOTE, X = dt, target = task$truth(), + .args = self$param_set$get_values(tags = "blsmote"), + .opts = list(warnPartialMatchArgs = FALSE))$syn_data) + } # Rename target column and fix character conversion - st[["class"]] = as_factor(st[["class"]], levels = task$class_names) - setnames(st, "class", task$target_names) + # We index by position (target should be last column) instead of indexing by name, which would lead to problems if a feature were called "class" + st[[ncol(st)]] = as_factor(st[[ncol(st)]], levels = task$class_names) + setnames(st, ncol(st), task$target_names) task$rbind(st) } diff --git a/tests/testthat/test_pipeop_adas.R b/tests/testthat/test_pipeop_adas.R index 2a31db5b9..9c8786565 100644 --- a/tests/testthat/test_pipeop_adas.R +++ b/tests/testthat/test_pipeop_adas.R @@ -14,22 +14,22 @@ test_that("PipeOpADAS - train works as intended", { op = PipeOpADAS$new() df = data.frame( - class = factor(sample(c("c1", "c2"), size = 200, replace = TRUE, prob = c(0.1, 0.9))), + target = factor(sample(c("c1", "c2"), size = 200, replace = TRUE, prob = c(0.1, 0.9))), x1 = rnorm(200), x2 = rnorm(200) ) - task = TaskClassif$new(id = "test", backend = df, target = "class") + task = TaskClassif$new(id = "test", backend = df, target = "target") # Compare to smotefamily::ADAS with default params set.seed(1234L) train_out = op$train(list(task))[[1]]$data() set.seed(1234L) - # rbind for same row (and col) order - adas_out = setDT(rbind( - df, - invoke(smotefamily::ADAS, X = task$data(cols = task$feature_names), target = task$truth(), - .opts = list(warnPartialMatchArgs = FALSE))$syn_data - )) + df_out = invoke(smotefamily::ADAS, X = task$data(cols = task$feature_names), target = task$truth(), + .opts = list(warnPartialMatchArgs = FALSE))$syn_data + # Rename class column to target and rbind for same row and col order + # Rename by name (not position) to notice should the order of columns outputed by smotefamily::ADAS be changed in the future + setnames(df_out, "class", "target") + adas_out = setDT(rbind(df, df_out)) expect_equal(train_out, adas_out) @@ -39,11 +39,10 @@ test_that("PipeOpADAS - train works as intended", { set.seed(1234L) train_out = op$train(list(task))[[1]]$data() set.seed(1234L) - adas_out = setDT(rbind( - df, - invoke(smotefamily::ADAS, X = task$data(cols = task$feature_names), target = task$truth(), K = 10, - .opts = list(warnPartialMatchArgs = FALSE))$syn_data - )) + df_out = invoke(smotefamily::ADAS, X = task$data(cols = task$feature_names), target = task$truth(), K = 10, + .opts = list(warnPartialMatchArgs = FALSE))$syn_data + setnames(df_out, "class", "target") + adas_out = setDT(rbind(df, df_out)) expect_equal(train_out, adas_out) @@ -55,3 +54,28 @@ test_that("PipeOpADAS - train works as intended", { ) }) + +test_that("PipeOpADAS - handling of feature named 'class'", { + skip_if_not_installed("smotefamily") + + op = PipeOpADAS$new() + + df = data.frame( + target = factor(sample(c("c1", "c2"), size = 200, replace = TRUE, prob = c(0.1, 0.9))), + class = rnorm(200), + x = rnorm(200) + ) + task = TaskClassif$new(id = "test", backend = df, target = "target") + + set.seed(1234L) + train_out = op$train(list(task))[[1]]$data() + set.seed(1234L) + df_out = invoke(smotefamily::ADAS, X = task$data(cols = task$feature_names), target = task$truth(), + .opts = list(warnPartialMatchArgs = FALSE))$syn_data + # Renaming by position + setnames(df_out, 3, "target") + adas_out = setDT(rbind(df, df_out)) + + expect_equal(train_out, adas_out) + +}) diff --git a/tests/testthat/test_pipeop_blsmote.R b/tests/testthat/test_pipeop_blsmote.R index 1f95e13ac..2c75b952d 100644 --- a/tests/testthat/test_pipeop_blsmote.R +++ b/tests/testthat/test_pipeop_blsmote.R @@ -15,19 +15,19 @@ test_that("PipeOpBLSmote - train works as intended", { df = smotefamily::sample_generator(500, 0.8) df$result = factor(df$result) - setnames(df, "result", "class") # we do this to avoid renaming later df = df[, c(3L, 1L, 2L)] # we do this to avoid reordering later - task = TaskClassif$new(id = "test", backend = df, target = "class") + task = TaskClassif$new(id = "test", backend = df, target = "result") # Compare to smotefamily::BLSMOTE with default params set.seed(1234L) train_out = op$train(list(task))[[1L]]$data() set.seed(1234L) - blsmote_out = setDT(rbind( - df, - invoke(smotefamily::BLSMOTE, X = task$data(cols = task$feature_names), target = task$truth(), - .opts = list(warnPartialMatchArgs = FALSE))$syn_data - )) + df_out = invoke(smotefamily::BLSMOTE, X = task$data(cols = task$feature_names), target = task$truth(), + .opts = list(warnPartialMatchArgs = FALSE))$syn_data + # Rename target column and rbind for same row and col order + # Rename by name (not position) to notice should the order of columns outputted by smotefamily::BLSMOTE be changed in the future + setnames(df_out, "class", "result") + blsmote_out = setDT(rbind(df, df_out)) expect_equal(train_out, blsmote_out) @@ -39,11 +39,10 @@ test_that("PipeOpBLSmote - train works as intended", { set.seed(1234L) train_out = op$train(list(task))[[1L]]$data() set.seed(1234L) - blsmote_out = setDT(rbind( - df, - invoke(smotefamily::BLSMOTE, X = task$data(cols = task$feature_names), target = task$truth(), - .args = pv, .opts = list(warnPartialMatchArgs = FALSE))$syn_data - )) + df_out = invoke(smotefamily::BLSMOTE, X = task$data(cols = task$feature_names), target = task$truth(), + .args = pv, .opts = list(warnPartialMatchArgs = FALSE))$syn_data + setnames(df_out, "class", "result") + blsmote_out = setDT(rbind(df, df_out)) expect_equal(train_out, blsmote_out) @@ -54,11 +53,10 @@ test_that("PipeOpBLSmote - train works as intended", { set.seed(1234L) train_out = op$train(list(task))[[1L]]$data() set.seed(1234L) - blsmote_out = setDT(rbind( - df, - invoke(smotefamily::BLSMOTE, X = task$data(cols = task$feature_names), target = task$truth(), - .args = pv, .opts = list(warnPartialMatchArgs = FALSE))$syn_data - )) + df_out = invoke(smotefamily::BLSMOTE, X = task$data(cols = task$feature_names), target = task$truth(), + .args = pv, .opts = list(warnPartialMatchArgs = FALSE))$syn_data + setnames(df_out, "class", "result") + blsmote_out = setDT(rbind(df, df_out)) expect_equal(train_out, blsmote_out) @@ -70,3 +68,29 @@ test_that("PipeOpBLSmote - train works as intended", { ) }) + + +test_that("PipeOpBLSmote - handling of feature named 'class'", { + skip_if_not_installed("smotefamily") + + op = PipeOpBLSmote$new() + + df = smotefamily::sample_generator(500, 0.8) + df$result = factor(df$result) + # Rename a column into "class" + setnames(df, "X2", "class") + df = df[, c(3L, 1L, 2L)] + task = TaskClassif$new(id = "test", backend = df, target = "result") + + set.seed(1234L) + train_out = op$train(list(task))[[1]]$data() + set.seed(1234L) + df_out = invoke(smotefamily::BLSMOTE, X = task$data(cols = task$feature_names), target = task$truth(), + .opts = list(warnPartialMatchArgs = FALSE))$syn_data + # Renaming by position + setnames(df_out, 3, "result") + blsmote_out = setDT(rbind(df, df_out)) + + expect_equal(train_out, blsmote_out) + +}) From b85eb734a04d805c849a43c2447a443839464436 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 21 Sep 2024 22:06:26 +0200 Subject: [PATCH 11/15] updated NEWS.md --- NEWS.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 602322265..bb62a1d34 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ # mlr3pipelines 0.6.0-9000 -* New PipeOp `PipeOpRowApply` / `po("rowapply")` +* New PipeOp: `PipeOpRowApply` / `po("rowapply")` +* New PipeOps for handling inbalanced data: `PipeOpADAS` / `po("adas")` and `PipeOpBLSmote` / `po("blsmote")` # mlr3pipelines 0.6.0 From 7fe914eff1d1857b4170f4e56bf8f861c88233db Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Sat, 21 Sep 2024 22:22:30 +0200 Subject: [PATCH 12/15] error message for wrong feature types --- R/PipeOpADAS.R | 8 ++++---- R/PipeOpBLSmote.R | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/PipeOpADAS.R b/R/PipeOpADAS.R index 53cd21673..180d7b8c4 100644 --- a/R/PipeOpADAS.R +++ b/R/PipeOpADAS.R @@ -84,21 +84,21 @@ PipeOpADAS = R6Class("PipeOpADAS", private = list( .train_task = function(task) { - assert_true(all(task$feature_types$type == "numeric")) cols = task$feature_names - unsupported_cols = setdiff(unlist(task$col_roles), union(cols, task$target_names)) if (length(unsupported_cols)) { stopf("ADAS cannot generate synthetic data for the following columns since they are neither features nor targets: '%s'", paste(unsupported_cols, collapse = "', '")) } - if (!length(cols)) { return(task) } - dt = task$data(cols = cols) + if (!all(task$feature_types$type %in% c("numeric"))) { + stop("ADAS does only accept numeric features. Use PipeOpSelect to select the appropriate features.") + } # Calculate synthetic data + dt = task$data(cols = cols) st = setDT(invoke(smotefamily::ADAS, X = dt, target = task$truth(), .args = self$param_set$get_values(tags = "adas"), .opts = list(warnPartialMatchArgs = FALSE))$syn_data) # ADAS uses partial arg matching internally diff --git a/R/PipeOpBLSmote.R b/R/PipeOpBLSmote.R index 259084cd9..cb1b5e5e1 100644 --- a/R/PipeOpBLSmote.R +++ b/R/PipeOpBLSmote.R @@ -96,21 +96,21 @@ PipeOpBLSmote = R6Class("PipeOpBLSmote", private = list( .train_task = function(task) { - assert_true(all(task$feature_types$type == "numeric")) cols = task$feature_names - unsupported_cols = setdiff(unlist(task$col_roles), union(cols, task$target_names)) if (length(unsupported_cols)) { stopf("BLSMOTE cannot generate synthetic data for the following columns since they are neither features nor targets: '%s'", paste(unsupported_cols, collapse = "', '")) } - if (!length(cols)) { return(task) } - dt = task$data(cols = cols) + if (!all(task$feature_types$type %in% c("numeric"))) { + stop("BLSmote does only accept numeric features. Use PipeOpSelect to select the appropriate features.") + } # Calculate synthetic data + dt = task$data(cols = cols) if (self$param_set$values$quiet) { base::invisible(utils::capture.output({ st = setDT(invoke(smotefamily::BLSMOTE, X = dt, target = task$truth(), From a2b20bc8b1fdf54a6fb6dd2f5dd38f9a41475338 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Mon, 23 Sep 2024 23:19:47 +0200 Subject: [PATCH 13/15] use str_collapse for unsupported cols --- R/PipeOpADAS.R | 4 ++-- R/PipeOpBLSmote.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/PipeOpADAS.R b/R/PipeOpADAS.R index 180d7b8c4..0d51d4aa7 100644 --- a/R/PipeOpADAS.R +++ b/R/PipeOpADAS.R @@ -87,8 +87,8 @@ PipeOpADAS = R6Class("PipeOpADAS", cols = task$feature_names unsupported_cols = setdiff(unlist(task$col_roles), union(cols, task$target_names)) if (length(unsupported_cols)) { - stopf("ADAS cannot generate synthetic data for the following columns since they are neither features nor targets: '%s'", - paste(unsupported_cols, collapse = "', '")) + stopf("ADAS cannot generate synthetic data for the following columns since they are neither features nor targets: %s.", + str_collapse(unsupported_cols, quote = '"')) } if (!length(cols)) { return(task) diff --git a/R/PipeOpBLSmote.R b/R/PipeOpBLSmote.R index cb1b5e5e1..4318d7943 100644 --- a/R/PipeOpBLSmote.R +++ b/R/PipeOpBLSmote.R @@ -99,8 +99,8 @@ PipeOpBLSmote = R6Class("PipeOpBLSmote", cols = task$feature_names unsupported_cols = setdiff(unlist(task$col_roles), union(cols, task$target_names)) if (length(unsupported_cols)) { - stopf("BLSMOTE cannot generate synthetic data for the following columns since they are neither features nor targets: '%s'", - paste(unsupported_cols, collapse = "', '")) + stopf("BLSMOTE cannot generate synthetic data for the following columns since they are neither features nor targets: %s.", + str_collapse(unsupported_cols, quote = '"')) } if (!length(cols)) { return(task) From ec7d9ff41fd22295f7bca0ab9fb6dceb9b040ad1 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Tue, 24 Sep 2024 10:24:18 +0200 Subject: [PATCH 14/15] remove dev comment --- R/PipeOpBLSmote.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/PipeOpBLSmote.R b/R/PipeOpBLSmote.R index 4318d7943..977d2b451 100644 --- a/R/PipeOpBLSmote.R +++ b/R/PipeOpBLSmote.R @@ -116,7 +116,7 @@ PipeOpBLSmote = R6Class("PipeOpBLSmote", st = setDT(invoke(smotefamily::BLSMOTE, X = dt, target = task$truth(), .args = self$param_set$get_values(tags = "blsmote"), .opts = list(warnPartialMatchArgs = FALSE))$syn_data) # BLSMOTE uses partial arg matching internally - })) # using {} not elegant? + })) } else { st = setDT(invoke(smotefamily::BLSMOTE, X = dt, target = task$truth(), .args = self$param_set$get_values(tags = "blsmote"), From 8923781df4564bf70c4cc1bec792ae474aa19eb7 Mon Sep 17 00:00:00 2001 From: kenomersmannPC Date: Tue, 24 Sep 2024 11:32:47 +0200 Subject: [PATCH 15/15] code review --- R/PipeOpBLSmote.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/PipeOpBLSmote.R b/R/PipeOpBLSmote.R index 977d2b451..c0d7df039 100644 --- a/R/PipeOpBLSmote.R +++ b/R/PipeOpBLSmote.R @@ -86,7 +86,7 @@ PipeOpBLSmote = R6Class("PipeOpBLSmote", dupSize = p_int(lower = 1, default = 0, special_vals = list(0), tags = c("train", "blsmote")), # Default of `method` is derived from the source code of smotefamily::BLSMOTE(), not documented there. method = p_fct(levels = c("type1", "type2"), default = "type1", tags = c("train", "blsmote")), - quiet = p_lgl(tags = "train") + quiet = p_lgl(tags = c("train", "required")) ) ps$values = list(quiet = TRUE) super$initialize(id, param_set = ps, param_vals = param_vals, can_subset_cols = FALSE, @@ -111,12 +111,12 @@ PipeOpBLSmote = R6Class("PipeOpBLSmote", # Calculate synthetic data dt = task$data(cols = cols) - if (self$param_set$values$quiet) { - base::invisible(utils::capture.output({ + if (self$param_set$get_values()$quiet) { + utils::capture.output({ st = setDT(invoke(smotefamily::BLSMOTE, X = dt, target = task$truth(), .args = self$param_set$get_values(tags = "blsmote"), .opts = list(warnPartialMatchArgs = FALSE))$syn_data) # BLSMOTE uses partial arg matching internally - })) + }) } else { st = setDT(invoke(smotefamily::BLSMOTE, X = dt, target = task$truth(), .args = self$param_set$get_values(tags = "blsmote"),