Skip to content

Commit

Permalink
fix + restructure comments
Browse files Browse the repository at this point in the history
  • Loading branch information
advieser committed Nov 2, 2024
1 parent 9096aca commit 8701d9d
Showing 1 changed file with 11 additions and 16 deletions.
27 changes: 11 additions & 16 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,23 +42,19 @@ task_filter_ex = function(task, row_ids) {
# Rbind duplicated rows to task
if (length(dup_ids)) {

# First, get a data.table with all duplicated rows.
new_data = task$data(rows = dup_ids, cols = cols)

# For column with role "group", create new groups for duplicates by adding a suffix.
# Second, if task has a column with role "group", create new groups for duplicate rows by adding a suffix to the group entry.
if (!is.null(task$groups)) {
group = NULL # for binding
row_id = NULL # for binding

# We create a data.table with the corresponding group to each duplicated ID.
# We then change the group entry based on how often the ID occurs.
# Note that we make no assumptions on whether the whole group is sampled here.
# That has to be checked in the functions calling this.
#
# We assume that the rbinded rows are in the same positions as the original ids in dup_ids.
# This should generally be the case as long as the task does not have a col role group
# and task$data(..., ordered = FALSE) in task$rbind() above (default).

grps = unique(task$groups)
# We create a data.table "new_groups" with the corresponding group to each duplicated ID.
# We then change the group entry based on how often the ID occurs. E.g. row_id = 1 occurs
# two times has the group entry "g". Then we rename the group entries to "g_1" and "g_2".
# If a group with a suffix (e.g. "_1") already exists, we add another suffix to it (i.e. "_1_1").
grps = unique(task$groups$group)
new_groups = task$groups[J(dup_ids), on = "row_id"][, group := {
groups = character(0)
i = 1
Expand All @@ -70,17 +66,18 @@ task_filter_ex = function(task, row_ids) {
groups
}, by = row_id]

# Generate data.table with rows for all newly added rows and updated group names
# Use "new_groups" to update the group entries.
new_data[, (task$col_roles$group) := new_groups$group]
}

# Lastly, new data is rbinded to the original task.
task$rbind(new_data)

}

# Row ids can be anything, we just take what mlr3 happens to assign to filter the task.
# row_ids can be anything, we just take what mlr3 happens to assign to filter the task.
row_ids[duplicated(row_ids)] = task$row_ids[newrows]

# Update row_ids, effectively filtering the task
task$row_roles$use = row_ids
task
}
Expand All @@ -107,7 +104,6 @@ curry = function(fn, ..., varname = "x") {
}
}


# 'and' operator for checkmate check_*-functions
# example:
# check_numeric(x) %check&&% check_true(all(x < 0))
Expand All @@ -121,7 +117,6 @@ curry = function(fn, ..., varname = "x") {
TRUE
}


# perform gsub on names of list
# `...` are given to `gsub()`
rename_list = function(x, ...) {
Expand Down

0 comments on commit 8701d9d

Please sign in to comment.