From 1b8aaae5d5d32b3a4645d918548251fa97bdbd4d Mon Sep 17 00:00:00 2001 From: Philip Studener Date: Fri, 2 Aug 2024 17:37:55 +0200 Subject: [PATCH 01/82] add pipeop draft --- R/PipeOpTaskfSurvClassifIPCW.R | 71 +++++++++++++++++++ .../test_pipeop_trafotask_survclassif_IPCW.R | 11 +++ 2 files changed, 82 insertions(+) create mode 100644 R/PipeOpTaskfSurvClassifIPCW.R create mode 100644 tests/testthat/test_pipeop_trafotask_survclassif_IPCW.R diff --git a/R/PipeOpTaskfSurvClassifIPCW.R b/R/PipeOpTaskfSurvClassifIPCW.R new file mode 100644 index 000000000..f1ef5239e --- /dev/null +++ b/R/PipeOpTaskfSurvClassifIPCW.R @@ -0,0 +1,71 @@ + +PipeOpTaskfSurvClassifIPCW = R6Class( + "PipeOpTaskfSurvClassifIPCW", + inherit = mlr3pipelines::PipeOp, + + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function(id = "trafotask_survclassif_IPCW") { + param_set = ps( + cutoff_time = p_dbl(0, default = NULL, special_vals = list(NULL)) + ) + super$initialize( + id = id, + param_set = param_set, + input = data.table( + name = "input", + train = "TaskSurv", + predict = "TaskSurv" + ), + output = data.table( + name = "output", + train = "TaskClassif", + predict = "TaskClassif" + ) + ) + } + ), + + private = list( + .predict = function(input) { + data = input[[1]]$data() + data$status = factor(data$status, levels = c("0", "1")) + task = TaskClassif$new(id = input[[1]]$id, backend = data, + target = "status", positive = "1") + list(task) + }, + + .train = function(input) { + data_trafo = input[[1]]$data() + cutoff_time = self$param_set$values$cutoff_time + cutoff_time = 5 + + # transform data and calculate weights + data_trafo$time[data_trafo$time > cutoff_time] = cutoff_time + data_trafo$status[data_trafo$time == cutoff_time] = 1 + data_trafo$status = (data_trafo$status != 1) * 1 + + task_new = TaskSurv$new(id = "ipcw", time = "time", event = "status", backend = data_trafo) + pred = lrn("surv.kaplan")$train(task_new)$predict(task_new) + weights = 1 / pred$data$distr[1,] + + # add weights to original data + data = input[[1]]$data() + data[["ipc_weights"]] = weights[as.character(data_trafo$time)] + data[status == 0 & time < cutoff_time, "ipc_weights" := 0] + data$status = factor(data$status, levels = c("0", "1")) + + # create new task + task = TaskClassif$new(id = paste0(input[[1]]$id, "_IPCW"), backend = data, + target = "status", positive = "1") + + task$set_col_roles("ipc_weights", roles = "weight") + + self$state = list() + list(task) + } + ) +) + +register_pipeop("trafotask_survclassif_IPCW", PipeOpTaskfSurvClassifIPCW) diff --git a/tests/testthat/test_pipeop_trafotask_survclassif_IPCW.R b/tests/testthat/test_pipeop_trafotask_survclassif_IPCW.R new file mode 100644 index 000000000..170efc3ed --- /dev/null +++ b/tests/testthat/test_pipeop_trafotask_survclassif_IPCW.R @@ -0,0 +1,11 @@ +test_that("PipeOpTaskSurvClassifIPCW", { + + task = tsk("rats") + po = PipeOpTaskfSurvClassifIPCW$new() + l = lrn("classif.gam") + + pipe = po %>>% l + pipe$train(task) + pred1 = pipe$predict(task)$classif.gam.output + expect_prediction_classif(pred1) +}) From 720eb58171eef7a27ff82d735f67d1703483ec21 Mon Sep 17 00:00:00 2001 From: studener Date: Mon, 5 Aug 2024 11:50:05 +0200 Subject: [PATCH 02/82] update tests --- R/PipeOpTaskSurvRegr.R | 2 +- R/PipeOpTaskfSurvClassifIPCW.R | 1 - .../test_pipeop_trafotask_survclassif_IPCW.R | 12 +++++++++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/R/PipeOpTaskSurvRegr.R b/R/PipeOpTaskSurvRegr.R index 1ae86b66f..15cd0ee98 100644 --- a/R/PipeOpTaskSurvRegr.R +++ b/R/PipeOpTaskSurvRegr.R @@ -21,7 +21,7 @@ #' #' * `method::character(1))`\cr #' Method to use for dealing with censoring. Options are `"ipcw"` (Vock et al., 2016): censoring -#' is column is removed and a `weights` column is added, weights are inverse estimated survival +#' column is removed and a `weights` column is added, weights are inverse estimated survival #' probability of the censoring distribution evaluated at survival time; #' `"mrl"` (Klein and Moeschberger, 2003): survival time of censored #' observations is transformed to the observed time plus the mean residual life-time at the moment diff --git a/R/PipeOpTaskfSurvClassifIPCW.R b/R/PipeOpTaskfSurvClassifIPCW.R index f1ef5239e..761cd0f03 100644 --- a/R/PipeOpTaskfSurvClassifIPCW.R +++ b/R/PipeOpTaskfSurvClassifIPCW.R @@ -39,7 +39,6 @@ PipeOpTaskfSurvClassifIPCW = R6Class( .train = function(input) { data_trafo = input[[1]]$data() cutoff_time = self$param_set$values$cutoff_time - cutoff_time = 5 # transform data and calculate weights data_trafo$time[data_trafo$time > cutoff_time] = cutoff_time diff --git a/tests/testthat/test_pipeop_trafotask_survclassif_IPCW.R b/tests/testthat/test_pipeop_trafotask_survclassif_IPCW.R index 170efc3ed..ee39012bf 100644 --- a/tests/testthat/test_pipeop_trafotask_survclassif_IPCW.R +++ b/tests/testthat/test_pipeop_trafotask_survclassif_IPCW.R @@ -1,11 +1,21 @@ +skip_if_not_installed("mlr3extralearners") test_that("PipeOpTaskSurvClassifIPCW", { task = tsk("rats") - po = PipeOpTaskfSurvClassifIPCW$new() + po = po("trafotask_survclassif_IPCW", cutoff_time = 50) l = lrn("classif.gam") pipe = po %>>% l pipe$train(task) pred1 = pipe$predict(task)$classif.gam.output expect_prediction_classif(pred1) + + + po = po("trafotask_survclassif_IPCW", cutoff_time = 75) + + pipe2 = po %>>% l + pipe2$train(task) + pred2 = pipe2$predict(task)$classif.gam.output + + testthat::expect_true(all(pred1$prob != pred2$prob)) }) From a512695a09c38243bbd8fd1185aca257540888df Mon Sep 17 00:00:00 2001 From: studener Date: Mon, 5 Aug 2024 12:38:29 +0200 Subject: [PATCH 03/82] add basic docs --- DESCRIPTION | 1 + NAMESPACE | 1 + R/PipeOpTaskfSurvClassifIPCW.R | 30 ++++- man/PipeOpPredTransformer.Rd | 1 + man/PipeOpTaskTransformer.Rd | 1 + man/PipeOpTransformer.Rd | 1 + man/mlr_pipeops_survavg.Rd | 1 + ..._pipeops_trafopred_classifsurv_disctime.Rd | 2 + man/mlr_pipeops_trafopred_regrsurv.Rd | 2 + man/mlr_pipeops_trafopred_survregr.Rd | 2 + man/mlr_pipeops_trafotask_regrsurv.Rd | 2 + man/mlr_pipeops_trafotask_survclassif_IPCW.Rd | 115 ++++++++++++++++++ ..._pipeops_trafotask_survclassif_disctime.Rd | 2 + man/mlr_pipeops_trafotask_survregr.Rd | 4 +- .../test_pipeop_trafotask_survclassif_IPCW.R | 4 +- 15 files changed, 165 insertions(+), 4 deletions(-) create mode 100644 man/mlr_pipeops_trafotask_survclassif_IPCW.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 31f2302f2..1f7744490 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -150,6 +150,7 @@ Collate: 'PipeOpTaskSurvClassifDiscTime.R' 'PipeOpTaskSurvRegr.R' 'PipeOpTaskTransformer.R' + 'PipeOpTaskfSurvClassifIPCW.R' 'PredictionDataDens.R' 'PredictionDataSurv.R' 'PredictionDens.R' diff --git a/NAMESPACE b/NAMESPACE index 673c263ba..3998e288b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -82,6 +82,7 @@ export(PipeOpTaskRegrSurv) export(PipeOpTaskSurvClassifDiscTime) export(PipeOpTaskSurvRegr) export(PipeOpTaskTransformer) +export(PipeOpTaskfSurvClassifIPCW) export(PipeOpTransformer) export(PredictionDens) export(PredictionSurv) diff --git a/R/PipeOpTaskfSurvClassifIPCW.R b/R/PipeOpTaskfSurvClassifIPCW.R index 761cd0f03..24d1e1abe 100644 --- a/R/PipeOpTaskfSurvClassifIPCW.R +++ b/R/PipeOpTaskfSurvClassifIPCW.R @@ -1,4 +1,32 @@ - +#' @title PipeOpTaskSurvClassifIPCW +#' @name mlr_pipeops_trafotask_survclassif_IPCW +#' @template param_pipelines +#' +#' @description +#' Transform [TaskSurv] to [TaskClassif][mlr3::TaskClassif] using IPCW (Vock et al., 2016). +#' +#' @section Dictionary: +#' This [PipeOp][mlr3pipelines::PipeOp] can be instantiated via the +#' [dictionary][mlr3misc::Dictionary] [mlr3pipelines::mlr_pipeops] +#' or with the associated sugar function [mlr3pipelines::po()]: +#' ``` +#' PipeOpTaskSurvClassifIPCW$new() +#' mlr_pipeops$get("trafotask_survclassif_IPCW") +#' po("trafotask_survclassif_IPCW") +#' ``` +#' +#' @section Parameters: +#' The parameters are +#' +#' * `cutoff_time :: numeric()`\cr +#' Cutoff time for IPCW. Observations with time larger than `cutoff_time` are censored. +#' +#' @references +#' `r format_bib("vock_2016")` +#' +#' @family PipeOps +#' @family Transformation PipeOps +#' @export PipeOpTaskfSurvClassifIPCW = R6Class( "PipeOpTaskfSurvClassifIPCW", inherit = mlr3pipelines::PipeOp, diff --git a/man/PipeOpPredTransformer.Rd b/man/PipeOpPredTransformer.Rd index bf18bddfe..6d4de4d17 100644 --- a/man/PipeOpPredTransformer.Rd +++ b/man/PipeOpPredTransformer.Rd @@ -39,6 +39,7 @@ Other PipeOps: \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, \code{\link{mlr_pipeops_trafotask_regrsurv}}, +\code{\link{mlr_pipeops_trafotask_survclassif_IPCW}}, \code{\link{mlr_pipeops_trafotask_survclassif_disctime}}, \code{\link{mlr_pipeops_trafotask_survregr}} diff --git a/man/PipeOpTaskTransformer.Rd b/man/PipeOpTaskTransformer.Rd index dca38c72f..91654b4a6 100644 --- a/man/PipeOpTaskTransformer.Rd +++ b/man/PipeOpTaskTransformer.Rd @@ -34,6 +34,7 @@ Other PipeOps: \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, \code{\link{mlr_pipeops_trafotask_regrsurv}}, +\code{\link{mlr_pipeops_trafotask_survclassif_IPCW}}, \code{\link{mlr_pipeops_trafotask_survclassif_disctime}}, \code{\link{mlr_pipeops_trafotask_survregr}} diff --git a/man/PipeOpTransformer.Rd b/man/PipeOpTransformer.Rd index 9450bc16a..d3fe6ea15 100644 --- a/man/PipeOpTransformer.Rd +++ b/man/PipeOpTransformer.Rd @@ -33,6 +33,7 @@ Other PipeOps: \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, \code{\link{mlr_pipeops_trafotask_regrsurv}}, +\code{\link{mlr_pipeops_trafotask_survclassif_IPCW}}, \code{\link{mlr_pipeops_trafotask_survclassif_disctime}}, \code{\link{mlr_pipeops_trafotask_survregr}} diff --git a/man/mlr_pipeops_survavg.Rd b/man/mlr_pipeops_survavg.Rd index 7c6a8f307..b0cd3fbe2 100644 --- a/man/mlr_pipeops_survavg.Rd +++ b/man/mlr_pipeops_survavg.Rd @@ -65,6 +65,7 @@ Other PipeOps: \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, \code{\link{mlr_pipeops_trafotask_regrsurv}}, +\code{\link{mlr_pipeops_trafotask_survclassif_IPCW}}, \code{\link{mlr_pipeops_trafotask_survclassif_disctime}}, \code{\link{mlr_pipeops_trafotask_survregr}} } diff --git a/man/mlr_pipeops_trafopred_classifsurv_disctime.Rd b/man/mlr_pipeops_trafopred_classifsurv_disctime.Rd index 96f50868f..3572ef8c6 100644 --- a/man/mlr_pipeops_trafopred_classifsurv_disctime.Rd +++ b/man/mlr_pipeops_trafopred_classifsurv_disctime.Rd @@ -56,6 +56,7 @@ Other PipeOps: \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, \code{\link{mlr_pipeops_trafotask_regrsurv}}, +\code{\link{mlr_pipeops_trafotask_survclassif_IPCW}}, \code{\link{mlr_pipeops_trafotask_survclassif_disctime}}, \code{\link{mlr_pipeops_trafotask_survregr}} @@ -63,6 +64,7 @@ Other Transformation PipeOps: \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, \code{\link{mlr_pipeops_trafotask_regrsurv}}, +\code{\link{mlr_pipeops_trafotask_survclassif_IPCW}}, \code{\link{mlr_pipeops_trafotask_survclassif_disctime}}, \code{\link{mlr_pipeops_trafotask_survregr}} } diff --git a/man/mlr_pipeops_trafopred_regrsurv.Rd b/man/mlr_pipeops_trafopred_regrsurv.Rd index eeffdfe51..4c396e776 100644 --- a/man/mlr_pipeops_trafopred_regrsurv.Rd +++ b/man/mlr_pipeops_trafopred_regrsurv.Rd @@ -65,6 +65,7 @@ Other PipeOps: \code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, \code{\link{mlr_pipeops_trafopred_survregr}}, \code{\link{mlr_pipeops_trafotask_regrsurv}}, +\code{\link{mlr_pipeops_trafotask_survclassif_IPCW}}, \code{\link{mlr_pipeops_trafotask_survclassif_disctime}}, \code{\link{mlr_pipeops_trafotask_survregr}} @@ -72,6 +73,7 @@ Other Transformation PipeOps: \code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, \code{\link{mlr_pipeops_trafopred_survregr}}, \code{\link{mlr_pipeops_trafotask_regrsurv}}, +\code{\link{mlr_pipeops_trafotask_survclassif_IPCW}}, \code{\link{mlr_pipeops_trafotask_survclassif_disctime}}, \code{\link{mlr_pipeops_trafotask_survregr}} } diff --git a/man/mlr_pipeops_trafopred_survregr.Rd b/man/mlr_pipeops_trafopred_survregr.Rd index c47687fa1..9251b8011 100644 --- a/man/mlr_pipeops_trafopred_survregr.Rd +++ b/man/mlr_pipeops_trafopred_survregr.Rd @@ -45,6 +45,7 @@ Other PipeOps: \code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafotask_regrsurv}}, +\code{\link{mlr_pipeops_trafotask_survclassif_IPCW}}, \code{\link{mlr_pipeops_trafotask_survclassif_disctime}}, \code{\link{mlr_pipeops_trafotask_survregr}} @@ -52,6 +53,7 @@ Other Transformation PipeOps: \code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafotask_regrsurv}}, +\code{\link{mlr_pipeops_trafotask_survclassif_IPCW}}, \code{\link{mlr_pipeops_trafotask_survclassif_disctime}}, \code{\link{mlr_pipeops_trafotask_survregr}} } diff --git a/man/mlr_pipeops_trafotask_regrsurv.Rd b/man/mlr_pipeops_trafotask_regrsurv.Rd index dcbe6f004..0e148d006 100644 --- a/man/mlr_pipeops_trafotask_regrsurv.Rd +++ b/man/mlr_pipeops_trafotask_regrsurv.Rd @@ -61,6 +61,7 @@ Other PipeOps: \code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, +\code{\link{mlr_pipeops_trafotask_survclassif_IPCW}}, \code{\link{mlr_pipeops_trafotask_survclassif_disctime}}, \code{\link{mlr_pipeops_trafotask_survregr}} @@ -68,6 +69,7 @@ Other Transformation PipeOps: \code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, +\code{\link{mlr_pipeops_trafotask_survclassif_IPCW}}, \code{\link{mlr_pipeops_trafotask_survclassif_disctime}}, \code{\link{mlr_pipeops_trafotask_survregr}} } diff --git a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd new file mode 100644 index 000000000..418ed6ad3 --- /dev/null +++ b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd @@ -0,0 +1,115 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PipeOpTaskfSurvClassifIPCW.R +\name{mlr_pipeops_trafotask_survclassif_IPCW} +\alias{mlr_pipeops_trafotask_survclassif_IPCW} +\alias{PipeOpTaskfSurvClassifIPCW} +\title{PipeOpTaskSurvClassifIPCW} +\description{ +Transform \link{TaskSurv} to \link[mlr3:TaskClassif]{TaskClassif} using IPCW (Vock et al., 2016). +} +\section{Dictionary}{ + +This \link[mlr3pipelines:PipeOp]{PipeOp} can be instantiated via the +\link[mlr3misc:Dictionary]{dictionary} \link[mlr3pipelines:mlr_pipeops]{mlr3pipelines::mlr_pipeops} +or with the associated sugar function \code{\link[mlr3pipelines:po]{mlr3pipelines::po()}}: + +\if{html}{\out{
}}\preformatted{PipeOpTaskSurvClassifIPCW$new() +mlr_pipeops$get("trafotask_survclassif_IPCW") +po("trafotask_survclassif_IPCW") +}\if{html}{\out{
}} +} + +\section{Parameters}{ + +The parameters are +\itemize{ +\item \code{cutoff_time :: numeric()}\cr +Cutoff time for IPCW. Observations with time larger than \code{cutoff_time} are censored. +} +} + +\references{ +Vock, M D, Wolfson, Julian, Bandyopadhyay, Sunayan, Adomavicius, Gediminas, Johnson, E P, Vazquez-Benitez, Gabriela, O'Connor, J P (2016). +\dQuote{Adapting machine learning techniques to censored time-to-event health record data: A general-purpose approach using inverse probability of censoring weighting.} +\emph{Journal of Biomedical Informatics}, \bold{61}, 119--131. +\doi{https://doi.org/10.1016/j.jbi.2016.03.009}, \url{https://www.sciencedirect.com/science/article/pii/S1532046416000496}. +} +\seealso{ +Other PipeOps: +\code{\link{PipeOpPredTransformer}}, +\code{\link{PipeOpTaskTransformer}}, +\code{\link{PipeOpTransformer}}, +\code{\link{mlr_pipeops_survavg}}, +\code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, +\code{\link{mlr_pipeops_trafopred_regrsurv}}, +\code{\link{mlr_pipeops_trafopred_survregr}}, +\code{\link{mlr_pipeops_trafotask_regrsurv}}, +\code{\link{mlr_pipeops_trafotask_survclassif_disctime}}, +\code{\link{mlr_pipeops_trafotask_survregr}} + +Other Transformation PipeOps: +\code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, +\code{\link{mlr_pipeops_trafopred_regrsurv}}, +\code{\link{mlr_pipeops_trafopred_survregr}}, +\code{\link{mlr_pipeops_trafotask_regrsurv}}, +\code{\link{mlr_pipeops_trafotask_survclassif_disctime}}, +\code{\link{mlr_pipeops_trafotask_survregr}} +} +\concept{PipeOps} +\concept{Transformation PipeOps} +\section{Super class}{ +\code{\link[mlr3pipelines:PipeOp]{mlr3pipelines::PipeOp}} -> \code{PipeOpTaskfSurvClassifIPCW} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-PipeOpTaskfSurvClassifIPCW-new}{\code{PipeOpTaskfSurvClassifIPCW$new()}} +\item \href{#method-PipeOpTaskfSurvClassifIPCW-clone}{\code{PipeOpTaskfSurvClassifIPCW$clone()}} +} +} +\if{html}{\out{ +
Inherited methods + +
+}} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-PipeOpTaskfSurvClassifIPCW-new}{}}} +\subsection{Method \code{new()}}{ +Creates a new instance of this \link[R6:R6Class]{R6} class. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{PipeOpTaskfSurvClassifIPCW$new(id = "trafotask_survclassif_IPCW")}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{id}}{(\code{character(1)})\cr +Identifier of the resulting object.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-PipeOpTaskfSurvClassifIPCW-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{PipeOpTaskfSurvClassifIPCW$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/mlr_pipeops_trafotask_survclassif_disctime.Rd b/man/mlr_pipeops_trafotask_survclassif_disctime.Rd index 37396414f..c77d19a1f 100644 --- a/man/mlr_pipeops_trafotask_survclassif_disctime.Rd +++ b/man/mlr_pipeops_trafotask_survclassif_disctime.Rd @@ -109,6 +109,7 @@ Other PipeOps: \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, \code{\link{mlr_pipeops_trafotask_regrsurv}}, +\code{\link{mlr_pipeops_trafotask_survclassif_IPCW}}, \code{\link{mlr_pipeops_trafotask_survregr}} Other Transformation PipeOps: @@ -116,6 +117,7 @@ Other Transformation PipeOps: \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, \code{\link{mlr_pipeops_trafotask_regrsurv}}, +\code{\link{mlr_pipeops_trafotask_survclassif_IPCW}}, \code{\link{mlr_pipeops_trafotask_survregr}} } \concept{PipeOps} diff --git a/man/mlr_pipeops_trafotask_survregr.Rd b/man/mlr_pipeops_trafotask_survregr.Rd index f597758ec..c130f974c 100644 --- a/man/mlr_pipeops_trafotask_survregr.Rd +++ b/man/mlr_pipeops_trafotask_survregr.Rd @@ -29,7 +29,7 @@ The parameters are \itemize{ \item \verb{method::character(1))}\cr Method to use for dealing with censoring. Options are \code{"ipcw"} (Vock et al., 2016): censoring -is column is removed and a \code{weights} column is added, weights are inverse estimated survival +column is removed and a \code{weights} column is added, weights are inverse estimated survival probability of the censoring distribution evaluated at survival time; \code{"mrl"} (Klein and Moeschberger, 2003): survival time of censored observations is transformed to the observed time plus the mean residual life-time at the moment @@ -140,6 +140,7 @@ Other PipeOps: \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, \code{\link{mlr_pipeops_trafotask_regrsurv}}, +\code{\link{mlr_pipeops_trafotask_survclassif_IPCW}}, \code{\link{mlr_pipeops_trafotask_survclassif_disctime}} Other Transformation PipeOps: @@ -147,6 +148,7 @@ Other Transformation PipeOps: \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, \code{\link{mlr_pipeops_trafotask_regrsurv}}, +\code{\link{mlr_pipeops_trafotask_survclassif_IPCW}}, \code{\link{mlr_pipeops_trafotask_survclassif_disctime}} } \concept{PipeOps} diff --git a/tests/testthat/test_pipeop_trafotask_survclassif_IPCW.R b/tests/testthat/test_pipeop_trafotask_survclassif_IPCW.R index ee39012bf..8c2d8a937 100644 --- a/tests/testthat/test_pipeop_trafotask_survclassif_IPCW.R +++ b/tests/testthat/test_pipeop_trafotask_survclassif_IPCW.R @@ -6,7 +6,7 @@ test_that("PipeOpTaskSurvClassifIPCW", { l = lrn("classif.gam") pipe = po %>>% l - pipe$train(task) + suppressWarnings(pipe$train(task)) pred1 = pipe$predict(task)$classif.gam.output expect_prediction_classif(pred1) @@ -14,7 +14,7 @@ test_that("PipeOpTaskSurvClassifIPCW", { po = po("trafotask_survclassif_IPCW", cutoff_time = 75) pipe2 = po %>>% l - pipe2$train(task) + suppressWarnings(pipe2$train(task)) pred2 = pipe2$predict(task)$classif.gam.output testthat::expect_true(all(pred1$prob != pred2$prob)) From 34e479837a3c84f924c74d401f1acf3a9a3332fc Mon Sep 17 00:00:00 2001 From: studener Date: Tue, 6 Aug 2024 12:07:33 +0200 Subject: [PATCH 04/82] add pipeline draft --- R/pipelines.R | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/R/pipelines.R b/R/pipelines.R index 307d90b4e..de99ab902 100644 --- a/R/pipelines.R +++ b/R/pipelines.R @@ -610,6 +610,55 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, gr } +#' @name mlr_graphs_survtoclassif_IPCW +#' @title Survival to Classification Reduction Pipeline using IPCW +#' @description Wrapper around multiple [PipeOp][mlr3pipelines::PipeOp]s to help in creation +#' of complex survival reduction methods. +#' +#' @param learner [LearnerClassif][mlr3::LearnerClassif]\cr +#' Classification learner to fit the transformed [TaskClassif][mlr3::TaskClassif]. +#' @param cutoff_time `numeric()`\cr +#' #' Cutoff time for IPCW. Observations with time larger than `cutoff_time` are censored. +#' #' @param predict `numeric()`\cr +#' #' If not set to "classif" (default) then the prediction is transformed to a crank. +#' @param graph_learner `logical(1)`\cr +#' If `TRUE` returns wraps the [Graph][mlr3pipelines::Graph] as a +#' [GraphLearner][mlr3pipelines::GraphLearner] otherwise (default) returns as a `Graph`. +#' +#' @details +#' The pipeline consists of the following steps: +#' \enumerate{ +#' \item [PipeOpTaskSurvClassifIPCW] Converts [TaskSurv] to a [TaskClassif][mlr3::TaskClassif]. +#' \item A [LearnerClassif] is fit and predicted on the new `TaskClassif`. +#' \item Optionally: [PipeOpPredClassifSurvIPCW] transforms the resulting [PredictionClassif][mlr3::PredictionClassif] +#' to [PredictionSurv]. +#' } +#' +#' @return [mlr3pipelines::Graph] or [mlr3pipelines::GraphLearner] +#' @family pipelines +#' +#' @export +pipeline_survtoclassif_IPCW = function(learner, cutoff_time = NULL, predict = "classif", graph_learner = FALSE) { + assert_true("prob" %in% learner$predict_types) + + gr = mlr3pipelines::Graph$new() + gr$add_pipeop(mlr3pipelines::po("trafotask_survclassif_IPCW", cutoff_time = cutoff_time)) + gr$add_pipeop(mlr3pipelines::po("learner", learner, predict_type = "prob")) + + gr$add_edge(src_id = "trafotask_survclassif_IPCW", dst_id = learner$id, src_channel = "output", dst_channel = "input") + + if (predict != "classif") { + gr$add_pipeop(mlr3pipelines::po("trafopred_classifsurv_IPCW")) + gr$add_edge(src_id = learner$id, dst_id = "trafopred_classifsurv_ICPW", src_channel = "output", dst_channel = "input") + } + + if (graph_learner) { + gr = mlr3pipelines::GraphLearner$new(gr) + } + + gr +} + register_graph("survaverager", pipeline_survaverager) register_graph("survbagging", pipeline_survbagging) register_graph("crankcompositor", pipeline_crankcompositor) @@ -617,3 +666,4 @@ register_graph("distrcompositor", pipeline_distrcompositor) register_graph("probregr", pipeline_probregr) register_graph("survtoregr", pipeline_survtoregr) register_graph("survtoclassif_disctime", pipeline_survtoclassif_disctime) +register_graph("survtoclassif_IPCW", pipeline_survtoclassif_IPCW) From c658b66549c10f324455df66dc0caf61500b7a00 Mon Sep 17 00:00:00 2001 From: studener Date: Tue, 6 Aug 2024 13:49:45 +0200 Subject: [PATCH 05/82] draft PipeOpPredClassifSurvIPCW --- R/PipeOpPredClassifSurvIPCW.R | 64 +++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 R/PipeOpPredClassifSurvIPCW.R diff --git a/R/PipeOpPredClassifSurvIPCW.R b/R/PipeOpPredClassifSurvIPCW.R new file mode 100644 index 000000000..1ee1f873e --- /dev/null +++ b/R/PipeOpPredClassifSurvIPCW.R @@ -0,0 +1,64 @@ +#' @title PipeOpPredClassifSurvIPCW +#' @name mlr_pipeops_trafopred_classifsurv_IPCW +#' +#' @description +#' Transform [PredictionClassif] to [PredictionSurv]. +#' +#' @section Dictionary: +#' This [PipeOp][mlr3pipelines::PipeOp] can be instantiated via the +#' [dictionary][mlr3misc::Dictionary] [mlr3pipelines::mlr_pipeops] +#' or with the associated sugar function [mlr3pipelines::po()]: +#' ``` +#' PipeOpPredClassifSurvIPCW$new() +#' mlr_pipeops$get("trafopred_classifsurv_IPCW") +#' po("trafopred_classifsurv_IPCW") +#' ``` +#' +#' @section Input and Output Channels: +#' The input is a [PredictionClassif] generated by [PipeOpTaskSurvClassifIPCW]. +#' The output is the input [PredictionClassif] transformed to a [PredictionSurv]. +#' Only works during prediction phase. +#' +#' @family PipeOps +#' @family Transformation PipeOps +#' @export +PipeOpPredClassifSurvIPCW = R6Class( + "PipeOpPredClassifSurvIPCW", + inherit = mlr3pipelines::PipeOp, + + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + #' @param id (character(1))\cr + #' Identifier of the resulting object. + initialize = function(id = "trafopred_classifsurv_IPCW") { + super$initialize( + id = id, + input = data.table( + name = "input", + train = "NULL", + predict = "PredictionClassif" + ), + output = data.table( + name = "output", + train = "NULL", + predict = "PredictionSurv" + ) + ) + } + ), + + private = list( + .predict = function(input) { + p = PredictionSurv$new(row_ids = 1, truth = Surv(1,0), crank = 0.5) + list(p) + }, + + .train = function(input) { + self$state = list() + list(input) + } + ) +) + +register_pipeop("trafopred_classifsurv_IPCW", PipeOpPredClassifSurvIPCW) From 14af33a6e9f3e3a189b9583a404304835888ef0f Mon Sep 17 00:00:00 2001 From: studener Date: Tue, 6 Aug 2024 13:49:59 +0200 Subject: [PATCH 06/82] update pipeline --- R/pipelines.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/pipelines.R b/R/pipelines.R index de99ab902..9985afe4e 100644 --- a/R/pipelines.R +++ b/R/pipelines.R @@ -647,9 +647,9 @@ pipeline_survtoclassif_IPCW = function(learner, cutoff_time = NULL, predict = "c gr$add_edge(src_id = "trafotask_survclassif_IPCW", dst_id = learner$id, src_channel = "output", dst_channel = "input") - if (predict != "classif") { + if (predict == "classif") { gr$add_pipeop(mlr3pipelines::po("trafopred_classifsurv_IPCW")) - gr$add_edge(src_id = learner$id, dst_id = "trafopred_classifsurv_ICPW", src_channel = "output", dst_channel = "input") + gr$add_edge(src_id = learner$id, dst_id = "trafopred_classifsurv_IPCW", src_channel = "output", dst_channel = "input") } if (graph_learner) { From 15b8d16a6e37ea77a855f5ac5f9899d3c9e10c9b Mon Sep 17 00:00:00 2001 From: studener Date: Tue, 6 Aug 2024 13:51:50 +0200 Subject: [PATCH 07/82] updocs --- DESCRIPTION | 1 + NAMESPACE | 2 + man/PipeOpPredTransformer.Rd | 1 + man/PipeOpTaskTransformer.Rd | 1 + man/PipeOpTransformer.Rd | 1 + man/mlr_graphs_crankcompositor.Rd | 1 + man/mlr_graphs_distrcompositor.Rd | 1 + man/mlr_graphs_probregr.Rd | 1 + man/mlr_graphs_survaverager.Rd | 1 + man/mlr_graphs_survbagging.Rd | 1 + man/mlr_graphs_survtoclassif_IPCW.Rd | 54 +++++++++ man/mlr_graphs_survtoclassif_disctime.Rd | 1 + man/mlr_graphs_survtoregr.Rd | 1 + man/mlr_pipeops_survavg.Rd | 1 + man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd | 109 ++++++++++++++++++ ..._pipeops_trafopred_classifsurv_disctime.Rd | 2 + man/mlr_pipeops_trafopred_regrsurv.Rd | 2 + man/mlr_pipeops_trafopred_survregr.Rd | 2 + man/mlr_pipeops_trafotask_regrsurv.Rd | 2 + man/mlr_pipeops_trafotask_survclassif_IPCW.Rd | 2 + ..._pipeops_trafotask_survclassif_disctime.Rd | 2 + man/mlr_pipeops_trafotask_survregr.Rd | 2 + 22 files changed, 191 insertions(+) create mode 100644 man/mlr_graphs_survtoclassif_IPCW.Rd create mode 100644 man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 1f7744490..da7e17346 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -140,6 +140,7 @@ Collate: 'PipeOpCrankCompositor.R' 'PipeOpDistrCompositor.R' 'PipeOpPredClassifSurvDiscTime.R' + 'PipeOpPredClassifSurvIPCW.R' 'PipeOpTransformer.R' 'PipeOpPredTransformer.R' 'PipeOpPredRegrSurv.R' diff --git a/NAMESPACE b/NAMESPACE index 3998e288b..06dd5542f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -73,6 +73,7 @@ export(PipeOpBreslow) export(PipeOpCrankCompositor) export(PipeOpDistrCompositor) export(PipeOpPredClassifSurvDiscTime) +export(PipeOpPredClassifSurvIPCW) export(PipeOpPredRegrSurv) export(PipeOpPredSurvRegr) export(PipeOpPredTransformer) @@ -98,6 +99,7 @@ export(as_task_surv) export(assert_surv) export(breslow) export(pecs) +export(pipeline_survtoclassif_IPCW) export(pipeline_survtoclassif_disctime) export(pipeline_survtoregr) export(plot_probregr) diff --git a/man/PipeOpPredTransformer.Rd b/man/PipeOpPredTransformer.Rd index 6d4de4d17..8dd916acd 100644 --- a/man/PipeOpPredTransformer.Rd +++ b/man/PipeOpPredTransformer.Rd @@ -35,6 +35,7 @@ Other PipeOps: \code{\link{PipeOpTaskTransformer}}, \code{\link{PipeOpTransformer}}, \code{\link{mlr_pipeops_survavg}}, +\code{\link{mlr_pipeops_trafopred_classifsurv_IPCW}}, \code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, diff --git a/man/PipeOpTaskTransformer.Rd b/man/PipeOpTaskTransformer.Rd index 91654b4a6..bcf0230f1 100644 --- a/man/PipeOpTaskTransformer.Rd +++ b/man/PipeOpTaskTransformer.Rd @@ -30,6 +30,7 @@ Other PipeOps: \code{\link{PipeOpPredTransformer}}, \code{\link{PipeOpTransformer}}, \code{\link{mlr_pipeops_survavg}}, +\code{\link{mlr_pipeops_trafopred_classifsurv_IPCW}}, \code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, diff --git a/man/PipeOpTransformer.Rd b/man/PipeOpTransformer.Rd index d3fe6ea15..9e5131023 100644 --- a/man/PipeOpTransformer.Rd +++ b/man/PipeOpTransformer.Rd @@ -29,6 +29,7 @@ Other PipeOps: \code{\link{PipeOpPredTransformer}}, \code{\link{PipeOpTaskTransformer}}, \code{\link{mlr_pipeops_survavg}}, +\code{\link{mlr_pipeops_trafopred_classifsurv_IPCW}}, \code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, diff --git a/man/mlr_graphs_crankcompositor.Rd b/man/mlr_graphs_crankcompositor.Rd index 2aed5581c..7508e146d 100644 --- a/man/mlr_graphs_crankcompositor.Rd +++ b/man/mlr_graphs_crankcompositor.Rd @@ -68,6 +68,7 @@ Other pipelines: \code{\link{mlr_graphs_probregr}}, \code{\link{mlr_graphs_survaverager}}, \code{\link{mlr_graphs_survbagging}}, +\code{\link{mlr_graphs_survtoclassif_IPCW}}, \code{\link{mlr_graphs_survtoclassif_disctime}}, \code{\link{mlr_graphs_survtoregr}} } diff --git a/man/mlr_graphs_distrcompositor.Rd b/man/mlr_graphs_distrcompositor.Rd index e75fee5cf..b9f6b517e 100644 --- a/man/mlr_graphs_distrcompositor.Rd +++ b/man/mlr_graphs_distrcompositor.Rd @@ -69,6 +69,7 @@ Other pipelines: \code{\link{mlr_graphs_probregr}}, \code{\link{mlr_graphs_survaverager}}, \code{\link{mlr_graphs_survbagging}}, +\code{\link{mlr_graphs_survtoclassif_IPCW}}, \code{\link{mlr_graphs_survtoclassif_disctime}}, \code{\link{mlr_graphs_survtoregr}} } diff --git a/man/mlr_graphs_probregr.Rd b/man/mlr_graphs_probregr.Rd index 4a0573d2d..165667fec 100644 --- a/man/mlr_graphs_probregr.Rd +++ b/man/mlr_graphs_probregr.Rd @@ -72,6 +72,7 @@ Other pipelines: \code{\link{mlr_graphs_distrcompositor}}, \code{\link{mlr_graphs_survaverager}}, \code{\link{mlr_graphs_survbagging}}, +\code{\link{mlr_graphs_survtoclassif_IPCW}}, \code{\link{mlr_graphs_survtoclassif_disctime}}, \code{\link{mlr_graphs_survtoregr}} } diff --git a/man/mlr_graphs_survaverager.Rd b/man/mlr_graphs_survaverager.Rd index e85c9dae0..5297aeb22 100644 --- a/man/mlr_graphs_survaverager.Rd +++ b/man/mlr_graphs_survaverager.Rd @@ -48,6 +48,7 @@ Other pipelines: \code{\link{mlr_graphs_distrcompositor}}, \code{\link{mlr_graphs_probregr}}, \code{\link{mlr_graphs_survbagging}}, +\code{\link{mlr_graphs_survtoclassif_IPCW}}, \code{\link{mlr_graphs_survtoclassif_disctime}}, \code{\link{mlr_graphs_survtoregr}} } diff --git a/man/mlr_graphs_survbagging.Rd b/man/mlr_graphs_survbagging.Rd index 3b9eaba82..0880a33fc 100644 --- a/man/mlr_graphs_survbagging.Rd +++ b/man/mlr_graphs_survbagging.Rd @@ -79,6 +79,7 @@ Other pipelines: \code{\link{mlr_graphs_distrcompositor}}, \code{\link{mlr_graphs_probregr}}, \code{\link{mlr_graphs_survaverager}}, +\code{\link{mlr_graphs_survtoclassif_IPCW}}, \code{\link{mlr_graphs_survtoclassif_disctime}}, \code{\link{mlr_graphs_survtoregr}} } diff --git a/man/mlr_graphs_survtoclassif_IPCW.Rd b/man/mlr_graphs_survtoclassif_IPCW.Rd new file mode 100644 index 000000000..40d2a7624 --- /dev/null +++ b/man/mlr_graphs_survtoclassif_IPCW.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pipelines.R +\name{mlr_graphs_survtoclassif_IPCW} +\alias{mlr_graphs_survtoclassif_IPCW} +\alias{pipeline_survtoclassif_IPCW} +\title{Survival to Classification Reduction Pipeline using IPCW} +\usage{ +pipeline_survtoclassif_IPCW( + learner, + cutoff_time = NULL, + predict = "classif", + graph_learner = FALSE +) +} +\arguments{ +\item{learner}{\link[mlr3:LearnerClassif]{LearnerClassif}\cr +Classification learner to fit the transformed \link[mlr3:TaskClassif]{TaskClassif}.} + +\item{cutoff_time}{\code{numeric()}\cr +#' Cutoff time for IPCW. Observations with time larger than \code{cutoff_time} are censored. +#' @param predict \code{numeric()}\cr +#' If not set to "classif" (default) then the prediction is transformed to a crank.} + +\item{graph_learner}{\code{logical(1)}\cr +If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a +\link[mlr3pipelines:mlr_learners_graph]{GraphLearner} otherwise (default) returns as a \code{Graph}.} +} +\value{ +\link[mlr3pipelines:Graph]{mlr3pipelines::Graph} or \link[mlr3pipelines:mlr_learners_graph]{mlr3pipelines::GraphLearner} +} +\description{ +Wrapper around multiple \link[mlr3pipelines:PipeOp]{PipeOp}s to help in creation +of complex survival reduction methods. +} +\details{ +The pipeline consists of the following steps: +\enumerate{ +\item \link{PipeOpTaskSurvClassifIPCW} Converts \link{TaskSurv} to a \link[mlr3:TaskClassif]{TaskClassif}. +\item A \link{LearnerClassif} is fit and predicted on the new \code{TaskClassif}. +\item Optionally: \link{PipeOpPredClassifSurvIPCW} transforms the resulting \link[mlr3:PredictionClassif]{PredictionClassif} +to \link{PredictionSurv}. +} +} +\seealso{ +Other pipelines: +\code{\link{mlr_graphs_crankcompositor}}, +\code{\link{mlr_graphs_distrcompositor}}, +\code{\link{mlr_graphs_probregr}}, +\code{\link{mlr_graphs_survaverager}}, +\code{\link{mlr_graphs_survbagging}}, +\code{\link{mlr_graphs_survtoclassif_disctime}}, +\code{\link{mlr_graphs_survtoregr}} +} +\concept{pipelines} diff --git a/man/mlr_graphs_survtoclassif_disctime.Rd b/man/mlr_graphs_survtoclassif_disctime.Rd index c3b3e64c3..ce7d8927c 100644 --- a/man/mlr_graphs_survtoclassif_disctime.Rd +++ b/man/mlr_graphs_survtoclassif_disctime.Rd @@ -86,6 +86,7 @@ Other pipelines: \code{\link{mlr_graphs_probregr}}, \code{\link{mlr_graphs_survaverager}}, \code{\link{mlr_graphs_survbagging}}, +\code{\link{mlr_graphs_survtoclassif_IPCW}}, \code{\link{mlr_graphs_survtoregr}} } \concept{pipelines} diff --git a/man/mlr_graphs_survtoregr.Rd b/man/mlr_graphs_survtoregr.Rd index ef9d99047..0811c595f 100644 --- a/man/mlr_graphs_survtoregr.Rd +++ b/man/mlr_graphs_survtoregr.Rd @@ -171,6 +171,7 @@ Other pipelines: \code{\link{mlr_graphs_probregr}}, \code{\link{mlr_graphs_survaverager}}, \code{\link{mlr_graphs_survbagging}}, +\code{\link{mlr_graphs_survtoclassif_IPCW}}, \code{\link{mlr_graphs_survtoclassif_disctime}} } \concept{pipelines} diff --git a/man/mlr_pipeops_survavg.Rd b/man/mlr_pipeops_survavg.Rd index b0cd3fbe2..12b070238 100644 --- a/man/mlr_pipeops_survavg.Rd +++ b/man/mlr_pipeops_survavg.Rd @@ -61,6 +61,7 @@ Other PipeOps: \code{\link{PipeOpPredTransformer}}, \code{\link{PipeOpTaskTransformer}}, \code{\link{PipeOpTransformer}}, +\code{\link{mlr_pipeops_trafopred_classifsurv_IPCW}}, \code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, diff --git a/man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd b/man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd new file mode 100644 index 000000000..70781125f --- /dev/null +++ b/man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd @@ -0,0 +1,109 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PipeOpPredClassifSurvIPCW.R +\name{mlr_pipeops_trafopred_classifsurv_IPCW} +\alias{mlr_pipeops_trafopred_classifsurv_IPCW} +\alias{PipeOpPredClassifSurvIPCW} +\title{PipeOpPredClassifSurvIPCW} +\description{ +Transform \link{PredictionClassif} to \link{PredictionSurv}. +} +\section{Dictionary}{ + +This \link[mlr3pipelines:PipeOp]{PipeOp} can be instantiated via the +\link[mlr3misc:Dictionary]{dictionary} \link[mlr3pipelines:mlr_pipeops]{mlr3pipelines::mlr_pipeops} +or with the associated sugar function \code{\link[mlr3pipelines:po]{mlr3pipelines::po()}}: + +\if{html}{\out{
}}\preformatted{PipeOpPredClassifSurvIPCW$new() +mlr_pipeops$get("trafopred_classifsurv_IPCW") +po("trafopred_classifsurv_IPCW") +}\if{html}{\out{
}} +} + +\section{Input and Output Channels}{ + +The input is a \link{PredictionClassif} generated by \link{PipeOpTaskSurvClassifIPCW}. +The output is the input \link{PredictionClassif} transformed to a \link{PredictionSurv}. +Only works during prediction phase. +} + +\seealso{ +Other PipeOps: +\code{\link{PipeOpPredTransformer}}, +\code{\link{PipeOpTaskTransformer}}, +\code{\link{PipeOpTransformer}}, +\code{\link{mlr_pipeops_survavg}}, +\code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, +\code{\link{mlr_pipeops_trafopred_regrsurv}}, +\code{\link{mlr_pipeops_trafopred_survregr}}, +\code{\link{mlr_pipeops_trafotask_regrsurv}}, +\code{\link{mlr_pipeops_trafotask_survclassif_IPCW}}, +\code{\link{mlr_pipeops_trafotask_survclassif_disctime}}, +\code{\link{mlr_pipeops_trafotask_survregr}} + +Other Transformation PipeOps: +\code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, +\code{\link{mlr_pipeops_trafopred_regrsurv}}, +\code{\link{mlr_pipeops_trafopred_survregr}}, +\code{\link{mlr_pipeops_trafotask_regrsurv}}, +\code{\link{mlr_pipeops_trafotask_survclassif_IPCW}}, +\code{\link{mlr_pipeops_trafotask_survclassif_disctime}}, +\code{\link{mlr_pipeops_trafotask_survregr}} +} +\concept{PipeOps} +\concept{Transformation PipeOps} +\section{Super class}{ +\code{\link[mlr3pipelines:PipeOp]{mlr3pipelines::PipeOp}} -> \code{PipeOpPredClassifSurvIPCW} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-PipeOpPredClassifSurvIPCW-new}{\code{PipeOpPredClassifSurvIPCW$new()}} +\item \href{#method-PipeOpPredClassifSurvIPCW-clone}{\code{PipeOpPredClassifSurvIPCW$clone()}} +} +} +\if{html}{\out{ +
Inherited methods + +
+}} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-PipeOpPredClassifSurvIPCW-new}{}}} +\subsection{Method \code{new()}}{ +Creates a new instance of this \link[R6:R6Class]{R6} class. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{PipeOpPredClassifSurvIPCW$new(id = "trafopred_classifsurv_IPCW")}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{id}}{(character(1))\cr +Identifier of the resulting object.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-PipeOpPredClassifSurvIPCW-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{PipeOpPredClassifSurvIPCW$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/mlr_pipeops_trafopred_classifsurv_disctime.Rd b/man/mlr_pipeops_trafopred_classifsurv_disctime.Rd index 3572ef8c6..a30fc3d4f 100644 --- a/man/mlr_pipeops_trafopred_classifsurv_disctime.Rd +++ b/man/mlr_pipeops_trafopred_classifsurv_disctime.Rd @@ -53,6 +53,7 @@ Other PipeOps: \code{\link{PipeOpTaskTransformer}}, \code{\link{PipeOpTransformer}}, \code{\link{mlr_pipeops_survavg}}, +\code{\link{mlr_pipeops_trafopred_classifsurv_IPCW}}, \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, \code{\link{mlr_pipeops_trafotask_regrsurv}}, @@ -61,6 +62,7 @@ Other PipeOps: \code{\link{mlr_pipeops_trafotask_survregr}} Other Transformation PipeOps: +\code{\link{mlr_pipeops_trafopred_classifsurv_IPCW}}, \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, \code{\link{mlr_pipeops_trafotask_regrsurv}}, diff --git a/man/mlr_pipeops_trafopred_regrsurv.Rd b/man/mlr_pipeops_trafopred_regrsurv.Rd index 4c396e776..f723fc18c 100644 --- a/man/mlr_pipeops_trafopred_regrsurv.Rd +++ b/man/mlr_pipeops_trafopred_regrsurv.Rd @@ -62,6 +62,7 @@ Other PipeOps: \code{\link{PipeOpTaskTransformer}}, \code{\link{PipeOpTransformer}}, \code{\link{mlr_pipeops_survavg}}, +\code{\link{mlr_pipeops_trafopred_classifsurv_IPCW}}, \code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, \code{\link{mlr_pipeops_trafopred_survregr}}, \code{\link{mlr_pipeops_trafotask_regrsurv}}, @@ -70,6 +71,7 @@ Other PipeOps: \code{\link{mlr_pipeops_trafotask_survregr}} Other Transformation PipeOps: +\code{\link{mlr_pipeops_trafopred_classifsurv_IPCW}}, \code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, \code{\link{mlr_pipeops_trafopred_survregr}}, \code{\link{mlr_pipeops_trafotask_regrsurv}}, diff --git a/man/mlr_pipeops_trafopred_survregr.Rd b/man/mlr_pipeops_trafopred_survregr.Rd index 9251b8011..405f02f9a 100644 --- a/man/mlr_pipeops_trafopred_survregr.Rd +++ b/man/mlr_pipeops_trafopred_survregr.Rd @@ -42,6 +42,7 @@ Other PipeOps: \code{\link{PipeOpTaskTransformer}}, \code{\link{PipeOpTransformer}}, \code{\link{mlr_pipeops_survavg}}, +\code{\link{mlr_pipeops_trafopred_classifsurv_IPCW}}, \code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafotask_regrsurv}}, @@ -50,6 +51,7 @@ Other PipeOps: \code{\link{mlr_pipeops_trafotask_survregr}} Other Transformation PipeOps: +\code{\link{mlr_pipeops_trafopred_classifsurv_IPCW}}, \code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafotask_regrsurv}}, diff --git a/man/mlr_pipeops_trafotask_regrsurv.Rd b/man/mlr_pipeops_trafotask_regrsurv.Rd index 0e148d006..bf79af239 100644 --- a/man/mlr_pipeops_trafotask_regrsurv.Rd +++ b/man/mlr_pipeops_trafotask_regrsurv.Rd @@ -58,6 +58,7 @@ Other PipeOps: \code{\link{PipeOpTaskTransformer}}, \code{\link{PipeOpTransformer}}, \code{\link{mlr_pipeops_survavg}}, +\code{\link{mlr_pipeops_trafopred_classifsurv_IPCW}}, \code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, @@ -66,6 +67,7 @@ Other PipeOps: \code{\link{mlr_pipeops_trafotask_survregr}} Other Transformation PipeOps: +\code{\link{mlr_pipeops_trafopred_classifsurv_IPCW}}, \code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, diff --git a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd index 418ed6ad3..210aaa56f 100644 --- a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd +++ b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd @@ -40,6 +40,7 @@ Other PipeOps: \code{\link{PipeOpTaskTransformer}}, \code{\link{PipeOpTransformer}}, \code{\link{mlr_pipeops_survavg}}, +\code{\link{mlr_pipeops_trafopred_classifsurv_IPCW}}, \code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, @@ -48,6 +49,7 @@ Other PipeOps: \code{\link{mlr_pipeops_trafotask_survregr}} Other Transformation PipeOps: +\code{\link{mlr_pipeops_trafopred_classifsurv_IPCW}}, \code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, diff --git a/man/mlr_pipeops_trafotask_survclassif_disctime.Rd b/man/mlr_pipeops_trafotask_survclassif_disctime.Rd index c77d19a1f..8f2b6691d 100644 --- a/man/mlr_pipeops_trafotask_survclassif_disctime.Rd +++ b/man/mlr_pipeops_trafotask_survclassif_disctime.Rd @@ -105,6 +105,7 @@ Other PipeOps: \code{\link{PipeOpTaskTransformer}}, \code{\link{PipeOpTransformer}}, \code{\link{mlr_pipeops_survavg}}, +\code{\link{mlr_pipeops_trafopred_classifsurv_IPCW}}, \code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, @@ -113,6 +114,7 @@ Other PipeOps: \code{\link{mlr_pipeops_trafotask_survregr}} Other Transformation PipeOps: +\code{\link{mlr_pipeops_trafopred_classifsurv_IPCW}}, \code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, diff --git a/man/mlr_pipeops_trafotask_survregr.Rd b/man/mlr_pipeops_trafotask_survregr.Rd index c130f974c..bdc70313c 100644 --- a/man/mlr_pipeops_trafotask_survregr.Rd +++ b/man/mlr_pipeops_trafotask_survregr.Rd @@ -136,6 +136,7 @@ Other PipeOps: \code{\link{PipeOpTaskTransformer}}, \code{\link{PipeOpTransformer}}, \code{\link{mlr_pipeops_survavg}}, +\code{\link{mlr_pipeops_trafopred_classifsurv_IPCW}}, \code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, @@ -144,6 +145,7 @@ Other PipeOps: \code{\link{mlr_pipeops_trafotask_survclassif_disctime}} Other Transformation PipeOps: +\code{\link{mlr_pipeops_trafopred_classifsurv_IPCW}}, \code{\link{mlr_pipeops_trafopred_classifsurv_disctime}}, \code{\link{mlr_pipeops_trafopred_regrsurv}}, \code{\link{mlr_pipeops_trafopred_survregr}}, From 29d7b9daa2b877faf8d45afc979268a6a1baa6ec Mon Sep 17 00:00:00 2001 From: studener Date: Tue, 6 Aug 2024 14:45:50 +0200 Subject: [PATCH 08/82] update tests --- R/pipelines.R | 4 +-- man/mlr_graphs_survtoclassif_IPCW.Rd | 2 +- tests/testthat/test_pipelines.R | 30 +++++++++++++++++++ .../test_pipeop_trafotask_survclassif_IPCW.R | 21 ------------- 4 files changed, 33 insertions(+), 24 deletions(-) delete mode 100644 tests/testthat/test_pipeop_trafotask_survclassif_IPCW.R diff --git a/R/pipelines.R b/R/pipelines.R index 9985afe4e..ec8ad72f6 100644 --- a/R/pipelines.R +++ b/R/pipelines.R @@ -638,7 +638,7 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, #' @family pipelines #' #' @export -pipeline_survtoclassif_IPCW = function(learner, cutoff_time = NULL, predict = "classif", graph_learner = FALSE) { +pipeline_survtoclassif_IPCW = function(learner, cutoff_time = NULL, output = "classif", graph_learner = FALSE) { assert_true("prob" %in% learner$predict_types) gr = mlr3pipelines::Graph$new() @@ -647,7 +647,7 @@ pipeline_survtoclassif_IPCW = function(learner, cutoff_time = NULL, predict = "c gr$add_edge(src_id = "trafotask_survclassif_IPCW", dst_id = learner$id, src_channel = "output", dst_channel = "input") - if (predict == "classif") { + if (output != "classif") { gr$add_pipeop(mlr3pipelines::po("trafopred_classifsurv_IPCW")) gr$add_edge(src_id = learner$id, dst_id = "trafopred_classifsurv_IPCW", src_channel = "output", dst_channel = "input") } diff --git a/man/mlr_graphs_survtoclassif_IPCW.Rd b/man/mlr_graphs_survtoclassif_IPCW.Rd index 40d2a7624..2532e14fa 100644 --- a/man/mlr_graphs_survtoclassif_IPCW.Rd +++ b/man/mlr_graphs_survtoclassif_IPCW.Rd @@ -8,7 +8,7 @@ pipeline_survtoclassif_IPCW( learner, cutoff_time = NULL, - predict = "classif", + output = "classif", graph_learner = FALSE ) } diff --git a/tests/testthat/test_pipelines.R b/tests/testthat/test_pipelines.R index fae131dd9..0bbcb1a07 100644 --- a/tests/testthat/test_pipelines.R +++ b/tests/testthat/test_pipelines.R @@ -181,3 +181,33 @@ test_that("survtoclassif_disctime", { # model with more covariates should have better C-index expect_gt(pred2$score(), pred$score()) }) + +test_that("survtoclassif_IPCW", { + requireNamespace("mlr3extralearners") + + pipe = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.gam")) + expect_class(pipe, "Graph") + + ## This needs fixing + grlrn = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.gam"), + graph_learner = TRUE) + expect_class(grlrn, "GraphLearner") + grlrn$train(task) + p = grlrn$predict(task) + expect_prediction_surv(p) + + # Test with cutoff_time + grlrn = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.gam"), + cutoff_time = 50) + expect_class(pipe, "Graph") + suppressWarnings(grlrn$train(task)) + p1 = grlrn$predict(task) + expect_prediction_classif(p1$classif.gam.output) + + grlrn = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.gam"), + cutoff_time = 75) + suppressWarnings(grlrn$train(task)) + p2 = grlrn$predict(task) + + expect_false(any(p1$classif.gam.output$data$prob == p2$classif.gam.output$data$prob)) +}) diff --git a/tests/testthat/test_pipeop_trafotask_survclassif_IPCW.R b/tests/testthat/test_pipeop_trafotask_survclassif_IPCW.R deleted file mode 100644 index 8c2d8a937..000000000 --- a/tests/testthat/test_pipeop_trafotask_survclassif_IPCW.R +++ /dev/null @@ -1,21 +0,0 @@ -skip_if_not_installed("mlr3extralearners") -test_that("PipeOpTaskSurvClassifIPCW", { - - task = tsk("rats") - po = po("trafotask_survclassif_IPCW", cutoff_time = 50) - l = lrn("classif.gam") - - pipe = po %>>% l - suppressWarnings(pipe$train(task)) - pred1 = pipe$predict(task)$classif.gam.output - expect_prediction_classif(pred1) - - - po = po("trafotask_survclassif_IPCW", cutoff_time = 75) - - pipe2 = po %>>% l - suppressWarnings(pipe2$train(task)) - pred2 = pipe2$predict(task)$classif.gam.output - - testthat::expect_true(all(pred1$prob != pred2$prob)) -}) From c21b8dc36f64524ae64508b12f45d92c618d6f2f Mon Sep 17 00:00:00 2001 From: studener Date: Tue, 6 Aug 2024 14:51:45 +0200 Subject: [PATCH 09/82] update tests --- tests/testthat/test_pipelines.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/testthat/test_pipelines.R b/tests/testthat/test_pipelines.R index 0bbcb1a07..d5ac8b2eb 100644 --- a/tests/testthat/test_pipelines.R +++ b/tests/testthat/test_pipelines.R @@ -182,6 +182,8 @@ test_that("survtoclassif_disctime", { expect_gt(pred2$score(), pred$score()) }) +skip_if_not_installed("mlr3extralearners") + test_that("survtoclassif_IPCW", { requireNamespace("mlr3extralearners") From daec59c1e9b6c4f2f10c036183de74761673567e Mon Sep 17 00:00:00 2001 From: studener Date: Tue, 6 Aug 2024 14:58:36 +0200 Subject: [PATCH 10/82] fix binding --- R/PipeOpTaskfSurvClassifIPCW.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/PipeOpTaskfSurvClassifIPCW.R b/R/PipeOpTaskfSurvClassifIPCW.R index 24d1e1abe..29239aa41 100644 --- a/R/PipeOpTaskfSurvClassifIPCW.R +++ b/R/PipeOpTaskfSurvClassifIPCW.R @@ -78,6 +78,7 @@ PipeOpTaskfSurvClassifIPCW = R6Class( weights = 1 / pred$data$distr[1,] # add weights to original data + time = status = NULL data = input[[1]]$data() data[["ipc_weights"]] = weights[as.character(data_trafo$time)] data[status == 0 & time < cutoff_time, "ipc_weights" := 0] From 1d5d56bc0f3c94c0b108175b77c530944bc33f8f Mon Sep 17 00:00:00 2001 From: studener Date: Tue, 6 Aug 2024 15:04:10 +0200 Subject: [PATCH 11/82] updocs --- R/pipelines.R | 2 +- man/mlr_graphs_survtoclassif_IPCW.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/pipelines.R b/R/pipelines.R index ec8ad72f6..76aa3027c 100644 --- a/R/pipelines.R +++ b/R/pipelines.R @@ -619,7 +619,7 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, #' Classification learner to fit the transformed [TaskClassif][mlr3::TaskClassif]. #' @param cutoff_time `numeric()`\cr #' #' Cutoff time for IPCW. Observations with time larger than `cutoff_time` are censored. -#' #' @param predict `numeric()`\cr +#' #' @param output `numeric()`\cr #' #' If not set to "classif" (default) then the prediction is transformed to a crank. #' @param graph_learner `logical(1)`\cr #' If `TRUE` returns wraps the [Graph][mlr3pipelines::Graph] as a diff --git a/man/mlr_graphs_survtoclassif_IPCW.Rd b/man/mlr_graphs_survtoclassif_IPCW.Rd index 2532e14fa..6bf377152 100644 --- a/man/mlr_graphs_survtoclassif_IPCW.Rd +++ b/man/mlr_graphs_survtoclassif_IPCW.Rd @@ -18,7 +18,7 @@ Classification learner to fit the transformed \link[mlr3:TaskClassif]{TaskClassi \item{cutoff_time}{\code{numeric()}\cr #' Cutoff time for IPCW. Observations with time larger than \code{cutoff_time} are censored. -#' @param predict \code{numeric()}\cr +#' @param output \code{numeric()}\cr #' If not set to "classif" (default) then the prediction is transformed to a crank.} \item{graph_learner}{\code{logical(1)}\cr From 9dcfb4a2821a0c641b568de7fe6db8f822ffb5ad Mon Sep 17 00:00:00 2001 From: studener Date: Tue, 6 Aug 2024 15:59:26 +0200 Subject: [PATCH 12/82] update PipeOpPredClassifSurv --- R/PipeOpPredClassifSurvIPCW.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/PipeOpPredClassifSurvIPCW.R b/R/PipeOpPredClassifSurvIPCW.R index 1ee1f873e..dc171221a 100644 --- a/R/PipeOpPredClassifSurvIPCW.R +++ b/R/PipeOpPredClassifSurvIPCW.R @@ -50,7 +50,12 @@ PipeOpPredClassifSurvIPCW = R6Class( private = list( .predict = function(input) { - p = PredictionSurv$new(row_ids = 1, truth = Surv(1,0), crank = 0.5) + pred = input[[1]] + # TODO: fix timepoints + p = PredictionSurv$new(row_ids = pred$row_ids, + truth = Surv(time = rep(0, length(pred$row_ids)), + event = as.integer(pred$truth)), + crank = pred$prob[, 2]) list(p) }, From 43e3222f4ce494260ff064e1d6d5d0025b443761 Mon Sep 17 00:00:00 2001 From: studener Date: Thu, 8 Aug 2024 12:46:32 +0200 Subject: [PATCH 13/82] fix typo --- DESCRIPTION | 2 +- NAMESPACE | 2 +- ...ssifIPCW.R => PipeOpTaskSurvClassifIPCW.R} | 6 ++--- R/pipelines.R | 4 ++-- man/mlr_graphs_survtoclassif_IPCW.Rd | 7 +++--- man/mlr_pipeops_trafotask_survclassif_IPCW.Rd | 22 +++++++++---------- 6 files changed, 22 insertions(+), 21 deletions(-) rename R/{PipeOpTaskfSurvClassifIPCW.R => PipeOpTaskSurvClassifIPCW.R} (95%) diff --git a/DESCRIPTION b/DESCRIPTION index da7e17346..e60df8227 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -149,9 +149,9 @@ Collate: 'PipeOpSurvAvg.R' 'PipeOpTaskRegrSurv.R' 'PipeOpTaskSurvClassifDiscTime.R' + 'PipeOpTaskSurvClassifIPCW.R' 'PipeOpTaskSurvRegr.R' 'PipeOpTaskTransformer.R' - 'PipeOpTaskfSurvClassifIPCW.R' 'PredictionDataDens.R' 'PredictionDataSurv.R' 'PredictionDens.R' diff --git a/NAMESPACE b/NAMESPACE index 06dd5542f..d1962c880 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -81,9 +81,9 @@ export(PipeOpProbregr) export(PipeOpSurvAvg) export(PipeOpTaskRegrSurv) export(PipeOpTaskSurvClassifDiscTime) +export(PipeOpTaskSurvClassifIPCW) export(PipeOpTaskSurvRegr) export(PipeOpTaskTransformer) -export(PipeOpTaskfSurvClassifIPCW) export(PipeOpTransformer) export(PredictionDens) export(PredictionSurv) diff --git a/R/PipeOpTaskfSurvClassifIPCW.R b/R/PipeOpTaskSurvClassifIPCW.R similarity index 95% rename from R/PipeOpTaskfSurvClassifIPCW.R rename to R/PipeOpTaskSurvClassifIPCW.R index 29239aa41..a71102d2b 100644 --- a/R/PipeOpTaskfSurvClassifIPCW.R +++ b/R/PipeOpTaskSurvClassifIPCW.R @@ -27,8 +27,8 @@ #' @family PipeOps #' @family Transformation PipeOps #' @export -PipeOpTaskfSurvClassifIPCW = R6Class( - "PipeOpTaskfSurvClassifIPCW", +PipeOpTaskSurvClassifIPCW = R6Class( + "PipeOpTaskSurvClassifIPCW", inherit = mlr3pipelines::PipeOp, public = list( @@ -96,4 +96,4 @@ PipeOpTaskfSurvClassifIPCW = R6Class( ) ) -register_pipeop("trafotask_survclassif_IPCW", PipeOpTaskfSurvClassifIPCW) +register_pipeop("trafotask_survclassif_IPCW", PipeOpTaskSurvClassifIPCW) diff --git a/R/pipelines.R b/R/pipelines.R index 76aa3027c..3c9707bcb 100644 --- a/R/pipelines.R +++ b/R/pipelines.R @@ -619,8 +619,8 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, #' Classification learner to fit the transformed [TaskClassif][mlr3::TaskClassif]. #' @param cutoff_time `numeric()`\cr #' #' Cutoff time for IPCW. Observations with time larger than `cutoff_time` are censored. -#' #' @param output `numeric()`\cr -#' #' If not set to "classif" (default) then the prediction is transformed to a crank. +#' @param output `numeric()`\cr +#' If not set to "classif" (default) then the prediction is transformed to a crank. #' @param graph_learner `logical(1)`\cr #' If `TRUE` returns wraps the [Graph][mlr3pipelines::Graph] as a #' [GraphLearner][mlr3pipelines::GraphLearner] otherwise (default) returns as a `Graph`. diff --git a/man/mlr_graphs_survtoclassif_IPCW.Rd b/man/mlr_graphs_survtoclassif_IPCW.Rd index 6bf377152..e1753dab2 100644 --- a/man/mlr_graphs_survtoclassif_IPCW.Rd +++ b/man/mlr_graphs_survtoclassif_IPCW.Rd @@ -17,9 +17,10 @@ pipeline_survtoclassif_IPCW( Classification learner to fit the transformed \link[mlr3:TaskClassif]{TaskClassif}.} \item{cutoff_time}{\code{numeric()}\cr -#' Cutoff time for IPCW. Observations with time larger than \code{cutoff_time} are censored. -#' @param output \code{numeric()}\cr -#' If not set to "classif" (default) then the prediction is transformed to a crank.} +#' Cutoff time for IPCW. Observations with time larger than \code{cutoff_time} are censored.} + +\item{output}{\code{numeric()}\cr +If not set to "classif" (default) then the prediction is transformed to a crank.} \item{graph_learner}{\code{logical(1)}\cr If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a diff --git a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd index 210aaa56f..52834eb1a 100644 --- a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd +++ b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/PipeOpTaskfSurvClassifIPCW.R +% Please edit documentation in R/PipeOpTaskSurvClassifIPCW.R \name{mlr_pipeops_trafotask_survclassif_IPCW} \alias{mlr_pipeops_trafotask_survclassif_IPCW} -\alias{PipeOpTaskfSurvClassifIPCW} +\alias{PipeOpTaskSurvClassifIPCW} \title{PipeOpTaskSurvClassifIPCW} \description{ Transform \link{TaskSurv} to \link[mlr3:TaskClassif]{TaskClassif} using IPCW (Vock et al., 2016). @@ -60,13 +60,13 @@ Other Transformation PipeOps: \concept{PipeOps} \concept{Transformation PipeOps} \section{Super class}{ -\code{\link[mlr3pipelines:PipeOp]{mlr3pipelines::PipeOp}} -> \code{PipeOpTaskfSurvClassifIPCW} +\code{\link[mlr3pipelines:PipeOp]{mlr3pipelines::PipeOp}} -> \code{PipeOpTaskSurvClassifIPCW} } \section{Methods}{ \subsection{Public methods}{ \itemize{ -\item \href{#method-PipeOpTaskfSurvClassifIPCW-new}{\code{PipeOpTaskfSurvClassifIPCW$new()}} -\item \href{#method-PipeOpTaskfSurvClassifIPCW-clone}{\code{PipeOpTaskfSurvClassifIPCW$clone()}} +\item \href{#method-PipeOpTaskSurvClassifIPCW-new}{\code{PipeOpTaskSurvClassifIPCW$new()}} +\item \href{#method-PipeOpTaskSurvClassifIPCW-clone}{\code{PipeOpTaskSurvClassifIPCW$clone()}} } } \if{html}{\out{ @@ -80,12 +80,12 @@ Other Transformation PipeOps: }} \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PipeOpTaskfSurvClassifIPCW-new}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-PipeOpTaskSurvClassifIPCW-new}{}}} \subsection{Method \code{new()}}{ Creates a new instance of this \link[R6:R6Class]{R6} class. \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PipeOpTaskfSurvClassifIPCW$new(id = "trafotask_survclassif_IPCW")}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{PipeOpTaskSurvClassifIPCW$new(id = "trafotask_survclassif_IPCW")}\if{html}{\out{
}} } \subsection{Arguments}{ @@ -98,12 +98,12 @@ Identifier of the resulting object.} } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-PipeOpTaskfSurvClassifIPCW-clone}{}}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-PipeOpTaskSurvClassifIPCW-clone}{}}} \subsection{Method \code{clone()}}{ The objects of this class are cloneable with this method. \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{PipeOpTaskfSurvClassifIPCW$clone(deep = FALSE)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{PipeOpTaskSurvClassifIPCW$clone(deep = FALSE)}\if{html}{\out{
}} } \subsection{Arguments}{ From dde3f72898280558553844816c6a34b07f78b4bb Mon Sep 17 00:00:00 2001 From: studener Date: Sat, 10 Aug 2024 13:15:33 +0200 Subject: [PATCH 14/82] refactor / add eps param to IPCW pipeop --- R/PipeOpTaskSurvClassifIPCW.R | 32 ++++++++++++++++++++++---------- tests/testthat/test_pipelines.R | 5 +++-- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/R/PipeOpTaskSurvClassifIPCW.R b/R/PipeOpTaskSurvClassifIPCW.R index a71102d2b..632a76081 100644 --- a/R/PipeOpTaskSurvClassifIPCW.R +++ b/R/PipeOpTaskSurvClassifIPCW.R @@ -36,7 +36,8 @@ PipeOpTaskSurvClassifIPCW = R6Class( #' Creates a new instance of this [R6][R6::R6Class] class. initialize = function(id = "trafotask_survclassif_IPCW") { param_set = ps( - cutoff_time = p_dbl(0, default = NULL, special_vals = list(NULL)) + cutoff_time = p_dbl(lower = 0, special_vals = list()), + eps = p_dbl(lower = 0, default = 1e-6) ) super$initialize( id = id, @@ -65,22 +66,33 @@ PipeOpTaskSurvClassifIPCW = R6Class( }, .train = function(input) { - data_trafo = input[[1]]$data() + data = input[[1]]$data() + time_var = input[[1]]$target_names[1] + status_var = input[[1]]$target_names[2] + cutoff_time = self$param_set$values$cutoff_time + eps = self$param_set$values$eps + + if (cutoff_time >= max(data[[time_var]])) { + stop("Cutoff time must be smaller than the maximum event time.") + } # transform data and calculate weights - data_trafo$time[data_trafo$time > cutoff_time] = cutoff_time - data_trafo$status[data_trafo$time == cutoff_time] = 1 - data_trafo$status = (data_trafo$status != 1) * 1 + times = data[[time_var]] + times[times > cutoff_time] = cutoff_time - task_new = TaskSurv$new(id = "ipcw", time = "time", event = "status", backend = data_trafo) - pred = lrn("surv.kaplan")$train(task_new)$predict(task_new) - weights = 1 / pred$data$distr[1,] + status = data[[status_var]] + status[times == cutoff_time] = 0 + + cens = survival::survfit(Surv(times, 1 - status) ~ 1) + cens$surv[length(cens$surv)] = cens$surv[length(cens$surv)-1] + cens$surv[cens$surv == 0] = eps + + weights = rep(1/cens$surv, table(times)) # add weights to original data time = status = NULL - data = input[[1]]$data() - data[["ipc_weights"]] = weights[as.character(data_trafo$time)] + data[["ipc_weights"]] = weights data[status == 0 & time < cutoff_time, "ipc_weights" := 0] data$status = factor(data$status, levels = c("0", "1")) diff --git a/tests/testthat/test_pipelines.R b/tests/testthat/test_pipelines.R index d5ac8b2eb..e02ff0a2e 100644 --- a/tests/testthat/test_pipelines.R +++ b/tests/testthat/test_pipelines.R @@ -187,12 +187,13 @@ skip_if_not_installed("mlr3extralearners") test_that("survtoclassif_IPCW", { requireNamespace("mlr3extralearners") - pipe = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.gam")) + pipe = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.gam"), + cutoff_time = 50) expect_class(pipe, "Graph") ## This needs fixing grlrn = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.gam"), - graph_learner = TRUE) + cutoff_time = 50, graph_learner = TRUE) expect_class(grlrn, "GraphLearner") grlrn$train(task) p = grlrn$predict(task) From 8680143385bd297ae2d7f5afc16e7cc865934ae2 Mon Sep 17 00:00:00 2001 From: studener Date: Mon, 12 Aug 2024 13:25:40 +0200 Subject: [PATCH 15/82] remove time_var from features --- R/PipeOpTaskSurvClassifIPCW.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/PipeOpTaskSurvClassifIPCW.R b/R/PipeOpTaskSurvClassifIPCW.R index 632a76081..285a52273 100644 --- a/R/PipeOpTaskSurvClassifIPCW.R +++ b/R/PipeOpTaskSurvClassifIPCW.R @@ -91,14 +91,14 @@ PipeOpTaskSurvClassifIPCW = R6Class( weights = rep(1/cens$surv, table(times)) # add weights to original data - time = status = NULL data[["ipc_weights"]] = weights - data[status == 0 & time < cutoff_time, "ipc_weights" := 0] - data$status = factor(data$status, levels = c("0", "1")) + data[status_var == 0 & time_var < cutoff_time, "ipc_weights" := 0] + data[[status_var]] = factor(data[[status_var]], levels = c("0", "1")) + data[[time_var]] = NULL # create new task task = TaskClassif$new(id = paste0(input[[1]]$id, "_IPCW"), backend = data, - target = "status", positive = "1") + target = status_var, positive = "1") task$set_col_roles("ipc_weights", roles = "weight") From c6d3a84d9f1e02886f99e433bf057d00b06c629e Mon Sep 17 00:00:00 2001 From: studener Date: Mon, 12 Aug 2024 16:08:51 +0200 Subject: [PATCH 16/82] add correct time to surv prediction --- R/PipeOpPredClassifSurvIPCW.R | 11 ++++++----- R/PipeOpTaskSurvClassifIPCW.R | 12 +++++++----- R/pipelines.R | 4 ++++ 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/R/PipeOpPredClassifSurvIPCW.R b/R/PipeOpPredClassifSurvIPCW.R index dc171221a..c8b4468df 100644 --- a/R/PipeOpPredClassifSurvIPCW.R +++ b/R/PipeOpPredClassifSurvIPCW.R @@ -35,9 +35,9 @@ PipeOpPredClassifSurvIPCW = R6Class( super$initialize( id = id, input = data.table( - name = "input", - train = "NULL", - predict = "PredictionClassif" + name = c("input", "data"), + train = c("NULL", "*"), + predict = c("PredictionClassif", "*") ), output = data.table( name = "output", @@ -51,9 +51,10 @@ PipeOpPredClassifSurvIPCW = R6Class( private = list( .predict = function(input) { pred = input[[1]] - # TODO: fix timepoints + times = input[[2]] + p = PredictionSurv$new(row_ids = pred$row_ids, - truth = Surv(time = rep(0, length(pred$row_ids)), + truth = Surv(time = times, event = as.integer(pred$truth)), crank = pred$prob[, 2]) list(p) diff --git a/R/PipeOpTaskSurvClassifIPCW.R b/R/PipeOpTaskSurvClassifIPCW.R index 285a52273..759651675 100644 --- a/R/PipeOpTaskSurvClassifIPCW.R +++ b/R/PipeOpTaskSurvClassifIPCW.R @@ -48,9 +48,9 @@ PipeOpTaskSurvClassifIPCW = R6Class( predict = "TaskSurv" ), output = data.table( - name = "output", - train = "TaskClassif", - predict = "TaskClassif" + name = c("output", "data"), + train = c("TaskClassif", "NULL"), + predict = c("TaskClassif", "*") ) ) } @@ -62,7 +62,9 @@ PipeOpTaskSurvClassifIPCW = R6Class( data$status = factor(data$status, levels = c("0", "1")) task = TaskClassif$new(id = input[[1]]$id, backend = data, target = "status", positive = "1") - list(task) + + time = data[[input[[1]]$target_names[1]]] + list(task, time) }, .train = function(input) { @@ -103,7 +105,7 @@ PipeOpTaskSurvClassifIPCW = R6Class( task$set_col_roles("ipc_weights", roles = "weight") self$state = list() - list(task) + list(task, NULL) } ) ) diff --git a/R/pipelines.R b/R/pipelines.R index 3c9707bcb..aca232b4b 100644 --- a/R/pipelines.R +++ b/R/pipelines.R @@ -649,7 +649,11 @@ pipeline_survtoclassif_IPCW = function(learner, cutoff_time = NULL, output = "cl if (output != "classif") { gr$add_pipeop(mlr3pipelines::po("trafopred_classifsurv_IPCW")) + gr$add_pipeop(mlr3pipelines::po("nop")) + gr$add_edge(src_id = learner$id, dst_id = "trafopred_classifsurv_IPCW", src_channel = "output", dst_channel = "input") + gr$add_edge(src_id = "trafotask_survclassif_IPCW", dst_id = "nop", src_channel = "data", dst_channel = "input") + gr$add_edge(src_id = "nop", dst_id = "trafopred_classifsurv_IPCW", src_channel = "output", dst_channel = "data") } if (graph_learner) { From c485290ba1bf8d43f00be96ef0ce46a20e078736 Mon Sep 17 00:00:00 2001 From: studener Date: Tue, 13 Aug 2024 14:08:54 +0200 Subject: [PATCH 17/82] updocs --- R/PipeOpPredClassifSurvIPCW.R | 3 ++- R/PipeOpTaskSurvClassifIPCW.R | 18 +++++++++++++++++ R/pipelines.R | 6 ++++-- man/mlr_graphs_survtoclassif_IPCW.Rd | 6 +++++- man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd | 3 ++- man/mlr_pipeops_trafotask_survclassif_IPCW.Rd | 20 +++++++++++++++++++ 6 files changed, 51 insertions(+), 5 deletions(-) diff --git a/R/PipeOpPredClassifSurvIPCW.R b/R/PipeOpPredClassifSurvIPCW.R index c8b4468df..4012d7514 100644 --- a/R/PipeOpPredClassifSurvIPCW.R +++ b/R/PipeOpPredClassifSurvIPCW.R @@ -15,7 +15,8 @@ #' ``` #' #' @section Input and Output Channels: -#' The input is a [PredictionClassif] generated by [PipeOpTaskSurvClassifIPCW]. +#' The input is a [PredictionClassif] and a vector containing observed times +#' both generated by [PipeOpTaskSurvClassifIPCW]. #' The output is the input [PredictionClassif] transformed to a [PredictionSurv]. #' Only works during prediction phase. #' diff --git a/R/PipeOpTaskSurvClassifIPCW.R b/R/PipeOpTaskSurvClassifIPCW.R index 759651675..7b99c13ba 100644 --- a/R/PipeOpTaskSurvClassifIPCW.R +++ b/R/PipeOpTaskSurvClassifIPCW.R @@ -15,11 +15,29 @@ #' po("trafotask_survclassif_IPCW") #' ``` #' +#' @section Input and Output Channels: +#' [PipeOpTaskSurvIPCW] has one input channel named "input", and two +#' output channels, one named "output" and the other "data". +#' +#' During training, the "output" is the "input" [TaskSurv] transformed to a +#' [TaskClassif][mlr3::TaskClassif]. +#' The target column is named `"status"` and indicates whether an event occurred +#' in each time interval. +#' The transformed task now has the property "weights". +#' The "data" is NULL. +#' +#' During prediction, the "input" [TaskSurv] is transformed to the "output" +#' [TaskClassif][mlr3::TaskClassif] with `"status"` as target. +#' The "data" is a vector containing the time of each observation. +#' This "data" is only meant to be used with the [PipeOpPredClassifSurvIPCW]. +#' #' @section Parameters: #' The parameters are #' #' * `cutoff_time :: numeric()`\cr #' Cutoff time for IPCW. Observations with time larger than `cutoff_time` are censored. +#' * `eps :: numeric()`\cr +#' Small value to replace `0` survival probabilities with to prevent infinite weights. #' #' @references #' `r format_bib("vock_2016")` diff --git a/R/pipelines.R b/R/pipelines.R index aca232b4b..1f07a6830 100644 --- a/R/pipelines.R +++ b/R/pipelines.R @@ -618,7 +618,9 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, #' @param learner [LearnerClassif][mlr3::LearnerClassif]\cr #' Classification learner to fit the transformed [TaskClassif][mlr3::TaskClassif]. #' @param cutoff_time `numeric()`\cr -#' #' Cutoff time for IPCW. Observations with time larger than `cutoff_time` are censored. +#' Cutoff time for IPCW. Observations with time larger than `cutoff_time` are censored. +#' @param eps `numeric()`\cr +#' Small value to replace `0` survival probabilities with to prevent infinite weights. #' @param output `numeric()`\cr #' If not set to "classif" (default) then the prediction is transformed to a crank. #' @param graph_learner `logical(1)`\cr @@ -638,7 +640,7 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, #' @family pipelines #' #' @export -pipeline_survtoclassif_IPCW = function(learner, cutoff_time = NULL, output = "classif", graph_learner = FALSE) { +pipeline_survtoclassif_IPCW = function(learner, cutoff_time = NULL, eps = 1e-6, output = "classif", graph_learner = FALSE) { assert_true("prob" %in% learner$predict_types) gr = mlr3pipelines::Graph$new() diff --git a/man/mlr_graphs_survtoclassif_IPCW.Rd b/man/mlr_graphs_survtoclassif_IPCW.Rd index e1753dab2..6fa8bb8ba 100644 --- a/man/mlr_graphs_survtoclassif_IPCW.Rd +++ b/man/mlr_graphs_survtoclassif_IPCW.Rd @@ -8,6 +8,7 @@ pipeline_survtoclassif_IPCW( learner, cutoff_time = NULL, + eps = 1e-06, output = "classif", graph_learner = FALSE ) @@ -17,7 +18,10 @@ pipeline_survtoclassif_IPCW( Classification learner to fit the transformed \link[mlr3:TaskClassif]{TaskClassif}.} \item{cutoff_time}{\code{numeric()}\cr -#' Cutoff time for IPCW. Observations with time larger than \code{cutoff_time} are censored.} +Cutoff time for IPCW. Observations with time larger than \code{cutoff_time} are censored.} + +\item{eps}{\code{numeric()}\cr +Small value to replace \code{0} survival probabilities with to prevent infinite weights.} \item{output}{\code{numeric()}\cr If not set to "classif" (default) then the prediction is transformed to a crank.} diff --git a/man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd b/man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd index 70781125f..fd336f954 100644 --- a/man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd +++ b/man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd @@ -21,7 +21,8 @@ po("trafopred_classifsurv_IPCW") \section{Input and Output Channels}{ -The input is a \link{PredictionClassif} generated by \link{PipeOpTaskSurvClassifIPCW}. +The input is a \link{PredictionClassif} and a vector containing observed times +both generated by \link{PipeOpTaskSurvClassifIPCW}. The output is the input \link{PredictionClassif} transformed to a \link{PredictionSurv}. Only works during prediction phase. } diff --git a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd index 52834eb1a..7b819a1cd 100644 --- a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd +++ b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd @@ -19,12 +19,32 @@ po("trafotask_survclassif_IPCW") }\if{html}{\out{}} } +\section{Input and Output Channels}{ + +\link{PipeOpTaskSurvIPCW} has one input channel named "input", and two +output channels, one named "output" and the other "data". + +During training, the "output" is the "input" \link{TaskSurv} transformed to a +\link[mlr3:TaskClassif]{TaskClassif}. +The target column is named \code{"status"} and indicates whether an event occurred +in each time interval. +The transformed task now has the property "weights". +The "data" is NULL. + +During prediction, the "input" \link{TaskSurv} is transformed to the "output" +\link[mlr3:TaskClassif]{TaskClassif} with \code{"status"} as target. +The "data" is a vector containing the time of each observation. +This "data" is only meant to be used with the \link{PipeOpPredClassifSurvIPCW}. +} + \section{Parameters}{ The parameters are \itemize{ \item \code{cutoff_time :: numeric()}\cr Cutoff time for IPCW. Observations with time larger than \code{cutoff_time} are censored. +\item \code{eps :: numeric()}\cr +Small value to replace \code{0} survival probabilities with to prevent infinite weights. } } From bbb382c786e20f12f1954e55893dad75ce4afe16 Mon Sep 17 00:00:00 2001 From: studener Date: Tue, 13 Aug 2024 14:16:49 +0200 Subject: [PATCH 18/82] fix typo --- R/PipeOpTaskSurvClassifIPCW.R | 2 +- man/mlr_pipeops_trafotask_survclassif_IPCW.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/PipeOpTaskSurvClassifIPCW.R b/R/PipeOpTaskSurvClassifIPCW.R index 7b99c13ba..2755ca9da 100644 --- a/R/PipeOpTaskSurvClassifIPCW.R +++ b/R/PipeOpTaskSurvClassifIPCW.R @@ -16,7 +16,7 @@ #' ``` #' #' @section Input and Output Channels: -#' [PipeOpTaskSurvIPCW] has one input channel named "input", and two +#' [PipeOpTaskSurvClassifIPCW] has one input channel named "input", and two #' output channels, one named "output" and the other "data". #' #' During training, the "output" is the "input" [TaskSurv] transformed to a diff --git a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd index 7b819a1cd..d0447d1cd 100644 --- a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd +++ b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd @@ -21,7 +21,7 @@ po("trafotask_survclassif_IPCW") \section{Input and Output Channels}{ -\link{PipeOpTaskSurvIPCW} has one input channel named "input", and two +\link{PipeOpTaskSurvClassifIPCW} has one input channel named "input", and two output channels, one named "output" and the other "data". During training, the "output" is the "input" \link{TaskSurv} transformed to a From dd3f74eb92350bc60b5ef355763f11f92c85b710 Mon Sep 17 00:00:00 2001 From: studener Date: Tue, 13 Aug 2024 17:00:03 +0200 Subject: [PATCH 19/82] update tests --- tests/testthat/test_pipelines.R | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/tests/testthat/test_pipelines.R b/tests/testthat/test_pipelines.R index e02ff0a2e..4fbfab4cd 100644 --- a/tests/testthat/test_pipelines.R +++ b/tests/testthat/test_pipelines.R @@ -187,30 +187,44 @@ skip_if_not_installed("mlr3extralearners") test_that("survtoclassif_IPCW", { requireNamespace("mlr3extralearners") + task = tsk("rats") + split = partition(task) + task_train = task$clone()$filter(split$train) + task_test = task$clone()$filter(split$test) + pipe = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.gam"), cutoff_time = 50) expect_class(pipe, "Graph") - ## This needs fixing + # This needs fixing grlrn = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.gam"), cutoff_time = 50, graph_learner = TRUE) expect_class(grlrn, "GraphLearner") - grlrn$train(task) - p = grlrn$predict(task) + grlrn$train(task_train) + p = grlrn$predict(task_test) expect_prediction_surv(p) + # Test with output = "surv" + grlrn = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.gam"), + cutoff_time = 50, output = "surv") + expect_class(pipe, "Graph") + suppressWarnings(grlrn$train(task_train)) + p = grlrn$predict(task_test) + expect_prediction_surv(p$trafopred_classifsurv_IPCW.output) + + # Test with cutoff_time grlrn = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.gam"), cutoff_time = 50) expect_class(pipe, "Graph") - suppressWarnings(grlrn$train(task)) - p1 = grlrn$predict(task) + suppressWarnings(grlrn$train(task_train)) + p1 = grlrn$predict(task_test) expect_prediction_classif(p1$classif.gam.output) grlrn = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.gam"), cutoff_time = 75) - suppressWarnings(grlrn$train(task)) - p2 = grlrn$predict(task) + suppressWarnings(grlrn$train(task_train)) + p2 = grlrn$predict(task_test) expect_false(any(p1$classif.gam.output$data$prob == p2$classif.gam.output$data$prob)) }) From 902b7f01c9a13e8c07a7acd01ccaf4d7ebd12847 Mon Sep 17 00:00:00 2001 From: studener Date: Tue, 13 Aug 2024 17:00:27 +0200 Subject: [PATCH 20/82] correct row ids for surv prediction --- R/PipeOpPredClassifSurvIPCW.R | 14 +++++++------- R/PipeOpTaskSurvClassifIPCW.R | 8 +++++--- man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd | 4 ++-- man/mlr_pipeops_trafotask_survclassif_IPCW.Rd | 3 ++- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/R/PipeOpPredClassifSurvIPCW.R b/R/PipeOpPredClassifSurvIPCW.R index 4012d7514..4fcbeb941 100644 --- a/R/PipeOpPredClassifSurvIPCW.R +++ b/R/PipeOpPredClassifSurvIPCW.R @@ -15,8 +15,8 @@ #' ``` #' #' @section Input and Output Channels: -#' The input is a [PredictionClassif] and a vector containing observed times -#' both generated by [PipeOpTaskSurvClassifIPCW]. +#' The input is a [PredictionClassif] and a [data.table] containing observed times +#' and row ids both generated by [PipeOpTaskSurvClassifIPCW]. #' The output is the input [PredictionClassif] transformed to a [PredictionSurv]. #' Only works during prediction phase. #' @@ -37,8 +37,8 @@ PipeOpPredClassifSurvIPCW = R6Class( id = id, input = data.table( name = c("input", "data"), - train = c("NULL", "*"), - predict = c("PredictionClassif", "*") + train = c("NULL", "NULL"), + predict = c("PredictionClassif", "data.table") ), output = data.table( name = "output", @@ -52,10 +52,10 @@ PipeOpPredClassifSurvIPCW = R6Class( private = list( .predict = function(input) { pred = input[[1]] - times = input[[2]] + data = input[[2]] - p = PredictionSurv$new(row_ids = pred$row_ids, - truth = Surv(time = times, + p = PredictionSurv$new(row_ids = data$ids, + truth = Surv(time = data$times, event = as.integer(pred$truth)), crank = pred$prob[, 2]) list(p) diff --git a/R/PipeOpTaskSurvClassifIPCW.R b/R/PipeOpTaskSurvClassifIPCW.R index 2755ca9da..a8729a514 100644 --- a/R/PipeOpTaskSurvClassifIPCW.R +++ b/R/PipeOpTaskSurvClassifIPCW.R @@ -28,7 +28,8 @@ #' #' During prediction, the "input" [TaskSurv] is transformed to the "output" #' [TaskClassif][mlr3::TaskClassif] with `"status"` as target. -#' The "data" is a vector containing the time of each observation. +#' The "data" is a [data.table] containing the "time" of each subject as well +#' as corresponding "row_ids". #' This "data" is only meant to be used with the [PipeOpPredClassifSurvIPCW]. #' #' @section Parameters: @@ -68,7 +69,7 @@ PipeOpTaskSurvClassifIPCW = R6Class( output = data.table( name = c("output", "data"), train = c("TaskClassif", "NULL"), - predict = c("TaskClassif", "*") + predict = c("TaskClassif", "data.table") ) ) } @@ -82,7 +83,8 @@ PipeOpTaskSurvClassifIPCW = R6Class( target = "status", positive = "1") time = data[[input[[1]]$target_names[1]]] - list(task, time) + data = data.table(ids = input[[1]]$row_ids, times = time) + list(task, data) }, .train = function(input) { diff --git a/man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd b/man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd index fd336f954..dbd02553c 100644 --- a/man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd +++ b/man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd @@ -21,8 +21,8 @@ po("trafopred_classifsurv_IPCW") \section{Input and Output Channels}{ -The input is a \link{PredictionClassif} and a vector containing observed times -both generated by \link{PipeOpTaskSurvClassifIPCW}. +The input is a \link{PredictionClassif} and a \link{data.table} containing observed times +and row ids both generated by \link{PipeOpTaskSurvClassifIPCW}. The output is the input \link{PredictionClassif} transformed to a \link{PredictionSurv}. Only works during prediction phase. } diff --git a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd index d0447d1cd..f563a16c3 100644 --- a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd +++ b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd @@ -33,7 +33,8 @@ The "data" is NULL. During prediction, the "input" \link{TaskSurv} is transformed to the "output" \link[mlr3:TaskClassif]{TaskClassif} with \code{"status"} as target. -The "data" is a vector containing the time of each observation. +The "data" is a \link{data.table} containing the "time" of each subject as well +as corresponding "row_ids". This "data" is only meant to be used with the \link{PipeOpPredClassifSurvIPCW}. } From 908aed48ebc08ef5fac7bb2273f651c3b6ba95df Mon Sep 17 00:00:00 2001 From: Philip Studener Date: Mon, 26 Aug 2024 15:18:25 +0200 Subject: [PATCH 21/82] remove classif output option / updocs --- R/PipeOpTaskSurvClassifIPCW.R | 9 ++++++--- R/pipelines.R | 20 +++++++------------ man/mlr_graphs_survtoclassif_IPCW.Rd | 6 +----- man/mlr_pipeops_trafotask_survclassif_IPCW.Rd | 4 ++-- tests/testthat/test_pipelines.R | 20 +++++-------------- 5 files changed, 21 insertions(+), 38 deletions(-) diff --git a/R/PipeOpTaskSurvClassifIPCW.R b/R/PipeOpTaskSurvClassifIPCW.R index a8729a514..ef6b514cd 100644 --- a/R/PipeOpTaskSurvClassifIPCW.R +++ b/R/PipeOpTaskSurvClassifIPCW.R @@ -19,8 +19,8 @@ #' [PipeOpTaskSurvClassifIPCW] has one input channel named "input", and two #' output channels, one named "output" and the other "data". #' -#' During training, the "output" is the "input" [TaskSurv] transformed to a -#' [TaskClassif][mlr3::TaskClassif]. +#' Training transforms the "input" [TaskSurv] to a [TaskClassif][mlr3::TaskClassif], +#' which is the "output". #' The target column is named `"status"` and indicates whether an event occurred #' in each time interval. #' The transformed task now has the property "weights". @@ -95,9 +95,12 @@ PipeOpTaskSurvClassifIPCW = R6Class( cutoff_time = self$param_set$values$cutoff_time eps = self$param_set$values$eps - if (cutoff_time >= max(data[[time_var]])) { + if (cutoff_time >= max(data[get(status_var) == 1, get(time_var)])) { stop("Cutoff time must be smaller than the maximum event time.") } + if (!all(data[[status_var]] %in% c(0,1))) { + stop("Event column of data must only contain 0 and 1.") + } # transform data and calculate weights times = data[[time_var]] diff --git a/R/pipelines.R b/R/pipelines.R index 1f07a6830..24c4291c2 100644 --- a/R/pipelines.R +++ b/R/pipelines.R @@ -621,8 +621,6 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, #' Cutoff time for IPCW. Observations with time larger than `cutoff_time` are censored. #' @param eps `numeric()`\cr #' Small value to replace `0` survival probabilities with to prevent infinite weights. -#' @param output `numeric()`\cr -#' If not set to "classif" (default) then the prediction is transformed to a crank. #' @param graph_learner `logical(1)`\cr #' If `TRUE` returns wraps the [Graph][mlr3pipelines::Graph] as a #' [GraphLearner][mlr3pipelines::GraphLearner] otherwise (default) returns as a `Graph`. @@ -632,7 +630,7 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, #' \enumerate{ #' \item [PipeOpTaskSurvClassifIPCW] Converts [TaskSurv] to a [TaskClassif][mlr3::TaskClassif]. #' \item A [LearnerClassif] is fit and predicted on the new `TaskClassif`. -#' \item Optionally: [PipeOpPredClassifSurvIPCW] transforms the resulting [PredictionClassif][mlr3::PredictionClassif] +#' \item [PipeOpPredClassifSurvIPCW] transforms the resulting [PredictionClassif][mlr3::PredictionClassif] #' to [PredictionSurv]. #' } #' @@ -640,23 +638,19 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, #' @family pipelines #' #' @export -pipeline_survtoclassif_IPCW = function(learner, cutoff_time = NULL, eps = 1e-6, output = "classif", graph_learner = FALSE) { +pipeline_survtoclassif_IPCW = function(learner, cutoff_time = NULL, eps = 1e-6, graph_learner = FALSE) { assert_true("prob" %in% learner$predict_types) gr = mlr3pipelines::Graph$new() gr$add_pipeop(mlr3pipelines::po("trafotask_survclassif_IPCW", cutoff_time = cutoff_time)) gr$add_pipeop(mlr3pipelines::po("learner", learner, predict_type = "prob")) + gr$add_pipeop(mlr3pipelines::po("trafopred_classifsurv_IPCW")) + gr$add_pipeop(mlr3pipelines::po("nop")) gr$add_edge(src_id = "trafotask_survclassif_IPCW", dst_id = learner$id, src_channel = "output", dst_channel = "input") - - if (output != "classif") { - gr$add_pipeop(mlr3pipelines::po("trafopred_classifsurv_IPCW")) - gr$add_pipeop(mlr3pipelines::po("nop")) - - gr$add_edge(src_id = learner$id, dst_id = "trafopred_classifsurv_IPCW", src_channel = "output", dst_channel = "input") - gr$add_edge(src_id = "trafotask_survclassif_IPCW", dst_id = "nop", src_channel = "data", dst_channel = "input") - gr$add_edge(src_id = "nop", dst_id = "trafopred_classifsurv_IPCW", src_channel = "output", dst_channel = "data") - } + gr$add_edge(src_id = learner$id, dst_id = "trafopred_classifsurv_IPCW", src_channel = "output", dst_channel = "input") + gr$add_edge(src_id = "trafotask_survclassif_IPCW", dst_id = "nop", src_channel = "data", dst_channel = "input") + gr$add_edge(src_id = "nop", dst_id = "trafopred_classifsurv_IPCW", src_channel = "output", dst_channel = "data") if (graph_learner) { gr = mlr3pipelines::GraphLearner$new(gr) diff --git a/man/mlr_graphs_survtoclassif_IPCW.Rd b/man/mlr_graphs_survtoclassif_IPCW.Rd index 6fa8bb8ba..083a5618e 100644 --- a/man/mlr_graphs_survtoclassif_IPCW.Rd +++ b/man/mlr_graphs_survtoclassif_IPCW.Rd @@ -9,7 +9,6 @@ pipeline_survtoclassif_IPCW( learner, cutoff_time = NULL, eps = 1e-06, - output = "classif", graph_learner = FALSE ) } @@ -23,9 +22,6 @@ Cutoff time for IPCW. Observations with time larger than \code{cutoff_time} are \item{eps}{\code{numeric()}\cr Small value to replace \code{0} survival probabilities with to prevent infinite weights.} -\item{output}{\code{numeric()}\cr -If not set to "classif" (default) then the prediction is transformed to a crank.} - \item{graph_learner}{\code{logical(1)}\cr If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a \link[mlr3pipelines:mlr_learners_graph]{GraphLearner} otherwise (default) returns as a \code{Graph}.} @@ -42,7 +38,7 @@ The pipeline consists of the following steps: \enumerate{ \item \link{PipeOpTaskSurvClassifIPCW} Converts \link{TaskSurv} to a \link[mlr3:TaskClassif]{TaskClassif}. \item A \link{LearnerClassif} is fit and predicted on the new \code{TaskClassif}. -\item Optionally: \link{PipeOpPredClassifSurvIPCW} transforms the resulting \link[mlr3:PredictionClassif]{PredictionClassif} +\item \link{PipeOpPredClassifSurvIPCW} transforms the resulting \link[mlr3:PredictionClassif]{PredictionClassif} to \link{PredictionSurv}. } } diff --git a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd index f563a16c3..150c6796e 100644 --- a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd +++ b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd @@ -24,8 +24,8 @@ po("trafotask_survclassif_IPCW") \link{PipeOpTaskSurvClassifIPCW} has one input channel named "input", and two output channels, one named "output" and the other "data". -During training, the "output" is the "input" \link{TaskSurv} transformed to a -\link[mlr3:TaskClassif]{TaskClassif}. +Training transforms the "input" \link{TaskSurv} to a \link[mlr3:TaskClassif]{TaskClassif}, +which is the "output". The target column is named \code{"status"} and indicates whether an event occurred in each time interval. The transformed task now has the property "weights". diff --git a/tests/testthat/test_pipelines.R b/tests/testthat/test_pipelines.R index 4fbfab4cd..d8132d0d8 100644 --- a/tests/testthat/test_pipelines.R +++ b/tests/testthat/test_pipelines.R @@ -196,35 +196,25 @@ test_that("survtoclassif_IPCW", { cutoff_time = 50) expect_class(pipe, "Graph") - # This needs fixing grlrn = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.gam"), cutoff_time = 50, graph_learner = TRUE) expect_class(grlrn, "GraphLearner") - grlrn$train(task_train) - p = grlrn$predict(task_test) - expect_prediction_surv(p) - - # Test with output = "surv" - grlrn = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.gam"), - cutoff_time = 50, output = "surv") - expect_class(pipe, "Graph") suppressWarnings(grlrn$train(task_train)) p = grlrn$predict(task_test) - expect_prediction_surv(p$trafopred_classifsurv_IPCW.output) - + expect_prediction_surv(p) # Test with cutoff_time grlrn = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.gam"), - cutoff_time = 50) + cutoff_time = 50, graph_learner = TRUE) expect_class(pipe, "Graph") suppressWarnings(grlrn$train(task_train)) p1 = grlrn$predict(task_test) - expect_prediction_classif(p1$classif.gam.output) + expect_prediction_surv(p1) grlrn = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.gam"), - cutoff_time = 75) + cutoff_time = 75, graph_learner = TRUE) suppressWarnings(grlrn$train(task_train)) p2 = grlrn$predict(task_test) - expect_false(any(p1$classif.gam.output$data$prob == p2$classif.gam.output$data$prob)) + expect_false(any(p1$crank == p2$crank)) }) From 88b206583bd2989319cf8bd672abda31a5c7edfb Mon Sep 17 00:00:00 2001 From: studener Date: Tue, 3 Sep 2024 14:20:44 +0200 Subject: [PATCH 22/82] updocs --- R/PipeOpTaskSurvClassifIPCW.R | 1 + R/pipelines.R | 1 + man/mlr_graphs_responsecompositor.Rd | 1 + man/mlr_graphs_survtoclassif_IPCW.Rd | 4 +++- man/mlr_pipeops_trafotask_survclassif_IPCW.Rd | 1 + 5 files changed, 7 insertions(+), 1 deletion(-) diff --git a/R/PipeOpTaskSurvClassifIPCW.R b/R/PipeOpTaskSurvClassifIPCW.R index ef6b514cd..35a8235a7 100644 --- a/R/PipeOpTaskSurvClassifIPCW.R +++ b/R/PipeOpTaskSurvClassifIPCW.R @@ -37,6 +37,7 @@ #' #' * `cutoff_time :: numeric()`\cr #' Cutoff time for IPCW. Observations with time larger than `cutoff_time` are censored. +#' Should be reasonably smaller than the maximum event time to avoid enormous weights. #' * `eps :: numeric()`\cr #' Small value to replace `0` survival probabilities with to prevent infinite weights. #' diff --git a/R/pipelines.R b/R/pipelines.R index ba64fc046..5802182c3 100644 --- a/R/pipelines.R +++ b/R/pipelines.R @@ -668,6 +668,7 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, #' Classification learner to fit the transformed [TaskClassif][mlr3::TaskClassif]. #' @param cutoff_time `numeric()`\cr #' Cutoff time for IPCW. Observations with time larger than `cutoff_time` are censored. +#' Should be reasonably smaller than the maximum event time to avoid enormous weights. #' @param eps `numeric()`\cr #' Small value to replace `0` survival probabilities with to prevent infinite weights. #' @param graph_learner `logical(1)`\cr diff --git a/man/mlr_graphs_responsecompositor.Rd b/man/mlr_graphs_responsecompositor.Rd index 0bc4b237b..bf4e05459 100644 --- a/man/mlr_graphs_responsecompositor.Rd +++ b/man/mlr_graphs_responsecompositor.Rd @@ -75,6 +75,7 @@ Other pipelines: \code{\link{mlr_graphs_probregr}}, \code{\link{mlr_graphs_survaverager}}, \code{\link{mlr_graphs_survbagging}}, +\code{\link{mlr_graphs_survtoclassif_IPCW}}, \code{\link{mlr_graphs_survtoclassif_disctime}}, \code{\link{mlr_graphs_survtoregr}} } diff --git a/man/mlr_graphs_survtoclassif_IPCW.Rd b/man/mlr_graphs_survtoclassif_IPCW.Rd index 083a5618e..238d6bf67 100644 --- a/man/mlr_graphs_survtoclassif_IPCW.Rd +++ b/man/mlr_graphs_survtoclassif_IPCW.Rd @@ -17,7 +17,8 @@ pipeline_survtoclassif_IPCW( Classification learner to fit the transformed \link[mlr3:TaskClassif]{TaskClassif}.} \item{cutoff_time}{\code{numeric()}\cr -Cutoff time for IPCW. Observations with time larger than \code{cutoff_time} are censored.} +Cutoff time for IPCW. Observations with time larger than \code{cutoff_time} are censored. +Should be reasonably smaller than the maximum event time to avoid enormous weights.} \item{eps}{\code{numeric()}\cr Small value to replace \code{0} survival probabilities with to prevent infinite weights.} @@ -47,6 +48,7 @@ Other pipelines: \code{\link{mlr_graphs_crankcompositor}}, \code{\link{mlr_graphs_distrcompositor}}, \code{\link{mlr_graphs_probregr}}, +\code{\link{mlr_graphs_responsecompositor}}, \code{\link{mlr_graphs_survaverager}}, \code{\link{mlr_graphs_survbagging}}, \code{\link{mlr_graphs_survtoclassif_disctime}}, diff --git a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd index 150c6796e..7649bc6ee 100644 --- a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd +++ b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd @@ -44,6 +44,7 @@ The parameters are \itemize{ \item \code{cutoff_time :: numeric()}\cr Cutoff time for IPCW. Observations with time larger than \code{cutoff_time} are censored. +Should be reasonably smaller than the maximum event time to avoid enormous weights. \item \code{eps :: numeric()}\cr Small value to replace \code{0} survival probabilities with to prevent infinite weights. } From 2c980fb0cb0de34d140b6d8a7107d78627a5a616 Mon Sep 17 00:00:00 2001 From: john Date: Wed, 4 Sep 2024 12:49:03 +0200 Subject: [PATCH 23/82] update unloading test --- tests/testthat/test_ipcw.R | 73 ++++++++++++++++++++++++++++++++++++ tests/testthat/test_unload.R | 6 ++- 2 files changed, 77 insertions(+), 2 deletions(-) create mode 100644 tests/testthat/test_ipcw.R diff --git a/tests/testthat/test_ipcw.R b/tests/testthat/test_ipcw.R new file mode 100644 index 000000000..3b4affb92 --- /dev/null +++ b/tests/testthat/test_ipcw.R @@ -0,0 +1,73 @@ +test_that("PipeOpTaskSurvClassifIPCW", { + task = tsk("lung") + + # imitate train/test split manually + test_ids = c(2, 10, 107) + train_ids = setdiff(task$row_ids, test_ids) + test_task = task$clone()$filter(rows = test_ids) + train_task = task$clone()$filter(rows = train_ids) + expect_equal(test_task$row_ids, test_ids) + expect_equal(train_task$row_ids, train_ids) + + po_disc = mlr3pipelines::po("trafotask_survclassif_disctime", cut = 4) + expect_class(po_disc, c("PipeOp", "PipeOpTaskSurvClassifDiscTime")) + + res = po_disc$train(list(train_task)) + + # 0 is added + time_cuts = po_disc$state$cut + expect_numeric(time_cuts, len = 5, lower = 0) + # no transformed data during training + expect_data_table(res[["transformed_data"]], nrows = 0, ncols = 0) + # classification task + output_task = res[[1L]] + expect_task_classif(output_task) + expect_equal(output_task$col_roles$original_ids, "id") + expect_equal(output_task$positive, "1") + expect_equal(output_task$target_names, "disc_status") + # new column added to the task + expect_equal("tend", setdiff(output_task$feature_names, task$feature_names)) + # not all observations have events on the last (4th) interval + expect_lt(output_task$nrow, task$nrow * 4) + + res = po_disc$predict(list(test_task)) + pred_task = res[[1L]] + + expect_task_classif(pred_task) + # every test observation will have one row per interval for prediction + expect_equal(pred_task$nrow, test_task$nrow * 4) + # `tend` matches the cut time points (excluding 0 time point) + tends = pred_task$data(cols = "tend")[[1L]] + expect_setequal(unique(tends), time_cuts[2:5]) + # original row ids are correct + expect_equal(pred_task$col_roles$original_ids, "id") + original_ids = pred_task$data(cols = "id")[[1L]] + correct_ids = rep(test_ids, each = 4) + expect_equal(original_ids, correct_ids) + + transformed_data = res[["transformed_data"]] + # check columns in the transformed data.table + expect_setequal(colnames(transformed_data), + c("id", "disc_status", "obs_times", "tend")) + # `id`s are correct + expect_equal(transformed_data$id, correct_ids) + # `disc_status` is the same + expect_equal(as.character(transformed_data$disc_status), + as.character(pred_task$truth())) + # `obs_times` are correct + times = test_task$times() # observed times + expect_setequal(unique(transformed_data$obs_times), times) + # `tends` are correct + expect_setequal(unique(transformed_data$tend), time_cuts[2:5]) + + # `disc_status` per interval and per observation is correct + # before observed time ("obs_times"), "disc_status" = 0 + expect_equal(as.character(unique(transformed_data[tend < obs_times, disc_status])), "0") + + # after observed time, "disc_status" must be the same as "status" + status = as.character(test_task$status()) + td = transformed_data[tend > obs_times] + expect_equal(as.character(unique(td[id == test_ids[1], disc_status])), status[1]) + expect_equal(as.character(unique(td[id == test_ids[2], disc_status])), status[2]) + expect_equal(as.character(unique(td[id == test_ids[3], disc_status])), status[3]) +}) diff --git a/tests/testthat/test_unload.R b/tests/testthat/test_unload.R index db25bceaf..90b3edbed 100644 --- a/tests/testthat/test_unload.R +++ b/tests/testthat/test_unload.R @@ -20,8 +20,10 @@ test_that("unloading leaves no trace", { "crankcompose", "distrcompose", "responsecompose", "breslowcompose", # transform prediction type "trafopred_classifsurv_disctime", "trafopred_survregr", "trafopred_regrsurv", + "trafopred_classifsurv_IPCW", # transform task type - "trafotask_regrsurv", "trafotask_survregr", "trafotask_survclassif_disctime" + "trafotask_regrsurv", "trafotask_survregr", "trafotask_survclassif_disctime", + "trafotask_survclassif_IPCW" ) expect_in(proba_pipeops, mlr_pipeops$keys()) @@ -32,7 +34,7 @@ test_that("unloading leaves no trace", { # compose prediction types "crankcompositor", "distrcompositor", "responsecompositor", # transform surv to other tasks - "survtoregr", "survtoclassif_disctime" + "survtoregr", "survtoclassif_disctime", "survtoclassif_IPCW" ) expect_in(proba_graphs, mlr_graphs$keys()) From 3e7f2b6dfa93805c8ea210f64b83bff33ecd2406 Mon Sep 17 00:00:00 2001 From: john Date: Fri, 6 Sep 2024 10:14:36 +0200 Subject: [PATCH 24/82] refactor + fixes --- R/PipeOpPredClassifSurvIPCW.R | 44 ++++++++--- R/PipeOpTaskSurvClassifIPCW.R | 141 ++++++++++++++++++++++------------ R/pipelines.R | 4 +- 3 files changed, 126 insertions(+), 63 deletions(-) diff --git a/R/PipeOpPredClassifSurvIPCW.R b/R/PipeOpPredClassifSurvIPCW.R index 4fcbeb941..be76d3499 100644 --- a/R/PipeOpPredClassifSurvIPCW.R +++ b/R/PipeOpPredClassifSurvIPCW.R @@ -2,7 +2,8 @@ #' @name mlr_pipeops_trafopred_classifsurv_IPCW #' #' @description -#' Transform [PredictionClassif] to [PredictionSurv]. +#' Transform [PredictionClassif] to [PredictionSurv] using the **I**nverse +#' **P**robability of **C**ensoring **W**eights (IPCW) method by Vock et al. (2016). #' #' @section Dictionary: #' This [PipeOp][mlr3pipelines::PipeOp] can be instantiated via the @@ -15,10 +16,23 @@ #' ``` #' #' @section Input and Output Channels: -#' The input is a [PredictionClassif] and a [data.table] containing observed times -#' and row ids both generated by [PipeOpTaskSurvClassifIPCW]. +#' The input is a [PredictionClassif] and a [data.table] containing observed times, +#' censoring indicators and row ids, all generated by [PipeOpTaskSurvClassifIPCW] +#' during the prediction phase. +#' #' The output is the input [PredictionClassif] transformed to a [PredictionSurv]. -#' Only works during prediction phase. +#' Each input classification probability prediction corresponds to the +#' probability of having the event up to the specified cutoff time +#' \eqn{\hat{\pi}(\bold{X}_i) = P(T_i < \tau|\bold{X}_i)}, +#' see Vock et al. (2016) and [PipeOpTaskSurvClassifIPCW]. +#' Therefore, these predictions serve as **continuous risk scores** that can be +#' directly interpreted as `crank` predictions in the right-censored survival +#' setting. We also map them to the survival distribution prediction `distr`, +#' at the specified cutoff time point, i.e. as +#' \eqn{S_i(\tau) = 1 - \hat{\pi}(\bold{X}_i)}. +#' +#' @references +#' `r format_bib("vock_2016")` #' #' @family PipeOps #' @family Transformation PipeOps @@ -51,13 +65,23 @@ PipeOpPredClassifSurvIPCW = R6Class( private = list( .predict = function(input) { - pred = input[[1]] - data = input[[2]] + pred = input[[1]] # classification predictions + data = input[[2]] # row_ids, times, status + + # risk => prob of having the event up until the cutoff time + risk = pred$prob[, "1"] + surv = matrix(data = 1 - risk, ncol = 1) + colnames(surv) = 500 # need the cutoff time here, add new input[[3]] to pass it on? + + p = PredictionSurv$new( + # the original row ids + row_ids = data$row_ids, + # the original truth (times, status) + truth = Surv(time = data$times, event = data$status), + crank = risk, + distr = surv + ) - p = PredictionSurv$new(row_ids = data$ids, - truth = Surv(time = data$times, - event = as.integer(pred$truth)), - crank = pred$prob[, 2]) list(p) }, diff --git a/R/PipeOpTaskSurvClassifIPCW.R b/R/PipeOpTaskSurvClassifIPCW.R index 35a8235a7..4b84ec650 100644 --- a/R/PipeOpTaskSurvClassifIPCW.R +++ b/R/PipeOpTaskSurvClassifIPCW.R @@ -3,7 +3,20 @@ #' @template param_pipelines #' #' @description -#' Transform [TaskSurv] to [TaskClassif][mlr3::TaskClassif] using IPCW (Vock et al., 2016). +#' Transform [TaskSurv] to [TaskClassif][mlr3::TaskClassif] using the **I**nverse +#' **P**robability of **C**ensoring **W**eights (IPCW) method by Vock et al. (2016). +#' +#' Let \eqn{T_i} be the observed times (event or censoring) and \eqn{\delta_i} +#' the censoring indicators for each observation \eqn{i} in the training set. +#' The IPCW technique consists of two steps: first we estimate the censoring +#' distribution \eqn{\hat{G}(t)} using the Kaplan-Meier estimator from the +#' training data. Then we calculate the observation weights given a cutoff time +#' \eqn{\tau} as: +#' +#' \deqn{\omega_i = 1/\hat{G}_{min(T_i,\tau)}} +#' +#' Observations that are censored prior to \eqn{\tau} get zero weights, i.e. +#' \eqn{\omega_i = 0}. #' #' @section Dictionary: #' This [PipeOp][mlr3pipelines::PipeOp] can be instantiated via the @@ -22,14 +35,17 @@ #' Training transforms the "input" [TaskSurv] to a [TaskClassif][mlr3::TaskClassif], #' which is the "output". #' The target column is named `"status"` and indicates whether an event occurred -#' in each time interval. -#' The transformed task now has the property "weights". -#' The "data" is NULL. +#' before the cutoff time \eqn{\tau}. +#' The observed times column is removed from the "output" task. +#' The transformed task has the property `"weights"` (the \eqn{\omega_i}). +#' The "data" is `NULL`. #' #' During prediction, the "input" [TaskSurv] is transformed to the "output" -#' [TaskClassif][mlr3::TaskClassif] with `"status"` as target. -#' The "data" is a [data.table] containing the "time" of each subject as well -#' as corresponding "row_ids". +#' [TaskClassif][mlr3::TaskClassif] with `"status"` as target (again indicating +#' if the event occurred before the cutoff time). +#' The "data" is a [data.table] containing the observed `times` \eqn{T_i} and +#' censoring indicators/`status` \eqn{\delta_i} of each subject as well as the corresponding +#' `row_ids`. #' This "data" is only meant to be used with the [PipeOpPredClassifSurvIPCW]. #' #' @section Parameters: @@ -39,7 +55,7 @@ #' Cutoff time for IPCW. Observations with time larger than `cutoff_time` are censored. #' Should be reasonably smaller than the maximum event time to avoid enormous weights. #' * `eps :: numeric()`\cr -#' Small value to replace `0` survival probabilities with to prevent infinite weights. +#' Small value to replace \eqn{G(t) = 0} censoring probabilities to prevent infinite weights. #' #' @references #' `r format_bib("vock_2016")` @@ -56,8 +72,8 @@ PipeOpTaskSurvClassifIPCW = R6Class( #' Creates a new instance of this [R6][R6::R6Class] class. initialize = function(id = "trafotask_survclassif_IPCW") { param_set = ps( - cutoff_time = p_dbl(lower = 0, special_vals = list()), - eps = p_dbl(lower = 0, default = 1e-6) + cutoff_time = p_dbl(0), + eps = p_dbl(0, default = 1e-3) ) super$initialize( id = id, @@ -77,59 +93,82 @@ PipeOpTaskSurvClassifIPCW = R6Class( ), private = list( - .predict = function(input) { - data = input[[1]]$data() - data$status = factor(data$status, levels = c("0", "1")) - task = TaskClassif$new(id = input[[1]]$id, backend = data, - target = "status", positive = "1") - - time = data[[input[[1]]$target_names[1]]] - data = data.table(ids = input[[1]]$row_ids, times = time) - list(task, data) - }, - .train = function(input) { - data = input[[1]]$data() - time_var = input[[1]]$target_names[1] - status_var = input[[1]]$target_names[2] + task = input[[1]] - cutoff_time = self$param_set$values$cutoff_time - eps = self$param_set$values$eps + # checks + assert_true(task$censtype == "right") + cutoff_time = assert_numeric(self$param_set$values$cutoff_time, null.ok = FALSE) + max_event_time = max(task$unique_event_times()) + stopifnot(cutoff_time < max_event_time) - if (cutoff_time >= max(data[get(status_var) == 1, get(time_var)])) { - stop("Cutoff time must be smaller than the maximum event time.") - } - if (!all(data[[status_var]] %in% c(0,1))) { - stop("Event column of data must only contain 0 and 1.") + # G(t): KM estimate of the censoring distribution + times = task$times() + status = task$status() + cens_fit = survival::survfit(Surv(times, 1 - status) ~ 1) + # make a G(t) one-column matrix => to use in `distr6` function later + cens_surv = matrix(cens_fit$surv, ncol = 1) # rows => times + + # apply the cutoff to `times` + cut_times = times + cut_times[cut_times > cutoff_time] = cutoff_time + # get G(t) at the observed cutoff'ed times efficiently + extend_times = getFromNamespace("C_Vec_WeightedDiscreteCdf", ns = "distr6") + cens_probs = extend_times(cut_times, cens_fit$time, cdf = 1 - cens_surv, FALSE, FALSE)[,1] + # substitute `eps` for observations: G(t) = 0 + if (any(cens_probs == 0)) { + warning("At least one t: G(t) = 0, will substitute with eps to avoid very large weights") + cens_probs[cens_probs == 0] = self$param_set$values$eps } - # transform data and calculate weights - times = data[[time_var]] - times[times > cutoff_time] = cutoff_time + # calculate the IPC weights + ipc_weights = 1 / cens_probs - status = data[[status_var]] - status[times == cutoff_time] = 0 + # add weights to original data + data = task$data() + time_var = task$target_names[1] + status_var = task$target_names[2] - cens = survival::survfit(Surv(times, 1 - status) ~ 1) - cens$surv[length(cens$surv)] = cens$surv[length(cens$surv)-1] - cens$surv[cens$surv == 0] = eps + # browser() + data[["ipc_weights"]] = ipc_weights + # zero weights for censored observations before the cutoff time + ids = status == 0 & times <= cutoff_time + data[ids, "ipc_weights" := 0] + # update target: status = 0 after cutoff (remains the same before cutoff) + status[times > cutoff_time] = 0 + data[[status_var]] = factor(status, levels = c("0", "1")) + # remove target time variable + data[[time_var]] = NULL - weights = rep(1/cens$surv, table(times)) + # create classification task + task_ipcw = TaskClassif$new(id = paste0(task$id, "_IPCW"), backend = data, + target = status_var, positive = "1") + task_ipcw$set_col_roles("ipc_weights", roles = "weight") - # add weights to original data - data[["ipc_weights"]] = weights - data[status_var == 0 & time_var < cutoff_time, "ipc_weights" := 0] - data[[status_var]] = factor(data[[status_var]], levels = c("0", "1")) - data[[time_var]] = NULL + list(task_ipcw, NULL) + }, - # create new task - task = TaskClassif$new(id = paste0(input[[1]]$id, "_IPCW"), backend = data, - target = status_var, positive = "1") + .predict = function(input) { + task = input[[1]] + times = task$times() + status = task$status() + data = task$data() + time_var = task$target_names[1] + status_var = task$target_names[2] + cutoff_time = assert_numeric(self$param_set$values$cutoff_time, null.ok = FALSE) - task$set_col_roles("ipc_weights", roles = "weight") + # update target: status = 0 after cutoff (remains the same before cutoff) + status[times > cutoff_time] = 0 + data[[status_var]] = factor(status, levels = c("0", "1")) + # remove target time variable + data[[time_var]] = NULL + # create classification task + task_classif = TaskClassif$new(id = task$id, backend = data, + target = "status", positive = "1") - self$state = list() - list(task, NULL) + # keep original row_ids, times and status + data = data.table(row_ids = task$row_ids, times = task$times(), status = task$status()) + list(task_classif, data) } ) ) diff --git a/R/pipelines.R b/R/pipelines.R index 5802182c3..8a42afcfd 100644 --- a/R/pipelines.R +++ b/R/pipelines.R @@ -670,7 +670,7 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, #' Cutoff time for IPCW. Observations with time larger than `cutoff_time` are censored. #' Should be reasonably smaller than the maximum event time to avoid enormous weights. #' @param eps `numeric()`\cr -#' Small value to replace `0` survival probabilities with to prevent infinite weights. +#' Small value to replace \eqn{G(t) = 0} censoring probabilities to prevent infinite weights. #' @param graph_learner `logical(1)`\cr #' If `TRUE` returns wraps the [Graph][mlr3pipelines::Graph] as a #' [GraphLearner][mlr3pipelines::GraphLearner] otherwise (default) returns as a `Graph`. @@ -688,7 +688,7 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, #' @family pipelines #' #' @export -pipeline_survtoclassif_IPCW = function(learner, cutoff_time = NULL, eps = 1e-6, graph_learner = FALSE) { +pipeline_survtoclassif_IPCW = function(learner, cutoff_time = NULL, eps = 1e-3, graph_learner = FALSE) { assert_true("prob" %in% learner$predict_types) gr = mlr3pipelines::Graph$new() From 860aef73c2e2d6dfcb5a848630ed130be47b4a54 Mon Sep 17 00:00:00 2001 From: john Date: Fri, 6 Sep 2024 10:16:29 +0200 Subject: [PATCH 25/82] rename test file --- tests/testthat/{test_discetetime.R => test_discretetime.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/testthat/{test_discetetime.R => test_discretetime.R} (100%) diff --git a/tests/testthat/test_discetetime.R b/tests/testthat/test_discretetime.R similarity index 100% rename from tests/testthat/test_discetetime.R rename to tests/testthat/test_discretetime.R From 3b37b29914dc961ace84514bd180b63f0eb88f05 Mon Sep 17 00:00:00 2001 From: john Date: Fri, 6 Sep 2024 10:16:41 +0200 Subject: [PATCH 26/82] add IPCW pipeop test + update pipeline test --- tests/testthat/test_ipcw.R | 79 ++++++++++++--------------------- tests/testthat/test_pipelines.R | 51 +++++++++++---------- 2 files changed, 56 insertions(+), 74 deletions(-) diff --git a/tests/testthat/test_ipcw.R b/tests/testthat/test_ipcw.R index 3b4affb92..d48580701 100644 --- a/tests/testthat/test_ipcw.R +++ b/tests/testthat/test_ipcw.R @@ -9,65 +9,42 @@ test_that("PipeOpTaskSurvClassifIPCW", { expect_equal(test_task$row_ids, test_ids) expect_equal(train_task$row_ids, train_ids) - po_disc = mlr3pipelines::po("trafotask_survclassif_disctime", cut = 4) - expect_class(po_disc, c("PipeOp", "PipeOpTaskSurvClassifDiscTime")) - - res = po_disc$train(list(train_task)) - - # 0 is added - time_cuts = po_disc$state$cut - expect_numeric(time_cuts, len = 5, lower = 0) - # no transformed data during training - expect_data_table(res[["transformed_data"]], nrows = 0, ncols = 0) + po_ipcw = mlr3pipelines::po("trafotask_survclassif_IPCW") + expect_class(po_ipcw, c("PipeOp", "PipeOpTaskSurvClassifIPCW")) + # don't allow NULL `cutoff_time` + expect_error(po_ipcw$train(list(train_task)), "not 'NULL'") + # `cutoff_time` should be less than the max event time + po_ipcw$param_set$set_values(cutoff_time = 3000) + expect_error(po_ipcw$train(list(train_task)), "cutoff_time < max_event_time is not TRUE") + po_ipcw$param_set$set_values(cutoff_time = 300) + + res = po_ipcw$train(list(train_task)) + + # no output data during training + expect_null(res[["data"]]) # classification task output_task = res[[1L]] expect_task_classif(output_task) - expect_equal(output_task$col_roles$original_ids, "id") + expect_equal(output_task$col_roles$weight, "ipc_weights") expect_equal(output_task$positive, "1") - expect_equal(output_task$target_names, "disc_status") - # new column added to the task - expect_equal("tend", setdiff(output_task$feature_names, task$feature_names)) - # not all observations have events on the last (4th) interval - expect_lt(output_task$nrow, task$nrow * 4) + expect_equal(output_task$target_names, "status") + expect_equal(output_task$nrow, train_task$nrow) # same observations - res = po_disc$predict(list(test_task)) + # check: is status target correct? (0 before cutoff) + # check: do `output_task$weights` make sense? are 0 the ones that should be 0? + + res = po_ipcw$predict(list(test_task)) pred_task = res[[1L]] expect_task_classif(pred_task) - # every test observation will have one row per interval for prediction - expect_equal(pred_task$nrow, test_task$nrow * 4) - # `tend` matches the cut time points (excluding 0 time point) - tends = pred_task$data(cols = "tend")[[1L]] - expect_setequal(unique(tends), time_cuts[2:5]) - # original row ids are correct - expect_equal(pred_task$col_roles$original_ids, "id") - original_ids = pred_task$data(cols = "id")[[1L]] - correct_ids = rep(test_ids, each = 4) - expect_equal(original_ids, correct_ids) - - transformed_data = res[["transformed_data"]] - # check columns in the transformed data.table - expect_setequal(colnames(transformed_data), - c("id", "disc_status", "obs_times", "tend")) - # `id`s are correct - expect_equal(transformed_data$id, correct_ids) - # `disc_status` is the same - expect_equal(as.character(transformed_data$disc_status), - as.character(pred_task$truth())) - # `obs_times` are correct - times = test_task$times() # observed times - expect_setequal(unique(transformed_data$obs_times), times) - # `tends` are correct - expect_setequal(unique(transformed_data$tend), time_cuts[2:5]) + # check status? - # `disc_status` per interval and per observation is correct - # before observed time ("obs_times"), "disc_status" = 0 - expect_equal(as.character(unique(transformed_data[tend < obs_times, disc_status])), "0") + # (row_ids, times, status) are correct? + data = res[[2L]] + expect_data_table(data, nrows = length(test_ids), ncols = 3) + expect_equal(names(data), c("row_ids", "times", "status")) - # after observed time, "disc_status" must be the same as "status" - status = as.character(test_task$status()) - td = transformed_data[tend > obs_times] - expect_equal(as.character(unique(td[id == test_ids[1], disc_status])), status[1]) - expect_equal(as.character(unique(td[id == test_ids[2], disc_status])), status[2]) - expect_equal(as.character(unique(td[id == test_ids[3], disc_status])), status[3]) + # create dataset (filter rats) with one event for training the pipeop and set + # the cutoff time 1 time unit just before that. + # Can we trigger the warning for the eps? is it even possible in general now to trigger that code? }) diff --git a/tests/testthat/test_pipelines.R b/tests/testthat/test_pipelines.R index 3cf1446f7..333c11c0f 100644 --- a/tests/testthat/test_pipelines.R +++ b/tests/testthat/test_pipelines.R @@ -155,36 +155,41 @@ test_that("survtoclassif_disctime", { skip_if_not_installed("mlr3extralearners") test_that("survtoclassif_IPCW", { - requireNamespace("mlr3extralearners") + task = tsk("lung") + part = partition(task) + task_train = task$clone()$filter(part$train) + task_test = task$clone()$filter(part$test) - task = tsk("rats") - split = partition(task) - task_train = task$clone()$filter(split$train) - task_test = task$clone()$filter(split$test) - - pipe = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.gam"), - cutoff_time = 50) + pipe = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.rpart"), + cutoff_time = 500) expect_class(pipe, "Graph") - grlrn = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.gam"), - cutoff_time = 50, graph_learner = TRUE) + grlrn = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.rpart"), + cutoff_time = 500, graph_learner = TRUE) expect_class(grlrn, "GraphLearner") - suppressWarnings(grlrn$train(task_train)) + grlrn$train(task_train) + # check: weights were used + expect_vector(grlrn$model$classif.rpart$model$call$weights, ptype = numeric(), + size = task_train$nrow) p = grlrn$predict(task_test) expect_prediction_surv(p) + # check crank and distr exist + # p$data$distr => 1 column, cutoff time as columname - # Test with cutoff_time - grlrn = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.gam"), - cutoff_time = 50, graph_learner = TRUE) - expect_class(pipe, "Graph") - suppressWarnings(grlrn$train(task_train)) - p1 = grlrn$predict(task_test) - expect_prediction_surv(p1) - - grlrn = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.gam"), - cutoff_time = 75, graph_learner = TRUE) - suppressWarnings(grlrn$train(task_train)) + # Test with different cutoff_time (fix cutoff code before during predict phase) + grlrn = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.rpart"), + cutoff_time = 600, graph_learner = TRUE) + grlrn$train(task_train) p2 = grlrn$predict(task_test) - expect_false(any(p1$crank == p2$crank)) + # different cutoff time, different crank predictions + # expect_false(all(p$crank == p2$crank)) + + # C-indexes the same? + expect_equal(p$score(), p2$score()) + # survival tree is worse? + p1 = lrn("surv.rpart")$train(task_train)$predict(task_test) + expect_lte(p1$score(), p$score()) + + # check msr("surv.brier") with only one time point? Eg prob at cutoff time? }) From 4bd50fcfad9faacffdc3f8eb4bdaa10626203d26 Mon Sep 17 00:00:00 2001 From: john Date: Fri, 6 Sep 2024 10:17:06 +0200 Subject: [PATCH 27/82] update docs --- man/mlr_graphs_survtoclassif_IPCW.Rd | 4 +-- man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd | 25 ++++++++++++--- man/mlr_pipeops_trafotask_survclassif_IPCW.Rd | 32 ++++++++++++++----- 3 files changed, 47 insertions(+), 14 deletions(-) diff --git a/man/mlr_graphs_survtoclassif_IPCW.Rd b/man/mlr_graphs_survtoclassif_IPCW.Rd index 238d6bf67..d2b1fe91f 100644 --- a/man/mlr_graphs_survtoclassif_IPCW.Rd +++ b/man/mlr_graphs_survtoclassif_IPCW.Rd @@ -8,7 +8,7 @@ pipeline_survtoclassif_IPCW( learner, cutoff_time = NULL, - eps = 1e-06, + eps = 0.001, graph_learner = FALSE ) } @@ -21,7 +21,7 @@ Cutoff time for IPCW. Observations with time larger than \code{cutoff_time} are Should be reasonably smaller than the maximum event time to avoid enormous weights.} \item{eps}{\code{numeric()}\cr -Small value to replace \code{0} survival probabilities with to prevent infinite weights.} +Small value to replace \eqn{G(t) = 0} censoring probabilities to prevent infinite weights.} \item{graph_learner}{\code{logical(1)}\cr If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a diff --git a/man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd b/man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd index dbd02553c..a1caf0b9d 100644 --- a/man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd +++ b/man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd @@ -5,7 +5,8 @@ \alias{PipeOpPredClassifSurvIPCW} \title{PipeOpPredClassifSurvIPCW} \description{ -Transform \link{PredictionClassif} to \link{PredictionSurv}. +Transform \link{PredictionClassif} to \link{PredictionSurv} using the \strong{I}nverse +\strong{P}robability of \strong{C}ensoring \strong{W}eights (IPCW) method by Vock et al. (2016). } \section{Dictionary}{ @@ -21,12 +22,28 @@ po("trafopred_classifsurv_IPCW") \section{Input and Output Channels}{ -The input is a \link{PredictionClassif} and a \link{data.table} containing observed times -and row ids both generated by \link{PipeOpTaskSurvClassifIPCW}. +The input is a \link{PredictionClassif} and a \link{data.table} containing observed times, +censoring indicators and row ids, all generated by \link{PipeOpTaskSurvClassifIPCW} +during the prediction phase. + The output is the input \link{PredictionClassif} transformed to a \link{PredictionSurv}. -Only works during prediction phase. +Each input classification probability prediction corresponds to the +probability of having the event up to the specified cutoff time +\eqn{\hat{\pi}(\bold{X}_i) = P(T_i < \tau|\bold{X}_i)}, +see Vock et al. (2016) and \link{PipeOpTaskSurvClassifIPCW}. +Therefore, these predictions serve as \strong{continuous risk scores} that can be +directly interpreted as \code{crank} predictions in the right-censored survival +setting. We also map them to the survival distribution prediction \code{distr}, +at the specified cutoff time point, i.e. as +\eqn{S_i(\tau) = 1 - \hat{\pi}(\bold{X}_i)}. } +\references{ +Vock, M D, Wolfson, Julian, Bandyopadhyay, Sunayan, Adomavicius, Gediminas, Johnson, E P, Vazquez-Benitez, Gabriela, O'Connor, J P (2016). +\dQuote{Adapting machine learning techniques to censored time-to-event health record data: A general-purpose approach using inverse probability of censoring weighting.} +\emph{Journal of Biomedical Informatics}, \bold{61}, 119--131. +\doi{https://doi.org/10.1016/j.jbi.2016.03.009}, \url{https://www.sciencedirect.com/science/article/pii/S1532046416000496}. +} \seealso{ Other PipeOps: \code{\link{PipeOpPredTransformer}}, diff --git a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd index 7649bc6ee..6dc34065f 100644 --- a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd +++ b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd @@ -5,7 +5,20 @@ \alias{PipeOpTaskSurvClassifIPCW} \title{PipeOpTaskSurvClassifIPCW} \description{ -Transform \link{TaskSurv} to \link[mlr3:TaskClassif]{TaskClassif} using IPCW (Vock et al., 2016). +Transform \link{TaskSurv} to \link[mlr3:TaskClassif]{TaskClassif} using the \strong{I}nverse +\strong{P}robability of \strong{C}ensoring \strong{W}eights (IPCW) method by Vock et al. (2016). + +Let \eqn{T_i} be the observed times (event or censoring) and \eqn{\delta_i} +the censoring indicators for each observation \eqn{i} in the training set. +The IPCW technique consists of two steps: first we estimate the censoring +distribution \eqn{\hat{G}(t)} using the Kaplan-Meier estimator from the +training data. Then we calculate the observation weights given a cutoff time +\eqn{\tau} as: + +\deqn{\omega_i = 1/\hat{G}_{min(T_i,\tau)}} + +Observations that are censored prior to \eqn{\tau} get zero weights, i.e. +\eqn{\omega_i = 0}. } \section{Dictionary}{ @@ -27,14 +40,17 @@ output channels, one named "output" and the other "data". Training transforms the "input" \link{TaskSurv} to a \link[mlr3:TaskClassif]{TaskClassif}, which is the "output". The target column is named \code{"status"} and indicates whether an event occurred -in each time interval. -The transformed task now has the property "weights". -The "data" is NULL. +before the cutoff time \eqn{\tau}. +The observed times column is removed from the "output" task. +The transformed task has the property \code{"weights"} (the \eqn{\omega_i}). +The "data" is \code{NULL}. During prediction, the "input" \link{TaskSurv} is transformed to the "output" -\link[mlr3:TaskClassif]{TaskClassif} with \code{"status"} as target. -The "data" is a \link{data.table} containing the "time" of each subject as well -as corresponding "row_ids". +\link[mlr3:TaskClassif]{TaskClassif} with \code{"status"} as target (again indicating +if the event occurred before the cutoff time). +The "data" is a \link{data.table} containing the observed \code{times} \eqn{T_i} and +censoring indicators/\code{status} \eqn{\delta_i} of each subject as well as the corresponding +\code{row_ids}. This "data" is only meant to be used with the \link{PipeOpPredClassifSurvIPCW}. } @@ -46,7 +62,7 @@ The parameters are Cutoff time for IPCW. Observations with time larger than \code{cutoff_time} are censored. Should be reasonably smaller than the maximum event time to avoid enormous weights. \item \code{eps :: numeric()}\cr -Small value to replace \code{0} survival probabilities with to prevent infinite weights. +Small value to replace \eqn{G(t) = 0} censoring probabilities to prevent infinite weights. } } From e29329ec2963c5ceb3c8cfde7940e1750467943d Mon Sep 17 00:00:00 2001 From: studener Date: Mon, 9 Sep 2024 11:23:46 +0200 Subject: [PATCH 28/82] add examples --- R/PipeOpTaskSurvClassifIPCW.R | 16 +++++++++++++ R/pipelines.R | 22 ++++++++++++++++++ man/LearnerDens.Rd | 5 ---- man/mlr_graphs_survtoclassif_IPCW.Rd | 23 +++++++++++++++++++ man/mlr_pipeops_trafotask_survclassif_IPCW.Rd | 17 ++++++++++++++ 5 files changed, 78 insertions(+), 5 deletions(-) diff --git a/R/PipeOpTaskSurvClassifIPCW.R b/R/PipeOpTaskSurvClassifIPCW.R index 4b84ec650..cc1c6f05d 100644 --- a/R/PipeOpTaskSurvClassifIPCW.R +++ b/R/PipeOpTaskSurvClassifIPCW.R @@ -62,6 +62,22 @@ #' #' @family PipeOps #' @family Transformation PipeOps +#' @examples +#' \dontrun{ +#' if (requireNamespace("mlr3pipelines", quietly = TRUE)) { +#' +#' library(mlr3) +#' library(mlr3pipelines) +#' +#' task = tsk("lung") +#' part = partition(task) +#' task_train = task$clone()$filter(part$train) +#' task_test = task$clone()$filter(part$test) +#' pipe_op = po("trafotask_survclassif_IPCW", cutoff_time = 500) +#' pipe_op$train(list(task_train)) +#' pipe_op$predict(list(task_test)) +#' } +#' } #' @export PipeOpTaskSurvClassifIPCW = R6Class( "PipeOpTaskSurvClassifIPCW", diff --git a/R/pipelines.R b/R/pipelines.R index 8a42afcfd..68170690f 100644 --- a/R/pipelines.R +++ b/R/pipelines.R @@ -687,6 +687,28 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, #' @return [mlr3pipelines::Graph] or [mlr3pipelines::GraphLearner] #' @family pipelines #' +#' @examples +#' \dontrun{ +#' if (requireNamespace("mlr3pipelines", quietly = TRUE) && +#' requireNamespace("mlr3learners", quietly = TRUE)) { +#' +#' library(mlr3) +#' library(mlr3learners) +#' library(mlr3pipelines) +#' +#' task = tsk("lung") +#' part = partition(task) +#' +#' grlrn = ppl( +#' "survtoclassif_IPCW", +#' learner = lrn("classif.rpart"), +#' cutoff_time = 500, # Observations after 500 days are censored +#' graph_learner = TRUE +#' ) +#' grlrn$train(task, row_ids = part$train) +#' grlrn$predict(task, row_ids = part$test) +#' } +#' } #' @export pipeline_survtoclassif_IPCW = function(learner, cutoff_time = NULL, eps = 1e-3, graph_learner = FALSE) { assert_true("prob" %in% learner$predict_types) diff --git a/man/LearnerDens.Rd b/man/LearnerDens.Rd index a85816150..84baf4fc6 100644 --- a/man/LearnerDens.Rd +++ b/man/LearnerDens.Rd @@ -66,7 +66,6 @@ Creates a new instance of this \link[R6:R6Class]{R6} class. predict_types = "cdf", feature_types = character(), properties = character(), - data_formats = "data.table", packages = character(), label = NA_character_, man = NA_character_ @@ -100,10 +99,6 @@ The following properties are currently standardized and understood by learners i \item \code{"oob_error"}: The learner supports extraction of estimated out of bag error, i.e. comes with a \code{oob_error()} extractor function (see section on optional extractors in \link{Learner}). }} -\item{\code{data_formats}}{(\code{character()})\cr -Set of supported data formats which can be processed during \verb{$train()} and \verb{$predict()}, -e.g. \code{"data.table"}.} - \item{\code{packages}}{(\code{character()})\cr Set of required packages. A warning is signaled by the constructor if at least one of the packages is not installed, diff --git a/man/mlr_graphs_survtoclassif_IPCW.Rd b/man/mlr_graphs_survtoclassif_IPCW.Rd index d2b1fe91f..64c8b0824 100644 --- a/man/mlr_graphs_survtoclassif_IPCW.Rd +++ b/man/mlr_graphs_survtoclassif_IPCW.Rd @@ -43,6 +43,29 @@ The pipeline consists of the following steps: to \link{PredictionSurv}. } } +\examples{ +\dontrun{ +if (requireNamespace("mlr3pipelines", quietly = TRUE) && + requireNamespace("mlr3learners", quietly = TRUE)) { + + library(mlr3) + library(mlr3learners) + library(mlr3pipelines) + + task = tsk("lung") + part = partition(task) + + grlrn = ppl( + "survtoclassif_IPCW", + learner = lrn("classif.rpart"), + cutoff_time = 500, # Observations after 500 days are censored + graph_learner = TRUE + ) + grlrn$train(task, row_ids = part$train) + grlrn$predict(task, row_ids = part$test) +} +} +} \seealso{ Other pipelines: \code{\link{mlr_graphs_crankcompositor}}, diff --git a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd index 6dc34065f..5e0aea892 100644 --- a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd +++ b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd @@ -66,6 +66,23 @@ Small value to replace \eqn{G(t) = 0} censoring probabilities to prevent infinit } } +\examples{ +\dontrun{ +if (requireNamespace("mlr3pipelines", quietly = TRUE)) { + + library(mlr3) + library(mlr3pipelines) + +task = tsk("lung") +part = partition(task) +task_train = task$clone()$filter(part$train) +task_test = task$clone()$filter(part$test) +pipe_op = po("trafotask_survclassif_IPCW", cutoff_time = 500) +pipe_op$train(list(task_train)) +pipe_op$predict(list(task_test)) +} +} +} \references{ Vock, M D, Wolfson, Julian, Bandyopadhyay, Sunayan, Adomavicius, Gediminas, Johnson, E P, Vazquez-Benitez, Gabriela, O'Connor, J P (2016). \dQuote{Adapting machine learning techniques to censored time-to-event health record data: A general-purpose approach using inverse probability of censoring weighting.} From 2ea204c7291c7031e4e547c82e442e8ec37761e2 Mon Sep 17 00:00:00 2001 From: studener Date: Mon, 9 Sep 2024 12:38:27 +0200 Subject: [PATCH 29/82] update pipeops --- R/PipeOpPredClassifSurvIPCW.R | 4 ++-- R/PipeOpTaskSurvClassifIPCW.R | 10 +++++----- tests/testthat/test_ipcw.R | 11 ++++------- tests/testthat/test_pipelines.R | 13 +++++-------- 4 files changed, 16 insertions(+), 22 deletions(-) diff --git a/R/PipeOpPredClassifSurvIPCW.R b/R/PipeOpPredClassifSurvIPCW.R index be76d3499..9f9ea75e1 100644 --- a/R/PipeOpPredClassifSurvIPCW.R +++ b/R/PipeOpPredClassifSurvIPCW.R @@ -52,7 +52,7 @@ PipeOpPredClassifSurvIPCW = R6Class( input = data.table( name = c("input", "data"), train = c("NULL", "NULL"), - predict = c("PredictionClassif", "data.table") + predict = c("PredictionClassif", "list") ), output = data.table( name = "output", @@ -71,7 +71,7 @@ PipeOpPredClassifSurvIPCW = R6Class( # risk => prob of having the event up until the cutoff time risk = pred$prob[, "1"] surv = matrix(data = 1 - risk, ncol = 1) - colnames(surv) = 500 # need the cutoff time here, add new input[[3]] to pass it on? + colnames(surv) = data$cutoff_time p = PredictionSurv$new( # the original row ids diff --git a/R/PipeOpTaskSurvClassifIPCW.R b/R/PipeOpTaskSurvClassifIPCW.R index cc1c6f05d..3da232a7c 100644 --- a/R/PipeOpTaskSurvClassifIPCW.R +++ b/R/PipeOpTaskSurvClassifIPCW.R @@ -102,7 +102,7 @@ PipeOpTaskSurvClassifIPCW = R6Class( output = data.table( name = c("output", "data"), train = c("TaskClassif", "NULL"), - predict = c("TaskClassif", "data.table") + predict = c("TaskClassif", "list") ) ) } @@ -140,12 +140,11 @@ PipeOpTaskSurvClassifIPCW = R6Class( # calculate the IPC weights ipc_weights = 1 / cens_probs - # add weights to original data data = task$data() time_var = task$target_names[1] status_var = task$target_names[2] - # browser() + # add weights to original data data[["ipc_weights"]] = ipc_weights # zero weights for censored observations before the cutoff time ids = status == 0 & times <= cutoff_time @@ -179,11 +178,12 @@ PipeOpTaskSurvClassifIPCW = R6Class( # remove target time variable data[[time_var]] = NULL # create classification task - task_classif = TaskClassif$new(id = task$id, backend = data, + task_classif = TaskClassif$new(id = paste0(task$id, "_IPCW"), backend = data, target = "status", positive = "1") # keep original row_ids, times and status - data = data.table(row_ids = task$row_ids, times = task$times(), status = task$status()) + data = list(row_ids = task$row_ids, times = task$times(), status = task$status(), + cutoff_time = cutoff_time) list(task_classif, data) } ) diff --git a/tests/testthat/test_ipcw.R b/tests/testthat/test_ipcw.R index d48580701..deb47cffe 100644 --- a/tests/testthat/test_ipcw.R +++ b/tests/testthat/test_ipcw.R @@ -39,12 +39,9 @@ test_that("PipeOpTaskSurvClassifIPCW", { expect_task_classif(pred_task) # check status? - # (row_ids, times, status) are correct? + # (row_ids, times, status, cutoff_time) are correct? data = res[[2L]] - expect_data_table(data, nrows = length(test_ids), ncols = 3) - expect_equal(names(data), c("row_ids", "times", "status")) - - # create dataset (filter rats) with one event for training the pipeop and set - # the cutoff time 1 time unit just before that. - # Can we trigger the warning for the eps? is it even possible in general now to trigger that code? + expect_list(data, len = 4) + expect_true(length(data$row_ids) == length(test_ids)) + expect_equal(names(data), c("row_ids", "times", "status", "cutoff_time")) }) diff --git a/tests/testthat/test_pipelines.R b/tests/testthat/test_pipelines.R index 1f8e1d5c4..210924eee 100644 --- a/tests/testthat/test_pipelines.R +++ b/tests/testthat/test_pipelines.R @@ -45,7 +45,7 @@ test_that("survtoclassif_disctime", { expect_equal(p$row_ids, p2$row_ids) expect_equal(p$truth, p2$truth) - expect_equal(p$score(), p2$score(), tolerance = 0.01) + expect_equal(p$score(), p2$score(), tolerance = 0.015) # Test with cut grlrn = mlr3pipelines::ppl("survtoclassif_disctime", learner = lrn("classif.log_reg"), @@ -120,7 +120,10 @@ test_that("survtoclassif_IPCW", { p = grlrn$predict(task_test) expect_prediction_surv(p) # check crank and distr exist + expect_true("crank" %in% names(p$data)) # p$data$distr => 1 column, cutoff time as columname + expect_matrix(p$data$dist, nrows = nrow(task_test$nrow), ncols = 1) + expect_true(colnames(p$data$dist) == "500") # Test with different cutoff_time (fix cutoff code before during predict phase) grlrn = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.rpart"), @@ -129,13 +132,7 @@ test_that("survtoclassif_IPCW", { p2 = grlrn$predict(task_test) # different cutoff time, different crank predictions - # expect_false(all(p$crank == p2$crank)) - - # C-indexes the same? - expect_equal(p$score(), p2$score()) - # survival tree is worse? - p1 = lrn("surv.rpart")$train(task_train)$predict(task_test) - expect_lte(p1$score(), p$score()) + expect_false(all(p$crank == p2$crank)) # check msr("surv.brier") with only one time point? Eg prob at cutoff time? }) From 89f831f3ec50814dca0651e2d04f6ba341f46483 Mon Sep 17 00:00:00 2001 From: studener Date: Thu, 12 Sep 2024 15:10:05 +0200 Subject: [PATCH 30/82] add tests --- tests/testthat/test_ipcw.R | 7 +++++-- tests/testthat/test_pipelines.R | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test_ipcw.R b/tests/testthat/test_ipcw.R index deb47cffe..9a3906461 100644 --- a/tests/testthat/test_ipcw.R +++ b/tests/testthat/test_ipcw.R @@ -26,18 +26,21 @@ test_that("PipeOpTaskSurvClassifIPCW", { output_task = res[[1L]] expect_task_classif(output_task) expect_equal(output_task$col_roles$weight, "ipc_weights") + expect_equal(sum(output_task$weights$weight), output_task$nrow) expect_equal(output_task$positive, "1") expect_equal(output_task$target_names, "status") expect_equal(output_task$nrow, train_task$nrow) # same observations - # check: is status target correct? (0 before cutoff) # check: do `output_task$weights` make sense? are 0 the ones that should be 0? + zero_weight_rows = train_task$status() == 0 & train_task$times() < 300 + expect_true(all(output_task$weights$weight[zero_weight_rows] == 0)) res = po_ipcw$predict(list(test_task)) pred_task = res[[1L]] expect_task_classif(pred_task) - # check status? + # check status == 0 for time > cutoff time + expect_true(all(pred_task$data(,"status")[res$data$times > 300] == 0)) # (row_ids, times, status, cutoff_time) are correct? data = res[[2L]] diff --git a/tests/testthat/test_pipelines.R b/tests/testthat/test_pipelines.R index 210924eee..b68366573 100644 --- a/tests/testthat/test_pipelines.R +++ b/tests/testthat/test_pipelines.R @@ -125,7 +125,7 @@ test_that("survtoclassif_IPCW", { expect_matrix(p$data$dist, nrows = nrow(task_test$nrow), ncols = 1) expect_true(colnames(p$data$dist) == "500") - # Test with different cutoff_time (fix cutoff code before during predict phase) + # Test with different cutoff_time grlrn = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.rpart"), cutoff_time = 600, graph_learner = TRUE) grlrn$train(task_train) @@ -135,4 +135,5 @@ test_that("survtoclassif_IPCW", { expect_false(all(p$crank == p2$crank)) # check msr("surv.brier") with only one time point? Eg prob at cutoff time? + expect_numeric(p2$score(msr("surv.graf", times = 600)), any.missing = FALSE) }) From ce1b347055988cef52d305cea2102aff4004a93e Mon Sep 17 00:00:00 2001 From: john Date: Tue, 17 Sep 2024 15:39:53 +0200 Subject: [PATCH 31/82] update doc (encapsulate method from mlr3 dev) --- man/LearnerDens.Rd | 1 + man/LearnerSurv.Rd | 1 + man/PredictionDens.Rd | 3 ++- man/PredictionSurv.Rd | 3 ++- man/mlr_learners_dens.hist.Rd | 1 + man/mlr_learners_dens.kde.Rd | 1 + man/mlr_learners_surv.coxph.Rd | 1 + man/mlr_learners_surv.kaplan.Rd | 1 + man/mlr_learners_surv.rpart.Rd | 1 + 9 files changed, 11 insertions(+), 2 deletions(-) diff --git a/man/LearnerDens.Rd b/man/LearnerDens.Rd index 84baf4fc6..a963d90c5 100644 --- a/man/LearnerDens.Rd +++ b/man/LearnerDens.Rd @@ -44,6 +44,7 @@ Other Learner:
Inherited methods
  • mlr3::Learner$base_learner()
  • +
  • mlr3::Learner$encapsulate()
  • mlr3::Learner$format()
  • mlr3::Learner$help()
  • mlr3::Learner$predict()
  • diff --git a/man/LearnerSurv.Rd b/man/LearnerSurv.Rd index 54c3cd59e..257d63220 100644 --- a/man/LearnerSurv.Rd +++ b/man/LearnerSurv.Rd @@ -47,6 +47,7 @@ Other Learner:
    Inherited methods
    • mlr3::Learner$base_learner()
    • +
    • mlr3::Learner$encapsulate()
    • mlr3::Learner$format()
    • mlr3::Learner$help()
    • mlr3::Learner$predict()
    • diff --git a/man/PredictionDens.Rd b/man/PredictionDens.Rd index c63a03c48..1a242c3ee 100644 --- a/man/PredictionDens.Rd +++ b/man/PredictionDens.Rd @@ -45,11 +45,12 @@ Access the stored estimated distribution.} } } \if{html}{\out{ -
      Inherited methods +
      Inherited methods diff --git a/man/PredictionSurv.Rd b/man/PredictionSurv.Rd index c0d22e10c..652f800d3 100644 --- a/man/PredictionSurv.Rd +++ b/man/PredictionSurv.Rd @@ -60,11 +60,12 @@ Access the stored predicted survival time.} } } \if{html}{\out{ -
      Inherited methods +
      Inherited methods diff --git a/man/mlr_learners_dens.hist.Rd b/man/mlr_learners_dens.hist.Rd index 54f29367c..58341647a 100644 --- a/man/mlr_learners_dens.hist.Rd +++ b/man/mlr_learners_dens.hist.Rd @@ -48,6 +48,7 @@ Other density estimators:
      Inherited methods
      • mlr3::Learner$base_learner()
      • +
      • mlr3::Learner$encapsulate()
      • mlr3::Learner$format()
      • mlr3::Learner$help()
      • mlr3::Learner$predict()
      • diff --git a/man/mlr_learners_dens.kde.Rd b/man/mlr_learners_dens.kde.Rd index bdff9c318..970ff3f94 100644 --- a/man/mlr_learners_dens.kde.Rd +++ b/man/mlr_learners_dens.kde.Rd @@ -59,6 +59,7 @@ Other density estimators:
        Inherited methods
        • mlr3::Learner$base_learner()
        • +
        • mlr3::Learner$encapsulate()
        • mlr3::Learner$format()
        • mlr3::Learner$help()
        • mlr3::Learner$predict()
        • diff --git a/man/mlr_learners_surv.coxph.Rd b/man/mlr_learners_surv.coxph.Rd index 5979b9f2f..43ac2ee80 100644 --- a/man/mlr_learners_surv.coxph.Rd +++ b/man/mlr_learners_surv.coxph.Rd @@ -69,6 +69,7 @@ Other survival learners:
          Inherited methods
          • mlr3::Learner$base_learner()
          • +
          • mlr3::Learner$encapsulate()
          • mlr3::Learner$format()
          • mlr3::Learner$help()
          • mlr3::Learner$predict()
          • diff --git a/man/mlr_learners_surv.kaplan.Rd b/man/mlr_learners_surv.kaplan.Rd index e3cd8992f..5829c99f5 100644 --- a/man/mlr_learners_surv.kaplan.Rd +++ b/man/mlr_learners_surv.kaplan.Rd @@ -65,6 +65,7 @@ Other survival learners:
            Inherited methods
            • mlr3::Learner$base_learner()
            • +
            • mlr3::Learner$encapsulate()
            • mlr3::Learner$format()
            • mlr3::Learner$help()
            • mlr3::Learner$predict()
            • diff --git a/man/mlr_learners_surv.rpart.Rd b/man/mlr_learners_surv.rpart.Rd index e243a1bf5..fa2ada657 100644 --- a/man/mlr_learners_surv.rpart.Rd +++ b/man/mlr_learners_surv.rpart.Rd @@ -85,6 +85,7 @@ Other survival learners:
              Inherited methods
              • mlr3::Learner$base_learner()
              • +
              • mlr3::Learner$encapsulate()
              • mlr3::Learner$format()
              • mlr3::Learner$help()
              • mlr3::Learner$predict()
              • From 24f38fe6018914ab0a236b5cbd926fbc14edaaaa Mon Sep 17 00:00:00 2001 From: john Date: Tue, 17 Sep 2024 15:44:18 +0200 Subject: [PATCH 32/82] improve doc and example --- R/PipeOpTaskSurvClassifIPCW.R | 35 ++++++++++++------- man/mlr_pipeops_trafotask_survclassif_IPCW.Rd | 33 ++++++++++------- 2 files changed, 43 insertions(+), 25 deletions(-) diff --git a/R/PipeOpTaskSurvClassifIPCW.R b/R/PipeOpTaskSurvClassifIPCW.R index 3da232a7c..90c8b2e3b 100644 --- a/R/PipeOpTaskSurvClassifIPCW.R +++ b/R/PipeOpTaskSurvClassifIPCW.R @@ -13,9 +13,9 @@ #' training data. Then we calculate the observation weights given a cutoff time #' \eqn{\tau} as: #' -#' \deqn{\omega_i = 1/\hat{G}_{min(T_i,\tau)}} +#' \deqn{\omega_i = 1/\hat{G}{(min(T_i,\tau))}} #' -#' Observations that are censored prior to \eqn{\tau} get zero weights, i.e. +#' Observations that are censored prior to \eqn{\tau} are assigned zero weights, i.e. #' \eqn{\omega_i = 0}. #' #' @section Dictionary: @@ -34,8 +34,8 @@ #' #' Training transforms the "input" [TaskSurv] to a [TaskClassif][mlr3::TaskClassif], #' which is the "output". -#' The target column is named `"status"` and indicates whether an event occurred -#' before the cutoff time \eqn{\tau}. +#' The target column is named `"status"` and indicates whether **an event occurred** +#' **before the cutoff time** \eqn{\tau} (`1` = yes, `0` = no). #' The observed times column is removed from the "output" task. #' The transformed task has the property `"weights"` (the \eqn{\omega_i}). #' The "data" is `NULL`. @@ -65,17 +65,26 @@ #' @examples #' \dontrun{ #' if (requireNamespace("mlr3pipelines", quietly = TRUE)) { -#' #' library(mlr3) #' library(mlr3pipelines) #' -#' task = tsk("lung") -#' part = partition(task) -#' task_train = task$clone()$filter(part$train) -#' task_test = task$clone()$filter(part$test) -#' pipe_op = po("trafotask_survclassif_IPCW", cutoff_time = 500) -#' pipe_op$train(list(task_train)) -#' pipe_op$predict(list(task_test)) +#' task = tsk("lung") +#' +#' # split task to train and test subtasks +#' part = partition(task) +#' task_train = task$clone()$filter(part$train) +#' task_test = task$clone()$filter(part$test) +#' +#' # define IPCW pipeop +#' po_ipcw = po("trafotask_survclassif_IPCW", cutoff_time = 500) +#' +#' # during training, output is a classification task with weights +#' task_classif_train = po_ipcw$train(list(task_train))[[1]] +#' task_classif_train +#' +#' # during prediction, output is a classification task (no weights) +#' task_classif_test = po_ipcw$predict(list(task_test))[[1]] +#' task_classif_test #' } #' } #' @export @@ -181,7 +190,7 @@ PipeOpTaskSurvClassifIPCW = R6Class( task_classif = TaskClassif$new(id = paste0(task$id, "_IPCW"), backend = data, target = "status", positive = "1") - # keep original row_ids, times and status + # keep original row_ids, times and status as well the cutoff time data = list(row_ids = task$row_ids, times = task$times(), status = task$status(), cutoff_time = cutoff_time) list(task_classif, data) diff --git a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd index 5e0aea892..d07d533bc 100644 --- a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd +++ b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd @@ -15,9 +15,9 @@ distribution \eqn{\hat{G}(t)} using the Kaplan-Meier estimator from the training data. Then we calculate the observation weights given a cutoff time \eqn{\tau} as: -\deqn{\omega_i = 1/\hat{G}_{min(T_i,\tau)}} +\deqn{\omega_i = 1/\hat{G}{(min(T_i,\tau))}} -Observations that are censored prior to \eqn{\tau} get zero weights, i.e. +Observations that are censored prior to \eqn{\tau} are assigned zero weights, i.e. \eqn{\omega_i = 0}. } \section{Dictionary}{ @@ -39,8 +39,8 @@ output channels, one named "output" and the other "data". Training transforms the "input" \link{TaskSurv} to a \link[mlr3:TaskClassif]{TaskClassif}, which is the "output". -The target column is named \code{"status"} and indicates whether an event occurred -before the cutoff time \eqn{\tau}. +The target column is named \code{"status"} and indicates whether \strong{an event occurred} +\strong{before the cutoff time} \eqn{\tau} (\code{1} = yes, \code{0} = no). The observed times column is removed from the "output" task. The transformed task has the property \code{"weights"} (the \eqn{\omega_i}). The "data" is \code{NULL}. @@ -69,17 +69,26 @@ Small value to replace \eqn{G(t) = 0} censoring probabilities to prevent infinit \examples{ \dontrun{ if (requireNamespace("mlr3pipelines", quietly = TRUE)) { - library(mlr3) library(mlr3pipelines) -task = tsk("lung") -part = partition(task) -task_train = task$clone()$filter(part$train) -task_test = task$clone()$filter(part$test) -pipe_op = po("trafotask_survclassif_IPCW", cutoff_time = 500) -pipe_op$train(list(task_train)) -pipe_op$predict(list(task_test)) + task = tsk("lung") + + # split task to train and test subtasks + part = partition(task) + task_train = task$clone()$filter(part$train) + task_test = task$clone()$filter(part$test) + + # define IPCW pipeop + po_ipcw = po("trafotask_survclassif_IPCW", cutoff_time = 500) + + # during training, output is a classification task with weights + task_classif_train = po_ipcw$train(list(task_train))[[1]] + task_classif_train + + # during prediction, output is a classification task (no weights) + task_classif_test = po_ipcw$predict(list(task_test))[[1]] + task_classif_test } } } From 792bd6e1b8514d3a62ce6ddba5dc7aef0acb9160 Mon Sep 17 00:00:00 2001 From: john Date: Tue, 17 Sep 2024 16:44:40 +0200 Subject: [PATCH 33/82] refinements (doc and eps param) --- R/PipeOpTaskSurvClassifIPCW.R | 10 +++++++--- R/pipelines.R | 5 +++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/R/PipeOpTaskSurvClassifIPCW.R b/R/PipeOpTaskSurvClassifIPCW.R index 90c8b2e3b..be43511f5 100644 --- a/R/PipeOpTaskSurvClassifIPCW.R +++ b/R/PipeOpTaskSurvClassifIPCW.R @@ -55,7 +55,8 @@ #' Cutoff time for IPCW. Observations with time larger than `cutoff_time` are censored. #' Should be reasonably smaller than the maximum event time to avoid enormous weights. #' * `eps :: numeric()`\cr -#' Small value to replace \eqn{G(t) = 0} censoring probabilities to prevent infinite weights. +#' Small value to replace \eqn{G(t) = 0} censoring probabilities to prevent +#' infinite weights (a warning is triggered if this happens). #' #' @references #' `r format_bib("vock_2016")` @@ -100,6 +101,8 @@ PipeOpTaskSurvClassifIPCW = R6Class( cutoff_time = p_dbl(0), eps = p_dbl(0, default = 1e-3) ) + param_set$set_values(eps = 1e-3) + super$initialize( id = id, param_set = param_set, @@ -125,7 +128,7 @@ PipeOpTaskSurvClassifIPCW = R6Class( assert_true(task$censtype == "right") cutoff_time = assert_numeric(self$param_set$values$cutoff_time, null.ok = FALSE) max_event_time = max(task$unique_event_times()) - stopifnot(cutoff_time < max_event_time) + stopifnot(cutoff_time <= max_event_time) # G(t): KM estimate of the censoring distribution times = task$times() @@ -140,7 +143,8 @@ PipeOpTaskSurvClassifIPCW = R6Class( # get G(t) at the observed cutoff'ed times efficiently extend_times = getFromNamespace("C_Vec_WeightedDiscreteCdf", ns = "distr6") cens_probs = extend_times(cut_times, cens_fit$time, cdf = 1 - cens_surv, FALSE, FALSE)[,1] - # substitute `eps` for observations: G(t) = 0 + + # substitute `eps` for observations: G(t) = 0 (this should never happen though!) if (any(cens_probs == 0)) { warning("At least one t: G(t) = 0, will substitute with eps to avoid very large weights") cens_probs[cens_probs == 0] = self$param_set$values$eps diff --git a/R/pipelines.R b/R/pipelines.R index 68170690f..11d608667 100644 --- a/R/pipelines.R +++ b/R/pipelines.R @@ -670,7 +670,8 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, #' Cutoff time for IPCW. Observations with time larger than `cutoff_time` are censored. #' Should be reasonably smaller than the maximum event time to avoid enormous weights. #' @param eps `numeric()`\cr -#' Small value to replace \eqn{G(t) = 0} censoring probabilities to prevent infinite weights. +#' Small value to replace \eqn{G(t) = 0} censoring probabilities to prevent infinite +#' weights (a warning is triggered if this happens). #' @param graph_learner `logical(1)`\cr #' If `TRUE` returns wraps the [Graph][mlr3pipelines::Graph] as a #' [GraphLearner][mlr3pipelines::GraphLearner] otherwise (default) returns as a `Graph`. @@ -714,7 +715,7 @@ pipeline_survtoclassif_IPCW = function(learner, cutoff_time = NULL, eps = 1e-3, assert_true("prob" %in% learner$predict_types) gr = mlr3pipelines::Graph$new() - gr$add_pipeop(mlr3pipelines::po("trafotask_survclassif_IPCW", cutoff_time = cutoff_time)) + gr$add_pipeop(mlr3pipelines::po("trafotask_survclassif_IPCW", cutoff_time = cutoff_time, eps = eps)) gr$add_pipeop(mlr3pipelines::po("learner", learner, predict_type = "prob")) gr$add_pipeop(mlr3pipelines::po("trafopred_classifsurv_IPCW")) gr$add_pipeop(mlr3pipelines::po("nop")) From c476aed01d38f5bac60a138759538c17319b1b9d Mon Sep 17 00:00:00 2001 From: john Date: Tue, 17 Sep 2024 16:48:55 +0200 Subject: [PATCH 34/82] refine IPCW test --- tests/testthat/test_ipcw.R | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tests/testthat/test_ipcw.R b/tests/testthat/test_ipcw.R index 9a3906461..3ad34b9cb 100644 --- a/tests/testthat/test_ipcw.R +++ b/tests/testthat/test_ipcw.R @@ -15,8 +15,9 @@ test_that("PipeOpTaskSurvClassifIPCW", { expect_error(po_ipcw$train(list(train_task)), "not 'NULL'") # `cutoff_time` should be less than the max event time po_ipcw$param_set$set_values(cutoff_time = 3000) - expect_error(po_ipcw$train(list(train_task)), "cutoff_time < max_event_time is not TRUE") - po_ipcw$param_set$set_values(cutoff_time = 300) + expect_error(po_ipcw$train(list(train_task)), "cutoff_time <= max_event_time is not TRUE") + cutoff = 300 + po_ipcw$param_set$set_values(cutoff_time = cutoff) res = po_ipcw$train(list(train_task)) @@ -29,22 +30,26 @@ test_that("PipeOpTaskSurvClassifIPCW", { expect_equal(sum(output_task$weights$weight), output_task$nrow) expect_equal(output_task$positive, "1") expect_equal(output_task$target_names, "status") - expect_equal(output_task$nrow, train_task$nrow) # same observations + expect_equal(output_task$nrow, train_task$nrow) # same #observations - # check: do `output_task$weights` make sense? are 0 the ones that should be 0? - zero_weight_rows = train_task$status() == 0 & train_task$times() < 300 - expect_true(all(output_task$weights$weight[zero_weight_rows] == 0)) + # check: are weights = 0 the ones that should be so? + true_zero_weight_ids = which(train_task$status() == 0 & train_task$times() <= cutoff) + res_zero_weight_ids = which(output_task$weights$weight == 0) + expect_equal(res_zero_weight_ids, true_zero_weight_ids) res = po_ipcw$predict(list(test_task)) pred_task = res[[1L]] expect_task_classif(pred_task) # check status == 0 for time > cutoff time - expect_true(all(pred_task$data(,"status")[res$data$times > 300] == 0)) + expect_true(all(pred_task$truth()[res$data$times > cutoff] == 0)) # (row_ids, times, status, cutoff_time) are correct? data = res[[2L]] expect_list(data, len = 4) - expect_true(length(data$row_ids) == length(test_ids)) expect_equal(names(data), c("row_ids", "times", "status", "cutoff_time")) + expect_equal(data$row_ids, test_ids) + expect_equal(data$times, test_task$times()) + expect_equal(data$status, test_task$status()) + expect_equal(data$cutoff_time, cutoff) }) From c9bc8286ff80c50ce07038e26ec2abacbab8b9e9 Mon Sep 17 00:00:00 2001 From: john Date: Wed, 18 Sep 2024 10:53:49 +0200 Subject: [PATCH 35/82] add comment --- R/PipeOpPredClassifSurvIPCW.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/PipeOpPredClassifSurvIPCW.R b/R/PipeOpPredClassifSurvIPCW.R index 9f9ea75e1..387f5d7af 100644 --- a/R/PipeOpPredClassifSurvIPCW.R +++ b/R/PipeOpPredClassifSurvIPCW.R @@ -66,7 +66,7 @@ PipeOpPredClassifSurvIPCW = R6Class( private = list( .predict = function(input) { pred = input[[1]] # classification predictions - data = input[[2]] # row_ids, times, status + data = input[[2]] # row_ids, times, status, cutoff_time # risk => prob of having the event up until the cutoff time risk = pred$prob[, "1"] From d9726230becf48e08ea72c0cf32094b31be124e8 Mon Sep 17 00:00:00 2001 From: john Date: Wed, 18 Sep 2024 11:17:15 +0200 Subject: [PATCH 36/82] fix rare bug in graf score when evulating a survival matrix with only 1 time point! --- R/integrated_scores.R | 2 +- tests/testthat/test_mlr_measures.R | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/R/integrated_scores.R b/R/integrated_scores.R index 085d5c08c..acc2536d7 100644 --- a/R/integrated_scores.R +++ b/R/integrated_scores.R @@ -57,7 +57,7 @@ weighted_survival_score = function(loss, truth, distribution, times = NULL, } else { # survival 2d array surv_mat = distribution } - surv_mat = surv_mat[, as.numeric(colnames(surv_mat)) <= t_max] + surv_mat = surv_mat[, as.numeric(colnames(surv_mat)) <= t_max, drop = FALSE] mtc = findInterval(unique_times, as.numeric(colnames(surv_mat))) cdf = 1 - surv_mat[, mtc, drop = FALSE] if (any(mtc == 0)) { diff --git a/tests/testthat/test_mlr_measures.R b/tests/testthat/test_mlr_measures.R index e5c1f7420..feeb14255 100644 --- a/tests/testthat/test_mlr_measures.R +++ b/tests/testthat/test_mlr_measures.R @@ -130,6 +130,13 @@ test_that("graf proper option", { expect_gt(s2, s1) }) +test_that("graf with 1 time point", { + data = data.frame(time = c(1,1), status = c(1,0), f1 = c(5,3)) + task = as_task_surv(x = data, event = "status", time = "time") + res = suppressWarnings(lrn("surv.coxph")$train(task)$predict(task)) + expect_number(res$score(msr("surv.graf", times = 1))) +}) + test_that("t_max, p_max", { set.seed(1L) t = tsk("rats")$filter(sample(1:300, 50)) From 5ebda76caff8b84768b4718a6dabaace7e604a1d Mon Sep 17 00:00:00 2001 From: john Date: Wed, 18 Sep 2024 11:27:29 +0200 Subject: [PATCH 37/82] empty state => empty list, not NULL --- R/PipeOpTaskSurvClassifIPCW.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/PipeOpTaskSurvClassifIPCW.R b/R/PipeOpTaskSurvClassifIPCW.R index be43511f5..aa78c838a 100644 --- a/R/PipeOpTaskSurvClassifIPCW.R +++ b/R/PipeOpTaskSurvClassifIPCW.R @@ -173,6 +173,9 @@ PipeOpTaskSurvClassifIPCW = R6Class( target = status_var, positive = "1") task_ipcw$set_col_roles("ipc_weights", roles = "weight") + # keep this in the state just in case + self$state = list() + # pass on classif task list(task_ipcw, NULL) }, From e23a1860280ceb033986563fefd4eb90bfd14135 Mon Sep 17 00:00:00 2001 From: john Date: Wed, 18 Sep 2024 11:29:17 +0200 Subject: [PATCH 38/82] update example --- R/pipelines.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/R/pipelines.R b/R/pipelines.R index 11d608667..3349b2dfc 100644 --- a/R/pipelines.R +++ b/R/pipelines.R @@ -688,11 +688,8 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, #' @return [mlr3pipelines::Graph] or [mlr3pipelines::GraphLearner] #' @family pipelines #' -#' @examples +#' @examplesIf mlr3misc::require_namespaces(c("mlr3pipelines", "mlr3learners"), quietly = TRUE) #' \dontrun{ -#' if (requireNamespace("mlr3pipelines", quietly = TRUE) && -#' requireNamespace("mlr3learners", quietly = TRUE)) { -#' #' library(mlr3) #' library(mlr3learners) #' library(mlr3pipelines) @@ -707,8 +704,12 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, #' graph_learner = TRUE #' ) #' grlrn$train(task, row_ids = part$train) -#' grlrn$predict(task, row_ids = part$test) -#' } +#' pred = grlrn$predict(task, row_ids = part$test) +#' pred # crank and distr at the cutoff time point included +#' +#' # score predictions +#' pred$score() # C-index +#' pred$score(msr("surv.brier", times = 500)) # Brier #' } #' @export pipeline_survtoclassif_IPCW = function(learner, cutoff_time = NULL, eps = 1e-3, graph_learner = FALSE) { From 854e4ebdcfa875d4d4caee76acd9ddb43ceea6a6 Mon Sep 17 00:00:00 2001 From: john Date: Wed, 18 Sep 2024 11:31:06 +0200 Subject: [PATCH 39/82] update IPCW pipeline test --- tests/testthat/test_pipelines.R | 42 +++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/tests/testthat/test_pipelines.R b/tests/testthat/test_pipelines.R index b68366573..3259ceb1c 100644 --- a/tests/testthat/test_pipelines.R +++ b/tests/testthat/test_pipelines.R @@ -103,8 +103,6 @@ skip_if_not_installed("mlr3extralearners") test_that("survtoclassif_IPCW", { task = tsk("lung") part = partition(task) - task_train = task$clone()$filter(part$train) - task_test = task$clone()$filter(part$test) pipe = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.rpart"), cutoff_time = 500) @@ -113,27 +111,35 @@ test_that("survtoclassif_IPCW", { grlrn = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.rpart"), cutoff_time = 500, graph_learner = TRUE) expect_class(grlrn, "GraphLearner") - grlrn$train(task_train) - # check: weights were used - expect_vector(grlrn$model$classif.rpart$model$call$weights, ptype = numeric(), - size = task_train$nrow) - p = grlrn$predict(task_test) + grlrn$train(task, row_ids = part$train) + # check that the weights were used for classif learner + expect_numeric(grlrn$model$classif.rpart$model$call$weights, any.missing = FALSE, + len = length(part$train)) + p = grlrn$predict(task, row_ids = part$test) expect_prediction_surv(p) - # check crank and distr exist - expect_true("crank" %in% names(p$data)) - # p$data$distr => 1 column, cutoff time as columname - expect_matrix(p$data$dist, nrows = nrow(task_test$nrow), ncols = 1) - expect_true(colnames(p$data$dist) == "500") + # crank is like risk => prob of having the event up to cutoff time + expect_numeric(p$crank, len = length(part$test), lower = 0, upper = 1) + # p$data$distr => 1 column, cutoff time as column name + expect_matrix(p$data$distr, nrows = length(part$test), ncols = 1) + expect_true(colnames(p$data$distr) == "500") + # crank = risk = 1 - surv at cutoff time + expect_equal(p$crank, 1 - p$data$distr[,"500"]) + # brier score at the cutoff time works + expect_number(p$score(msr("surv.brier", times = 500)), finite = TRUE) + # also in other points + expect_number(p$score(msr("surv.brier", times = 100)), finite = TRUE) + expect_number(p$score(msr("surv.brier", times = 600)), finite = TRUE) # Test with different cutoff_time grlrn = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.rpart"), cutoff_time = 600, graph_learner = TRUE) - grlrn$train(task_train) - p2 = grlrn$predict(task_test) + grlrn$train(task, part$train) + p2 = grlrn$predict(task, part$test) - # different cutoff time, different crank predictions - expect_false(all(p$crank == p2$crank)) + # check predictions + expect_numeric(p2$crank, len = length(part$test), lower = 0, upper = 1) + expect_number(p2$score(msr("surv.brier", times = 600)), finite = TRUE) - # check msr("surv.brier") with only one time point? Eg prob at cutoff time? - expect_numeric(p2$score(msr("surv.graf", times = 600)), any.missing = FALSE) + # different cutoff time, different (crank) predictions + expect_false(all(p$crank == p2$crank)) }) From 919378ee73d2dc3e5907aa35f9dde54c842e6129 Mon Sep 17 00:00:00 2001 From: john Date: Wed, 18 Sep 2024 11:31:18 +0200 Subject: [PATCH 40/82] update docs --- man/mlr_graphs_survtoclassif_IPCW.Rd | 16 ++++++++++------ man/mlr_pipeops_trafotask_survclassif_IPCW.Rd | 3 ++- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/man/mlr_graphs_survtoclassif_IPCW.Rd b/man/mlr_graphs_survtoclassif_IPCW.Rd index 64c8b0824..8162c3a3b 100644 --- a/man/mlr_graphs_survtoclassif_IPCW.Rd +++ b/man/mlr_graphs_survtoclassif_IPCW.Rd @@ -21,7 +21,8 @@ Cutoff time for IPCW. Observations with time larger than \code{cutoff_time} are Should be reasonably smaller than the maximum event time to avoid enormous weights.} \item{eps}{\code{numeric()}\cr -Small value to replace \eqn{G(t) = 0} censoring probabilities to prevent infinite weights.} +Small value to replace \eqn{G(t) = 0} censoring probabilities to prevent infinite +weights (a warning is triggered if this happens).} \item{graph_learner}{\code{logical(1)}\cr If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a @@ -44,10 +45,8 @@ to \link{PredictionSurv}. } } \examples{ +\dontshow{if (mlr3misc::require_namespaces(c("mlr3pipelines", "mlr3learners"), quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontrun{ -if (requireNamespace("mlr3pipelines", quietly = TRUE) && - requireNamespace("mlr3learners", quietly = TRUE)) { - library(mlr3) library(mlr3learners) library(mlr3pipelines) @@ -62,9 +61,14 @@ if (requireNamespace("mlr3pipelines", quietly = TRUE) && graph_learner = TRUE ) grlrn$train(task, row_ids = part$train) - grlrn$predict(task, row_ids = part$test) -} + pred = grlrn$predict(task, row_ids = part$test) + pred # crank and distr at the cutoff time point included + + # score predictions + pred$score() # C-index + pred$score(msr("surv.brier", times = 500)) # Brier } +\dontshow{\}) # examplesIf} } \seealso{ Other pipelines: diff --git a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd index d07d533bc..63888322b 100644 --- a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd +++ b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd @@ -62,7 +62,8 @@ The parameters are Cutoff time for IPCW. Observations with time larger than \code{cutoff_time} are censored. Should be reasonably smaller than the maximum event time to avoid enormous weights. \item \code{eps :: numeric()}\cr -Small value to replace \eqn{G(t) = 0} censoring probabilities to prevent infinite weights. +Small value to replace \eqn{G(t) = 0} censoring probabilities to prevent +infinite weights (a warning is triggered if this happens). } } From 0108d2d7865a7d304e081de285fa56d16566cc5c Mon Sep 17 00:00:00 2001 From: john Date: Wed, 18 Sep 2024 11:47:00 +0200 Subject: [PATCH 41/82] update to v0.6.9 --- DESCRIPTION | 2 +- NEWS.md | 26 ++++++++++++++++---------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0f1dc6675..687e0277c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: mlr3proba Title: Probabilistic Supervised Learning for 'mlr3' -Version: 0.6.8 +Version: 0.6.9 Authors@R: c(person(given = "Raphael", family = "Sonabend", diff --git a/NEWS.md b/NEWS.md index 293fcf698..4c333d5c9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,26 +1,32 @@ +# mlr3proba 0.6.9 + +* New `PipeOp`s: `PipeOpTaskSurvClassifIPCW`, `PipeOpPredClassifSurvIPCW` +* New pipeline (**reduction method**): `pipeline_survtoclassif_IPCW` +* Fixed a rare bug in Brier score when the survival matrix had one time point/column only. + # mlr3proba 0.6.8 -- `Rcpp` code optimizations -- Fixed ERV scoring to comply with `mlr3` dev version (no bugs before) -- Skipping `survtoregr` pipelines due to bugs (to be refactored in the future) +* `Rcpp` code optimizations +* Fixed ERV scoring to comply with `mlr3` dev version (no bugs before) +* Skipping `survtoregr` pipelines due to bugs (to be refactored in the future) # mlr3proba 0.6.7 -- Deprecate `crank` to `distr` composition in `distrcompose` pipeop (only from `lp` => `distr` works now) -- Add `get_mortality()` function (from `survivalmodels::surv_to_risk()` -- Add Rcpp function `assert_surv_matrix()` -- Update and simplify `crankcompose` pipeop and respective pipeline (no `response` is created anymore) -- Add `responsecompositor` pipeline with `rmst` and `median` +* Deprecate `crank` to `distr` composition in `distrcompose` pipeop (only from `lp` => `distr` works now) +* Add `get_mortality()` function (from `survivalmodels::surv_to_risk()` +* Add Rcpp function `assert_surv_matrix()` +* Update and simplify `crankcompose` pipeop and respective pipeline (no `response` is created anymore) +* Add `responsecompositor` pipeline with `rmst` and `median` # mlr3proba 0.6.6 -- Small fixes and refactoring to the discrete-time pipeops +* Small fixes and refactoring to the discrete-time pipeops # mlr3proba 0.6.5 * Add support for discrete-time survival analysis * New `PipeOp`s: `PipeOpTaskSurvClassifDiscTime`, `PipeOpPredClassifSurvDiscTime` -* New pipeline: `pipeline_survtoclassif` +* New pipeline (**reduction method**): `pipeline_survtoclassif_disctime` # mlr3proba 0.6.4 From 28790e20a52dc75dd4043c6a65cbfa24d649f532 Mon Sep 17 00:00:00 2001 From: john Date: Fri, 20 Sep 2024 10:31:30 +0200 Subject: [PATCH 42/82] ignore docs/ --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index a87488d9b..dce3cf7f0 100644 --- a/.gitignore +++ b/.gitignore @@ -104,4 +104,5 @@ src/*.so src/*.dll CRAN-RELEASE .vscode -check/* \ No newline at end of file +check/* +docs From bac562d92f2cd51aff4c613b93457c53cdff271f Mon Sep 17 00:00:00 2001 From: john Date: Fri, 20 Sep 2024 10:32:00 +0200 Subject: [PATCH 43/82] add fancy icon --- .github/workflows/pkgdown.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pkgdown.yml b/.github/workflows/pkgdown.yml index ae1dab80c..2b71404e7 100644 --- a/.github/workflows/pkgdown.yml +++ b/.github/workflows/pkgdown.yml @@ -42,7 +42,7 @@ jobs: run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) shell: Rscript {0} - - name: Deploy + - name: Deploy to GitHub pages 🚀 if: github.event_name != 'pull_request' uses: JamesIves/github-pages-deploy-action@v4.6.4 with: From 2816a625c60dde24368f4c1941927ba60188424e Mon Sep 17 00:00:00 2001 From: john Date: Fri, 20 Sep 2024 11:20:53 +0200 Subject: [PATCH 44/82] temp fix of math rending issue --- .Rbuildignore | 1 + NEWS.md | 1 + pkgdown/_pkgdown.yml | 26 +++++++++++++++++++------- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 8f4b0cfce..f10061b19 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -23,3 +23,4 @@ README.html ^\.vscode$ ^\.lintr$ ^\.pre-commit-config\.yaml$ +^pkgdown/_pkgdown\.yml$ diff --git a/NEWS.md b/NEWS.md index 4c333d5c9..9fa547abe 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,7 @@ * New `PipeOp`s: `PipeOpTaskSurvClassifIPCW`, `PipeOpPredClassifSurvIPCW` * New pipeline (**reduction method**): `pipeline_survtoclassif_IPCW` * Fixed a rare bug in Brier score when the survival matrix had one time point/column only. +* Temp fix of math-rendering issue in package website # mlr3proba 0.6.8 diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 741098bfd..6d9765f7a 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -2,6 +2,13 @@ url: https://mlr3proba.mlr-org.com template: bootstrap: 5 + # solve temporarily math rendering issue: https://github.com/r-lib/pkgdown/issues/2704 + # TODO: Check and remove when above issue is resolved + includes: + in_header: | + + + light-switch: true math-rendering: mathjax package: mlr3pkgdowntemplate @@ -17,8 +24,9 @@ toc: navbar: structure: left: [articles, reference, news, book] - right: [search, github, mattermost, stackoverflow, rss, lightswitch] + right: [search, github, mattermost, stackoverflow, website, lightswitch] components: + # Left home: ~ articles: text: Vignettes @@ -33,19 +41,23 @@ navbar: href: https://mlr-org.com/gallery/technical/2023-10-25-bart-survival/ - text: Neural Networks for Survival Analysis href: https://towardsdatascience.com/neural-networks-for-survival-analysis-in-r-1e0421584ab - mattermost: - icon: fa fa-comments - href: https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/ book: text: mlr3book - icon: fa fa-link + icon: fa fa-book href: https://mlr3book.mlr-org.com + # Right + mattermost: + icon: fa fa-comments + href: https://lmmisld-lmu-stats-slds.srv.mwn.de/mlr_invite/ + aria-label: Mattermost stackoverflow: icon: fab fa-stack-overflow href: https://stackoverflow.com/questions/tagged/mlr3 - rss: - icon: fa-rss + aria-label: Stack Overflow + website: + icon: fa-link href: https://mlr-org.com/ + aria-label: mlr3 website reference: - title: Package From fe7df89cd392f862bcbe17a97e77b01ae9ecebf9 Mon Sep 17 00:00:00 2001 From: john Date: Fri, 20 Sep 2024 11:28:07 +0200 Subject: [PATCH 45/82] update docs back to CRAN mlr3 version --- man/LearnerDens.Rd | 1 - man/LearnerSurv.Rd | 1 - man/PredictionDens.Rd | 3 +-- man/PredictionSurv.Rd | 3 +-- man/mlr_learners_dens.hist.Rd | 1 - man/mlr_learners_dens.kde.Rd | 1 - man/mlr_learners_surv.coxph.Rd | 1 - man/mlr_learners_surv.kaplan.Rd | 1 - man/mlr_learners_surv.rpart.Rd | 1 - 9 files changed, 2 insertions(+), 11 deletions(-) diff --git a/man/LearnerDens.Rd b/man/LearnerDens.Rd index a963d90c5..84baf4fc6 100644 --- a/man/LearnerDens.Rd +++ b/man/LearnerDens.Rd @@ -44,7 +44,6 @@ Other Learner:
                Inherited methods
                • mlr3::Learner$base_learner()
                • -
                • mlr3::Learner$encapsulate()
                • mlr3::Learner$format()
                • mlr3::Learner$help()
                • mlr3::Learner$predict()
                • diff --git a/man/LearnerSurv.Rd b/man/LearnerSurv.Rd index 257d63220..54c3cd59e 100644 --- a/man/LearnerSurv.Rd +++ b/man/LearnerSurv.Rd @@ -47,7 +47,6 @@ Other Learner:
                  Inherited methods
                  • mlr3::Learner$base_learner()
                  • -
                  • mlr3::Learner$encapsulate()
                  • mlr3::Learner$format()
                  • mlr3::Learner$help()
                  • mlr3::Learner$predict()
                  • diff --git a/man/PredictionDens.Rd b/man/PredictionDens.Rd index 1a242c3ee..c63a03c48 100644 --- a/man/PredictionDens.Rd +++ b/man/PredictionDens.Rd @@ -45,12 +45,11 @@ Access the stored estimated distribution.} } } \if{html}{\out{ -
                    Inherited methods +
                    Inherited methods diff --git a/man/PredictionSurv.Rd b/man/PredictionSurv.Rd index 652f800d3..c0d22e10c 100644 --- a/man/PredictionSurv.Rd +++ b/man/PredictionSurv.Rd @@ -60,12 +60,11 @@ Access the stored predicted survival time.} } } \if{html}{\out{ -
                    Inherited methods +
                    Inherited methods diff --git a/man/mlr_learners_dens.hist.Rd b/man/mlr_learners_dens.hist.Rd index 58341647a..54f29367c 100644 --- a/man/mlr_learners_dens.hist.Rd +++ b/man/mlr_learners_dens.hist.Rd @@ -48,7 +48,6 @@ Other density estimators:
                    Inherited methods
                    • mlr3::Learner$base_learner()
                    • -
                    • mlr3::Learner$encapsulate()
                    • mlr3::Learner$format()
                    • mlr3::Learner$help()
                    • mlr3::Learner$predict()
                    • diff --git a/man/mlr_learners_dens.kde.Rd b/man/mlr_learners_dens.kde.Rd index 970ff3f94..bdff9c318 100644 --- a/man/mlr_learners_dens.kde.Rd +++ b/man/mlr_learners_dens.kde.Rd @@ -59,7 +59,6 @@ Other density estimators:
                      Inherited methods
                      • mlr3::Learner$base_learner()
                      • -
                      • mlr3::Learner$encapsulate()
                      • mlr3::Learner$format()
                      • mlr3::Learner$help()
                      • mlr3::Learner$predict()
                      • diff --git a/man/mlr_learners_surv.coxph.Rd b/man/mlr_learners_surv.coxph.Rd index 43ac2ee80..5979b9f2f 100644 --- a/man/mlr_learners_surv.coxph.Rd +++ b/man/mlr_learners_surv.coxph.Rd @@ -69,7 +69,6 @@ Other survival learners:
                        Inherited methods
                        • mlr3::Learner$base_learner()
                        • -
                        • mlr3::Learner$encapsulate()
                        • mlr3::Learner$format()
                        • mlr3::Learner$help()
                        • mlr3::Learner$predict()
                        • diff --git a/man/mlr_learners_surv.kaplan.Rd b/man/mlr_learners_surv.kaplan.Rd index 5829c99f5..e3cd8992f 100644 --- a/man/mlr_learners_surv.kaplan.Rd +++ b/man/mlr_learners_surv.kaplan.Rd @@ -65,7 +65,6 @@ Other survival learners:
                          Inherited methods
                          • mlr3::Learner$base_learner()
                          • -
                          • mlr3::Learner$encapsulate()
                          • mlr3::Learner$format()
                          • mlr3::Learner$help()
                          • mlr3::Learner$predict()
                          • diff --git a/man/mlr_learners_surv.rpart.Rd b/man/mlr_learners_surv.rpart.Rd index fa2ada657..e243a1bf5 100644 --- a/man/mlr_learners_surv.rpart.Rd +++ b/man/mlr_learners_surv.rpart.Rd @@ -85,7 +85,6 @@ Other survival learners:
                            Inherited methods
                            • mlr3::Learner$base_learner()
                            • -
                            • mlr3::Learner$encapsulate()
                            • mlr3::Learner$format()
                            • mlr3::Learner$help()
                            • mlr3::Learner$predict()
                            • From e9856a3fbd13bbe319afbf2a7a0a174cabe3e96f Mon Sep 17 00:00:00 2001 From: john Date: Fri, 20 Sep 2024 11:52:38 +0200 Subject: [PATCH 46/82] update example --- R/PipeOpTaskSurvClassifIPCW.R | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/R/PipeOpTaskSurvClassifIPCW.R b/R/PipeOpTaskSurvClassifIPCW.R index aa78c838a..21d5975fb 100644 --- a/R/PipeOpTaskSurvClassifIPCW.R +++ b/R/PipeOpTaskSurvClassifIPCW.R @@ -63,10 +63,10 @@ #' #' @family PipeOps #' @family Transformation PipeOps -#' @examples +#' @examplesIf mlr3misc::require_namespaces(c("mlr3pipelines", "mlr3learners"), quietly = TRUE) #' \dontrun{ -#' if (requireNamespace("mlr3pipelines", quietly = TRUE)) { #' library(mlr3) +#' library(mlr3learners) #' library(mlr3pipelines) #' #' task = tsk("lung") @@ -86,7 +86,18 @@ #' # during prediction, output is a classification task (no weights) #' task_classif_test = po_ipcw$predict(list(task_test))[[1]] #' task_classif_test -#' } +#' +#' # train classif learner on the train task with weights +#' learner = lrn("classif.rpart", predict_type = "prob") +#' learner$train(task_classif_train) +#' +#' # predict using the test output task +#' p = learner$predict(task_classif_test) +#' +#' # use classif measures for evaluation +#' p$confusion +#' p$score() +#' p$score(msr("classif.auc")) #' } #' @export PipeOpTaskSurvClassifIPCW = R6Class( From 2ad288f054fc226f0b3d866276b3c048f6cddf8e Mon Sep 17 00:00:00 2001 From: john Date: Fri, 20 Sep 2024 11:53:25 +0200 Subject: [PATCH 47/82] refactor: cutoff_time => tau --- R/PipeOpPredClassifSurvIPCW.R | 4 ++-- R/PipeOpResponseCompositor.R | 16 +++++++-------- R/PipeOpTaskSurvClassifIPCW.R | 34 ++++++++++++++++++-------------- R/pipelines.R | 35 ++++++++++++++++----------------- tests/testthat/test_ipcw.R | 16 +++++++-------- tests/testthat/test_pipelines.R | 8 ++++---- 6 files changed, 57 insertions(+), 56 deletions(-) diff --git a/R/PipeOpPredClassifSurvIPCW.R b/R/PipeOpPredClassifSurvIPCW.R index 387f5d7af..41e94c3c8 100644 --- a/R/PipeOpPredClassifSurvIPCW.R +++ b/R/PipeOpPredClassifSurvIPCW.R @@ -66,12 +66,12 @@ PipeOpPredClassifSurvIPCW = R6Class( private = list( .predict = function(input) { pred = input[[1]] # classification predictions - data = input[[2]] # row_ids, times, status, cutoff_time + data = input[[2]] # row_ids, times, status, tau # risk => prob of having the event up until the cutoff time risk = pred$prob[, "1"] surv = matrix(data = 1 - risk, ncol = 1) - colnames(surv) = data$cutoff_time + colnames(surv) = data$tau p = PredictionSurv$new( # the original row ids diff --git a/R/PipeOpResponseCompositor.R b/R/PipeOpResponseCompositor.R index d00fbce97..4f08acc29 100644 --- a/R/PipeOpResponseCompositor.R +++ b/R/PipeOpResponseCompositor.R @@ -33,7 +33,7 @@ #' - `method` :: `character(1)` \cr #' Determines what method should be used to produce a survival time (response) from the survival distribution. #' Available methods are `"rmst"` and `"median"`, corresponding to the *restricted mean survival time* and the *median survival time* respectively. -#' - `cutoff_time` :: `numeric(1)` \cr +#' - `tau` :: `numeric(1)` \cr #' Determines the time point up to which we calculate the restricted mean survival time (works only for the `"rmst"` method). #' If `NULL` (default), all the available time points in the predicted survival distribution will be used. #' * `add_crank` :: `logical(1)` \cr @@ -47,7 +47,7 @@ #' The restricted mean survival time is the default/preferred method and is calculated as follows: #' \deqn{T_{i,rmst} \approx \sum_{t_j \in [0,\tau]} (t_j - t_{j-1}) S_i(t_j)} #' -#' where \eqn{T} is the expected survival time, \eqn{\tau} is the time cutoff and \eqn{S_i(t_j)} are the predicted survival probabilities of observation \eqn{i} for all the \eqn{t_j} time points. +#' where \eqn{T} is the expected survival time, \eqn{\tau} is the time cutoff/horizon and \eqn{S_i(t_j)} are the predicted survival probabilities of observation \eqn{i} for all the \eqn{t_j} time points. #' #' The \eqn{T_{i,median}} survival time is just the first time point for which the survival probability is less than \eqn{0.5}. #' If no such time point exists (e.g. when the survival distribution is not proper due to high censoring) we return the last time point. @@ -86,7 +86,7 @@ PipeOpResponseCompositor = R6Class("PipeOpResponseCompositor", initialize = function(id = "responsecompose", param_vals = list()) { param_set = ps( method = p_fct(default = "rmst", levels = c("rmst", "median"), tags = "predict"), - cutoff_time = p_dbl(0, default = NULL, special_vals = list(NULL), tags = "predict"), + tau = p_dbl(0, default = NULL, special_vals = list(NULL), tags = "predict"), add_crank = p_lgl(default = FALSE, tags = "predict"), overwrite = p_lgl(default = FALSE, tags = "predict") ) @@ -139,14 +139,12 @@ PipeOpResponseCompositor = R6Class("PipeOpResponseCompositor", method = self$param_set$values$method if (method == "rmst") { - cutoff_time = self$param_set$values$cutoff_time - within_range = !is.null(cutoff_time) && - cutoff_time <= max(times) && - cutoff_time >= min(times) + tau = self$param_set$values$tau + within_range = !is.null(tau) && tau <= max(times) && tau >= min(times) if (within_range) { # subset survival matrix and times - surv = surv[, times <= cutoff_time, drop = FALSE] - times = times[times <= cutoff_time] + surv = surv[, times <= tau, drop = FALSE] + times = times[times <= tau] } # calculate the restricted mean survival time diff --git a/R/PipeOpTaskSurvClassifIPCW.R b/R/PipeOpTaskSurvClassifIPCW.R index 21d5975fb..76a45dedd 100644 --- a/R/PipeOpTaskSurvClassifIPCW.R +++ b/R/PipeOpTaskSurvClassifIPCW.R @@ -51,9 +51,9 @@ #' @section Parameters: #' The parameters are #' -#' * `cutoff_time :: numeric()`\cr -#' Cutoff time for IPCW. Observations with time larger than `cutoff_time` are censored. -#' Should be reasonably smaller than the maximum event time to avoid enormous weights. +#' * `tau :: numeric()`\cr +#' Predefined time point for IPCW. Observations with time larger than \eqn{\tau} are censored. +#' Must be less or equal to the maximum event time. #' * `eps :: numeric()`\cr #' Small value to replace \eqn{G(t) = 0} censoring probabilities to prevent #' infinite weights (a warning is triggered if this happens). @@ -77,7 +77,7 @@ #' task_test = task$clone()$filter(part$test) #' #' # define IPCW pipeop -#' po_ipcw = po("trafotask_survclassif_IPCW", cutoff_time = 500) +#' po_ipcw = po("trafotask_survclassif_IPCW", tau = 365) #' #' # during training, output is a classification task with weights #' task_classif_train = po_ipcw$train(list(task_train))[[1]] @@ -109,7 +109,7 @@ PipeOpTaskSurvClassifIPCW = R6Class( #' Creates a new instance of this [R6][R6::R6Class] class. initialize = function(id = "trafotask_survclassif_IPCW") { param_set = ps( - cutoff_time = p_dbl(0), + tau = p_dbl(0), eps = p_dbl(0, default = 1e-3) ) param_set$set_values(eps = 1e-3) @@ -137,9 +137,9 @@ PipeOpTaskSurvClassifIPCW = R6Class( # checks assert_true(task$censtype == "right") - cutoff_time = assert_numeric(self$param_set$values$cutoff_time, null.ok = FALSE) + tau = assert_numeric(self$param_set$values$tau, null.ok = FALSE) max_event_time = max(task$unique_event_times()) - stopifnot(cutoff_time <= max_event_time) + stopifnot(tau <= max_event_time) # G(t): KM estimate of the censoring distribution times = task$times() @@ -150,7 +150,7 @@ PipeOpTaskSurvClassifIPCW = R6Class( # apply the cutoff to `times` cut_times = times - cut_times[cut_times > cutoff_time] = cutoff_time + cut_times[cut_times > tau] = tau # get G(t) at the observed cutoff'ed times efficiently extend_times = getFromNamespace("C_Vec_WeightedDiscreteCdf", ns = "distr6") cens_probs = extend_times(cut_times, cens_fit$time, cdf = 1 - cens_surv, FALSE, FALSE)[,1] @@ -171,10 +171,10 @@ PipeOpTaskSurvClassifIPCW = R6Class( # add weights to original data data[["ipc_weights"]] = ipc_weights # zero weights for censored observations before the cutoff time - ids = status == 0 & times <= cutoff_time + ids = status == 0 & times <= tau data[ids, "ipc_weights" := 0] # update target: status = 0 after cutoff (remains the same before cutoff) - status[times > cutoff_time] = 0 + status[times > tau] = 0 data[[status_var]] = factor(status, levels = c("0", "1")) # remove target time variable data[[time_var]] = NULL @@ -197,10 +197,10 @@ PipeOpTaskSurvClassifIPCW = R6Class( data = task$data() time_var = task$target_names[1] status_var = task$target_names[2] - cutoff_time = assert_numeric(self$param_set$values$cutoff_time, null.ok = FALSE) + tau = assert_numeric(self$param_set$values$tau, null.ok = FALSE) # update target: status = 0 after cutoff (remains the same before cutoff) - status[times > cutoff_time] = 0 + status[times > tau] = 0 data[[status_var]] = factor(status, levels = c("0", "1")) # remove target time variable data[[time_var]] = NULL @@ -208,9 +208,13 @@ PipeOpTaskSurvClassifIPCW = R6Class( task_classif = TaskClassif$new(id = paste0(task$id, "_IPCW"), backend = data, target = "status", positive = "1") - # keep original row_ids, times and status as well the cutoff time - data = list(row_ids = task$row_ids, times = task$times(), status = task$status(), - cutoff_time = cutoff_time) + # keep original row_ids, times and status as well the tau time point + data = list( + row_ids = task$row_ids, + times = task$times(), + status = task$status(), + tau = tau + ) list(task_classif, data) } ) diff --git a/R/pipelines.R b/R/pipelines.R index 3349b2dfc..1fc390044 100644 --- a/R/pipelines.R +++ b/R/pipelines.R @@ -168,7 +168,7 @@ pipeline_crankcompositor = function(learner, method = c("mort"), #' @param method (`character(1)`)\cr #' Determines what method should be used to produce a survival time (response) from the survival distribution. #' Available methods are `"rmst"` and `"median"`, corresponding to the *restricted mean survival time* and the *median survival time* respectively. -#' @param cutoff_time (`numeric(1)`)\cr +#' @param tau (`numeric(1)`)\cr #' Determines the time point up to which we calculate the restricted mean survival time (works only for the `"rmst"` method). #' If `NULL` (default), all the available time points in the predicted survival distribution will be used. #' @param add_crank (`logical(1)`)\cr @@ -199,19 +199,19 @@ pipeline_crankcompositor = function(learner, method = c("mort"), #' grlrn$predict(task, part$test) #' } #' } -pipeline_responsecompositor = function(learner, method = "rmst", cutoff_time = NULL, +pipeline_responsecompositor = function(learner, method = "rmst", tau = NULL, add_crank = FALSE, overwrite = FALSE, graph_learner = FALSE) { assert_learner(learner, task_type = "surv") assert_choice(method, choices = c("rmst", "median")) - assert_number(cutoff_time, null.ok = TRUE, lower = 0) + assert_number(tau, null.ok = TRUE, lower = 0) assert_logical(add_crank) assert_logical(overwrite) assert_logical(graph_learner) pred = mlr3pipelines::as_graph(learner) - pv = list(method = method, cutoff_time = cutoff_time, add_crank = add_crank, + pv = list(method = method, tau = tau, add_crank = add_crank, overwrite = overwrite) compositor = mlr3pipelines::po("responsecompose", param_vals = pv) @@ -666,24 +666,23 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, #' #' @param learner [LearnerClassif][mlr3::LearnerClassif]\cr #' Classification learner to fit the transformed [TaskClassif][mlr3::TaskClassif]. -#' @param cutoff_time `numeric()`\cr -#' Cutoff time for IPCW. Observations with time larger than `cutoff_time` are censored. -#' Should be reasonably smaller than the maximum event time to avoid enormous weights. -#' @param eps `numeric()`\cr +#' @param tau (`numeric()`)\cr +#' Predefined time point for IPCW. Observations with time larger than \eqn{\tau} are censored. +#' Must be less or equal to the maximum event time. +#' @param eps (`numeric()`)\cr #' Small value to replace \eqn{G(t) = 0} censoring probabilities to prevent infinite #' weights (a warning is triggered if this happens). -#' @param graph_learner `logical(1)`\cr +#' @param graph_learner (`logical(1)`)\cr #' If `TRUE` returns wraps the [Graph][mlr3pipelines::Graph] as a #' [GraphLearner][mlr3pipelines::GraphLearner] otherwise (default) returns as a `Graph`. #' #' @details #' The pipeline consists of the following steps: -#' \enumerate{ -#' \item [PipeOpTaskSurvClassifIPCW] Converts [TaskSurv] to a [TaskClassif][mlr3::TaskClassif]. -#' \item A [LearnerClassif] is fit and predicted on the new `TaskClassif`. -#' \item [PipeOpPredClassifSurvIPCW] transforms the resulting [PredictionClassif][mlr3::PredictionClassif] +#' +#' 1. [PipeOpTaskSurvClassifIPCW] Converts [TaskSurv] to a [TaskClassif][mlr3::TaskClassif]. +#' 2. A [LearnerClassif] is fit and predicted on the new `TaskClassif`. +#' 3. [PipeOpPredClassifSurvIPCW] transforms the resulting [PredictionClassif][mlr3::PredictionClassif] #' to [PredictionSurv]. -#' } #' #' @return [mlr3pipelines::Graph] or [mlr3pipelines::GraphLearner] #' @family pipelines @@ -700,7 +699,7 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, #' grlrn = ppl( #' "survtoclassif_IPCW", #' learner = lrn("classif.rpart"), -#' cutoff_time = 500, # Observations after 500 days are censored +#' tau = 500, # Observations after 500 days are censored #' graph_learner = TRUE #' ) #' grlrn$train(task, row_ids = part$train) @@ -709,14 +708,14 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, #' #' # score predictions #' pred$score() # C-index -#' pred$score(msr("surv.brier", times = 500)) # Brier +#' pred$score(msr("surv.brier", times = 500)) # Brier score at tau #' } #' @export -pipeline_survtoclassif_IPCW = function(learner, cutoff_time = NULL, eps = 1e-3, graph_learner = FALSE) { +pipeline_survtoclassif_IPCW = function(learner, tau = NULL, eps = 1e-3, graph_learner = FALSE) { assert_true("prob" %in% learner$predict_types) gr = mlr3pipelines::Graph$new() - gr$add_pipeop(mlr3pipelines::po("trafotask_survclassif_IPCW", cutoff_time = cutoff_time, eps = eps)) + gr$add_pipeop(mlr3pipelines::po("trafotask_survclassif_IPCW", tau = tau, eps = eps)) gr$add_pipeop(mlr3pipelines::po("learner", learner, predict_type = "prob")) gr$add_pipeop(mlr3pipelines::po("trafopred_classifsurv_IPCW")) gr$add_pipeop(mlr3pipelines::po("nop")) diff --git a/tests/testthat/test_ipcw.R b/tests/testthat/test_ipcw.R index 3ad34b9cb..497c30714 100644 --- a/tests/testthat/test_ipcw.R +++ b/tests/testthat/test_ipcw.R @@ -11,13 +11,13 @@ test_that("PipeOpTaskSurvClassifIPCW", { po_ipcw = mlr3pipelines::po("trafotask_survclassif_IPCW") expect_class(po_ipcw, c("PipeOp", "PipeOpTaskSurvClassifIPCW")) - # don't allow NULL `cutoff_time` + # don't allow NULL `tau` expect_error(po_ipcw$train(list(train_task)), "not 'NULL'") - # `cutoff_time` should be less than the max event time - po_ipcw$param_set$set_values(cutoff_time = 3000) - expect_error(po_ipcw$train(list(train_task)), "cutoff_time <= max_event_time is not TRUE") + # `tau` should be less than the max event time + po_ipcw$param_set$set_values(tau = 3000) + expect_error(po_ipcw$train(list(train_task)), "tau <= max_event_time is not TRUE") cutoff = 300 - po_ipcw$param_set$set_values(cutoff_time = cutoff) + po_ipcw$param_set$set_values(tau = cutoff) res = po_ipcw$train(list(train_task)) @@ -44,12 +44,12 @@ test_that("PipeOpTaskSurvClassifIPCW", { # check status == 0 for time > cutoff time expect_true(all(pred_task$truth()[res$data$times > cutoff] == 0)) - # (row_ids, times, status, cutoff_time) are correct? + # check that (row_ids, times, status, tau) are correct data = res[[2L]] expect_list(data, len = 4) - expect_equal(names(data), c("row_ids", "times", "status", "cutoff_time")) + expect_equal(names(data), c("row_ids", "times", "status", "tau")) expect_equal(data$row_ids, test_ids) expect_equal(data$times, test_task$times()) expect_equal(data$status, test_task$status()) - expect_equal(data$cutoff_time, cutoff) + expect_equal(data$tau, cutoff) }) diff --git a/tests/testthat/test_pipelines.R b/tests/testthat/test_pipelines.R index 3259ceb1c..864885bd1 100644 --- a/tests/testthat/test_pipelines.R +++ b/tests/testthat/test_pipelines.R @@ -105,11 +105,11 @@ test_that("survtoclassif_IPCW", { part = partition(task) pipe = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.rpart"), - cutoff_time = 500) + tau = 500) expect_class(pipe, "Graph") grlrn = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.rpart"), - cutoff_time = 500, graph_learner = TRUE) + tau = 500, graph_learner = TRUE) expect_class(grlrn, "GraphLearner") grlrn$train(task, row_ids = part$train) # check that the weights were used for classif learner @@ -130,9 +130,9 @@ test_that("survtoclassif_IPCW", { expect_number(p$score(msr("surv.brier", times = 100)), finite = TRUE) expect_number(p$score(msr("surv.brier", times = 600)), finite = TRUE) - # Test with different cutoff_time + # Test with different tau grlrn = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.rpart"), - cutoff_time = 600, graph_learner = TRUE) + tau = 600, graph_learner = TRUE) grlrn$train(task, part$train) p2 = grlrn$predict(task, part$test) From 4e449273261cba38cec0d2405aff74e01844ecf9 Mon Sep 17 00:00:00 2001 From: john Date: Fri, 20 Sep 2024 12:19:34 +0200 Subject: [PATCH 48/82] doc: IPCW surv predictions should be evaluated at tau only --- R/PipeOpPredClassifSurvIPCW.R | 4 +++- R/PipeOpResponseCompositor.R | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/PipeOpPredClassifSurvIPCW.R b/R/PipeOpPredClassifSurvIPCW.R index 41e94c3c8..e5f3273ae 100644 --- a/R/PipeOpPredClassifSurvIPCW.R +++ b/R/PipeOpPredClassifSurvIPCW.R @@ -28,8 +28,10 @@ #' Therefore, these predictions serve as **continuous risk scores** that can be #' directly interpreted as `crank` predictions in the right-censored survival #' setting. We also map them to the survival distribution prediction `distr`, -#' at the specified cutoff time point, i.e. as +#' at the specified cutoff time point \eqn{\tau}, i.e. as #' \eqn{S_i(\tau) = 1 - \hat{\pi}(\bold{X}_i)}. +#' Survival measures that use the survival distribution (eg [ISBS][mlr_measures_surv.brier]) +#' should be evaluated exactly at the cutoff time point \eqn{\tau}, see example. #' #' @references #' `r format_bib("vock_2016")` diff --git a/R/PipeOpResponseCompositor.R b/R/PipeOpResponseCompositor.R index 4f08acc29..f92a4e989 100644 --- a/R/PipeOpResponseCompositor.R +++ b/R/PipeOpResponseCompositor.R @@ -72,7 +72,7 @@ #' # mostly improper survival distributions, "median" sets the survival time #' # to the last time point #' -#' # RMST (default) as response, while also changing the crank = -response +#' # RMST (default) as response, while also changing the `crank` to `-response` #' por = po("responsecompose", param_vals = list(overwrite = TRUE, add_crank = TRUE)) #' por$predict(list(pred))[[1L]] #' } From c41e19270e959f409a79f03061b09d9f927f4c7f Mon Sep 17 00:00:00 2001 From: john Date: Fri, 20 Sep 2024 12:27:35 +0200 Subject: [PATCH 49/82] fix test --- tests/testthat/test_responsecompose.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test_responsecompose.R b/tests/testthat/test_responsecompose.R index 8886426f6..33d8cf869 100644 --- a/tests/testthat/test_responsecompose.R +++ b/tests/testthat/test_responsecompose.R @@ -50,11 +50,11 @@ test_that("different methods, different responses", { test_that("different cutoffs, different rmst", { por1 = mlr3pipelines::po("responsecompose", overwrite = TRUE, method = "rmst") por2 = mlr3pipelines::po("responsecompose", overwrite = TRUE, method = "rmst", - cutoff_time = 100) # t_max = 99 in the generated data + tau = 100) # t_max = 99 in the generated data por3 = mlr3pipelines::po("responsecompose", overwrite = TRUE, method = "rmst", - cutoff_time = 65) + tau = 65) por4 = mlr3pipelines::po("responsecompose", overwrite = TRUE, method = "rmst", - cutoff_time = 25) + tau = 25) p1 = por1$predict(list(pcox))[[1L]] p2 = por2$predict(list(pcox))[[1L]] p3 = por3$predict(list(pcox))[[1L]] From e12eb76f0093c4cca5ca82fab25f8bf5299a1790 Mon Sep 17 00:00:00 2001 From: john Date: Fri, 20 Sep 2024 12:28:33 +0200 Subject: [PATCH 50/82] updocs --- man/mlr_graphs_responsecompositor.Rd | 4 +-- man/mlr_graphs_survtoclassif_IPCW.Rd | 16 ++++++------ man/mlr_pipeops_responsecompose.Rd | 6 ++--- man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd | 4 ++- man/mlr_pipeops_trafotask_survclassif_IPCW.Rd | 25 ++++++++++++++----- 5 files changed, 35 insertions(+), 20 deletions(-) diff --git a/man/mlr_graphs_responsecompositor.Rd b/man/mlr_graphs_responsecompositor.Rd index bf4e05459..407aa9380 100644 --- a/man/mlr_graphs_responsecompositor.Rd +++ b/man/mlr_graphs_responsecompositor.Rd @@ -8,7 +8,7 @@ pipeline_responsecompositor( learner, method = "rmst", - cutoff_time = NULL, + tau = NULL, add_crank = FALSE, overwrite = FALSE, graph_learner = FALSE @@ -24,7 +24,7 @@ be wrapped in \link[mlr3pipelines:Graph]{mlr3pipelines::Graph} or a \code{Graph} Determines what method should be used to produce a survival time (response) from the survival distribution. Available methods are \code{"rmst"} and \code{"median"}, corresponding to the \emph{restricted mean survival time} and the \emph{median survival time} respectively.} -\item{cutoff_time}{(\code{numeric(1)})\cr +\item{tau}{(\code{numeric(1)})\cr Determines the time point up to which we calculate the restricted mean survival time (works only for the \code{"rmst"} method). If \code{NULL} (default), all the available time points in the predicted survival distribution will be used.} diff --git a/man/mlr_graphs_survtoclassif_IPCW.Rd b/man/mlr_graphs_survtoclassif_IPCW.Rd index 8162c3a3b..e03a999b2 100644 --- a/man/mlr_graphs_survtoclassif_IPCW.Rd +++ b/man/mlr_graphs_survtoclassif_IPCW.Rd @@ -7,7 +7,7 @@ \usage{ pipeline_survtoclassif_IPCW( learner, - cutoff_time = NULL, + tau = NULL, eps = 0.001, graph_learner = FALSE ) @@ -16,15 +16,15 @@ pipeline_survtoclassif_IPCW( \item{learner}{\link[mlr3:LearnerClassif]{LearnerClassif}\cr Classification learner to fit the transformed \link[mlr3:TaskClassif]{TaskClassif}.} -\item{cutoff_time}{\code{numeric()}\cr -Cutoff time for IPCW. Observations with time larger than \code{cutoff_time} are censored. -Should be reasonably smaller than the maximum event time to avoid enormous weights.} +\item{tau}{(\code{numeric()})\cr +Predefined time point for IPCW. Observations with time larger than \eqn{\tau} are censored. +Must be less or equal to the maximum event time.} -\item{eps}{\code{numeric()}\cr +\item{eps}{(\code{numeric()})\cr Small value to replace \eqn{G(t) = 0} censoring probabilities to prevent infinite weights (a warning is triggered if this happens).} -\item{graph_learner}{\code{logical(1)}\cr +\item{graph_learner}{(\code{logical(1)})\cr If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a \link[mlr3pipelines:mlr_learners_graph]{GraphLearner} otherwise (default) returns as a \code{Graph}.} } @@ -57,7 +57,7 @@ to \link{PredictionSurv}. grlrn = ppl( "survtoclassif_IPCW", learner = lrn("classif.rpart"), - cutoff_time = 500, # Observations after 500 days are censored + tau = 500, # Observations after 500 days are censored graph_learner = TRUE ) grlrn$train(task, row_ids = part$train) @@ -66,7 +66,7 @@ to \link{PredictionSurv}. # score predictions pred$score() # C-index - pred$score(msr("surv.brier", times = 500)) # Brier + pred$score(msr("surv.brier", times = 500)) # Brier score at tau } \dontshow{\}) # examplesIf} } diff --git a/man/mlr_pipeops_responsecompose.Rd b/man/mlr_pipeops_responsecompose.Rd index dea59706c..6a0dc8642 100644 --- a/man/mlr_pipeops_responsecompose.Rd +++ b/man/mlr_pipeops_responsecompose.Rd @@ -43,7 +43,7 @@ The \verb{$state} is left empty (\code{list()}). \item \code{method} :: \code{character(1)} \cr Determines what method should be used to produce a survival time (response) from the survival distribution. Available methods are \code{"rmst"} and \code{"median"}, corresponding to the \emph{restricted mean survival time} and the \emph{median survival time} respectively. -\item \code{cutoff_time} :: \code{numeric(1)} \cr +\item \code{tau} :: \code{numeric(1)} \cr Determines the time point up to which we calculate the restricted mean survival time (works only for the \code{"rmst"} method). If \code{NULL} (default), all the available time points in the predicted survival distribution will be used. } @@ -62,7 +62,7 @@ If \code{TRUE}, then the \code{response} (and the \code{crank}, if \code{add_cra The restricted mean survival time is the default/preferred method and is calculated as follows: \deqn{T_{i,rmst} \approx \sum_{t_j \in [0,\tau]} (t_j - t_{j-1}) S_i(t_j)} -where \eqn{T} is the expected survival time, \eqn{\tau} is the time cutoff and \eqn{S_i(t_j)} are the predicted survival probabilities of observation \eqn{i} for all the \eqn{t_j} time points. +where \eqn{T} is the expected survival time, \eqn{\tau} is the time cutoff/horizon and \eqn{S_i(t_j)} are the predicted survival probabilities of observation \eqn{i} for all the \eqn{t_j} time points. The \eqn{T_{i,median}} survival time is just the first time point for which the survival probability is less than \eqn{0.5}. If no such time point exists (e.g. when the survival distribution is not proper due to high censoring) we return the last time point. @@ -83,7 +83,7 @@ if (requireNamespace("mlr3pipelines", quietly = TRUE)) { # mostly improper survival distributions, "median" sets the survival time # to the last time point - # RMST (default) as response, while also changing the crank = -response + # RMST (default) as response, while also changing the `crank` to `-response` por = po("responsecompose", param_vals = list(overwrite = TRUE, add_crank = TRUE)) por$predict(list(pred))[[1L]] } diff --git a/man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd b/man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd index a1caf0b9d..5a5c356f8 100644 --- a/man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd +++ b/man/mlr_pipeops_trafopred_classifsurv_IPCW.Rd @@ -34,8 +34,10 @@ see Vock et al. (2016) and \link{PipeOpTaskSurvClassifIPCW}. Therefore, these predictions serve as \strong{continuous risk scores} that can be directly interpreted as \code{crank} predictions in the right-censored survival setting. We also map them to the survival distribution prediction \code{distr}, -at the specified cutoff time point, i.e. as +at the specified cutoff time point \eqn{\tau}, i.e. as \eqn{S_i(\tau) = 1 - \hat{\pi}(\bold{X}_i)}. +Survival measures that use the survival distribution (eg \link[=mlr_measures_surv.brier]{ISBS}) +should be evaluated exactly at the cutoff time point \eqn{\tau}, see example. } \references{ diff --git a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd index 63888322b..aa82ae50b 100644 --- a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd +++ b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd @@ -58,9 +58,9 @@ This "data" is only meant to be used with the \link{PipeOpPredClassifSurvIPCW}. The parameters are \itemize{ -\item \code{cutoff_time :: numeric()}\cr -Cutoff time for IPCW. Observations with time larger than \code{cutoff_time} are censored. -Should be reasonably smaller than the maximum event time to avoid enormous weights. +\item \code{tau :: numeric()}\cr +Predefined time point for IPCW. Observations with time larger than \eqn{\tau} are censored. +Must be less or equal to the maximum event time. \item \code{eps :: numeric()}\cr Small value to replace \eqn{G(t) = 0} censoring probabilities to prevent infinite weights (a warning is triggered if this happens). @@ -68,9 +68,10 @@ infinite weights (a warning is triggered if this happens). } \examples{ +\dontshow{if (mlr3misc::require_namespaces(c("mlr3pipelines", "mlr3learners"), quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontrun{ -if (requireNamespace("mlr3pipelines", quietly = TRUE)) { library(mlr3) + library(mlr3learners) library(mlr3pipelines) task = tsk("lung") @@ -81,7 +82,7 @@ if (requireNamespace("mlr3pipelines", quietly = TRUE)) { task_test = task$clone()$filter(part$test) # define IPCW pipeop - po_ipcw = po("trafotask_survclassif_IPCW", cutoff_time = 500) + po_ipcw = po("trafotask_survclassif_IPCW", tau = 365) # during training, output is a classification task with weights task_classif_train = po_ipcw$train(list(task_train))[[1]] @@ -90,8 +91,20 @@ if (requireNamespace("mlr3pipelines", quietly = TRUE)) { # during prediction, output is a classification task (no weights) task_classif_test = po_ipcw$predict(list(task_test))[[1]] task_classif_test + + # train classif learner on the train task with weights + learner = lrn("classif.rpart", predict_type = "prob") + learner$train(task_classif_train) + + # predict using the test output task + p = learner$predict(task_classif_test) + + # use classif measures for evaluation + p$confusion + p$score() + p$score(msr("classif.auc")) } -} +\dontshow{\}) # examplesIf} } \references{ Vock, M D, Wolfson, Julian, Bandyopadhyay, Sunayan, Adomavicius, Gediminas, Johnson, E P, Vazquez-Benitez, Gabriela, O'Connor, J P (2016). From 5146c2bb9d217a3fc9ed85780b7df3c962c45182 Mon Sep 17 00:00:00 2001 From: john Date: Sat, 21 Sep 2024 16:31:57 +0200 Subject: [PATCH 51/82] refine doc (mlr3 style) --- R/PipeOpSurvAvg.R | 4 +- R/PipeOpTaskSurvClassifIPCW.R | 4 +- R/PipeOpTaskSurvRegr.R | 12 +++--- R/TaskSurv.R | 2 +- R/pipelines.R | 42 +++++++++---------- man-roxygen/param_rows.R | 2 +- man-roxygen/pipeline.R | 2 +- man/TaskSurv.Rd | 18 ++++---- man/mlr_graphs_crankcompositor.Rd | 6 +-- man/mlr_graphs_distrcompositor.Rd | 8 ++-- man/mlr_graphs_probregr.Rd | 4 +- man/mlr_graphs_responsecompositor.Rd | 2 +- man/mlr_graphs_survaverager.Rd | 2 +- man/mlr_graphs_survbagging.Rd | 10 ++--- man/mlr_graphs_survtoclassif_disctime.Rd | 8 ++-- man/mlr_graphs_survtoregr.Rd | 14 +++---- man/mlr_pipeops_survavg.Rd | 4 +- man/mlr_pipeops_trafotask_survclassif_IPCW.Rd | 4 +- man/mlr_pipeops_trafotask_survregr.Rd | 12 +++--- tests/testthat/test_pipelines.R | 2 +- 20 files changed, 81 insertions(+), 81 deletions(-) diff --git a/R/PipeOpSurvAvg.R b/R/PipeOpSurvAvg.R index 12289e3b8..16718a17a 100644 --- a/R/PipeOpSurvAvg.R +++ b/R/PipeOpSurvAvg.R @@ -53,11 +53,11 @@ PipeOpSurvAvg = R6Class("PipeOpSurvAvg", #' @description #' Creates a new instance of this [R6][R6::R6Class] class. #' - #' @param innum `(numeric(1))`\cr + #' @param innum (`numeric(1)`)\cr #' Determines the number of input channels. #' If `innum` is 0 (default), a vararg input channel is created that can take an arbitrary #' number of inputs. - #' @param ... `ANY`\cr + #' @param ... (`ANY`)\cr #' Additional arguments passed to [mlr3pipelines::PipeOpEnsemble]. initialize = function(innum = 0, id = "survavg", param_vals = list(), ...) { diff --git a/R/PipeOpTaskSurvClassifIPCW.R b/R/PipeOpTaskSurvClassifIPCW.R index 76a45dedd..371a4a52e 100644 --- a/R/PipeOpTaskSurvClassifIPCW.R +++ b/R/PipeOpTaskSurvClassifIPCW.R @@ -51,10 +51,10 @@ #' @section Parameters: #' The parameters are #' -#' * `tau :: numeric()`\cr +#' * `tau` :: `numeric()`\cr #' Predefined time point for IPCW. Observations with time larger than \eqn{\tau} are censored. #' Must be less or equal to the maximum event time. -#' * `eps :: numeric()`\cr +#' * `eps` :: `numeric()`\cr #' Small value to replace \eqn{G(t) = 0} censoring probabilities to prevent #' infinite weights (a warning is triggered if this happens). #' diff --git a/R/PipeOpTaskSurvRegr.R b/R/PipeOpTaskSurvRegr.R index 15cd0ee98..47cdf2173 100644 --- a/R/PipeOpTaskSurvRegr.R +++ b/R/PipeOpTaskSurvRegr.R @@ -19,7 +19,7 @@ #' @section Parameters: #' The parameters are #' -#' * `method::character(1))`\cr +#' * `method` :: `character(1)`\cr #' Method to use for dealing with censoring. Options are `"ipcw"` (Vock et al., 2016): censoring #' column is removed and a `weights` column is added, weights are inverse estimated survival #' probability of the censoring distribution evaluated at survival time; @@ -31,21 +31,21 @@ #' status column is deleted - again should be used with caution; `"reorder"`: selects features and #' targets and sets the target in the new task object. Note that `"mrl"` and `"ipcw"` will perform #' worse with Type I censoring. -#' * `estimator::(character(1))`\cr +#' * `estimator` :: `character(1)`\cr #' Method for calculating censoring weights or mean residual lifetime in `"mrl"`, #' current options are: `"kaplan"`: unconditional Kaplan-Meier estimator; #' `"akritas"`: conditional non-parameteric nearest-neighbours estimator; #' `"cox"`. -#' * `alpha::(numeric(1))`\cr +#' * `alpha` :: `numeric(1)`\cr #' When `ipcw` is used, optional hyper-parameter that adds an extra penalty to the weighting for #' censored observations. If set to `0` then censored observations are given zero weight and #' deleted, weighting only the non-censored observations. A weight for an observation is then #' \eqn{(\delta + \alpha(1-\delta))/G(t)} where \eqn{\delta} is the censoring indicator. -#' * `eps::numeric(1)`\cr +#' * `eps` :: `numeric(1)`\cr #' Small value to replace `0` survival probabilities with in IPCW to prevent infinite weights. -#' * `lambda::(numeric(1))`\cr +#' * `lambda` :: `numeric(1)`\cr #' Nearest neighbours parameter for the `"akritas"` estimator in the [mlr3extralearners package](https://mlr3extralearners.mlr-org.com/), default `0.5`. -#' * `features, target :: character())`\cr +#' * `features, target` :: `character()`\cr #' For `"reorder"` method, specify which columns become features and targets. #' * `learner cneter, mimpu, iter.bj, max.cycle, mstop, nu`\cr #' Passed to [bujar::bujar]. diff --git a/R/TaskSurv.R b/R/TaskSurv.R index 9dd9f4326..d02525083 100644 --- a/R/TaskSurv.R +++ b/R/TaskSurv.R @@ -408,7 +408,7 @@ TaskSurv = R6::R6Class("TaskSurv", ), active = list( - #' @field censtype `character(1)`\cr + #' @field censtype (`character(1)`)\cr #' Returns the type of censoring, one of `"right"`, `"left"`, `"counting"`, #' `"interval"`, `"interval2"` or `"mstate"`. #' Currently, only the `"right"`-censoring type is fully supported, the rest diff --git a/R/pipelines.R b/R/pipelines.R index 1fc390044..45582fafb 100644 --- a/R/pipelines.R +++ b/R/pipelines.R @@ -42,15 +42,15 @@ pipeline_survaverager = function(learners, param_vals = list(), graph_learner = #' @templateVar pipeop [PipeOpSubsample][mlr3pipelines::PipeOpSubsample] and [PipeOpSurvAvg] #' @templateVar id survbagging #' @template param_pipeline_learner -#' @param iterations `integer(1)`\cr +#' @param iterations (`integer(1)`)\cr #' Number of bagging iterations. Defaults to 10. -#' @param frac `numeric(1)`\cr +#' @param frac (`numeric(1)`)\cr #' Percentage of rows to keep during subsampling. See #' [PipeOpSubsample][mlr3pipelines::PipeOpSubsample] for more information. Defaults to 0.7. -#' @param avg `logical(1)`\cr +#' @param avg (`logical(1)`)\cr #' If `TRUE` (default) predictions are aggregated with [PipeOpSurvAvg], otherwise returned #' as multiple predictions. Can only be `FALSE` if `graph_learner = FALSE`. -#' @param weights `numeric()` \cr +#' @param weights (`numeric()`)\cr #' Weights for model avering, ignored if `avg = FALSE`. Default is uniform weighting, #' see [PipeOpSurvAvg]. #' @details Bagging (Bootstrap AGGregatING) is the process of bootstrapping data and aggregating @@ -109,11 +109,11 @@ pipeline_survbagging = function(learner, iterations = 10, frac = 0.7, avg = TRUE #' @templateVar id crankcompositor #' @template param_pipeline_learner #' -#' @param method `character(1)`\cr +#' @param method (`character(1)`)\cr #' Determines what method should be used to produce a continuous ranking from the distribution. #' Currently only `mort` is supported, which is the sum of the cumulative hazard, also called *expected/ensemble mortality*, see Ishwaran et al. (2008). #' For more details, see [get_mortality()]. -#' @param overwrite `logical(1)`\cr +#' @param overwrite (`logical(1)`)\cr #' If `FALSE` (default) and the prediction already has a `crank` prediction, then the compositor returns the input prediction unchanged. #' If `TRUE`, then the `crank` will be overwritten. #' @@ -231,16 +231,16 @@ pipeline_responsecompositor = function(learner, method = "rmst", tau = NULL, #' @template param_pipeline_learner #' @param learner [LearnerSurv]\cr #' Survival learner. -#' @param estimator `character(1)`\cr +#' @param estimator (`character(1)`)\cr #' One of `kaplan` (default), `nelson` or `breslow`, corresponding to the Kaplan-Meier, #' Nelson-Aalen and [Breslow][breslow] estimators respectively. #' Used to estimate the baseline survival distribution. -#' @param form `character(1)`\cr +#' @param form (`character(1)`)\cr #' One of `aft` (default), `ph`, or `po`, corresponding to accelerated failure time, #' proportional hazards, and proportional odds respectively. #' Used to determine the form of the composed survival distribution. #' Ignored if estimator is `breslow`. -#' @param overwrite `logical(1)`\cr +#' @param overwrite (`logical(1)`)\cr #' If `FALSE` (default) then if the `learner` already has a `distr`, the compositor does nothing. #' If `TRUE` then the `distr` is overwritten by the compositor if #' already present, which may be required for changing the prediction `distr` from one model form @@ -301,7 +301,7 @@ pipeline_distrcompositor = function(learner, estimator = "kaplan", form = "aft", #' @param learner_se `[mlr3::Learner]|[mlr3pipelines::PipeOp]` \cr #' Optional [LearnerRegr][mlr3::LearnerRegr] with predict_type `se` to estimate the standard #' error. If left `NULL` then `learner` must have `se` in predict_types. -#' @param dist `character(1)`\cr +#' @param dist (`character(1)`)\cr #' Location-scale distribution to use for composition. #' Current possibilities are' `"Cauchy", "Gumbel", "Laplace", "Logistic", "Normal", "Uniform"`. Default is `"Uniform"`. #' @examples @@ -412,12 +412,12 @@ pipeline_probregr = function(learner, learner_se = NULL, dist = "Uniform", #' model on these predictions. The resulting regression predictions can then be viewed as the linear #' predictors of the new data, which can ultimately be composed to a distribution. #' -#' @param method `integer(1)`\cr +#' @param method (`integer(1)`)\cr #' Reduction method to use, corresponds to those in `details`. Default is `1`. #' @param regr_learner [LearnerRegr][mlr3::LearnerRegr]\cr #' Regression learner to fit to the transformed [TaskRegr][mlr3::TaskRegr]. If `regr_se_learner` is #' `NULL` in method `2`, then `regr_learner` must have `se` predict_type. -#' @param distrcompose `logical(1)`\cr +#' @param distrcompose (`logical(1)`)\cr #' For method `3` if `TRUE` (default) then [PipeOpDistrCompositor] is utilised to #' transform the deterministic predictions to a survival distribution. #' @param distr_estimator [LearnerSurv]\cr @@ -428,18 +428,18 @@ pipeline_probregr = function(learner, learner_se = NULL, dist = "Uniform", #' predict_type must be provided. #' @param surv_learner [LearnerSurv]\cr #' For method `3`, a [LearnerSurv] with `lp` predict type to estimate linear predictors. -#' @param survregr_params `list()`\cr +#' @param survregr_params (`list()`)\cr #' Parameters passed to [PipeOpTaskSurvRegr], default are survival to regression transformation #' via `ipcw`, with weighting determined by Kaplan-Meier and no additional penalty for censoring. -#' @param distrcompose_params `list()`\cr +#' @param distrcompose_params (`list()`)\cr #' Parameters passed to [PipeOpDistrCompositor], default is accelerated failure time model form. -#' @param probregr_params `list()`\cr +#' @param probregr_params (`list()`)\cr #' Parameters passed to [PipeOpProbregr], default is [Uniform][distr6::Uniform] #' distribution for composition. -#' @param learnercv_params `list()`\cr +#' @param learnercv_params (`list()`)\cr #' Parameters passed to [PipeOpLearnerCV][mlr3pipelines::PipeOpLearnerCV], default is to use #' insampling. -#' @param graph_learner `logical(1)`\cr +#' @param graph_learner (`logical(1)`)\cr #' If `TRUE` returns wraps the [Graph][mlr3pipelines::Graph] as a #' [GraphLearner][mlr3pipelines::GraphLearner] otherwise (default) returns as a `Graph`. #' @@ -576,20 +576,20 @@ pipeline_survtoregr = function(method = 1, regr_learner = lrn("regr.featureless" #' @param learner [LearnerClassif][mlr3::LearnerClassif]\cr #' Classification learner to fit the transformed [TaskClassif][mlr3::TaskClassif]. #' `learner` must have `predict_type` of type `"prob"`. -#' @param cut `numeric()`\cr +#' @param cut (`numeric()`)\cr #' Split points, used to partition the data into intervals. #' If unspecified, all unique event times will be used. #' If `cut` is a single integer, it will be interpreted as the number of equidistant #' intervals from 0 until the maximum event time. -#' @param max_time `numeric(1)`\cr +#' @param max_time (`numeric(1)`)\cr #' If cut is unspecified, this will be the last possible event time. #' All event times after max_time will be administratively censored at max_time. -#' @param rhs `character(1)`\cr +#' @param rhs (`character(1)`)\cr #' Right-hand side of the formula to with the learner. #' All features of the task are available as well as `tend` the upper bounds #' of the intervals created by `cut`. #' If rhs is unspecified, the formula of the task will be used. -#' @param graph_learner `logical(1)`\cr +#' @param graph_learner (`logical(1)`)\cr #' If `TRUE` returns wraps the [Graph][mlr3pipelines::Graph] as a #' [GraphLearner][mlr3pipelines::GraphLearner] otherwise (default) returns as a `Graph`. #' diff --git a/man-roxygen/param_rows.R b/man-roxygen/param_rows.R index b31cf7127..83a21e425 100644 --- a/man-roxygen/param_rows.R +++ b/man-roxygen/param_rows.R @@ -1,2 +1,2 @@ -#' @param rows `integer()`\cr +#' @param rows (`integer()`)\cr #' Row indices. diff --git a/man-roxygen/pipeline.R b/man-roxygen/pipeline.R index 6d507ff02..1a108de93 100644 --- a/man-roxygen/pipeline.R +++ b/man-roxygen/pipeline.R @@ -1,7 +1,7 @@ #' @name <%= paste0("mlr_graphs_", id)%> #' @title <%=title%> Pipeline #' @description Wrapper around <%=pipeop%> to simplify [Graph][mlr3pipelines::Graph] creation. -#' @param graph_learner `logical(1)`\cr +#' @param graph_learner (`logical(1)`)\cr #' If `TRUE` returns wraps the [Graph][mlr3pipelines::Graph] as a #' [GraphLearner][mlr3pipelines::GraphLearner] otherwise (default) returns as a `Graph`. #' @return [mlr3pipelines::Graph] or [mlr3pipelines::GraphLearner] diff --git a/man/TaskSurv.Rd b/man/TaskSurv.Rd index 4fb33156e..e63d4e3ec 100644 --- a/man/TaskSurv.Rd +++ b/man/TaskSurv.Rd @@ -72,7 +72,7 @@ Other Task: \section{Active bindings}{ \if{html}{\out{
                              }} \describe{ -\item{\code{censtype}}{\code{character(1)}\cr +\item{\code{censtype}}{(\code{character(1)})\cr Returns the type of censoring, one of \code{"right"}, \code{"left"}, \code{"counting"}, \code{"interval"}, \code{"interval2"} or \code{"mstate"}. Currently, only the \code{"right"}-censoring type is fully supported, the rest @@ -201,7 +201,7 @@ type. Defaults to all rows with role \code{"use"}. \subsection{Arguments}{ \if{html}{\out{
                              }} \describe{ -\item{\code{rows}}{\code{integer()}\cr +\item{\code{rows}}{(\code{integer()})\cr Row indices.} } \if{html}{\out{
                              }} @@ -245,7 +245,7 @@ Returns the (unsorted) outcome times. \subsection{Arguments}{ \if{html}{\out{
                              }} \describe{ -\item{\code{rows}}{\code{integer()}\cr +\item{\code{rows}}{(\code{integer()})\cr Row indices.} } \if{html}{\out{
                              }} @@ -271,7 +271,7 @@ See \code{\link[survival:Surv]{survival::Surv()}}. \subsection{Arguments}{ \if{html}{\out{
                              }} \describe{ -\item{\code{rows}}{\code{integer()}\cr +\item{\code{rows}}{(\code{integer()})\cr Row indices.} } \if{html}{\out{
                              }} @@ -293,7 +293,7 @@ Returns the sorted unique outcome times for \code{"right"}, \code{"left"} and \subsection{Arguments}{ \if{html}{\out{
                              }} \describe{ -\item{\code{rows}}{\code{integer()}\cr +\item{\code{rows}}{(\code{integer()})\cr Row indices.} } \if{html}{\out{
                              }} @@ -315,7 +315,7 @@ Returns the sorted unique event (or failure) outcome times for \code{"right"}, \subsection{Arguments}{ \if{html}{\out{
                              }} \describe{ -\item{\code{rows}}{\code{integer()}\cr +\item{\code{rows}}{(\code{integer()})\cr Row indices.} } \if{html}{\out{
                              }} @@ -408,7 +408,7 @@ Only designed for \code{"right"} and \code{"left"} censoring. \subsection{Arguments}{ \if{html}{\out{
                              }} \describe{ -\item{\code{rows}}{\code{integer()}\cr +\item{\code{rows}}{(\code{integer()})\cr Row indices.} } \if{html}{\out{
                              }} @@ -435,7 +435,7 @@ Only designed for \code{"right"} and \code{"left"} censoring. \subsection{Arguments}{ \if{html}{\out{
                              }} \describe{ -\item{\code{rows}}{\code{integer()}\cr +\item{\code{rows}}{(\code{integer()})\cr Row indices.} \item{\code{admin_time}}{(\code{numeric(1)}) \cr @@ -478,7 +478,7 @@ Only designed for \code{"right"} and \code{"left"} censoring. \subsection{Arguments}{ \if{html}{\out{
                              }} \describe{ -\item{\code{rows}}{\code{integer()}\cr +\item{\code{rows}}{(\code{integer()})\cr Row indices.} \item{\code{method}}{(\code{character(1)}) \cr diff --git a/man/mlr_graphs_crankcompositor.Rd b/man/mlr_graphs_crankcompositor.Rd index f09ca89da..5879a1baf 100644 --- a/man/mlr_graphs_crankcompositor.Rd +++ b/man/mlr_graphs_crankcompositor.Rd @@ -18,16 +18,16 @@ Either a \code{Learner} which will be wrapped in \link[mlr3pipelines:mlr_pipeops be wrapped in \link[mlr3pipelines:Graph]{mlr3pipelines::Graph} or a \code{Graph} itself. Underlying \code{Learner} should be \link{LearnerSurv}.} -\item{method}{\code{character(1)}\cr +\item{method}{(\code{character(1)})\cr Determines what method should be used to produce a continuous ranking from the distribution. Currently only \code{mort} is supported, which is the sum of the cumulative hazard, also called \emph{expected/ensemble mortality}, see Ishwaran et al. (2008). For more details, see \code{\link[=get_mortality]{get_mortality()}}.} -\item{overwrite}{\code{logical(1)}\cr +\item{overwrite}{(\code{logical(1)})\cr If \code{FALSE} (default) and the prediction already has a \code{crank} prediction, then the compositor returns the input prediction unchanged. If \code{TRUE}, then the \code{crank} will be overwritten.} -\item{graph_learner}{\code{logical(1)}\cr +\item{graph_learner}{(\code{logical(1)})\cr If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a \link[mlr3pipelines:mlr_learners_graph]{GraphLearner} otherwise (default) returns as a \code{Graph}.} } diff --git a/man/mlr_graphs_distrcompositor.Rd b/man/mlr_graphs_distrcompositor.Rd index 7512d6a36..76b661d9e 100644 --- a/man/mlr_graphs_distrcompositor.Rd +++ b/man/mlr_graphs_distrcompositor.Rd @@ -17,24 +17,24 @@ pipeline_distrcompositor( \item{learner}{\link{LearnerSurv}\cr Survival learner.} -\item{estimator}{\code{character(1)}\cr +\item{estimator}{(\code{character(1)})\cr One of \code{kaplan} (default), \code{nelson} or \code{breslow}, corresponding to the Kaplan-Meier, Nelson-Aalen and \link[=breslow]{Breslow} estimators respectively. Used to estimate the baseline survival distribution.} -\item{form}{\code{character(1)}\cr +\item{form}{(\code{character(1)})\cr One of \code{aft} (default), \code{ph}, or \code{po}, corresponding to accelerated failure time, proportional hazards, and proportional odds respectively. Used to determine the form of the composed survival distribution. Ignored if estimator is \code{breslow}.} -\item{overwrite}{\code{logical(1)}\cr +\item{overwrite}{(\code{logical(1)})\cr If \code{FALSE} (default) then if the \code{learner} already has a \code{distr}, the compositor does nothing. If \code{TRUE} then the \code{distr} is overwritten by the compositor if already present, which may be required for changing the prediction \code{distr} from one model form to another.} -\item{graph_learner}{\code{logical(1)}\cr +\item{graph_learner}{(\code{logical(1)})\cr If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a \link[mlr3pipelines:mlr_learners_graph]{GraphLearner} otherwise (default) returns as a \code{Graph}.} } diff --git a/man/mlr_graphs_probregr.Rd b/man/mlr_graphs_probregr.Rd index 260e284cd..24ee3490d 100644 --- a/man/mlr_graphs_probregr.Rd +++ b/man/mlr_graphs_probregr.Rd @@ -22,11 +22,11 @@ be wrapped in \link[mlr3pipelines:Graph]{mlr3pipelines::Graph} or a \code{Graph} Optional \link[mlr3:LearnerRegr]{LearnerRegr} with predict_type \code{se} to estimate the standard error. If left \code{NULL} then \code{learner} must have \code{se} in predict_types.} -\item{dist}{\code{character(1)}\cr +\item{dist}{(\code{character(1)})\cr Location-scale distribution to use for composition. Current possibilities are' \verb{"Cauchy", "Gumbel", "Laplace", "Logistic", "Normal", "Uniform"}. Default is \code{"Uniform"}.} -\item{graph_learner}{\code{logical(1)}\cr +\item{graph_learner}{(\code{logical(1)})\cr If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a \link[mlr3pipelines:mlr_learners_graph]{GraphLearner} otherwise (default) returns as a \code{Graph}.} } diff --git a/man/mlr_graphs_responsecompositor.Rd b/man/mlr_graphs_responsecompositor.Rd index 407aa9380..80f9f1915 100644 --- a/man/mlr_graphs_responsecompositor.Rd +++ b/man/mlr_graphs_responsecompositor.Rd @@ -36,7 +36,7 @@ Works only if \code{overwrite} is \code{TRUE}.} If \code{FALSE} (default) and the prediction already has a \code{response} prediction, then the compositor returns the input prediction unchanged. If \code{TRUE}, then the \code{response} (and the \code{crank}, if \code{add_crank} is \code{TRUE}) will be overwritten.} -\item{graph_learner}{\code{logical(1)}\cr +\item{graph_learner}{(\code{logical(1)})\cr If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a \link[mlr3pipelines:mlr_learners_graph]{GraphLearner} otherwise (default) returns as a \code{Graph}.} } diff --git a/man/mlr_graphs_survaverager.Rd b/man/mlr_graphs_survaverager.Rd index b877b7700..0ede56722 100644 --- a/man/mlr_graphs_survaverager.Rd +++ b/man/mlr_graphs_survaverager.Rd @@ -14,7 +14,7 @@ List of \link{LearnerSurv}s to average.} \item{param_vals}{\code{(list())} \cr Parameters, including weights, to pass to \link{PipeOpSurvAvg}.} -\item{graph_learner}{\code{logical(1)}\cr +\item{graph_learner}{(\code{logical(1)})\cr If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a \link[mlr3pipelines:mlr_learners_graph]{GraphLearner} otherwise (default) returns as a \code{Graph}.} } diff --git a/man/mlr_graphs_survbagging.Rd b/man/mlr_graphs_survbagging.Rd index 04deb5e0d..8ea5d37d8 100644 --- a/man/mlr_graphs_survbagging.Rd +++ b/man/mlr_graphs_survbagging.Rd @@ -20,22 +20,22 @@ Either a \code{Learner} which will be wrapped in \link[mlr3pipelines:mlr_pipeops be wrapped in \link[mlr3pipelines:Graph]{mlr3pipelines::Graph} or a \code{Graph} itself. Underlying \code{Learner} should be \link{LearnerSurv}.} -\item{iterations}{\code{integer(1)}\cr +\item{iterations}{(\code{integer(1)})\cr Number of bagging iterations. Defaults to 10.} -\item{frac}{\code{numeric(1)}\cr +\item{frac}{(\code{numeric(1)})\cr Percentage of rows to keep during subsampling. See \link[mlr3pipelines:mlr_pipeops_subsample]{PipeOpSubsample} for more information. Defaults to 0.7.} -\item{avg}{\code{logical(1)}\cr +\item{avg}{(\code{logical(1)})\cr If \code{TRUE} (default) predictions are aggregated with \link{PipeOpSurvAvg}, otherwise returned as multiple predictions. Can only be \code{FALSE} if \code{graph_learner = FALSE}.} -\item{weights}{\code{numeric()} \cr +\item{weights}{(\code{numeric()})\cr Weights for model avering, ignored if \code{avg = FALSE}. Default is uniform weighting, see \link{PipeOpSurvAvg}.} -\item{graph_learner}{\code{logical(1)}\cr +\item{graph_learner}{(\code{logical(1)})\cr If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a \link[mlr3pipelines:mlr_learners_graph]{GraphLearner} otherwise (default) returns as a \code{Graph}.} } diff --git a/man/mlr_graphs_survtoclassif_disctime.Rd b/man/mlr_graphs_survtoclassif_disctime.Rd index 3a493e519..78d74db62 100644 --- a/man/mlr_graphs_survtoclassif_disctime.Rd +++ b/man/mlr_graphs_survtoclassif_disctime.Rd @@ -18,23 +18,23 @@ pipeline_survtoclassif_disctime( Classification learner to fit the transformed \link[mlr3:TaskClassif]{TaskClassif}. \code{learner} must have \code{predict_type} of type \code{"prob"}.} -\item{cut}{\code{numeric()}\cr +\item{cut}{(\code{numeric()})\cr Split points, used to partition the data into intervals. If unspecified, all unique event times will be used. If \code{cut} is a single integer, it will be interpreted as the number of equidistant intervals from 0 until the maximum event time.} -\item{max_time}{\code{numeric(1)}\cr +\item{max_time}{(\code{numeric(1)})\cr If cut is unspecified, this will be the last possible event time. All event times after max_time will be administratively censored at max_time.} -\item{rhs}{\code{character(1)}\cr +\item{rhs}{(\code{character(1)})\cr Right-hand side of the formula to with the learner. All features of the task are available as well as \code{tend} the upper bounds of the intervals created by \code{cut}. If rhs is unspecified, the formula of the task will be used.} -\item{graph_learner}{\code{logical(1)}\cr +\item{graph_learner}{(\code{logical(1)})\cr If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a \link[mlr3pipelines:mlr_learners_graph]{GraphLearner} otherwise (default) returns as a \code{Graph}.} } diff --git a/man/mlr_graphs_survtoregr.Rd b/man/mlr_graphs_survtoregr.Rd index 04d3860b6..4ae09dd0a 100644 --- a/man/mlr_graphs_survtoregr.Rd +++ b/man/mlr_graphs_survtoregr.Rd @@ -20,14 +20,14 @@ pipeline_survtoregr( ) } \arguments{ -\item{method}{\code{integer(1)}\cr +\item{method}{(\code{integer(1)})\cr Reduction method to use, corresponds to those in \code{details}. Default is \code{1}.} \item{regr_learner}{\link[mlr3:LearnerRegr]{LearnerRegr}\cr Regression learner to fit to the transformed \link[mlr3:TaskRegr]{TaskRegr}. If \code{regr_se_learner} is \code{NULL} in method \code{2}, then \code{regr_learner} must have \code{se} predict_type.} -\item{distrcompose}{\code{logical(1)}\cr +\item{distrcompose}{(\code{logical(1)})\cr For method \code{3} if \code{TRUE} (default) then \link{PipeOpDistrCompositor} is utilised to transform the deterministic predictions to a survival distribution.} @@ -42,22 +42,22 @@ predict_type must be provided.} \item{surv_learner}{\link{LearnerSurv}\cr For method \code{3}, a \link{LearnerSurv} with \code{lp} predict type to estimate linear predictors.} -\item{survregr_params}{\code{list()}\cr +\item{survregr_params}{(\code{list()})\cr Parameters passed to \link{PipeOpTaskSurvRegr}, default are survival to regression transformation via \code{ipcw}, with weighting determined by Kaplan-Meier and no additional penalty for censoring.} -\item{distrcompose_params}{\code{list()}\cr +\item{distrcompose_params}{(\code{list()})\cr Parameters passed to \link{PipeOpDistrCompositor}, default is accelerated failure time model form.} -\item{probregr_params}{\code{list()}\cr +\item{probregr_params}{(\code{list()})\cr Parameters passed to \link{PipeOpProbregr}, default is \link[distr6:Uniform]{Uniform} distribution for composition.} -\item{learnercv_params}{\code{list()}\cr +\item{learnercv_params}{(\code{list()})\cr Parameters passed to \link[mlr3pipelines:mlr_pipeops_learner_cv]{PipeOpLearnerCV}, default is to use insampling.} -\item{graph_learner}{\code{logical(1)}\cr +\item{graph_learner}{(\code{logical(1)})\cr If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a \link[mlr3pipelines:mlr_learners_graph]{GraphLearner} otherwise (default) returns as a \code{Graph}.} } diff --git a/man/mlr_pipeops_survavg.Rd b/man/mlr_pipeops_survavg.Rd index 12b070238..48e02f1cf 100644 --- a/man/mlr_pipeops_survavg.Rd +++ b/man/mlr_pipeops_survavg.Rd @@ -104,7 +104,7 @@ Creates a new instance of this \link[R6:R6Class]{R6} class. \subsection{Arguments}{ \if{html}{\out{
                              }} \describe{ -\item{\code{innum}}{\code{(numeric(1))}\cr +\item{\code{innum}}{(\code{numeric(1)})\cr Determines the number of input channels. If \code{innum} is 0 (default), a vararg input channel is created that can take an arbitrary number of inputs.} @@ -116,7 +116,7 @@ Identifier of the resulting object.} List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction.} -\item{\code{...}}{\code{ANY}\cr +\item{\code{...}}{(\code{ANY})\cr Additional arguments passed to \link[mlr3pipelines:PipeOpEnsemble]{mlr3pipelines::PipeOpEnsemble}.} } \if{html}{\out{
                              }} diff --git a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd index aa82ae50b..fddf5a5be 100644 --- a/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd +++ b/man/mlr_pipeops_trafotask_survclassif_IPCW.Rd @@ -58,10 +58,10 @@ This "data" is only meant to be used with the \link{PipeOpPredClassifSurvIPCW}. The parameters are \itemize{ -\item \code{tau :: numeric()}\cr +\item \code{tau} :: \code{numeric()}\cr Predefined time point for IPCW. Observations with time larger than \eqn{\tau} are censored. Must be less or equal to the maximum event time. -\item \code{eps :: numeric()}\cr +\item \code{eps} :: \code{numeric()}\cr Small value to replace \eqn{G(t) = 0} censoring probabilities to prevent infinite weights (a warning is triggered if this happens). } diff --git a/man/mlr_pipeops_trafotask_survregr.Rd b/man/mlr_pipeops_trafotask_survregr.Rd index bdc70313c..cebdf7bc8 100644 --- a/man/mlr_pipeops_trafotask_survregr.Rd +++ b/man/mlr_pipeops_trafotask_survregr.Rd @@ -27,7 +27,7 @@ The \verb{$state} is a named \code{list} with the \verb{$state} elements The parameters are \itemize{ -\item \verb{method::character(1))}\cr +\item \code{method} :: \code{character(1)}\cr Method to use for dealing with censoring. Options are \code{"ipcw"} (Vock et al., 2016): censoring column is removed and a \code{weights} column is added, weights are inverse estimated survival probability of the censoring distribution evaluated at survival time; @@ -39,21 +39,21 @@ data-set - should be used with caution if censoring is informative; \code{"omit" status column is deleted - again should be used with caution; \code{"reorder"}: selects features and targets and sets the target in the new task object. Note that \code{"mrl"} and \code{"ipcw"} will perform worse with Type I censoring. -\item \verb{estimator::(character(1))}\cr +\item \code{estimator} :: \code{character(1)}\cr Method for calculating censoring weights or mean residual lifetime in \code{"mrl"}, current options are: \code{"kaplan"}: unconditional Kaplan-Meier estimator; \code{"akritas"}: conditional non-parameteric nearest-neighbours estimator; \code{"cox"}. -\item \verb{alpha::(numeric(1))}\cr +\item \code{alpha} :: \code{numeric(1)}\cr When \code{ipcw} is used, optional hyper-parameter that adds an extra penalty to the weighting for censored observations. If set to \code{0} then censored observations are given zero weight and deleted, weighting only the non-censored observations. A weight for an observation is then \eqn{(\delta + \alpha(1-\delta))/G(t)} where \eqn{\delta} is the censoring indicator. -\item \code{eps::numeric(1)}\cr +\item \code{eps} :: \code{numeric(1)}\cr Small value to replace \code{0} survival probabilities with in IPCW to prevent infinite weights. -\item \verb{lambda::(numeric(1))}\cr +\item \code{lambda} :: \code{numeric(1)}\cr Nearest neighbours parameter for the \code{"akritas"} estimator in the \href{https://mlr3extralearners.mlr-org.com/}{mlr3extralearners package}, default \code{0.5}. -\item \verb{features, target :: character())}\cr +\item \verb{features, target} :: \code{character()}\cr For \code{"reorder"} method, specify which columns become features and targets. \item \verb{learner cneter, mimpu, iter.bj, max.cycle, mstop, nu}\cr Passed to \link[bujar:bujar]{bujar::bujar}. diff --git a/tests/testthat/test_pipelines.R b/tests/testthat/test_pipelines.R index 864885bd1..1d24b2fa1 100644 --- a/tests/testthat/test_pipelines.R +++ b/tests/testthat/test_pipelines.R @@ -140,6 +140,6 @@ test_that("survtoclassif_IPCW", { expect_numeric(p2$crank, len = length(part$test), lower = 0, upper = 1) expect_number(p2$score(msr("surv.brier", times = 600)), finite = TRUE) - # different cutoff time, different (crank) predictions + # different cutoff times, different (crank) predictions expect_false(all(p$crank == p2$crank)) }) From 30c80b4c530fc50f8cc89df6af2e171246c6b077 Mon Sep 17 00:00:00 2001 From: john Date: Sun, 22 Sep 2024 01:05:55 +0200 Subject: [PATCH 52/82] correct doc about t_max in integrated scores --- man-roxygen/param_tmax.R | 4 ++-- man/mlr_measures_surv.graf.Rd | 4 ++-- man/mlr_measures_surv.intlogloss.Rd | 4 ++-- man/mlr_measures_surv.schmid.Rd | 4 ++-- man/weighted_survival_score.Rd | 34 +++++++++++++++++++++++++++++ 5 files changed, 42 insertions(+), 8 deletions(-) create mode 100644 man/weighted_survival_score.Rd diff --git a/man-roxygen/param_tmax.R b/man-roxygen/param_tmax.R index 89f9ee305..90957a505 100644 --- a/man-roxygen/param_tmax.R +++ b/man-roxygen/param_tmax.R @@ -2,6 +2,6 @@ #' - `t_max` (`numeric(1)`)\cr #' Cutoff time (i.e. time horizon) to evaluate the measure up to. #' Mutually exclusive with `p_max` or `times`. -#' This will effectively remove test observations for which the time -#' (event or censoring) is less than `t_max`. +#' This will effectively remove test observations for which the observed time +#' (event or censoring) is strictly more than `t_max`. #' It's recommended to set `t_max` to avoid division by `eps`, see Details. diff --git a/man/mlr_measures_surv.graf.Rd b/man/mlr_measures_surv.graf.Rd index 83ccd1f20..04ae965f8 100644 --- a/man/mlr_measures_surv.graf.Rd +++ b/man/mlr_measures_surv.graf.Rd @@ -102,8 +102,8 @@ If \code{integrate == FALSE} then a single time point at which to return the sco \item \code{t_max} (\code{numeric(1)})\cr Cutoff time (i.e. time horizon) to evaluate the measure up to. Mutually exclusive with \code{p_max} or \code{times}. -This will effectively remove test observations for which the time -(event or censoring) is less than \code{t_max}. +This will effectively remove test observations for which the observed time +(event or censoring) is strictly more than \code{t_max}. It's recommended to set \code{t_max} to avoid division by \code{eps}, see Details. } diff --git a/man/mlr_measures_surv.intlogloss.Rd b/man/mlr_measures_surv.intlogloss.Rd index 737439c0e..314117072 100644 --- a/man/mlr_measures_surv.intlogloss.Rd +++ b/man/mlr_measures_surv.intlogloss.Rd @@ -94,8 +94,8 @@ If \code{integrate == FALSE} then a single time point at which to return the sco \item \code{t_max} (\code{numeric(1)})\cr Cutoff time (i.e. time horizon) to evaluate the measure up to. Mutually exclusive with \code{p_max} or \code{times}. -This will effectively remove test observations for which the time -(event or censoring) is less than \code{t_max}. +This will effectively remove test observations for which the observed time +(event or censoring) is strictly more than \code{t_max}. It's recommended to set \code{t_max} to avoid division by \code{eps}, see Details. } diff --git a/man/mlr_measures_surv.schmid.Rd b/man/mlr_measures_surv.schmid.Rd index 38b6eaecc..bac156965 100644 --- a/man/mlr_measures_surv.schmid.Rd +++ b/man/mlr_measures_surv.schmid.Rd @@ -91,8 +91,8 @@ If \code{integrate == FALSE} then a single time point at which to return the sco \item \code{t_max} (\code{numeric(1)})\cr Cutoff time (i.e. time horizon) to evaluate the measure up to. Mutually exclusive with \code{p_max} or \code{times}. -This will effectively remove test observations for which the time -(event or censoring) is less than \code{t_max}. +This will effectively remove test observations for which the observed time +(event or censoring) is strictly more than \code{t_max}. It's recommended to set \code{t_max} to avoid division by \code{eps}, see Details. } diff --git a/man/weighted_survival_score.Rd b/man/weighted_survival_score.Rd new file mode 100644 index 000000000..f59a1a59e --- /dev/null +++ b/man/weighted_survival_score.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/integrated_scores.R +\name{weighted_survival_score} +\alias{weighted_survival_score} +\title{\itemize{ +\item \code{tmax_apply} = TRUE => one of \code{t_max}, \code{p_max} is given +\item \code{tmax_apply} = FALSE => \code{times} is given +The \code{t_max} cutoff will be applied later in the predicted survival matrix +to filter observations (rows) and time points (columns) + filter the +(time, status) target on both train (if provided) and test data +}} +\usage{ +weighted_survival_score( + loss, + truth, + distribution, + times = NULL, + t_max = NULL, + p_max = NULL, + proper, + train = NULL, + eps, + ... +) +} +\description{ +\itemize{ +\item \code{tmax_apply} = TRUE => one of \code{t_max}, \code{p_max} is given +\item \code{tmax_apply} = FALSE => \code{times} is given +The \code{t_max} cutoff will be applied later in the predicted survival matrix +to filter observations (rows) and time points (columns) + filter the +(time, status) target on both train (if provided) and test data +} +} From 3e1d8b1a17eff1625ba2e93cc126c3263239c58a Mon Sep 17 00:00:00 2001 From: john Date: Sun, 22 Sep 2024 13:46:58 +0200 Subject: [PATCH 53/82] add description for Rcpp function --- src/survival_scores.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/survival_scores.cpp b/src/survival_scores.cpp index c3134e748..36ecf0ed5 100644 --- a/src/survival_scores.cpp +++ b/src/survival_scores.cpp @@ -3,6 +3,9 @@ using namespace Rcpp; using namespace std; +// This function essentially finds and returns the subset of `true_times` that +// align with the requested times (`req_times`), after cleaning up the invalid +// `req_times` (outside of the `true_times` range or duplicate consesutive elements) // [[Rcpp::export(.c_get_unique_times)]] NumericVector c_get_unique_times(NumericVector true_times, NumericVector req_times) { if (req_times.length() == 0) { From a6e073cd3c1ce5d20f22c2a162c6476bbefadaae Mon Sep 17 00:00:00 2001 From: john Date: Mon, 23 Sep 2024 12:17:07 +0300 Subject: [PATCH 54/82] fix spelling --- src/survival_scores.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/survival_scores.cpp b/src/survival_scores.cpp index 36ecf0ed5..1590ceca2 100644 --- a/src/survival_scores.cpp +++ b/src/survival_scores.cpp @@ -5,7 +5,7 @@ using namespace std; // This function essentially finds and returns the subset of `true_times` that // align with the requested times (`req_times`), after cleaning up the invalid -// `req_times` (outside of the `true_times` range or duplicate consesutive elements) +// `req_times` (outside of the `true_times` range or duplicate consecutive elements) // [[Rcpp::export(.c_get_unique_times)]] NumericVector c_get_unique_times(NumericVector true_times, NumericVector req_times) { if (req_times.length() == 0) { From d0c29ff8eac44d390ec0267f3c0f1c59071e00ff Mon Sep 17 00:00:00 2001 From: Philip Studener Date: Tue, 24 Sep 2024 15:53:31 +0200 Subject: [PATCH 55/82] fix type --- R/pipelines.R | 2 +- man/mlr_graphs_survtoclassif_disctime.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/pipelines.R b/R/pipelines.R index 1fc390044..26ad9a514 100644 --- a/R/pipelines.R +++ b/R/pipelines.R @@ -585,7 +585,7 @@ pipeline_survtoregr = function(method = 1, regr_learner = lrn("regr.featureless" #' If cut is unspecified, this will be the last possible event time. #' All event times after max_time will be administratively censored at max_time. #' @param rhs `character(1)`\cr -#' Right-hand side of the formula to with the learner. +#' Right-hand side of the formula to use with the learner. #' All features of the task are available as well as `tend` the upper bounds #' of the intervals created by `cut`. #' If rhs is unspecified, the formula of the task will be used. diff --git a/man/mlr_graphs_survtoclassif_disctime.Rd b/man/mlr_graphs_survtoclassif_disctime.Rd index 3a493e519..7b51b296c 100644 --- a/man/mlr_graphs_survtoclassif_disctime.Rd +++ b/man/mlr_graphs_survtoclassif_disctime.Rd @@ -29,7 +29,7 @@ If cut is unspecified, this will be the last possible event time. All event times after max_time will be administratively censored at max_time.} \item{rhs}{\code{character(1)}\cr -Right-hand side of the formula to with the learner. +Right-hand side of the formula to use with the learner. All features of the task are available as well as \code{tend} the upper bounds of the intervals created by \code{cut}. If rhs is unspecified, the formula of the task will be used.} From 069414161bb95f07ff4300278b99b07eef8f543d Mon Sep 17 00:00:00 2001 From: studener Date: Thu, 26 Sep 2024 14:53:45 +0200 Subject: [PATCH 56/82] fix typo --- man-roxygen/param_times.R | 4 ++-- man/MeasureSurvAUC.Rd | 4 ++-- man/mlr_measures_surv.chambless_auc.Rd | 4 ++-- man/mlr_measures_surv.graf.Rd | 4 ++-- man/mlr_measures_surv.hung_auc.Rd | 4 ++-- man/mlr_measures_surv.intlogloss.Rd | 4 ++-- man/mlr_measures_surv.schmid.Rd | 4 ++-- man/mlr_measures_surv.song_auc.Rd | 4 ++-- man/mlr_measures_surv.song_tnr.Rd | 4 ++-- man/mlr_measures_surv.song_tpr.Rd | 4 ++-- man/mlr_measures_surv.uno_auc.Rd | 4 ++-- 11 files changed, 22 insertions(+), 22 deletions(-) diff --git a/man-roxygen/param_times.R b/man-roxygen/param_times.R index 813978e87..c01dbb3fa 100644 --- a/man-roxygen/param_times.R +++ b/man-roxygen/param_times.R @@ -1,4 +1,4 @@ #' @section Parameter details: #' - `times` (`numeric()`)\cr -#' If `integrate == TRUE` then a vector of time-points over which to integrate the score. -#' If `integrate == FALSE` then a single time point at which to return the score. +#' If `integrated == TRUE` then a vector of time-points over which to integrate the score. +#' If `integrated == FALSE` then a single time point at which to return the score. diff --git a/man/MeasureSurvAUC.Rd b/man/MeasureSurvAUC.Rd index 676658b48..281808e0d 100644 --- a/man/MeasureSurvAUC.Rd +++ b/man/MeasureSurvAUC.Rd @@ -17,8 +17,8 @@ time points); otherwise, not integrated (eg at a single time point). \itemize{ \item \code{times} (\code{numeric()})\cr -If \code{integrate == TRUE} then a vector of time-points over which to integrate the score. -If \code{integrate == FALSE} then a single time point at which to return the score. +If \code{integrated == TRUE} then a vector of time-points over which to integrate the score. +If \code{integrated == FALSE} then a single time point at which to return the score. } } diff --git a/man/mlr_measures_surv.chambless_auc.Rd b/man/mlr_measures_surv.chambless_auc.Rd index 43c310294..6b8476ace 100644 --- a/man/mlr_measures_surv.chambless_auc.Rd +++ b/man/mlr_measures_surv.chambless_auc.Rd @@ -56,8 +56,8 @@ time points); otherwise, not integrated (eg at a single time point). \itemize{ \item \code{times} (\code{numeric()})\cr -If \code{integrate == TRUE} then a vector of time-points over which to integrate the score. -If \code{integrate == FALSE} then a single time point at which to return the score. +If \code{integrated == TRUE} then a vector of time-points over which to integrate the score. +If \code{integrated == FALSE} then a single time point at which to return the score. } } diff --git a/man/mlr_measures_surv.graf.Rd b/man/mlr_measures_surv.graf.Rd index 83ccd1f20..994332bf9 100644 --- a/man/mlr_measures_surv.graf.Rd +++ b/man/mlr_measures_surv.graf.Rd @@ -93,8 +93,8 @@ time points); otherwise, not integrated (eg at a single time point). \itemize{ \item \code{times} (\code{numeric()})\cr -If \code{integrate == TRUE} then a vector of time-points over which to integrate the score. -If \code{integrate == FALSE} then a single time point at which to return the score. +If \code{integrated == TRUE} then a vector of time-points over which to integrate the score. +If \code{integrated == FALSE} then a single time point at which to return the score. } diff --git a/man/mlr_measures_surv.hung_auc.Rd b/man/mlr_measures_surv.hung_auc.Rd index b4be0260e..e5207db4e 100644 --- a/man/mlr_measures_surv.hung_auc.Rd +++ b/man/mlr_measures_surv.hung_auc.Rd @@ -56,8 +56,8 @@ time points); otherwise, not integrated (eg at a single time point). \itemize{ \item \code{times} (\code{numeric()})\cr -If \code{integrate == TRUE} then a vector of time-points over which to integrate the score. -If \code{integrate == FALSE} then a single time point at which to return the score. +If \code{integrated == TRUE} then a vector of time-points over which to integrate the score. +If \code{integrated == FALSE} then a single time point at which to return the score. } } diff --git a/man/mlr_measures_surv.intlogloss.Rd b/man/mlr_measures_surv.intlogloss.Rd index 737439c0e..dfa37ebec 100644 --- a/man/mlr_measures_surv.intlogloss.Rd +++ b/man/mlr_measures_surv.intlogloss.Rd @@ -85,8 +85,8 @@ time points); otherwise, not integrated (eg at a single time point). \itemize{ \item \code{times} (\code{numeric()})\cr -If \code{integrate == TRUE} then a vector of time-points over which to integrate the score. -If \code{integrate == FALSE} then a single time point at which to return the score. +If \code{integrated == TRUE} then a vector of time-points over which to integrate the score. +If \code{integrated == FALSE} then a single time point at which to return the score. } diff --git a/man/mlr_measures_surv.schmid.Rd b/man/mlr_measures_surv.schmid.Rd index 38b6eaecc..634853857 100644 --- a/man/mlr_measures_surv.schmid.Rd +++ b/man/mlr_measures_surv.schmid.Rd @@ -82,8 +82,8 @@ time points); otherwise, not integrated (eg at a single time point). \itemize{ \item \code{times} (\code{numeric()})\cr -If \code{integrate == TRUE} then a vector of time-points over which to integrate the score. -If \code{integrate == FALSE} then a single time point at which to return the score. +If \code{integrated == TRUE} then a vector of time-points over which to integrate the score. +If \code{integrated == FALSE} then a single time point at which to return the score. } diff --git a/man/mlr_measures_surv.song_auc.Rd b/man/mlr_measures_surv.song_auc.Rd index 645030cc7..f57df5545 100644 --- a/man/mlr_measures_surv.song_auc.Rd +++ b/man/mlr_measures_surv.song_auc.Rd @@ -50,8 +50,8 @@ msr("surv.song_auc") \itemize{ \item \code{times} (\code{numeric()})\cr -If \code{integrate == TRUE} then a vector of time-points over which to integrate the score. -If \code{integrate == FALSE} then a single time point at which to return the score. +If \code{integrated == TRUE} then a vector of time-points over which to integrate the score. +If \code{integrated == FALSE} then a single time point at which to return the score. } diff --git a/man/mlr_measures_surv.song_tnr.Rd b/man/mlr_measures_surv.song_tnr.Rd index 778a8131b..aa2717532 100644 --- a/man/mlr_measures_surv.song_tnr.Rd +++ b/man/mlr_measures_surv.song_tnr.Rd @@ -52,8 +52,8 @@ msr("surv.song_tnr") \itemize{ \item \code{times} (\code{numeric()})\cr -If \code{integrate == TRUE} then a vector of time-points over which to integrate the score. -If \code{integrate == FALSE} then a single time point at which to return the score. +If \code{integrated == TRUE} then a vector of time-points over which to integrate the score. +If \code{integrated == FALSE} then a single time point at which to return the score. } diff --git a/man/mlr_measures_surv.song_tpr.Rd b/man/mlr_measures_surv.song_tpr.Rd index e3df8bd96..62549705a 100644 --- a/man/mlr_measures_surv.song_tpr.Rd +++ b/man/mlr_measures_surv.song_tpr.Rd @@ -53,8 +53,8 @@ msr("surv.song_tpr") \itemize{ \item \code{times} (\code{numeric()})\cr -If \code{integrate == TRUE} then a vector of time-points over which to integrate the score. -If \code{integrate == FALSE} then a single time point at which to return the score. +If \code{integrated == TRUE} then a vector of time-points over which to integrate the score. +If \code{integrated == FALSE} then a single time point at which to return the score. } diff --git a/man/mlr_measures_surv.uno_auc.Rd b/man/mlr_measures_surv.uno_auc.Rd index c524301f6..5536ad7ad 100644 --- a/man/mlr_measures_surv.uno_auc.Rd +++ b/man/mlr_measures_surv.uno_auc.Rd @@ -56,8 +56,8 @@ time points); otherwise, not integrated (eg at a single time point). \itemize{ \item \code{times} (\code{numeric()})\cr -If \code{integrate == TRUE} then a vector of time-points over which to integrate the score. -If \code{integrate == FALSE} then a single time point at which to return the score. +If \code{integrated == TRUE} then a vector of time-points over which to integrate the score. +If \code{integrated == FALSE} then a single time point at which to return the score. } } From f1b1f0e643a8106ce3279e08a609d095efe10a8d Mon Sep 17 00:00:00 2001 From: john Date: Fri, 27 Sep 2024 12:09:53 +0300 Subject: [PATCH 57/82] move code comment --- R/PipeOpCrankCompositor.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/PipeOpCrankCompositor.R b/R/PipeOpCrankCompositor.R index 19bcba3cd..574318aab 100644 --- a/R/PipeOpCrankCompositor.R +++ b/R/PipeOpCrankCompositor.R @@ -109,11 +109,10 @@ PipeOpCrankCompositor = R6Class("PipeOpCrankCompositor", crank = get_mortality(surv) } - # update only `crank` p = PredictionSurv$new( row_ids = pred$row_ids, truth = pred$truth, - crank = crank, + crank = crank, # update only `crank` distr = pred$distr, lp = pred$lp, response = pred$response From 6a049ac7d8202362d122926d9cbd96ea9cf38894 Mon Sep 17 00:00:00 2001 From: john Date: Fri, 27 Sep 2024 12:10:36 +0300 Subject: [PATCH 58/82] fix: keep the response to the output prediction object in distrcompose --- R/PipeOpDistrCompositor.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/PipeOpDistrCompositor.R b/R/PipeOpDistrCompositor.R index aca38240f..63a7da44b 100644 --- a/R/PipeOpDistrCompositor.R +++ b/R/PipeOpDistrCompositor.R @@ -143,7 +143,7 @@ PipeOpDistrCompositor = R6Class("PipeOpDistrCompositor", # compose survival distribution if (form == "ph") { - cdf = 1 - (survmat^exp(lpmat)) + cdf = 1 - (survmat ^ exp(lpmat)) } else if (form == "aft") { mtc = findInterval(timesmat / exp(lpmat), times) mtc[mtc == 0] = NA @@ -161,6 +161,7 @@ PipeOpDistrCompositor = R6Class("PipeOpDistrCompositor", truth = pred$truth, crank = pred$crank, lp = pred$lp, + response = pred$response, distr = distr # overwrite only the distribution ) From 39439f829202ca8398b8a8aec18ef226b35cc2be Mon Sep 17 00:00:00 2001 From: john Date: Fri, 27 Sep 2024 14:04:37 +0300 Subject: [PATCH 59/82] add checks on the `times` arg for the intergrated survival losses --- R/MeasureSurvGraf.R | 15 +++++++++++---- R/MeasureSurvIntLogloss.R | 23 ++++++++++++++++------- R/MeasureSurvSchmid.R | 21 +++++++++++++++------ 3 files changed, 42 insertions(+), 17 deletions(-) diff --git a/R/MeasureSurvGraf.R b/R/MeasureSurvGraf.R index 9d5c7dca5..bffca5de2 100644 --- a/R/MeasureSurvGraf.R +++ b/R/MeasureSurvGraf.R @@ -95,16 +95,22 @@ MeasureSurvGraf = R6::R6Class("MeasureSurvGraf", private = list( .score = function(prediction, task, train_set, ...) { ps = self$param_set$values + # times must be unique, sorted and positive numbers + times = assert_numeric(ps$times, lower = 0, any.missing = FALSE, + unique = TRUE, sorted = TRUE, null.ok = TRUE) + # ERV score if (ps$ERV) return(.scoring_rule_erv(self, prediction, task, train_set)) - nok = sum(!is.null(ps$times), !is.null(ps$t_max), !is.null(ps$p_max)) > 1 + + nok = sum(!is.null(times), !is.null(ps$t_max), !is.null(ps$p_max)) > 1 if (nok) { stop("Only one of `times`, `t_max`, and `p_max` should be provided") } + if (!ps$integrated) { msg = "If `integrated=FALSE` then `times` should be a scalar numeric." - assert_numeric(ps$times, len = 1L, .var.name = msg) + assert_numeric(times, len = 1L, .var.name = msg) } else { - if (!is.null(ps$times) && length(ps$times) == 1L) { + if (!is.null(times) && length(times) == 1L) { ps$integrated = FALSE } } @@ -118,9 +124,10 @@ MeasureSurvGraf = R6::R6Class("MeasureSurvGraf", train = NULL } + # `score` is a matrix, IBS(i,j) => n_test_obs x times score = weighted_survival_score("graf", truth = prediction$truth, - distribution = prediction$data$distr, times = ps$times, + distribution = prediction$data$distr, times = times, t_max = ps$t_max, p_max = ps$p_max, proper = ps$proper, train = train, eps = ps$eps ) diff --git a/R/MeasureSurvIntLogloss.R b/R/MeasureSurvIntLogloss.R index e39fa3429..c47cca2b3 100644 --- a/R/MeasureSurvIntLogloss.R +++ b/R/MeasureSurvIntLogloss.R @@ -87,17 +87,22 @@ MeasureSurvIntLogloss = R6::R6Class("MeasureSurvIntLogloss", private = list( .score = function(prediction, task, train_set, ...) { ps = self$param_set$values - + # times must be unique, sorted and positive numbers + times = assert_numeric(ps$times, lower = 0, any.missing = FALSE, + unique = TRUE, sorted = TRUE, null.ok = TRUE) + # ERV score if (ps$ERV) return(.scoring_rule_erv(self, prediction, task, train_set)) - nok = sum(!is.null(ps$times), !is.null(ps$t_max), !is.null(ps$p_max)) > 1 + + nok = sum(!is.null(times), !is.null(ps$t_max), !is.null(ps$p_max)) > 1 if (nok) { stop("Only one of `times`, `t_max`, and `p_max` should be provided") } + if (!ps$integrated) { msg = "If `integrated=FALSE` then `times` should be a scalar numeric." - assert_numeric(ps$times, len = 1L, .var.name = msg) + assert_numeric(times, len = 1L, .var.name = msg) } else { - if (!is.null(ps$times) && length(ps$times) == 1L) { + if (!is.null(times) && length(times) == 1L) { ps$integrated = FALSE } } @@ -111,9 +116,13 @@ MeasureSurvIntLogloss = R6::R6Class("MeasureSurvIntLogloss", train = NULL } - score = weighted_survival_score("intslogloss", truth = prediction$truth, - distribution = prediction$data$distr, times = ps$times, t_max = ps$t_max, - p_max = ps$p_max, proper = ps$proper, train = train, eps = ps$eps) + # `score` is a matrix, IBS(i,j) => n_test_obs x times + score = weighted_survival_score("intslogloss", + truth = prediction$truth, + distribution = prediction$data$distr, times = times, + t_max = ps$t_max, p_max = ps$p_max, proper = ps$proper, train = train, + eps = ps$eps + ) if (ps$se) { integrated_se(score, ps$integrated) diff --git a/R/MeasureSurvSchmid.R b/R/MeasureSurvSchmid.R index 2663a1ed2..c108ce7b6 100644 --- a/R/MeasureSurvSchmid.R +++ b/R/MeasureSurvSchmid.R @@ -84,16 +84,21 @@ MeasureSurvSchmid = R6::R6Class("MeasureSurvSchmid", private = list( .score = function(prediction, task, train_set, ...) { ps = self$param_set$values + # times must be unique, sorted and positive numbers + times = assert_numeric(ps$times, lower = 0, any.missing = FALSE, + unique = TRUE, sorted = TRUE, null.ok = TRUE) + + # ERV score if (ps$ERV) return(.scoring_rule_erv(self, prediction, task, train_set)) - nok = sum(!is.null(ps$times), !is.null(ps$t_max), !is.null(ps$p_max)) > 1 + nok = sum(!is.null(times), !is.null(ps$t_max), !is.null(ps$p_max)) > 1 if (nok) { stop("Only one of `times`, `t_max`, and `p_max` should be provided") } if (!ps$integrated) { msg = "If `integrated=FALSE` then `times` should be a scalar numeric." - assert_numeric(ps$times, len = 1L, .var.name = msg) + assert_numeric(times, len = 1L, .var.name = msg) } else { - if (!is.null(ps$times) && length(ps$times) == 1L) { + if (!is.null(times) && length(times) == 1L) { ps$integrated = FALSE } } @@ -107,9 +112,13 @@ MeasureSurvSchmid = R6::R6Class("MeasureSurvSchmid", train = NULL } - score = weighted_survival_score("schmid", truth = prediction$truth, - distribution = prediction$data$distr, times = ps$times, t_max = ps$t_max, - p_max = ps$p_max, proper = ps$proper, train = train, eps = ps$eps) + # `score` is a matrix, IBS(i,j) => n_test_obs x times + score = weighted_survival_score("schmid", + truth = prediction$truth, + distribution = prediction$data$distr, times = times, + t_max = ps$t_max, p_max = ps$p_max, proper = ps$proper, train = train, + eps = ps$eps + ) if (ps$se) { integrated_se(score, ps$integrated) From cf3bf308d0650729d42fa5e88d432091c8d836b5 Mon Sep 17 00:00:00 2001 From: john Date: Fri, 27 Sep 2024 14:33:17 +0300 Subject: [PATCH 60/82] refactor + add code comments in 'integrated_scores()' --- R/integrated_scores.R | 45 +++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/R/integrated_scores.R b/R/integrated_scores.R index acc2536d7..6b1bb6267 100644 --- a/R/integrated_scores.R +++ b/R/integrated_scores.R @@ -8,13 +8,23 @@ score_graf_schmid = function(true_times, unique_times, cdf, power = 2) { c_score_graf_schmid(true_times, unique_times, cdf, power) } +# Notes: +#' - Either all of `times`, `t_max`, `p_max` are NULL, or only one of them is not +#' - `times` is sorted (increasing), unique, positive time points +#' - `t_max` > 0 +#' - `p_max` in [0,1] weighted_survival_score = function(loss, truth, distribution, times = NULL, t_max = NULL, p_max = NULL, proper, train = NULL, eps, ...) { assert_surv(truth) - - # if `tmax_apply` = TRUE, the t_max cutoff will be applied to both train - # (if provided) and test data. For this at least one of `t_max` or `p_max` - # should be given + # test set's (times, status) + test_times = truth[, "time"] + test_status = truth[, "status"] + + #' - `tmax_apply` = TRUE => one of `t_max`, `p_max` is given + #' - `tmax_apply` = FALSE => `times` is given or all of `times`, `p_max` and `t_max` are NULL + #' The `t_max` cutoff will be applied later in the predicted survival matrix + #' to filter observations (rows) and time points (columns) + filter the + #' (time, status) target on both train (if provided) and test data tmax_apply = !(is.null(t_max) && is.null(p_max)) # calculate `t_max` (time horizon) @@ -69,18 +79,24 @@ weighted_survival_score = function(loss, truth, distribution, times = NULL, cdf = t(cdf) } - # apply `t_max` cutoff to the test set's (time, status) - true_times = all_times[all_times <= t_max] - true_status = all_status[all_times <= t_max] + # apply `t_max` cutoff to remove observations + if (tmax_apply) { + true_times = test_times[test_times <= t_max] + true_status = test_status[test_times <= t_max] + cdf = cdf[, test_times <= t_max, drop = FALSE] + } else { + true_times = test_times + true_status = test_status + } true_truth = Surv(true_times, true_status) assert_numeric(true_times, any.missing = FALSE) assert_numeric(unique_times, any.missing = FALSE) assert_matrix(cdf, nrows = length(unique_times), ncols = length(true_times), - any.missing = FALSE) + any.missing = FALSE) - # Note that whilst we calculate the score for censored here, they are then - # corrected in the weighting function `.c_weight_survival_score()` + # Note that whilst we calculate the score for censored observations here, + # they are then corrected in the weighting function `.c_weight_survival_score()` if (loss == "graf") { score = score_graf_schmid(true_times, unique_times, cdf, power = 2) } else if (loss == "schmid") { @@ -89,18 +105,19 @@ weighted_survival_score = function(loss, truth, distribution, times = NULL, score = score_intslogloss(true_times, unique_times, cdf, eps = eps) } - # use all (time, status) information from train or test set + # use the `truth` (time, status) information from the train or test set if (is.null(train)) { - cens = survival::survfit(Surv(all_times, 1 - all_status) ~ 1) + cens = survival::survfit(Surv(test_times, 1 - test_status) ~ 1) } else { + # no filtering of observations from train data: use ALL train_times = train[, "time"] train_status = train[, "status"] cens = survival::survfit(Surv(train_times, 1 - train_status) ~ 1) } - # G(t): KM estimate of the censoring distr + # G(t): KM estimate of the censoring distribution cens = matrix(c(cens$time, cens$surv), ncol = 2L) - # filter time points based on `t_max` cutoff + # filter G(t) time points based on `t_max` cutoff if (tmax_apply) { cens = cens[cens[, 1L] <= t_max, , drop = FALSE] } From 424aa9f572b1ac0359c1b32e6eea9417720f0caa Mon Sep 17 00:00:00 2001 From: john Date: Fri, 27 Sep 2024 14:35:28 +0300 Subject: [PATCH 61/82] use distr6 C++ function to constantly interpolate S(t) --- R/integrated_scores.R | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/R/integrated_scores.R b/R/integrated_scores.R index 6b1bb6267..d8c05a5b4 100644 --- a/R/integrated_scores.R +++ b/R/integrated_scores.R @@ -67,16 +67,13 @@ weighted_survival_score = function(loss, truth, distribution, times = NULL, } else { # survival 2d array surv_mat = distribution } - surv_mat = surv_mat[, as.numeric(colnames(surv_mat)) <= t_max, drop = FALSE] - mtc = findInterval(unique_times, as.numeric(colnames(surv_mat))) - cdf = 1 - surv_mat[, mtc, drop = FALSE] - if (any(mtc == 0)) { - cdf = cbind(matrix(0, nrow(cdf), sum(mtc == 0)), cdf) - } - # apply `t_max` cutoff to remove observations in the test predictions - cdf = cdf[all_times <= t_max, , drop = FALSE] - colnames(cdf) = unique_times - cdf = t(cdf) + + # `pred_times`: time points for which we have S(t) + pred_times = as.numeric(colnames(surv_mat)) + extend_times = getFromNamespace("C_Vec_WeightedDiscreteCdf", ns = "distr6") + # `unique_times`: time points for which we want S(t) + cdf = extend_times(unique_times, pred_times, cdf = t(1 - surv_mat), TRUE, FALSE) + rownames(cdf) = unique_times # times x obs } # apply `t_max` cutoff to remove observations From 5031b8bc1ce6960f8c75e4726570d70d6f651436 Mon Sep 17 00:00:00 2001 From: john Date: Fri, 27 Sep 2024 14:36:06 +0300 Subject: [PATCH 62/82] small fix + refactoring: change the way `times` and `t_max` args are used in 'integrated_scores()' --- R/integrated_scores.R | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/R/integrated_scores.R b/R/integrated_scores.R index d8c05a5b4..bef04bd7e 100644 --- a/R/integrated_scores.R +++ b/R/integrated_scores.R @@ -27,9 +27,13 @@ weighted_survival_score = function(loss, truth, distribution, times = NULL, #' (time, status) target on both train (if provided) and test data tmax_apply = !(is.null(t_max) && is.null(p_max)) - # calculate `t_max` (time horizon) - if (is.null(times) || !length(times)) { - unique_times = unique(sort(truth[, "time"])) + #' **IMPORTANT**: times to calculate the score at => evaluation times + #' We start with the unique, sorted, test set time points + unique_times = unique(sort(test_times)) + + if (tmax_apply) { + #' one of `t_max`, `p_max` is given + #' calculate `t_max` (time horizon) if `p_max` is given if (!is.null(p_max)) { surv = survival::survfit(truth ~ 1) indx = which(1 - (surv$n.risk / surv$n) > p_max) @@ -41,20 +45,29 @@ weighted_survival_score = function(loss, truth, distribution, times = NULL, # `p_max` proportion of censoring t_max = surv$time[indx[1L]] } - } else if (is.null(t_max)) { - t_max = max(unique_times) } - } else { - unique_times = .c_get_unique_times(truth[, "time"], times) - t_max = max(unique_times) - } - # subset `unique_times` in the test set up to `t_max` - unique_times = unique_times[unique_times <= t_max] + #' check that `t_max` is within evaluation time range + if (t_max < min(unique_times)) { + stop("`t_max` is smaller than the minimum test time. Please increase value!") + } - # keep all the test set time points for the censoring distr via KM if no train data - all_times = truth[, "time"] - all_status = truth[, "status"] + # filter `unique_times` in the test set up to `t_max` + unique_times = unique_times[unique_times <= t_max] + } else { + #' `times` is given or it is `NULL` + # We keep compatibility with previous code here and return an error if + # the requested `times` are ALL outside the considered evaluation test times. + # We do not prune these requested times at all (we assume that times are + # positive, unique and sorted). + # Constant interpolation is used later to get S(t) for these time points + outside_range = !is.null(times) && all(times < min(unique_times) | times > max(unique_times)) + if (outside_range) { + stop("Requested times are all outside the considered evaluation range.") + } + #' is `times = NULL`, use the `unique_times` + unique_times = times %??% unique_times + } # get the cdf matrix (rows => times, cols => obs) if (inherits(distribution, "Distribution")) { From d24c37bc7b4e2946d65abeb1cc27c4b1fd4528ab Mon Sep 17 00:00:00 2001 From: john Date: Fri, 27 Sep 2024 16:42:43 +0300 Subject: [PATCH 63/82] compatibility with mlr3 0.21.0 --- R/TaskSurv.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/TaskSurv.R b/R/TaskSurv.R index d02525083..b595b9c3f 100644 --- a/R/TaskSurv.R +++ b/R/TaskSurv.R @@ -393,7 +393,7 @@ TaskSurv = R6::R6Class("TaskSurv", assert_choice(self$censtype, choices = c("right", "left")) cox = lrn("surv.coxph") - cox$encapsulate = c(train = "evaluate", predict = "evaluate") + cox$encapsulate("evaluate", fallback = lrn("surv.kaplan")) cox$train(self) ok = (length(cox$errors) == 0L) & (length(cox$warnings) == 0L) From 4940d73c1ade2891caba6482575c1b25c1a8835f Mon Sep 17 00:00:00 2001 From: john Date: Fri, 27 Sep 2024 16:43:20 +0300 Subject: [PATCH 64/82] test IBS with `times` argument more thoroughly --- tests/testthat/test_mlr_measures.R | 68 +++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 16 deletions(-) diff --git a/tests/testthat/test_mlr_measures.R b/tests/testthat/test_mlr_measures.R index feeb14255..f521d31f5 100644 --- a/tests/testthat/test_mlr_measures.R +++ b/tests/testthat/test_mlr_measures.R @@ -9,12 +9,14 @@ test_that("mlr_measures", { skip_if_not_installed("survAUC") keys = mlr_measures$keys("^surv") + # remove alias for brier + keys = keys[keys != "surv.graf"] for (key in keys) { if (grepl("TNR|TPR|tpr|tnr", key)) { m = msr(key, times = 60L) } else { - if (key %in% c("surv.graf", "surv.intlogloss", "surv.schmid", "surv.brier")) { + if (key %in% c("surv.intlogloss", "surv.schmid", "surv.brier")) { m = msr(key, proper = TRUE) } else { m = msr(key) @@ -50,18 +52,17 @@ test_that("integrated_prob_losses", { set.seed(1L) t = tsk("rats")$filter(sample(300, 50L)) p = lrn("surv.kaplan")$train(t)$predict(t) - probs = paste0("surv.", c("graf", "intlogloss", "schmid")) - lapply( - probs, - function(x) expect_error(p$score(msr(x, times = 39:80, integrated = FALSE, - proper = TRUE)), "scalar numeric") - ) + losses = paste0("surv.", c("graf", "intlogloss", "schmid")) + for (loss in losses) { + m = msr(loss, times = 39:80, integrated = FALSE, proper = TRUE) + expect_error(p$score(m), "scalar numeric") + } - prediction$score(msr("surv.intlogloss", integrated = TRUE, proper = TRUE, times = 100:110)) - expect_silent(prediction$score(lapply(probs, msr, integrated = TRUE, proper = TRUE))) - expect_error(prediction$score(lapply(probs, msr, integrated = TRUE, times = 34:38, proper = TRUE)), "Requested times") - expect_silent(prediction$score(lapply(probs, msr, integrated = TRUE, times = 100:110, proper = TRUE))) - expect_silent(prediction$score(lapply(probs, msr, integrated = FALSE, times = 80, proper = TRUE))) + expect_silent(prediction$score(msr("surv.intlogloss", integrated = TRUE, proper = TRUE, times = 100:110))) + expect_silent(prediction$score(lapply(losses, msr, integrated = TRUE, proper = TRUE))) + expect_error(prediction$score(lapply(losses, msr, integrated = TRUE, times = 34:38, proper = TRUE)), "Requested times") + expect_silent(prediction$score(lapply(losses, msr, integrated = TRUE, times = 100:110, proper = TRUE))) + expect_silent(prediction$score(lapply(losses, msr, integrated = FALSE, times = 80, proper = TRUE))) }) test_that("dcalib works", { @@ -137,7 +138,7 @@ test_that("graf with 1 time point", { expect_number(res$score(msr("surv.graf", times = 1))) }) -test_that("t_max, p_max", { +test_that("graf: t_max, p_max, times", { set.seed(1L) t = tsk("rats")$filter(sample(1:300, 50)) p = lrn("surv.kaplan")$train(t)$predict(t) @@ -146,9 +147,23 @@ test_that("t_max, p_max", { expect_error(p$score(msr("surv.graf", integrated = FALSE))) expect_error(p$score(msr("surv.graf", times = 1:2, t_max = 3))) - m1 = p$score(msr("surv.graf", times = seq(100))) - m2 = p$score(msr("surv.graf", t_max = 100)) - expect_equal(m1, m2) + times = sort(unique(p$truth[,1])) # test time points + t_max = 100 + times_flt = times[times <= t_max] # keep only times until the `t_max` + m0 = p$score(msr("surv.graf")) # uses all test time points + m1 = p$score(msr("surv.graf", times = times_flt)) # uses times_flt + m2 = p$score(msr("surv.graf", t_max = t_max)) # 100 + m3 = p$score(msr("surv.graf", t_max = max(times))) # 104 + m4 = p$score(msr("surv.graf", t_max = max(times) + 1)) # 105 + + # different time points considered + expect_true(m0 != m1) + # same time points are used, but `t_max` also removes observations + expect_true(m1 != m2) + # different `t_max` => different time points used + expect_true(m2 != m3) + # different `t_max` but after the max evaluation time point, so result stays the same + expect_equal(m3, m4) s = t$kaplan() # KM t_max = s$time[which(1 - s$n.risk / s$n > 0.3)[1]] # t_max for up to 30% cens @@ -160,6 +175,27 @@ test_that("t_max, p_max", { expect_equal(m1, m2) expect_true(m1 != m3) + # times is not necessarily decomposable, due to the `method` that performs integration + p_cox = suppressWarnings(lrn("surv.coxph")$train(t)$predict(t)) + s1 = p_cox$score(msr("surv.graf", times = 68)) + s2 = p_cox$score(msr("surv.graf", times = 92)) + s3 = p_cox$score(msr("surv.graf", times = 102)) + mean_score = (s1 + s2 + s3) / 3 + # simple mean + s_all1 = p_cox$score(msr("surv.graf", method = 1, times = c(68, 92, 102))) + # mean weighted by the difference between time-points + s_all2 = p_cox$score(msr("surv.graf", method = 2, times = c(68, 92, 102))) + expect_equal(s_all1, mean_score) + expect_true(s_all2 != mean_score) +}) + +test_that("cindex: t_max, p_max", { + set.seed(1L) + t = tsk("rats")$filter(sample(1:300, 50)) + s = t$kaplan() # KM + t_max = s$time[which(1 - s$n.risk / s$n > 0.3)[1]] # t_max for up to 30% cens + + # t_max and p_max are the same p_cox = suppressWarnings(lrn("surv.coxph")$train(t)$predict(t)) c1 = p_cox$score(msr("surv.cindex", t_max = t_max)) c2 = p_cox$score(msr("surv.cindex", p_max = 0.3)) From 4d53c73cb36126fc5f102d6fff1302cd1392edb0 Mon Sep 17 00:00:00 2001 From: john Date: Fri, 27 Sep 2024 16:43:40 +0300 Subject: [PATCH 65/82] update docs --- man/LearnerDens.Rd | 1 + man/LearnerSurv.Rd | 1 + man/PredictionDens.Rd | 3 ++- man/PredictionSurv.Rd | 3 ++- man/mlr_learners_dens.hist.Rd | 1 + man/mlr_learners_dens.kde.Rd | 1 + man/mlr_learners_surv.coxph.Rd | 1 + man/mlr_learners_surv.kaplan.Rd | 1 + man/mlr_learners_surv.rpart.Rd | 1 + man/weighted_survival_score.Rd | 26 ++++++++++++++++++++++++-- 10 files changed, 35 insertions(+), 4 deletions(-) diff --git a/man/LearnerDens.Rd b/man/LearnerDens.Rd index 84baf4fc6..a963d90c5 100644 --- a/man/LearnerDens.Rd +++ b/man/LearnerDens.Rd @@ -44,6 +44,7 @@ Other Learner:
                              Inherited methods
                              • mlr3::Learner$base_learner()
                              • +
                              • mlr3::Learner$encapsulate()
                              • mlr3::Learner$format()
                              • mlr3::Learner$help()
                              • mlr3::Learner$predict()
                              • diff --git a/man/LearnerSurv.Rd b/man/LearnerSurv.Rd index 54c3cd59e..257d63220 100644 --- a/man/LearnerSurv.Rd +++ b/man/LearnerSurv.Rd @@ -47,6 +47,7 @@ Other Learner:
                                Inherited methods
                                • mlr3::Learner$base_learner()
                                • +
                                • mlr3::Learner$encapsulate()
                                • mlr3::Learner$format()
                                • mlr3::Learner$help()
                                • mlr3::Learner$predict()
                                • diff --git a/man/PredictionDens.Rd b/man/PredictionDens.Rd index c63a03c48..1a242c3ee 100644 --- a/man/PredictionDens.Rd +++ b/man/PredictionDens.Rd @@ -45,11 +45,12 @@ Access the stored estimated distribution.} } } \if{html}{\out{ -
                                  Inherited methods +
                                  Inherited methods diff --git a/man/PredictionSurv.Rd b/man/PredictionSurv.Rd index c0d22e10c..652f800d3 100644 --- a/man/PredictionSurv.Rd +++ b/man/PredictionSurv.Rd @@ -60,11 +60,12 @@ Access the stored predicted survival time.} } } \if{html}{\out{ -
                                  Inherited methods +
                                  Inherited methods diff --git a/man/mlr_learners_dens.hist.Rd b/man/mlr_learners_dens.hist.Rd index 54f29367c..58341647a 100644 --- a/man/mlr_learners_dens.hist.Rd +++ b/man/mlr_learners_dens.hist.Rd @@ -48,6 +48,7 @@ Other density estimators:
                                  Inherited methods
                                  • mlr3::Learner$base_learner()
                                  • +
                                  • mlr3::Learner$encapsulate()
                                  • mlr3::Learner$format()
                                  • mlr3::Learner$help()
                                  • mlr3::Learner$predict()
                                  • diff --git a/man/mlr_learners_dens.kde.Rd b/man/mlr_learners_dens.kde.Rd index bdff9c318..970ff3f94 100644 --- a/man/mlr_learners_dens.kde.Rd +++ b/man/mlr_learners_dens.kde.Rd @@ -59,6 +59,7 @@ Other density estimators:
                                    Inherited methods
                                    • mlr3::Learner$base_learner()
                                    • +
                                    • mlr3::Learner$encapsulate()
                                    • mlr3::Learner$format()
                                    • mlr3::Learner$help()
                                    • mlr3::Learner$predict()
                                    • diff --git a/man/mlr_learners_surv.coxph.Rd b/man/mlr_learners_surv.coxph.Rd index 5979b9f2f..43ac2ee80 100644 --- a/man/mlr_learners_surv.coxph.Rd +++ b/man/mlr_learners_surv.coxph.Rd @@ -69,6 +69,7 @@ Other survival learners:
                                      Inherited methods
                                      • mlr3::Learner$base_learner()
                                      • +
                                      • mlr3::Learner$encapsulate()
                                      • mlr3::Learner$format()
                                      • mlr3::Learner$help()
                                      • mlr3::Learner$predict()
                                      • diff --git a/man/mlr_learners_surv.kaplan.Rd b/man/mlr_learners_surv.kaplan.Rd index e3cd8992f..5829c99f5 100644 --- a/man/mlr_learners_surv.kaplan.Rd +++ b/man/mlr_learners_surv.kaplan.Rd @@ -65,6 +65,7 @@ Other survival learners:
                                        Inherited methods
                                        • mlr3::Learner$base_learner()
                                        • +
                                        • mlr3::Learner$encapsulate()
                                        • mlr3::Learner$format()
                                        • mlr3::Learner$help()
                                        • mlr3::Learner$predict()
                                        • diff --git a/man/mlr_learners_surv.rpart.Rd b/man/mlr_learners_surv.rpart.Rd index e243a1bf5..fa2ada657 100644 --- a/man/mlr_learners_surv.rpart.Rd +++ b/man/mlr_learners_surv.rpart.Rd @@ -85,6 +85,7 @@ Other survival learners:
                                          Inherited methods
                                          • mlr3::Learner$base_learner()
                                          • +
                                          • mlr3::Learner$encapsulate()
                                          • mlr3::Learner$format()
                                          • mlr3::Learner$help()
                                          • mlr3::Learner$predict()
                                          • diff --git a/man/weighted_survival_score.Rd b/man/weighted_survival_score.Rd index f59a1a59e..0800ad707 100644 --- a/man/weighted_survival_score.Rd +++ b/man/weighted_survival_score.Rd @@ -3,11 +3,22 @@ \name{weighted_survival_score} \alias{weighted_survival_score} \title{\itemize{ +\item Either all of \code{times}, \code{t_max}, \code{p_max} are NULL, or only one of them is not +\item \code{times} is sorted (increasing), unique, positive time points +\item \code{t_max} > 0 +\item \code{p_max} in \link{0,1} \item \code{tmax_apply} = TRUE => one of \code{t_max}, \code{p_max} is given -\item \code{tmax_apply} = FALSE => \code{times} is given +\item \code{tmax_apply} = FALSE => \code{times} is given or all of \code{times}, \code{p_max} and \code{t_max} are NULL The \code{t_max} cutoff will be applied later in the predicted survival matrix to filter observations (rows) and time points (columns) + filter the (time, status) target on both train (if provided) and test data +\strong{IMPORTANT}: times to calculate the score at => evaluation times +We start with the unique, sorted, test set time points +one of \code{t_max}, \code{p_max} is given +calculate \code{t_max} (time horizon) if \code{p_max} is given +check that \code{t_max} is within evaluation time range +\code{times} is given or it is \code{NULL} +is \code{times = NULL}, use the \code{unique_times} }} \usage{ weighted_survival_score( @@ -25,10 +36,21 @@ weighted_survival_score( } \description{ \itemize{ +\item Either all of \code{times}, \code{t_max}, \code{p_max} are NULL, or only one of them is not +\item \code{times} is sorted (increasing), unique, positive time points +\item \code{t_max} > 0 +\item \code{p_max} in \link{0,1} \item \code{tmax_apply} = TRUE => one of \code{t_max}, \code{p_max} is given -\item \code{tmax_apply} = FALSE => \code{times} is given +\item \code{tmax_apply} = FALSE => \code{times} is given or all of \code{times}, \code{p_max} and \code{t_max} are NULL The \code{t_max} cutoff will be applied later in the predicted survival matrix to filter observations (rows) and time points (columns) + filter the (time, status) target on both train (if provided) and test data +\strong{IMPORTANT}: times to calculate the score at => evaluation times +We start with the unique, sorted, test set time points +one of \code{t_max}, \code{p_max} is given +calculate \code{t_max} (time horizon) if \code{p_max} is given +check that \code{t_max} is within evaluation time range +\code{times} is given or it is \code{NULL} +is \code{times = NULL}, use the \code{unique_times} } } From 4a5534817844bf528f206d0f2fe1a7135d10d496 Mon Sep 17 00:00:00 2001 From: john Date: Fri, 27 Sep 2024 16:52:55 +0300 Subject: [PATCH 66/82] complete merging --- R/pipelines.R | 7 +------ man/mlr_graphs_survtoclassif_disctime.Rd | 7 +------ 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/R/pipelines.R b/R/pipelines.R index 771efad2b..935e7f46f 100644 --- a/R/pipelines.R +++ b/R/pipelines.R @@ -584,16 +584,11 @@ pipeline_survtoregr = function(method = 1, regr_learner = lrn("regr.featureless" #' @param max_time (`numeric(1)`)\cr #' If cut is unspecified, this will be the last possible event time. #' All event times after max_time will be administratively censored at max_time. -<<<<<<< HEAD #' @param rhs (`character(1)`)\cr -#' Right-hand side of the formula to with the learner. -======= -#' @param rhs `character(1)`\cr #' Right-hand side of the formula to use with the learner. ->>>>>>> 069414161bb95f07ff4300278b99b07eef8f543d #' All features of the task are available as well as `tend` the upper bounds #' of the intervals created by `cut`. -#' If rhs is unspecified, the formula of the task will be used. +#' If `rhs` is unspecified, the formula of the task will be used. #' @param graph_learner (`logical(1)`)\cr #' If `TRUE` returns wraps the [Graph][mlr3pipelines::Graph] as a #' [GraphLearner][mlr3pipelines::GraphLearner] otherwise (default) returns as a `Graph`. diff --git a/man/mlr_graphs_survtoclassif_disctime.Rd b/man/mlr_graphs_survtoclassif_disctime.Rd index 5c8709877..7ae567bd4 100644 --- a/man/mlr_graphs_survtoclassif_disctime.Rd +++ b/man/mlr_graphs_survtoclassif_disctime.Rd @@ -28,16 +28,11 @@ intervals from 0 until the maximum event time.} If cut is unspecified, this will be the last possible event time. All event times after max_time will be administratively censored at max_time.} -<<<<<<< HEAD \item{rhs}{(\code{character(1)})\cr -Right-hand side of the formula to with the learner. -======= -\item{rhs}{\code{character(1)}\cr Right-hand side of the formula to use with the learner. ->>>>>>> 069414161bb95f07ff4300278b99b07eef8f543d All features of the task are available as well as \code{tend} the upper bounds of the intervals created by \code{cut}. -If rhs is unspecified, the formula of the task will be used.} +If \code{rhs} is unspecified, the formula of the task will be used.} \item{graph_learner}{(\code{logical(1)})\cr If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a From 239784a0d52ba68d5404012ad1eda5cda7a87a27 Mon Sep 17 00:00:00 2001 From: john Date: Fri, 27 Sep 2024 17:05:06 +0300 Subject: [PATCH 67/82] change comment doc to not produce a man entry --- R/integrated_scores.R | 32 +++++++++---------- man/weighted_survival_score.Rd | 56 ---------------------------------- 2 files changed, 16 insertions(+), 72 deletions(-) delete mode 100644 man/weighted_survival_score.Rd diff --git a/R/integrated_scores.R b/R/integrated_scores.R index bef04bd7e..ca936d70a 100644 --- a/R/integrated_scores.R +++ b/R/integrated_scores.R @@ -9,10 +9,10 @@ score_graf_schmid = function(true_times, unique_times, cdf, power = 2) { } # Notes: -#' - Either all of `times`, `t_max`, `p_max` are NULL, or only one of them is not -#' - `times` is sorted (increasing), unique, positive time points -#' - `t_max` > 0 -#' - `p_max` in [0,1] +# - Either all of `times`, `t_max`, `p_max` are NULL, or only one of them is not +# - `times` is sorted (increasing), unique, positive time points +# - `t_max` > 0 +# - `p_max` in [0,1] weighted_survival_score = function(loss, truth, distribution, times = NULL, t_max = NULL, p_max = NULL, proper, train = NULL, eps, ...) { assert_surv(truth) @@ -20,20 +20,20 @@ weighted_survival_score = function(loss, truth, distribution, times = NULL, test_times = truth[, "time"] test_status = truth[, "status"] - #' - `tmax_apply` = TRUE => one of `t_max`, `p_max` is given - #' - `tmax_apply` = FALSE => `times` is given or all of `times`, `p_max` and `t_max` are NULL - #' The `t_max` cutoff will be applied later in the predicted survival matrix - #' to filter observations (rows) and time points (columns) + filter the - #' (time, status) target on both train (if provided) and test data + # - `tmax_apply` = TRUE => one of `t_max`, `p_max` is given + # - `tmax_apply` = FALSE => `times` is given or all of `times`, `p_max` and `t_max` are NULL + # The `t_max` cutoff will be applied later in the predicted survival matrix + # to filter observations (rows) and time points (columns) + filter the + # (time, status) target on both train (if provided) and test data tmax_apply = !(is.null(t_max) && is.null(p_max)) - #' **IMPORTANT**: times to calculate the score at => evaluation times - #' We start with the unique, sorted, test set time points + # **IMPORTANT**: times to calculate the score at => evaluation times + # We start with the unique, sorted, test set time points unique_times = unique(sort(test_times)) if (tmax_apply) { - #' one of `t_max`, `p_max` is given - #' calculate `t_max` (time horizon) if `p_max` is given + # one of `t_max`, `p_max` is given + # calculate `t_max` (time horizon) if `p_max` is given if (!is.null(p_max)) { surv = survival::survfit(truth ~ 1) indx = which(1 - (surv$n.risk / surv$n) > p_max) @@ -47,7 +47,7 @@ weighted_survival_score = function(loss, truth, distribution, times = NULL, } } - #' check that `t_max` is within evaluation time range + # check that `t_max` is within evaluation time range if (t_max < min(unique_times)) { stop("`t_max` is smaller than the minimum test time. Please increase value!") } @@ -55,7 +55,7 @@ weighted_survival_score = function(loss, truth, distribution, times = NULL, # filter `unique_times` in the test set up to `t_max` unique_times = unique_times[unique_times <= t_max] } else { - #' `times` is given or it is `NULL` + # `times` is given or it is `NULL` # We keep compatibility with previous code here and return an error if # the requested `times` are ALL outside the considered evaluation test times. # We do not prune these requested times at all (we assume that times are @@ -65,7 +65,7 @@ weighted_survival_score = function(loss, truth, distribution, times = NULL, if (outside_range) { stop("Requested times are all outside the considered evaluation range.") } - #' is `times = NULL`, use the `unique_times` + # is `times = NULL`, use the `unique_times` unique_times = times %??% unique_times } diff --git a/man/weighted_survival_score.Rd b/man/weighted_survival_score.Rd deleted file mode 100644 index 0800ad707..000000000 --- a/man/weighted_survival_score.Rd +++ /dev/null @@ -1,56 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/integrated_scores.R -\name{weighted_survival_score} -\alias{weighted_survival_score} -\title{\itemize{ -\item Either all of \code{times}, \code{t_max}, \code{p_max} are NULL, or only one of them is not -\item \code{times} is sorted (increasing), unique, positive time points -\item \code{t_max} > 0 -\item \code{p_max} in \link{0,1} -\item \code{tmax_apply} = TRUE => one of \code{t_max}, \code{p_max} is given -\item \code{tmax_apply} = FALSE => \code{times} is given or all of \code{times}, \code{p_max} and \code{t_max} are NULL -The \code{t_max} cutoff will be applied later in the predicted survival matrix -to filter observations (rows) and time points (columns) + filter the -(time, status) target on both train (if provided) and test data -\strong{IMPORTANT}: times to calculate the score at => evaluation times -We start with the unique, sorted, test set time points -one of \code{t_max}, \code{p_max} is given -calculate \code{t_max} (time horizon) if \code{p_max} is given -check that \code{t_max} is within evaluation time range -\code{times} is given or it is \code{NULL} -is \code{times = NULL}, use the \code{unique_times} -}} -\usage{ -weighted_survival_score( - loss, - truth, - distribution, - times = NULL, - t_max = NULL, - p_max = NULL, - proper, - train = NULL, - eps, - ... -) -} -\description{ -\itemize{ -\item Either all of \code{times}, \code{t_max}, \code{p_max} are NULL, or only one of them is not -\item \code{times} is sorted (increasing), unique, positive time points -\item \code{t_max} > 0 -\item \code{p_max} in \link{0,1} -\item \code{tmax_apply} = TRUE => one of \code{t_max}, \code{p_max} is given -\item \code{tmax_apply} = FALSE => \code{times} is given or all of \code{times}, \code{p_max} and \code{t_max} are NULL -The \code{t_max} cutoff will be applied later in the predicted survival matrix -to filter observations (rows) and time points (columns) + filter the -(time, status) target on both train (if provided) and test data -\strong{IMPORTANT}: times to calculate the score at => evaluation times -We start with the unique, sorted, test set time points -one of \code{t_max}, \code{p_max} is given -calculate \code{t_max} (time horizon) if \code{p_max} is given -check that \code{t_max} is within evaluation time range -\code{times} is given or it is \code{NULL} -is \code{times = NULL}, use the \code{unique_times} -} -} From d6b867016c7ab44b1219dd7b227dc1af2cf4093e Mon Sep 17 00:00:00 2001 From: john Date: Mon, 7 Oct 2024 12:43:55 +0300 Subject: [PATCH 68/82] change outside times range to a warning (more reasonable) --- R/integrated_scores.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/integrated_scores.R b/R/integrated_scores.R index ca936d70a..2e6b1ba2d 100644 --- a/R/integrated_scores.R +++ b/R/integrated_scores.R @@ -61,9 +61,10 @@ weighted_survival_score = function(loss, truth, distribution, times = NULL, # We do not prune these requested times at all (we assume that times are # positive, unique and sorted). # Constant interpolation is used later to get S(t) for these time points - outside_range = !is.null(times) && all(times < min(unique_times) | times > max(unique_times)) + outside_range = !is.null(times) && any(times < min(unique_times) | times > max(unique_times)) if (outside_range) { - stop("Requested times are all outside the considered evaluation range.") + warning("Some requested times are outside the considered evaluation range + (unique, sorted, test set's time points).") } # is `times = NULL`, use the `unique_times` unique_times = times %??% unique_times From 46138b1bcfbb16aced24e4f21c6a377a7efe48eb Mon Sep 17 00:00:00 2001 From: john Date: Mon, 7 Oct 2024 13:47:07 +0300 Subject: [PATCH 69/82] update IBS + times tests --- tests/testthat/test_mlr_measures.R | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test_mlr_measures.R b/tests/testthat/test_mlr_measures.R index f521d31f5..c40630bf5 100644 --- a/tests/testthat/test_mlr_measures.R +++ b/tests/testthat/test_mlr_measures.R @@ -48,7 +48,7 @@ test_that("unintegrated_prob_losses", { expect_silent(prediction$score(msr)) }) -test_that("integrated_prob_losses", { +test_that("integrated losses with use of times", { set.seed(1L) t = tsk("rats")$filter(sample(300, 50L)) p = lrn("surv.kaplan")$train(t)$predict(t) @@ -58,11 +58,23 @@ test_that("integrated_prob_losses", { expect_error(p$score(m), "scalar numeric") } - expect_silent(prediction$score(msr("surv.intlogloss", integrated = TRUE, proper = TRUE, times = 100:110))) + # between 64 and 104 + test_unique_times = sort(unique(prediction$truth[,1])) + expect_true(all(test_unique_times > 63)) + expect_true(all(test_unique_times < 105)) + + # no `times` => use test set's unique time points expect_silent(prediction$score(lapply(losses, msr, integrated = TRUE, proper = TRUE))) - expect_error(prediction$score(lapply(losses, msr, integrated = TRUE, times = 34:38, proper = TRUE)), "Requested times") - expect_silent(prediction$score(lapply(losses, msr, integrated = TRUE, times = 100:110, proper = TRUE))) - expect_silent(prediction$score(lapply(losses, msr, integrated = FALSE, times = 80, proper = TRUE))) + # all `times` outside the test set range + for (loss in losses) { + expect_warning(prediction$score(msr(loss, integrated = TRUE, proper = TRUE, times = 34:38)), "requested times") + } + # some `times` outside the test set range + for (loss in losses) { + expect_warning(prediction$score(msr(loss, integrated = TRUE, proper = TRUE, times = 100:110)), "requested times") + } + # one time point, inside the range, no warnings + expect_silent(prediction$score(lapply(losses, msr, integrated = FALSE, proper = TRUE, times = 80))) }) test_that("dcalib works", { From e557ae4a2657e3af062afaaa7663909bd05d4109 Mon Sep 17 00:00:00 2001 From: john Date: Mon, 7 Oct 2024 17:18:32 +0300 Subject: [PATCH 70/82] update doc files for IBS --- man-roxygen/details_method.R | 8 ++++++++ man-roxygen/details_tmax.R | 3 ++- man-roxygen/details_trainG.R | 3 ++- 3 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 man-roxygen/details_method.R diff --git a/man-roxygen/details_method.R b/man-roxygen/details_method.R new file mode 100644 index 000000000..7dcd4b148 --- /dev/null +++ b/man-roxygen/details_method.R @@ -0,0 +1,8 @@ +#' @section Implementation differences (time-integration): +#' +#' If comparing the integrated graf score to other packages, e.g. +#' \CRANpkg{pec}, then `method = 2` should be used. However the results may +#' still be very slightly different as this package uses `survfit` to estimate +#' the censoring distribution, in line with the Graf 1999 paper; whereas some +#' other packages use `prodlim` with `reverse = TRUE` (meaning Kaplan-Meier is +#' not used). diff --git a/man-roxygen/details_tmax.R b/man-roxygen/details_tmax.R index 7e0f24eef..0d8fc023f 100644 --- a/man-roxygen/details_tmax.R +++ b/man-roxygen/details_tmax.R @@ -1,4 +1,5 @@ -#' @details +#' @section Time Cutoff Details: +#' #' If `t_max` or `p_max` is given, then \eqn{G(t)} will be fitted using **all observations** from the #' train set (or test set) and only then the cutoff time will be applied. #' This is to ensure that more data is used for fitting the censoring distribution via the diff --git a/man-roxygen/details_trainG.R b/man-roxygen/details_trainG.R index 7906c8864..2a131c29c 100644 --- a/man-roxygen/details_trainG.R +++ b/man-roxygen/details_trainG.R @@ -1,4 +1,5 @@ -#' @details +#' @section Data used for Estimating Censoring Distribution: +#' #' If `task` and `train_set` are passed to `$score` then \eqn{G(t)} is fit on training data, #' otherwise testing data. The first is likely to reduce any bias caused by calculating #' parts of the measure on the test data it is evaluating. The training data is automatically From b8e8b6c8ad9894af13ff8659aca962422a18938d Mon Sep 17 00:00:00 2001 From: john Date: Mon, 7 Oct 2024 17:18:57 +0300 Subject: [PATCH 71/82] update doc for RNLL --- R/MeasureSurvLogloss.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/MeasureSurvLogloss.R b/R/MeasureSurvLogloss.R index a779637c7..69698c455 100644 --- a/R/MeasureSurvLogloss.R +++ b/R/MeasureSurvLogloss.R @@ -23,12 +23,12 @@ #' where \eqn{N} are the number of observations in the test set, and \eqn{sd} is the standard #' deviation. #' -#' The **Re-weighted Negative Log-Likelihood** (RNLL) or IPCW Log Loss is defined by -#' \deqn{L_{RNLL}(f, t, \Delta) = -\Delta log(f(t))/G(t)} -#' where \eqn{\Delta} is the censoring indicator and G is the Kaplan-Meier estimator of the +#' The **Re-weighted Negative Log-Likelihood** (RNLL) or IPCW (Inverse Probability Censoring Weighted) Log Loss is defined by +#' \deqn{L_{RNLL}(f, t, \delta) = - \frac{\delta \log[f(t)]}{G(t)}} +#' where \eqn{\delta} is the censoring indicator and \eqn{G(t)} is the Kaplan-Meier estimator of the #' censoring distribution. #' So only observations that have experienced the event are taking into account -#' for RNLL and both \eqn{f(t), G(t)} are calculated only at the event times. +#' for RNLL (i.e. \eqn{\delta = 1}) and both \eqn{f(t), G(t)} are calculated only at the event times. #' If only censored observations exist in the test set, `NaN` is returned. #' #' @template details_trainG From 2697304e65f111db03ad0fc9c606f38e1c7ed651 Mon Sep 17 00:00:00 2001 From: john Date: Mon, 7 Oct 2024 17:19:53 +0300 Subject: [PATCH 72/82] update doc for ISBS --- R/MeasureSurvGraf.R | 33 +++++++-------- man/mlr_measures_surv.graf.Rd | 77 ++++++++++++++++++++++------------- 2 files changed, 65 insertions(+), 45 deletions(-) diff --git a/R/MeasureSurvGraf.R b/R/MeasureSurvGraf.R index bffca5de2..69b288f88 100644 --- a/R/MeasureSurvGraf.R +++ b/R/MeasureSurvGraf.R @@ -19,28 +19,29 @@ #' or squared survival loss. #' #' @details -#' For an individual who dies at time \eqn{t}, with predicted Survival function, \eqn{S}, the -#' Graf Score at time \eqn{t^*}{t*} is given by -#' \deqn{L_{ISBS}(S,t|t^*) = [(S(t^*)^2)I(t \le t^*, \delta = 1)(1/G(t))] + [((1 - S(t^*))^2)I(t > t^*)(1/G(t^*))]} +#' This measure has two dimensions: (test set) observations and time points. +#' For a specific individual \eqn{i}, with observed survival outcome \eqn{(t_i, \delta_i)} +#' (time and censoring indicator) and predicted survival function \eqn{S_i(t)}, the +#' *observation-wise* loss integrated across the time dimension up to the +#' time cutoff \eqn{\tau^*}, is: +#' +#' \deqn{L_{ISBS}(S_i, t_i, \delta_i) = \text{I}(t_i \leq \tau^*) \int^{\tau^*}_0 \frac{S_i^2(\tau) \text{I}(t_i \leq \tau, \delta=1)}{G(t_i)} + \frac{(1-S_i(\tau))^2 \text{I}(t_i > \tau)}{G(\tau)} \ d\tau} +#' #' where \eqn{G} is the Kaplan-Meier estimate of the censoring distribution. #' -#' The re-weighted ISBS (RISBS) is given by -#' \deqn{L_{RISBS}(S,t|t^*) = [(S(t^*)^2)I(t \le t^*, \delta = 1)(1/G(t))] + [((1 - S(t^*))^2)I(t > t^*)(1/G(t))]} -#' where \eqn{G} is the Kaplan-Meier estimate of the censoring distribution, i.e. always -#' weighted by \eqn{G(t)}. +#' The **re-weighted ISBS** (RISBS) is: +#' +#' \deqn{L_{RISBS}(S_i, t_i, \delta_i) = \delta_i \text{I}(t_i \leq \tau^*) \int^{\tau^*}_0 \frac{S_i^2(\tau) \text{I}(t_i \leq \tau) + (1-S_i(\tau))^2 \text{I}(t_i > \tau)}{G(t_i)} \ d\tau} +#' +#' which is always weighted by \eqn{G(t_i)} and removes the censored observations. +#' #' RISBS is strictly proper when the censoring distribution is independent -#' of the survival distribution and when G is fit on a sufficiently large dataset. +#' of the survival distribution and when \eqn{G(t)} is fit on a sufficiently large dataset. #' ISBS is never proper. Use `proper = FALSE` for ISBS and `proper = TRUE` for RISBS. #' Results may be very different if many observations are -#' censored at the last observed time due to division by 1/`eps` in `proper = TRUE`. -#' -#' **Note**: If comparing the integrated graf score to other packages, e.g. -#' \CRANpkg{pec}, then `method = 2` should be used. However the results may -#' still be very slightly different as this package uses `survfit` to estimate -#' the censoring distribution, in line with the Graf 1999 paper; whereas some -#' other packages use `prodlim` with `reverse = TRUE` (meaning Kaplan-Meier is -#' not used). +#' censored at the last observed time due to division by \eqn{1/eps} in `proper = TRUE`. #' +#' @template details_method #' @template details_trainG #' @template details_tmax #' diff --git a/man/mlr_measures_surv.graf.Rd b/man/mlr_measures_surv.graf.Rd index 2b8091c91..e03d8182f 100644 --- a/man/mlr_measures_surv.graf.Rd +++ b/man/mlr_measures_surv.graf.Rd @@ -11,40 +11,27 @@ Calculates the \strong{Integrated Survival Brier Score} (ISBS), Integrated Graf or squared survival loss. } \details{ -For an individual who dies at time \eqn{t}, with predicted Survival function, \eqn{S}, the -Graf Score at time \eqn{t^*}{t*} is given by -\deqn{L_{ISBS}(S,t|t^*) = [(S(t^*)^2)I(t \le t^*, \delta = 1)(1/G(t))] + [((1 - S(t^*))^2)I(t > t^*)(1/G(t^*))]} +This measure has two dimensions: (test set) observations and time points. +For a specific individual \eqn{i}, with observed survival outcome \eqn{(t_i, \delta_i)} +(time and censoring indicator) and predicted survival function \eqn{S_i(t)}, the +\emph{observation-wise} loss integrated across the time dimension up to the +time cutoff \eqn{\tau^*}, is: + +\deqn{L_{ISBS}(S_i, t_i, \delta_i) = \text{I}(t_i \leq \tau^*) \int^{\tau^*}_0 \frac{S_i^2(\tau) \text{I}(t_i \leq \tau, \delta=1)}{G(t_i)} + \frac{(1-S_i(\tau))^2 \text{I}(t_i > \tau)}{G(\tau)} \ d\tau} + where \eqn{G} is the Kaplan-Meier estimate of the censoring distribution. -The re-weighted ISBS (RISBS) is given by -\deqn{L_{RISBS}(S,t|t^*) = [(S(t^*)^2)I(t \le t^*, \delta = 1)(1/G(t))] + [((1 - S(t^*))^2)I(t > t^*)(1/G(t))]} -where \eqn{G} is the Kaplan-Meier estimate of the censoring distribution, i.e. always -weighted by \eqn{G(t)}. -RISBS is strictly proper when the censoring distribution is independent -of the survival distribution and when G is fit on a sufficiently large dataset. -ISBS is never proper. Use \code{proper = FALSE} for ISBS and \code{proper = TRUE} for RISBS. -Results may be very different if many observations are -censored at the last observed time due to division by 1/\code{eps} in \code{proper = TRUE}. +The \strong{re-weighted ISBS} (RISBS) is: -\strong{Note}: If comparing the integrated graf score to other packages, e.g. -\CRANpkg{pec}, then \code{method = 2} should be used. However the results may -still be very slightly different as this package uses \code{survfit} to estimate -the censoring distribution, in line with the Graf 1999 paper; whereas some -other packages use \code{prodlim} with \code{reverse = TRUE} (meaning Kaplan-Meier is -not used). +\deqn{L_{RISBS}(S_i, t_i, \delta_i) = \delta_i \text{I}(t_i \leq \tau^*) \int^{\tau^*}_0 \frac{S_i^2(\tau) \text{I}(t_i \leq \tau) + (1-S_i(\tau))^2 \text{I}(t_i > \tau)}{G(t_i)} \ d\tau} -If \code{task} and \code{train_set} are passed to \verb{$score} then \eqn{G(t)} is fit on training data, -otherwise testing data. The first is likely to reduce any bias caused by calculating -parts of the measure on the test data it is evaluating. The training data is automatically -used in scoring resamplings. +which is always weighted by \eqn{G(t_i)} and removes the censored observations. -If \code{t_max} or \code{p_max} is given, then \eqn{G(t)} will be fitted using \strong{all observations} from the -train set (or test set) and only then the cutoff time will be applied. -This is to ensure that more data is used for fitting the censoring distribution via the -Kaplan-Meier. -Setting the \code{t_max} can help alleviate inflation of the score when \code{proper} is \code{TRUE}, -in cases where an observation is censored at the last observed time point. -This results in \eqn{G(t_{max}) = 0} and the use of \code{eps} instead (when \code{t_max} is \code{NULL}). +RISBS is strictly proper when the censoring distribution is independent +of the survival distribution and when \eqn{G(t)} is fit on a sufficiently large dataset. +ISBS is never proper. Use \code{proper = FALSE} for ISBS and \code{proper = TRUE} for RISBS. +Results may be very different if many observations are +censored at the last observed time due to division by \eqn{1/eps} in \code{proper = TRUE}. } \section{Dictionary}{ @@ -162,6 +149,38 @@ Default is \code{FALSE}. } } +\section{Implementation differences (time-integration)}{ + + +If comparing the integrated graf score to other packages, e.g. +\CRANpkg{pec}, then \code{method = 2} should be used. However the results may +still be very slightly different as this package uses \code{survfit} to estimate +the censoring distribution, in line with the Graf 1999 paper; whereas some +other packages use \code{prodlim} with \code{reverse = TRUE} (meaning Kaplan-Meier is +not used). +} + +\section{Data used for Estimating Censoring Distribution}{ + + +If \code{task} and \code{train_set} are passed to \verb{$score} then \eqn{G(t)} is fit on training data, +otherwise testing data. The first is likely to reduce any bias caused by calculating +parts of the measure on the test data it is evaluating. The training data is automatically +used in scoring resamplings. +} + +\section{Time Cutoff Details}{ + + +If \code{t_max} or \code{p_max} is given, then \eqn{G(t)} will be fitted using \strong{all observations} from the +train set (or test set) and only then the cutoff time will be applied. +This is to ensure that more data is used for fitting the censoring distribution via the +Kaplan-Meier. +Setting the \code{t_max} can help alleviate inflation of the score when \code{proper} is \code{TRUE}, +in cases where an observation is censored at the last observed time point. +This results in \eqn{G(t_{max}) = 0} and the use of \code{eps} instead (when \code{t_max} is \code{NULL}). +} + \references{ Graf E, Schmoor C, Sauerbrei W, Schumacher M (1999). \dQuote{Assessment and comparison of prognostic classification schemes for survival data.} From 73a6748009cc91cc83a16787e174373a2b321a4b Mon Sep 17 00:00:00 2001 From: john Date: Mon, 7 Oct 2024 17:21:15 +0300 Subject: [PATCH 73/82] update NEWs --- NEWS.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 9fa547abe..858f26565 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,8 @@ * New `PipeOp`s: `PipeOpTaskSurvClassifIPCW`, `PipeOpPredClassifSurvIPCW` * New pipeline (**reduction method**): `pipeline_survtoclassif_IPCW` -* Fixed a rare bug in Brier score when the survival matrix had one time point/column only. +* Improved the way Integrated Brier score handles the `times` argument and the `t_max`, especially when the survival matrix has one time point (column) +* Improved documentation of integrated survival scores * Temp fix of math-rendering issue in package website # mlr3proba 0.6.8 From b4b2b2b30e1b0ec5ee55a53b9b31f3c0ce87cba4 Mon Sep 17 00:00:00 2001 From: john Date: Mon, 7 Oct 2024 22:02:59 +0300 Subject: [PATCH 74/82] added new doc templates and refined some others --- man-roxygen/details_method.R | 2 +- man-roxygen/param_tmax.R | 3 ++- man-roxygen/properness.R | 9 +++++++++ man-roxygen/which_times.R | 21 +++++++++++++++++++++ 4 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 man-roxygen/properness.R create mode 100644 man-roxygen/which_times.R diff --git a/man-roxygen/details_method.R b/man-roxygen/details_method.R index 7dcd4b148..1f116921b 100644 --- a/man-roxygen/details_method.R +++ b/man-roxygen/details_method.R @@ -1,4 +1,4 @@ -#' @section Implementation differences (time-integration): +#' @section Implementation differences: #' #' If comparing the integrated graf score to other packages, e.g. #' \CRANpkg{pec}, then `method = 2` should be used. However the results may diff --git a/man-roxygen/param_tmax.R b/man-roxygen/param_tmax.R index 90957a505..268605756 100644 --- a/man-roxygen/param_tmax.R +++ b/man-roxygen/param_tmax.R @@ -1,7 +1,8 @@ #' @section Parameter details: #' - `t_max` (`numeric(1)`)\cr -#' Cutoff time (i.e. time horizon) to evaluate the measure up to. +#' Cutoff time \eqn{\tau^*} (i.e. time horizon) to evaluate the measure up to. #' Mutually exclusive with `p_max` or `times`. #' This will effectively remove test observations for which the observed time #' (event or censoring) is strictly more than `t_max`. #' It's recommended to set `t_max` to avoid division by `eps`, see Details. +#' If `t_max` is not specified, an `Inf` time horizon is assumed. diff --git a/man-roxygen/properness.R b/man-roxygen/properness.R new file mode 100644 index 000000000..cb1c6d8f0 --- /dev/null +++ b/man-roxygen/properness.R @@ -0,0 +1,9 @@ +#' @section Properness: +#' +#' <%=proper_id%> is strictly proper when the censoring distribution is independent +#' of the survival distribution and when \eqn{G(t)} is fit on a sufficiently large dataset. +#' <%=improper_id%> is never proper. Use `proper = FALSE` for <%=improper_id%> and +#' `proper = TRUE` for <%=proper_id%>. +#' Results may be very different if many observations are censored at the last +#' observed time due to division by \eqn{1/eps} in `proper = TRUE`. +#' diff --git a/man-roxygen/which_times.R b/man-roxygen/which_times.R new file mode 100644 index 000000000..448f5352a --- /dev/null +++ b/man-roxygen/which_times.R @@ -0,0 +1,21 @@ +#' @section Time points used for evaluation: +#' If the `times` argument is not specified (`NULL`), then the unique (and +#' sorted) time points from the **test set** are used for evaluation of the +#' time-integrated score. +#' This was a design decision due to the fact that different predicted survival +#' distributions \eqn{S(t)} usually have a **discretized time domain** which may +#' differ, i.e. in the case the survival predictions come from different survival +#' learners. +#' Essentially, using the same set of time points for the calculation of the score +#' minimizes the bias that would come from using different time points. +#' We note that \eqn{S(t)} is by default constantly interpolated for time points that fall +#' outside its discretized time domain. +#' +#' Naturally, if the `times` argument is specified, then exactly these time +#' points are used for evaluation. +#' A warning is given to the user in case some of the specified `times` fall outside +#' of the time point range of the test set. +#' The assumption here is that if the test set is large enough, it should have a +#' time domain/range similar to the one from the train set, and therefore time +#' points outside that domain might lead to interpolation or extrapolation of \eqn{S(t)}. +#' From 8ad9eb1e7688f216d2e1ca877c4abb460d1c1ff8 Mon Sep 17 00:00:00 2001 From: john Date: Mon, 7 Oct 2024 22:20:24 +0300 Subject: [PATCH 75/82] refine docs for 3 integrated survival scores (ISBS, ISS, ISSL) --- R/MeasureSurvGraf.R | 22 +++--- R/MeasureSurvIntLogloss.R | 35 +++++---- R/MeasureSurvLogloss.R | 2 +- R/MeasureSurvSchmid.R | 34 +++++++-- man/mlr_measures_surv.graf.Rd | 57 +++++++++++--- man/mlr_measures_surv.intlogloss.Rd | 108 ++++++++++++++++++++------- man/mlr_measures_surv.logloss.Rd | 24 +++--- man/mlr_measures_surv.schmid.Rd | 110 ++++++++++++++++++++++------ 8 files changed, 291 insertions(+), 101 deletions(-) diff --git a/R/MeasureSurvGraf.R b/R/MeasureSurvGraf.R index 69b288f88..2ef60545b 100644 --- a/R/MeasureSurvGraf.R +++ b/R/MeasureSurvGraf.R @@ -20,10 +20,10 @@ #' #' @details #' This measure has two dimensions: (test set) observations and time points. -#' For a specific individual \eqn{i}, with observed survival outcome \eqn{(t_i, \delta_i)} -#' (time and censoring indicator) and predicted survival function \eqn{S_i(t)}, the -#' *observation-wise* loss integrated across the time dimension up to the -#' time cutoff \eqn{\tau^*}, is: +#' For a specific individual \eqn{i} from the test set, with observed survival +#' outcome \eqn{(t_i, \delta_i)} (time and censoring indicator) and predicted +#' survival function \eqn{S_i(t)}, the *observation-wise* loss integrated across +#' the time dimension up to the time cutoff \eqn{\tau^*}, is: #' #' \deqn{L_{ISBS}(S_i, t_i, \delta_i) = \text{I}(t_i \leq \tau^*) \int^{\tau^*}_0 \frac{S_i^2(\tau) \text{I}(t_i \leq \tau, \delta=1)}{G(t_i)} + \frac{(1-S_i(\tau))^2 \text{I}(t_i > \tau)}{G(\tau)} \ d\tau} #' @@ -33,14 +33,16 @@ #' #' \deqn{L_{RISBS}(S_i, t_i, \delta_i) = \delta_i \text{I}(t_i \leq \tau^*) \int^{\tau^*}_0 \frac{S_i^2(\tau) \text{I}(t_i \leq \tau) + (1-S_i(\tau))^2 \text{I}(t_i > \tau)}{G(t_i)} \ d\tau} #' -#' which is always weighted by \eqn{G(t_i)} and removes the censored observations. +#' which is always weighted by \eqn{G(t_i)} and is equal to zero for a censored subject. #' -#' RISBS is strictly proper when the censoring distribution is independent -#' of the survival distribution and when \eqn{G(t)} is fit on a sufficiently large dataset. -#' ISBS is never proper. Use `proper = FALSE` for ISBS and `proper = TRUE` for RISBS. -#' Results may be very different if many observations are -#' censored at the last observed time due to division by \eqn{1/eps} in `proper = TRUE`. +#' To get a single score across all \eqn{N} observations of the test set, we +#' return the average of the time-integrated observation-wise scores: +#' \deqn{\sum_{i=1}^N L(S_i, t_i, \delta_i) / N} #' +#' @template properness +#' @templateVar improper_id ISBS +#' @templateVar proper_id RISBS +#' @template which_times #' @template details_method #' @template details_trainG #' @template details_tmax diff --git a/R/MeasureSurvIntLogloss.R b/R/MeasureSurvIntLogloss.R index c47cca2b3..0b5ce26f2 100644 --- a/R/MeasureSurvIntLogloss.R +++ b/R/MeasureSurvIntLogloss.R @@ -17,22 +17,31 @@ #' Logarithmic (log) Loss, aka integrated cross entropy. #' #' @details -#' For an individual who dies at time \eqn{t}, with predicted Survival function, \eqn{S}, the -#' probabilistic log loss at time \eqn{t^*}{t*} is given by -#' \deqn{L_{ISLL}(S,t|t^*) = - [log(1 - S(t^*))I(t \le t^*, \delta = 1)(1/G(t))] - [log(S(t^*))I(t > t^*)(1/G(t^*))]} +#' This measure has two dimensions: (test set) observations and time points. +#' For a specific individual \eqn{i} from the test set, with observed survival +#' outcome \eqn{(t_i, \delta_i)} (time and censoring indicator) and predicted +#' survival function \eqn{S_i(t)}, the *observation-wise* loss integrated across +#' the time dimension up to the time cutoff \eqn{\tau^*}, is: +#' +#' \deqn{L_{ISLL}(S_i, t_i, \delta_i) = -\text{I}(t_i \leq \tau^*) \int^{\tau^*}_0 \frac{log[1-S_i(\tau)] \text{I}(t_i \leq \tau, \delta=1)}{G(t_i)} + \frac{\log[S_i(\tau)] \text{I}(t_i > \tau)}{G(\tau)} \ d\tau} +#' #' where \eqn{G} is the Kaplan-Meier estimate of the censoring distribution. #' -#' The re-weighted ISLL, RISLL is given by -#' \deqn{L_{RISLL}(S,t|t^*) = - [log(1 - S(t^*))I(t \le t^*, \delta = 1)(1/G(t))] - [log(S(t^*))I(t > t^*)(1/G(t))]} -#' where \eqn{G} is the Kaplan-Meier estimate of the censoring distribution, i.e. always -#' weighted by \eqn{G(t)}. -#' RISLL is strictly proper when the censoring distribution is independent -#' of the survival distribution and when G is fit on a sufficiently large dataset. -#' ISLL is never proper. -#' Use `proper = FALSE` for ISLL and `proper = TRUE` for RISLL. -#' Results may be very different if many observations are censored at the last -#' observed time due to division by 1/`eps` in `proper = TRUE`. +#' The **re-weighted ISLL** (RISLL) is: +#' +#' \deqn{L_{RISLL}(S_i, t_i, \delta_i) = -\delta_i \text{I}(t_i \leq \tau^*) \int^{\tau^*}_0 \frac{\log[1-S_i(\tau)]) \text{I}(t_i \leq \tau) + \log[S_i(\tau)] \text{I}(t_i > \tau)}{G(t_i)} \ d\tau} +#' +#' which is always weighted by \eqn{G(t_i)} and is equal to zero for a censored subject. +#' +#' To get a single score across all \eqn{N} observations of the test set, we +#' return the average of the time-integrated observation-wise scores: +#' \deqn{\sum_{i=1}^N L(S_i, t_i, \delta_i) / N} #' +#' @template properness +#' @templateVar improper_id ISLL +#' @templateVar proper_id RISLL +#' @template which_times +#' @template details_method #' @template details_trainG #' @template details_tmax #' diff --git a/R/MeasureSurvLogloss.R b/R/MeasureSurvLogloss.R index 69698c455..2e2d11ac8 100644 --- a/R/MeasureSurvLogloss.R +++ b/R/MeasureSurvLogloss.R @@ -16,7 +16,7 @@ #' The Log Loss, in the context of probabilistic predictions, is defined as the #' negative log probability density function, \eqn{f}, evaluated at the #' observation time (event or censoring), \eqn{t}, -#' \deqn{L_{NLL}(f, t) = -log(f(t))} +#' \deqn{L_{NLL}(f, t) = -\log[f(t)]} #' #' The standard error of the Log Loss, L, is approximated via, #' \deqn{se(L) = sd(L)/\sqrt{N}}{se(L) = sd(L)/\sqrt N} diff --git a/R/MeasureSurvSchmid.R b/R/MeasureSurvSchmid.R index c108ce7b6..1773dfd2f 100644 --- a/R/MeasureSurvSchmid.R +++ b/R/MeasureSurvSchmid.R @@ -16,20 +16,38 @@ #' Calculates the **Integrated Schmid Score** (ISS), aka integrated absolute loss. #' #' @details -#' For an individual who dies at time \eqn{t}, with predicted Survival function, \eqn{S}, the -#' Schmid Score at time \eqn{t^*}{t*} is given by +#' This measure has two dimensions: (test set) observations and time points. +#' For a specific individual \eqn{i} from the test set, with observed survival +#' outcome \eqn{(t_i, \delta_i)} (time and censoring indicator) and predicted +#' survival function \eqn{S_i(t)}, the *observation-wise* loss integrated across +#' the time dimension up to the time cutoff \eqn{\tau^*}, is: +#' +#' \deqn{L_{ISS}(S_i, t_i, \delta_i) = \text{I}(t_i \leq \tau^*) \int^{\tau^*}_0 \frac{S_i(\tau) \text{I}(t_i \leq \tau, \delta=1)}{G(t_i)} + \frac{(1-S_i(\tau)) \text{I}(t_i > \tau)}{G(\tau)} \ d\tau} +#' +#' where \eqn{G} is the Kaplan-Meier estimate of the censoring distribution. +#' +#' The **re-weighted ISS** (RISS) is: +#' +#' \deqn{L_{RISS}(S_i, t_i, \delta_i) = \delta_i \text{I}(t_i \leq \tau^*) \int^{\tau^*}_0 \frac{S_i(\tau) \text{I}(t_i \leq \tau) + (1-S_i(\tau)) \text{I}(t_i > \tau)}{G(t_i)} \ d\tau} +#' +#' which is always weighted by \eqn{G(t_i)} and is equal to zero for a censored subject. +#' +#' To get a single score across all \eqn{N} observations of the test set, we +#' return the average of the time-integrated observation-wise scores: +#' \deqn{\sum_{i=1}^N L(S_i, t_i, \delta_i) / N} +#' +#' #' \deqn{L_{ISS}(S,t|t^*) = [(S(t^*))I(t \le t^*, \delta = 1)(1/G(t))] + [((1 - S(t^*)))I(t > t^*)(1/G(t^*))]} #' where \eqn{G} is the Kaplan-Meier estimate of the censoring distribution. #' #' The re-weighted ISS, RISS is given by #' \deqn{L_{RISS}(S,t|t^*) = [(S(t^*))I(t \le t^*, \delta = 1)(1/G(t))] + [((1 - S(t^*)))I(t > t^*)(1/G(t))]} -#' where \eqn{G} is the Kaplan-Meier estimate of the censoring distribution, i.e. always -#' weighted by \eqn{G(t)}. RISS is strictly proper when the censoring distribution is independent -#' of the survival distribution and when G is fit on a sufficiently large dataset. ISS is never -#' proper. Use `proper = FALSE` for ISS and `proper = TRUE` for RISS. -#' Results may be very different if many observations are censored at the last -#' observed time due to division by 1/`eps` in `proper = TRUE`. #' +#' @template properness +#' @templateVar improper_id ISS +#' @templateVar proper_id RISS +#' @template which_times +#' @template details_method #' @template details_trainG #' @template details_tmax #' diff --git a/man/mlr_measures_surv.graf.Rd b/man/mlr_measures_surv.graf.Rd index e03d8182f..4d2924dfc 100644 --- a/man/mlr_measures_surv.graf.Rd +++ b/man/mlr_measures_surv.graf.Rd @@ -12,10 +12,10 @@ or squared survival loss. } \details{ This measure has two dimensions: (test set) observations and time points. -For a specific individual \eqn{i}, with observed survival outcome \eqn{(t_i, \delta_i)} -(time and censoring indicator) and predicted survival function \eqn{S_i(t)}, the -\emph{observation-wise} loss integrated across the time dimension up to the -time cutoff \eqn{\tau^*}, is: +For a specific individual \eqn{i} from the test set, with observed survival +outcome \eqn{(t_i, \delta_i)} (time and censoring indicator) and predicted +survival function \eqn{S_i(t)}, the \emph{observation-wise} loss integrated across +the time dimension up to the time cutoff \eqn{\tau^*}, is: \deqn{L_{ISBS}(S_i, t_i, \delta_i) = \text{I}(t_i \leq \tau^*) \int^{\tau^*}_0 \frac{S_i^2(\tau) \text{I}(t_i \leq \tau, \delta=1)}{G(t_i)} + \frac{(1-S_i(\tau))^2 \text{I}(t_i > \tau)}{G(\tau)} \ d\tau} @@ -25,13 +25,11 @@ The \strong{re-weighted ISBS} (RISBS) is: \deqn{L_{RISBS}(S_i, t_i, \delta_i) = \delta_i \text{I}(t_i \leq \tau^*) \int^{\tau^*}_0 \frac{S_i^2(\tau) \text{I}(t_i \leq \tau) + (1-S_i(\tau))^2 \text{I}(t_i > \tau)}{G(t_i)} \ d\tau} -which is always weighted by \eqn{G(t_i)} and removes the censored observations. +which is always weighted by \eqn{G(t_i)} and is equal to zero for a censored subject. -RISBS is strictly proper when the censoring distribution is independent -of the survival distribution and when \eqn{G(t)} is fit on a sufficiently large dataset. -ISBS is never proper. Use \code{proper = FALSE} for ISBS and \code{proper = TRUE} for RISBS. -Results may be very different if many observations are -censored at the last observed time due to division by \eqn{1/eps} in \code{proper = TRUE}. +To get a single score across all \eqn{N} observations of the test set, we +return the average of the time-integrated observation-wise scores: +\deqn{\sum_{i=1}^N L(S_i, t_i, \delta_i) / N} } \section{Dictionary}{ @@ -87,11 +85,12 @@ If \code{integrated == FALSE} then a single time point at which to return the sc \itemize{ \item \code{t_max} (\code{numeric(1)})\cr -Cutoff time (i.e. time horizon) to evaluate the measure up to. +Cutoff time \eqn{\tau^*} (i.e. time horizon) to evaluate the measure up to. Mutually exclusive with \code{p_max} or \code{times}. This will effectively remove test observations for which the observed time (event or censoring) is strictly more than \code{t_max}. It's recommended to set \code{t_max} to avoid division by \code{eps}, see Details. +If \code{t_max} is not specified, an \code{Inf} time horizon is assumed. } @@ -149,7 +148,41 @@ Default is \code{FALSE}. } } -\section{Implementation differences (time-integration)}{ +\section{Properness}{ + + +RISBS is strictly proper when the censoring distribution is independent +of the survival distribution and when \eqn{G(t)} is fit on a sufficiently large dataset. +ISBS is never proper. Use \code{proper = FALSE} for ISBS and +\code{proper = TRUE} for RISBS. +Results may be very different if many observations are censored at the last +observed time due to division by \eqn{1/eps} in \code{proper = TRUE}. +} + +\section{Time points used for evaluation}{ + +If the \code{times} argument is not specified (\code{NULL}), then the unique (and +sorted) time points from the \strong{test set} are used for evaluation of the +time-integrated score. +This was a design decision due to the fact that different predicted survival +distributions \eqn{S(t)} usually have a \strong{discretized time domain} which may +differ, i.e. in the case the survival predictions come from different survival +learners. +Essentially, using the same set of time points for the calculation of the score +minimizes the bias that would come from using different time points. +We note that \eqn{S(t)} is by default constantly interpolated for time points that fall +outside its discretized time domain. + +Naturally, if the \code{times} argument is specified, then exactly these time +points are used for evaluation. +A warning is given to the user in case some of the specified \code{times} fall outside +of the time point range of the test set. +The assumption here is that if the test set is large enough, it should have a +time domain/range similar to the one from the train set, and therefore time +points outside that domain might lead to interpolation or extrapolation of \eqn{S(t)}. +} + +\section{Implementation differences}{ If comparing the integrated graf score to other packages, e.g. diff --git a/man/mlr_measures_surv.intlogloss.Rd b/man/mlr_measures_surv.intlogloss.Rd index f827c3439..5a6d7af85 100644 --- a/man/mlr_measures_surv.intlogloss.Rd +++ b/man/mlr_measures_surv.intlogloss.Rd @@ -9,34 +9,25 @@ Calculates the \strong{Integrated Survival Log-Likelihood} (ISLL) or Integrated Logarithmic (log) Loss, aka integrated cross entropy. } \details{ -For an individual who dies at time \eqn{t}, with predicted Survival function, \eqn{S}, the -probabilistic log loss at time \eqn{t^*}{t*} is given by -\deqn{L_{ISLL}(S,t|t^*) = - [log(1 - S(t^*))I(t \le t^*, \delta = 1)(1/G(t))] - [log(S(t^*))I(t > t^*)(1/G(t^*))]} +This measure has two dimensions: (test set) observations and time points. +For a specific individual \eqn{i} from the test set, with observed survival +outcome \eqn{(t_i, \delta_i)} (time and censoring indicator) and predicted +survival function \eqn{S_i(t)}, the \emph{observation-wise} loss integrated across +the time dimension up to the time cutoff \eqn{\tau^*}, is: + +\deqn{L_{ISLL}(S_i, t_i, \delta_i) = -\text{I}(t_i \leq \tau^*) \int^{\tau^*}_0 \frac{log[1-S_i(\tau)] \text{I}(t_i \leq \tau, \delta=1)}{G(t_i)} + \frac{\log[S_i(\tau)] \text{I}(t_i > \tau)}{G(\tau)} \ d\tau} + where \eqn{G} is the Kaplan-Meier estimate of the censoring distribution. -The re-weighted ISLL, RISLL is given by -\deqn{L_{RISLL}(S,t|t^*) = - [log(1 - S(t^*))I(t \le t^*, \delta = 1)(1/G(t))] - [log(S(t^*))I(t > t^*)(1/G(t))]} -where \eqn{G} is the Kaplan-Meier estimate of the censoring distribution, i.e. always -weighted by \eqn{G(t)}. -RISLL is strictly proper when the censoring distribution is independent -of the survival distribution and when G is fit on a sufficiently large dataset. -ISLL is never proper. -Use \code{proper = FALSE} for ISLL and \code{proper = TRUE} for RISLL. -Results may be very different if many observations are censored at the last -observed time due to division by 1/\code{eps} in \code{proper = TRUE}. +The \strong{re-weighted ISLL} (RISLL) is: -If \code{task} and \code{train_set} are passed to \verb{$score} then \eqn{G(t)} is fit on training data, -otherwise testing data. The first is likely to reduce any bias caused by calculating -parts of the measure on the test data it is evaluating. The training data is automatically -used in scoring resamplings. +\deqn{L_{RISLL}(S_i, t_i, \delta_i) = -\delta_i \text{I}(t_i \leq \tau^*) \int^{\tau^*}_0 \frac{\log[1-S_i(\tau)]) \text{I}(t_i \leq \tau) + \log[S_i(\tau)] \text{I}(t_i > \tau)}{G(t_i)} \ d\tau} -If \code{t_max} or \code{p_max} is given, then \eqn{G(t)} will be fitted using \strong{all observations} from the -train set (or test set) and only then the cutoff time will be applied. -This is to ensure that more data is used for fitting the censoring distribution via the -Kaplan-Meier. -Setting the \code{t_max} can help alleviate inflation of the score when \code{proper} is \code{TRUE}, -in cases where an observation is censored at the last observed time point. -This results in \eqn{G(t_{max}) = 0} and the use of \code{eps} instead (when \code{t_max} is \code{NULL}). +which is always weighted by \eqn{G(t_i)} and is equal to zero for a censored subject. + +To get a single score across all \eqn{N} observations of the test set, we +return the average of the time-integrated observation-wise scores: +\deqn{\sum_{i=1}^N L(S_i, t_i, \delta_i) / N} } \section{Dictionary}{ @@ -92,11 +83,12 @@ If \code{integrated == FALSE} then a single time point at which to return the sc \itemize{ \item \code{t_max} (\code{numeric(1)})\cr -Cutoff time (i.e. time horizon) to evaluate the measure up to. +Cutoff time \eqn{\tau^*} (i.e. time horizon) to evaluate the measure up to. Mutually exclusive with \code{p_max} or \code{times}. This will effectively remove test observations for which the observed time (event or censoring) is strictly more than \code{t_max}. It's recommended to set \code{t_max} to avoid division by \code{eps}, see Details. +If \code{t_max} is not specified, an \code{Inf} time horizon is assumed. } @@ -154,6 +146,72 @@ Default is \code{FALSE}. } } +\section{Properness}{ + + +RISLL is strictly proper when the censoring distribution is independent +of the survival distribution and when \eqn{G(t)} is fit on a sufficiently large dataset. +ISLL is never proper. Use \code{proper = FALSE} for ISLL and +\code{proper = TRUE} for RISLL. +Results may be very different if many observations are censored at the last +observed time due to division by \eqn{1/eps} in \code{proper = TRUE}. +} + +\section{Time points used for evaluation}{ + +If the \code{times} argument is not specified (\code{NULL}), then the unique (and +sorted) time points from the \strong{test set} are used for evaluation of the +time-integrated score. +This was a design decision due to the fact that different predicted survival +distributions \eqn{S(t)} usually have a \strong{discretized time domain} which may +differ, i.e. in the case the survival predictions come from different survival +learners. +Essentially, using the same set of time points for the calculation of the score +minimizes the bias that would come from using different time points. +We note that \eqn{S(t)} is by default constantly interpolated for time points that fall +outside its discretized time domain. + +Naturally, if the \code{times} argument is specified, then exactly these time +points are used for evaluation. +A warning is given to the user in case some of the specified \code{times} fall outside +of the time point range of the test set. +The assumption here is that if the test set is large enough, it should have a +time domain/range similar to the one from the train set, and therefore time +points outside that domain might lead to interpolation or extrapolation of \eqn{S(t)}. +} + +\section{Implementation differences}{ + + +If comparing the integrated graf score to other packages, e.g. +\CRANpkg{pec}, then \code{method = 2} should be used. However the results may +still be very slightly different as this package uses \code{survfit} to estimate +the censoring distribution, in line with the Graf 1999 paper; whereas some +other packages use \code{prodlim} with \code{reverse = TRUE} (meaning Kaplan-Meier is +not used). +} + +\section{Data used for Estimating Censoring Distribution}{ + + +If \code{task} and \code{train_set} are passed to \verb{$score} then \eqn{G(t)} is fit on training data, +otherwise testing data. The first is likely to reduce any bias caused by calculating +parts of the measure on the test data it is evaluating. The training data is automatically +used in scoring resamplings. +} + +\section{Time Cutoff Details}{ + + +If \code{t_max} or \code{p_max} is given, then \eqn{G(t)} will be fitted using \strong{all observations} from the +train set (or test set) and only then the cutoff time will be applied. +This is to ensure that more data is used for fitting the censoring distribution via the +Kaplan-Meier. +Setting the \code{t_max} can help alleviate inflation of the score when \code{proper} is \code{TRUE}, +in cases where an observation is censored at the last observed time point. +This results in \eqn{G(t_{max}) = 0} and the use of \code{eps} instead (when \code{t_max} is \code{NULL}). +} + \references{ Graf E, Schmoor C, Sauerbrei W, Schumacher M (1999). \dQuote{Assessment and comparison of prognostic classification schemes for survival data.} diff --git a/man/mlr_measures_surv.logloss.Rd b/man/mlr_measures_surv.logloss.Rd index 29eb2061e..245364758 100644 --- a/man/mlr_measures_surv.logloss.Rd +++ b/man/mlr_measures_surv.logloss.Rd @@ -11,25 +11,20 @@ Calculates the cross-entropy, or negative log-likelihood (NLL) or logarithmic (l The Log Loss, in the context of probabilistic predictions, is defined as the negative log probability density function, \eqn{f}, evaluated at the observation time (event or censoring), \eqn{t}, -\deqn{L_{NLL}(f, t) = -log(f(t))} +\deqn{L_{NLL}(f, t) = -\log[f(t)]} The standard error of the Log Loss, L, is approximated via, \deqn{se(L) = sd(L)/\sqrt{N}}{se(L) = sd(L)/\sqrt N} where \eqn{N} are the number of observations in the test set, and \eqn{sd} is the standard deviation. -The \strong{Re-weighted Negative Log-Likelihood} (RNLL) or IPCW Log Loss is defined by -\deqn{L_{RNLL}(f, t, \Delta) = -\Delta log(f(t))/G(t)} -where \eqn{\Delta} is the censoring indicator and G is the Kaplan-Meier estimator of the +The \strong{Re-weighted Negative Log-Likelihood} (RNLL) or IPCW (Inverse Probability Censoring Weighted) Log Loss is defined by +\deqn{L_{RNLL}(f, t, \delta) = - \frac{\delta \log[f(t)]}{G(t)}} +where \eqn{\delta} is the censoring indicator and \eqn{G(t)} is the Kaplan-Meier estimator of the censoring distribution. So only observations that have experienced the event are taking into account -for RNLL and both \eqn{f(t), G(t)} are calculated only at the event times. +for RNLL (i.e. \eqn{\delta = 1}) and both \eqn{f(t), G(t)} are calculated only at the event times. If only censored observations exist in the test set, \code{NaN} is returned. - -If \code{task} and \code{train_set} are passed to \verb{$score} then \eqn{G(t)} is fit on training data, -otherwise testing data. The first is likely to reduce any bias caused by calculating -parts of the measure on the test data it is evaluating. The training data is automatically -used in scoring resamplings. } \section{Dictionary}{ @@ -95,6 +90,15 @@ If \code{TRUE} (default) then returns the \eqn{L_{RNLL}} score (which is proper) } } +\section{Data used for Estimating Censoring Distribution}{ + + +If \code{task} and \code{train_set} are passed to \verb{$score} then \eqn{G(t)} is fit on training data, +otherwise testing data. The first is likely to reduce any bias caused by calculating +parts of the measure on the test data it is evaluating. The training data is automatically +used in scoring resamplings. +} + \seealso{ Other survival measures: \code{\link{mlr_measures_surv.calib_alpha}}, diff --git a/man/mlr_measures_surv.schmid.Rd b/man/mlr_measures_surv.schmid.Rd index 811c2c08c..d01cd26ea 100644 --- a/man/mlr_measures_surv.schmid.Rd +++ b/man/mlr_measures_surv.schmid.Rd @@ -8,32 +8,31 @@ Calculates the \strong{Integrated Schmid Score} (ISS), aka integrated absolute loss. } \details{ -For an individual who dies at time \eqn{t}, with predicted Survival function, \eqn{S}, the -Schmid Score at time \eqn{t^*}{t*} is given by +This measure has two dimensions: (test set) observations and time points. +For a specific individual \eqn{i} from the test set, with observed survival +outcome \eqn{(t_i, \delta_i)} (time and censoring indicator) and predicted +survival function \eqn{S_i(t)}, the \emph{observation-wise} loss integrated across +the time dimension up to the time cutoff \eqn{\tau^*}, is: + +\deqn{L_{ISS}(S_i, t_i, \delta_i) = \text{I}(t_i \leq \tau^*) \int^{\tau^*}_0 \frac{S_i(\tau) \text{I}(t_i \leq \tau, \delta=1)}{G(t_i)} + \frac{(1-S_i(\tau)) \text{I}(t_i > \tau)}{G(\tau)} \ d\tau} + +where \eqn{G} is the Kaplan-Meier estimate of the censoring distribution. + +The \strong{re-weighted ISS} (RISS) is: + +\deqn{L_{RISS}(S_i, t_i, \delta_i) = \delta_i \text{I}(t_i \leq \tau^*) \int^{\tau^*}_0 \frac{S_i(\tau) \text{I}(t_i \leq \tau) + (1-S_i(\tau)) \text{I}(t_i > \tau)}{G(t_i)} \ d\tau} + +which is always weighted by \eqn{G(t_i)} and is equal to zero for a censored subject. + +To get a single score across all \eqn{N} observations of the test set, we +return the average of the time-integrated observation-wise scores: +\deqn{\sum_{i=1}^N L(S_i, t_i, \delta_i) / N} + \deqn{L_{ISS}(S,t|t^*) = [(S(t^*))I(t \le t^*, \delta = 1)(1/G(t))] + [((1 - S(t^*)))I(t > t^*)(1/G(t^*))]} where \eqn{G} is the Kaplan-Meier estimate of the censoring distribution. The re-weighted ISS, RISS is given by \deqn{L_{RISS}(S,t|t^*) = [(S(t^*))I(t \le t^*, \delta = 1)(1/G(t))] + [((1 - S(t^*)))I(t > t^*)(1/G(t))]} -where \eqn{G} is the Kaplan-Meier estimate of the censoring distribution, i.e. always -weighted by \eqn{G(t)}. RISS is strictly proper when the censoring distribution is independent -of the survival distribution and when G is fit on a sufficiently large dataset. ISS is never -proper. Use \code{proper = FALSE} for ISS and \code{proper = TRUE} for RISS. -Results may be very different if many observations are censored at the last -observed time due to division by 1/\code{eps} in \code{proper = TRUE}. - -If \code{task} and \code{train_set} are passed to \verb{$score} then \eqn{G(t)} is fit on training data, -otherwise testing data. The first is likely to reduce any bias caused by calculating -parts of the measure on the test data it is evaluating. The training data is automatically -used in scoring resamplings. - -If \code{t_max} or \code{p_max} is given, then \eqn{G(t)} will be fitted using \strong{all observations} from the -train set (or test set) and only then the cutoff time will be applied. -This is to ensure that more data is used for fitting the censoring distribution via the -Kaplan-Meier. -Setting the \code{t_max} can help alleviate inflation of the score when \code{proper} is \code{TRUE}, -in cases where an observation is censored at the last observed time point. -This results in \eqn{G(t_{max}) = 0} and the use of \code{eps} instead (when \code{t_max} is \code{NULL}). } \section{Dictionary}{ @@ -89,11 +88,12 @@ If \code{integrated == FALSE} then a single time point at which to return the sc \itemize{ \item \code{t_max} (\code{numeric(1)})\cr -Cutoff time (i.e. time horizon) to evaluate the measure up to. +Cutoff time \eqn{\tau^*} (i.e. time horizon) to evaluate the measure up to. Mutually exclusive with \code{p_max} or \code{times}. This will effectively remove test observations for which the observed time (event or censoring) is strictly more than \code{t_max}. It's recommended to set \code{t_max} to avoid division by \code{eps}, see Details. +If \code{t_max} is not specified, an \code{Inf} time horizon is assumed. } @@ -151,6 +151,72 @@ Default is \code{FALSE}. } } +\section{Properness}{ + + +RISS is strictly proper when the censoring distribution is independent +of the survival distribution and when \eqn{G(t)} is fit on a sufficiently large dataset. +ISS is never proper. Use \code{proper = FALSE} for ISS and +\code{proper = TRUE} for RISS. +Results may be very different if many observations are censored at the last +observed time due to division by \eqn{1/eps} in \code{proper = TRUE}. +} + +\section{Time points used for evaluation}{ + +If the \code{times} argument is not specified (\code{NULL}), then the unique (and +sorted) time points from the \strong{test set} are used for evaluation of the +time-integrated score. +This was a design decision due to the fact that different predicted survival +distributions \eqn{S(t)} usually have a \strong{discretized time domain} which may +differ, i.e. in the case the survival predictions come from different survival +learners. +Essentially, using the same set of time points for the calculation of the score +minimizes the bias that would come from using different time points. +We note that \eqn{S(t)} is by default constantly interpolated for time points that fall +outside its discretized time domain. + +Naturally, if the \code{times} argument is specified, then exactly these time +points are used for evaluation. +A warning is given to the user in case some of the specified \code{times} fall outside +of the time point range of the test set. +The assumption here is that if the test set is large enough, it should have a +time domain/range similar to the one from the train set, and therefore time +points outside that domain might lead to interpolation or extrapolation of \eqn{S(t)}. +} + +\section{Implementation differences}{ + + +If comparing the integrated graf score to other packages, e.g. +\CRANpkg{pec}, then \code{method = 2} should be used. However the results may +still be very slightly different as this package uses \code{survfit} to estimate +the censoring distribution, in line with the Graf 1999 paper; whereas some +other packages use \code{prodlim} with \code{reverse = TRUE} (meaning Kaplan-Meier is +not used). +} + +\section{Data used for Estimating Censoring Distribution}{ + + +If \code{task} and \code{train_set} are passed to \verb{$score} then \eqn{G(t)} is fit on training data, +otherwise testing data. The first is likely to reduce any bias caused by calculating +parts of the measure on the test data it is evaluating. The training data is automatically +used in scoring resamplings. +} + +\section{Time Cutoff Details}{ + + +If \code{t_max} or \code{p_max} is given, then \eqn{G(t)} will be fitted using \strong{all observations} from the +train set (or test set) and only then the cutoff time will be applied. +This is to ensure that more data is used for fitting the censoring distribution via the +Kaplan-Meier. +Setting the \code{t_max} can help alleviate inflation of the score when \code{proper} is \code{TRUE}, +in cases where an observation is censored at the last observed time point. +This results in \eqn{G(t_{max}) = 0} and the use of \code{eps} instead (when \code{t_max} is \code{NULL}). +} + \references{ Schemper, Michael, Henderson, Robin (2000). \dQuote{Predictive Accuracy and Explained Variation in Cox Regression.} From 52b400fadca26c8d27b44909f4676b4bb0d6029b Mon Sep 17 00:00:00 2001 From: john Date: Tue, 8 Oct 2024 11:21:10 +0200 Subject: [PATCH 76/82] refine examples and test for IPCW (integrated = FALSE for a single times) --- R/pipelines.R | 2 +- man/mlr_graphs_survtoclassif_IPCW.Rd | 2 +- tests/testthat/test_pipelines.R | 7 ++----- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/R/pipelines.R b/R/pipelines.R index 935e7f46f..4ec36dcaa 100644 --- a/R/pipelines.R +++ b/R/pipelines.R @@ -708,7 +708,7 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, #' #' # score predictions #' pred$score() # C-index -#' pred$score(msr("surv.brier", times = 500)) # Brier score at tau +#' pred$score(msr("surv.brier", times = 500, integrated = FALSE)) # Brier score at tau #' } #' @export pipeline_survtoclassif_IPCW = function(learner, tau = NULL, eps = 1e-3, graph_learner = FALSE) { diff --git a/man/mlr_graphs_survtoclassif_IPCW.Rd b/man/mlr_graphs_survtoclassif_IPCW.Rd index e03a999b2..ee560c84c 100644 --- a/man/mlr_graphs_survtoclassif_IPCW.Rd +++ b/man/mlr_graphs_survtoclassif_IPCW.Rd @@ -66,7 +66,7 @@ to \link{PredictionSurv}. # score predictions pred$score() # C-index - pred$score(msr("surv.brier", times = 500)) # Brier score at tau + pred$score(msr("surv.brier", times = 500, integrated = FALSE)) # Brier score at tau } \dontshow{\}) # examplesIf} } diff --git a/tests/testthat/test_pipelines.R b/tests/testthat/test_pipelines.R index 1d24b2fa1..4dae18b89 100644 --- a/tests/testthat/test_pipelines.R +++ b/tests/testthat/test_pipelines.R @@ -125,10 +125,7 @@ test_that("survtoclassif_IPCW", { # crank = risk = 1 - surv at cutoff time expect_equal(p$crank, 1 - p$data$distr[,"500"]) # brier score at the cutoff time works - expect_number(p$score(msr("surv.brier", times = 500)), finite = TRUE) - # also in other points - expect_number(p$score(msr("surv.brier", times = 100)), finite = TRUE) - expect_number(p$score(msr("surv.brier", times = 600)), finite = TRUE) + expect_number(p$score(msr("surv.brier", times = 500, integrated = FALSE)), finite = TRUE) # Test with different tau grlrn = mlr3pipelines::ppl("survtoclassif_IPCW", learner = lrn("classif.rpart"), @@ -138,7 +135,7 @@ test_that("survtoclassif_IPCW", { # check predictions expect_numeric(p2$crank, len = length(part$test), lower = 0, upper = 1) - expect_number(p2$score(msr("surv.brier", times = 600)), finite = TRUE) + expect_number(p2$score(msr("surv.brier", times = 600, integrated = FALSE)), finite = TRUE) # different cutoff times, different (crank) predictions expect_false(all(p$crank == p2$crank)) From 8413b0b0d447885316ae89b961182fefe75a8f15 Mon Sep 17 00:00:00 2001 From: john Date: Tue, 8 Oct 2024 11:22:10 +0200 Subject: [PATCH 77/82] add IPCW pipeline alias --- R/pipelines.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/pipelines.R b/R/pipelines.R index 4ec36dcaa..874dce64b 100644 --- a/R/pipelines.R +++ b/R/pipelines.R @@ -741,3 +741,4 @@ register_graph("probregr", pipeline_probregr) register_graph("survtoregr", pipeline_survtoregr) register_graph("survtoclassif_disctime", pipeline_survtoclassif_disctime) register_graph("survtoclassif_IPCW", pipeline_survtoclassif_IPCW) +register_graph("survtoclassif_vock", pipeline_survtoclassif_IPCW) # alias From c8d4b9fc09239cbfe568dec8b44cc0b4b4d1c34a Mon Sep 17 00:00:00 2001 From: john Date: Tue, 8 Oct 2024 16:17:55 +0200 Subject: [PATCH 78/82] add doc template for pipeline construction via ppl --- man-roxygen/pipeline.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/man-roxygen/pipeline.R b/man-roxygen/pipeline.R index 1a108de93..102325916 100644 --- a/man-roxygen/pipeline.R +++ b/man-roxygen/pipeline.R @@ -4,6 +4,14 @@ #' @param graph_learner (`logical(1)`)\cr #' If `TRUE` returns wraps the [Graph][mlr3pipelines::Graph] as a #' [GraphLearner][mlr3pipelines::GraphLearner] otherwise (default) returns as a `Graph`. +#' @section Dictionary: +#' This [Graph][mlr3pipelines::Graph] can be instantiated via the [dictionary][mlr3misc::Dictionary] +#' [mlr_graphs][mlr3pipelines::mlr_graphs] or with the associated sugar function [ppl()][mlr3pipelines::ppl]: +#' ``` +#' mlr_graphs$get("<%=id%>") +#' ppl("<%=id%>") +#' ``` +#' #' @return [mlr3pipelines::Graph] or [mlr3pipelines::GraphLearner] #' @family pipelines #' @export From 4f49f826a2373eda12324c17d8199d6753f9253b Mon Sep 17 00:00:00 2001 From: john Date: Tue, 8 Oct 2024 16:22:56 +0200 Subject: [PATCH 79/82] pipeline doc refactoring --- R/pipelines.R | 90 +++++++++--------------- man/mlr_graphs_crankcompositor.Rd | 17 +++-- man/mlr_graphs_distrcompositor.Rd | 23 ++++-- man/mlr_graphs_probregr.Rd | 18 +++-- man/mlr_graphs_responsecompositor.Rd | 17 +++-- man/mlr_graphs_survaverager.Rd | 17 +++-- man/mlr_graphs_survbagging.Rd | 19 +++-- man/mlr_graphs_survtoclassif_IPCW.Rd | 24 +++++-- man/mlr_graphs_survtoclassif_disctime.Rd | 27 ++++--- man/mlr_graphs_survtoregr.Rd | 19 +---- 10 files changed, 152 insertions(+), 119 deletions(-) diff --git a/R/pipelines.R b/R/pipelines.R index 874dce64b..a9109e53d 100644 --- a/R/pipelines.R +++ b/R/pipelines.R @@ -2,13 +2,14 @@ #' @templateVar title Survival Prediction Averaging #' @templateVar pipeop [PipeOpSurvAvg] #' @templateVar id survaverager +#' #' @param learners `(list())` \cr #' List of [LearnerSurv]s to average. #' @param param_vals `(list())` \cr #' Parameters, including weights, to pass to [PipeOpSurvAvg]. -#' @examples +#' +#' @examplesIf mlr3misc::require_namespaces(c("mlr3pipelines"), quietly = TRUE) #' \dontrun{ -#' if (requireNamespace("mlr3pipelines", quietly = TRUE)) { #' library("mlr3") #' library("mlr3pipelines") #' @@ -22,7 +23,6 @@ #' pipe$train(task) #' pipe$predict(task) #' } -#' } pipeline_survaverager = function(learners, param_vals = list(), graph_learner = FALSE) { learners = mlr3pipelines::gunion(map(learners, mlr3pipelines::as_graph)) po = mlr3pipelines::po("survavg", param_vals = param_vals) @@ -42,6 +42,7 @@ pipeline_survaverager = function(learners, param_vals = list(), graph_learner = #' @templateVar pipeop [PipeOpSubsample][mlr3pipelines::PipeOpSubsample] and [PipeOpSurvAvg] #' @templateVar id survbagging #' @template param_pipeline_learner +#' #' @param iterations (`integer(1)`)\cr #' Number of bagging iterations. Defaults to 10. #' @param frac (`numeric(1)`)\cr @@ -53,16 +54,16 @@ pipeline_survaverager = function(learners, param_vals = list(), graph_learner = #' @param weights (`numeric()`)\cr #' Weights for model avering, ignored if `avg = FALSE`. Default is uniform weighting, #' see [PipeOpSurvAvg]. +#' #' @details Bagging (Bootstrap AGGregatING) is the process of bootstrapping data and aggregating #' the final predictions. Bootstrapping splits the data into `B` smaller datasets of a given size #' and is performed with [PipeOpSubsample][mlr3pipelines::PipeOpSubsample]. Aggregation is #' the sample mean of deterministic predictions and a #' [MixtureDistribution][distr6::MixtureDistribution] of distribution predictions. This can be #' further enhanced by using a weighted average by supplying `weights`. -#' @return [mlr3pipelines::GraphLearner] -#' @examples +#' +#' @examplesIf mlr3misc::require_namespaces(c("mlr3pipelines"), quietly = TRUE) #' \dontrun{ -#' if (requireNamespace("mlr3pipelines", quietly = TRUE)) { #' library("mlr3") #' library("mlr3pipelines") #' @@ -76,7 +77,6 @@ pipeline_survaverager = function(learners, param_vals = list(), graph_learner = #' pipe$train(task) #' pipe$predict(task) #' } -#' } pipeline_survbagging = function(learner, iterations = 10, frac = 0.7, avg = TRUE, weights = 1, graph_learner = FALSE) { @@ -117,9 +117,8 @@ pipeline_survbagging = function(learner, iterations = 10, frac = 0.7, avg = TRUE #' If `FALSE` (default) and the prediction already has a `crank` prediction, then the compositor returns the input prediction unchanged. #' If `TRUE`, then the `crank` will be overwritten. #' -#' @examples +#' @examplesIf mlr3misc::require_namespaces(c("mlr3pipelines"), quietly = TRUE) #' \dontrun{ -#' if (requireNamespace("mlr3pipelines", quietly = TRUE)) { #' library("mlr3") #' library("mlr3pipelines") #' @@ -137,7 +136,6 @@ pipeline_survbagging = function(learner, iterations = 10, frac = 0.7, avg = TRUE #' grlrn$train(task, part$train) #' grlrn$predict(task, part$test) #' } -#' } pipeline_crankcompositor = function(learner, method = c("mort"), overwrite = FALSE, graph_learner = FALSE) { assert_learner(learner, task_type = "surv") @@ -178,9 +176,8 @@ pipeline_crankcompositor = function(learner, method = c("mort"), #' If `FALSE` (default) and the prediction already has a `response` prediction, then the compositor returns the input prediction unchanged. #' If `TRUE`, then the `response` (and the `crank`, if `add_crank` is `TRUE`) will be overwritten. #' -#' @examples +#' @examplesIf mlr3misc::require_namespaces(c("mlr3pipelines"), quietly = TRUE) #' \dontrun{ -#' if (requireNamespace("mlr3pipelines", quietly = TRUE)) { #' library("mlr3") #' library("mlr3pipelines") #' @@ -198,7 +195,6 @@ pipeline_crankcompositor = function(learner, method = c("mort"), #' grlrn$train(task, part$train) #' grlrn$predict(task, part$test) #' } -#' } pipeline_responsecompositor = function(learner, method = "rmst", tau = NULL, add_crank = FALSE, overwrite = FALSE, graph_learner = FALSE) { @@ -229,8 +225,7 @@ pipeline_responsecompositor = function(learner, method = "rmst", tau = NULL, #' @templateVar pipeop [PipeOpDistrCompositor] or [PipeOpBreslow] #' @templateVar id distrcompositor #' @template param_pipeline_learner -#' @param learner [LearnerSurv]\cr -#' Survival learner. +#' #' @param estimator (`character(1)`)\cr #' One of `kaplan` (default), `nelson` or `breslow`, corresponding to the Kaplan-Meier, #' Nelson-Aalen and [Breslow][breslow] estimators respectively. @@ -245,9 +240,9 @@ pipeline_responsecompositor = function(learner, method = "rmst", tau = NULL, #' If `TRUE` then the `distr` is overwritten by the compositor if #' already present, which may be required for changing the prediction `distr` from one model form #' to another. -#' @examples +#' +#' @examplesIf mlr3misc::require_namespaces(c("mlr3pipelines"), quietly = TRUE) #' \dontrun{ -#' if (requireNamespace("mlr3pipelines", quietly = TRUE)) { #' library("mlr3pipelines") #' #' # let's change the distribution prediction of Cox (Breslow-based) to an AFT form: @@ -263,7 +258,6 @@ pipeline_responsecompositor = function(learner, method = "rmst", tau = NULL, #' grlrn$train(task) #' grlrn$predict(task) #' } -#' } pipeline_distrcompositor = function(learner, estimator = "kaplan", form = "aft", overwrite = FALSE, graph_learner = FALSE) { # some checks @@ -304,10 +298,8 @@ pipeline_distrcompositor = function(learner, estimator = "kaplan", form = "aft", #' @param dist (`character(1)`)\cr #' Location-scale distribution to use for composition. #' Current possibilities are' `"Cauchy", "Gumbel", "Laplace", "Logistic", "Normal", "Uniform"`. Default is `"Uniform"`. -#' @examples +#' @examplesIf mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE) #' \dontrun{ -#' if (requireNamespace("mlr3pipelines", quietly = TRUE) && -#' requireNamespace("rpart", quietly = TRUE)) { #' library("mlr3") #' library("mlr3pipelines") #' @@ -332,7 +324,6 @@ pipeline_distrcompositor = function(learner, estimator = "kaplan", form = "aft", #' pipe$train(task) #' pipe$predict(task) #' } -#' } pipeline_probregr = function(learner, learner_se = NULL, dist = "Uniform", graph_learner = FALSE) { @@ -443,12 +434,8 @@ pipeline_probregr = function(learner, learner_se = NULL, dist = "Uniform", #' If `TRUE` returns wraps the [Graph][mlr3pipelines::Graph] as a #' [GraphLearner][mlr3pipelines::GraphLearner] otherwise (default) returns as a `Graph`. #' -#' @return [mlr3pipelines::Graph] or [mlr3pipelines::GraphLearner] -#' @family pipelines -#' -#' @examples +#' @examplesIf mlr3misc::require_namespaces(c("mlr3pipelines"), quietly = TRUE) #' \dontrun{ -#' if (requireNamespace("mlr3pipelines", quietly = TRUE)) { #' library("mlr3") #' library("mlr3pipelines") #' @@ -486,7 +473,6 @@ pipeline_probregr = function(learner, learner_se = NULL, dist = "Uniform", #' pipe$train(task) #' pipe$predict(task) #' } -#' } #' @export pipeline_survtoregr = function(method = 1, regr_learner = lrn("regr.featureless"), distrcompose = TRUE, distr_estimator = lrn("surv.kaplan"), @@ -568,10 +554,10 @@ pipeline_survtoregr = function(method = 1, regr_learner = lrn("regr.featureless" gr } -#' @name mlr_graphs_survtoclassif_disctime -#' @title Survival to Classification Reduction Pipeline -#' @description Wrapper around multiple [PipeOp][mlr3pipelines::PipeOp]s to help in creation -#' of complex survival reduction methods. +#' @template pipeline +#' @templateVar pipeop [PipeOpTaskSurvClassifDiscTime] and [PipeOpPredClassifSurvDiscTime] +#' @templateVar id survtoclassif_disctime +#' @templateVar title Survival to Classification Reduction using Discrete Time #' #' @param learner [LearnerClassif][mlr3::LearnerClassif]\cr #' Classification learner to fit the transformed [TaskClassif][mlr3::TaskClassif]. @@ -589,29 +575,18 @@ pipeline_survtoregr = function(method = 1, regr_learner = lrn("regr.featureless" #' All features of the task are available as well as `tend` the upper bounds #' of the intervals created by `cut`. #' If `rhs` is unspecified, the formula of the task will be used. -#' @param graph_learner (`logical(1)`)\cr -#' If `TRUE` returns wraps the [Graph][mlr3pipelines::Graph] as a -#' [GraphLearner][mlr3pipelines::GraphLearner] otherwise (default) returns as a `Graph`. #' #' @details #' The pipeline consists of the following steps: -#' \enumerate{ -#' \item [PipeOpTaskSurvClassifDiscTime] Converts [TaskSurv] to a [TaskClassif][mlr3::TaskClassif]. -#' \item A [LearnerClassif] is fit and predicted on the new `TaskClassif`. -#' \item [PipeOpPredClassifSurvDiscTime] transforms the resulting [PredictionClassif][mlr3::PredictionClassif] -#' to [PredictionSurv]. -#' \item Optionally: [PipeOpModelMatrix][mlr3pipelines::PipeOpModelMatrix] is used to transform the formula of the task -#' before fitting the learner. -#' } #' -#' @return [mlr3pipelines::Graph] or [mlr3pipelines::GraphLearner] -#' @family pipelines +#' 1. [PipeOpTaskSurvClassifDiscTime] Converts [TaskSurv] to a [TaskClassif][mlr3::TaskClassif]. +#' 2. A [LearnerClassif] is fit and predicted on the new `TaskClassif`. +#' 3. [PipeOpPredClassifSurvDiscTime] transforms the resulting [PredictionClassif][mlr3::PredictionClassif] to [PredictionSurv]. +#' 4. Optionally: [PipeOpModelMatrix][mlr3pipelines::PipeOpModelMatrix] is used to transform the formula of the task +#' before fitting the learner. #' -#' @examples +#' @examplesIf mlr3misc::require_namespaces(c("mlr3pipelines", "mlr3learners"), quietly = TRUE) #' \dontrun{ -#' if (requireNamespace("mlr3pipelines", quietly = TRUE) && -#' requireNamespace("mlr3learners", quietly = TRUE)) { -#' #' library(mlr3) #' library(mlr3learners) #' library(mlr3pipelines) @@ -628,7 +603,6 @@ pipeline_survtoregr = function(method = 1, regr_learner = lrn("regr.featureless" #' grlrn$train(task, row_ids = part$train) #' grlrn$predict(task, row_ids = part$test) #' } -#' } #' @export pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, rhs = NULL, graph_learner = FALSE) { @@ -659,10 +633,15 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, gr } -#' @name mlr_graphs_survtoclassif_IPCW -#' @title Survival to Classification Reduction Pipeline using IPCW -#' @description Wrapper around multiple [PipeOp][mlr3pipelines::PipeOp]s to help in creation -#' of complex survival reduction methods. +#' @template pipeline +#' @templateVar pipeop [PipeOpTaskSurvClassifIPCW] and [PipeOpPredClassifSurvIPCW] +#' @templateVar id survtoclassif_IPCW +#' @templateVar title Survival to Classification Reduction using IPCW +#' @section Dictionary: +#' Additional alias id for pipeline construction: +#' ``` +#' ppl("survtoclassif_vock") +#' ``` #' #' @param learner [LearnerClassif][mlr3::LearnerClassif]\cr #' Classification learner to fit the transformed [TaskClassif][mlr3::TaskClassif]. @@ -684,9 +663,6 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, #' 3. [PipeOpPredClassifSurvIPCW] transforms the resulting [PredictionClassif][mlr3::PredictionClassif] #' to [PredictionSurv]. #' -#' @return [mlr3pipelines::Graph] or [mlr3pipelines::GraphLearner] -#' @family pipelines -#' #' @examplesIf mlr3misc::require_namespaces(c("mlr3pipelines", "mlr3learners"), quietly = TRUE) #' \dontrun{ #' library(mlr3) diff --git a/man/mlr_graphs_crankcompositor.Rd b/man/mlr_graphs_crankcompositor.Rd index 5879a1baf..4af2d53e9 100644 --- a/man/mlr_graphs_crankcompositor.Rd +++ b/man/mlr_graphs_crankcompositor.Rd @@ -37,9 +37,19 @@ If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a \description{ Wrapper around \link{PipeOpCrankCompositor} to simplify \link[mlr3pipelines:Graph]{Graph} creation. } +\section{Dictionary}{ + +This \link[mlr3pipelines:Graph]{Graph} can be instantiated via the \link[mlr3misc:Dictionary]{dictionary} +\link[mlr3pipelines:mlr_graphs]{mlr_graphs} or with the associated sugar function \link[mlr3pipelines:ppl]{ppl()}: + +\if{html}{\out{
                                            }}\preformatted{mlr_graphs$get("crankcompositor") +ppl("crankcompositor") +}\if{html}{\out{
                                            }} +} + \examples{ +\dontshow{if (mlr3misc::require_namespaces(c("mlr3pipelines"), quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontrun{ -if (requireNamespace("mlr3pipelines", quietly = TRUE)) { library("mlr3") library("mlr3pipelines") @@ -57,7 +67,7 @@ if (requireNamespace("mlr3pipelines", quietly = TRUE)) { grlrn$train(task, part$train) grlrn$predict(task, part$test) } -} +\dontshow{\}) # examplesIf} } \seealso{ Other pipelines: @@ -67,7 +77,6 @@ Other pipelines: \code{\link{mlr_graphs_survaverager}}, \code{\link{mlr_graphs_survbagging}}, \code{\link{mlr_graphs_survtoclassif_IPCW}}, -\code{\link{mlr_graphs_survtoclassif_disctime}}, -\code{\link{mlr_graphs_survtoregr}} +\code{\link{mlr_graphs_survtoclassif_disctime}} } \concept{pipelines} diff --git a/man/mlr_graphs_distrcompositor.Rd b/man/mlr_graphs_distrcompositor.Rd index 76b661d9e..7cb42d2c9 100644 --- a/man/mlr_graphs_distrcompositor.Rd +++ b/man/mlr_graphs_distrcompositor.Rd @@ -14,8 +14,10 @@ pipeline_distrcompositor( ) } \arguments{ -\item{learner}{\link{LearnerSurv}\cr -Survival learner.} +\item{learner}{\verb{[mlr3::Learner]|[mlr3pipelines::PipeOp]|[mlr3pipelines::Graph]} \cr +Either a \code{Learner} which will be wrapped in \link[mlr3pipelines:mlr_pipeops_learner]{mlr3pipelines::PipeOpLearner}, a \code{PipeOp} which will +be wrapped in \link[mlr3pipelines:Graph]{mlr3pipelines::Graph} or a \code{Graph} itself. Underlying \code{Learner} should be +\link{LearnerSurv}.} \item{estimator}{(\code{character(1)})\cr One of \code{kaplan} (default), \code{nelson} or \code{breslow}, corresponding to the Kaplan-Meier, @@ -44,9 +46,19 @@ If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a \description{ Wrapper around \link{PipeOpDistrCompositor} or \link{PipeOpBreslow} to simplify \link[mlr3pipelines:Graph]{Graph} creation. } +\section{Dictionary}{ + +This \link[mlr3pipelines:Graph]{Graph} can be instantiated via the \link[mlr3misc:Dictionary]{dictionary} +\link[mlr3pipelines:mlr_graphs]{mlr_graphs} or with the associated sugar function \link[mlr3pipelines:ppl]{ppl()}: + +\if{html}{\out{
                                            }}\preformatted{mlr_graphs$get("distrcompositor") +ppl("distrcompositor") +}\if{html}{\out{
                                            }} +} + \examples{ +\dontshow{if (mlr3misc::require_namespaces(c("mlr3pipelines"), quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontrun{ -if (requireNamespace("mlr3pipelines", quietly = TRUE)) { library("mlr3pipelines") # let's change the distribution prediction of Cox (Breslow-based) to an AFT form: @@ -62,7 +74,7 @@ if (requireNamespace("mlr3pipelines", quietly = TRUE)) { grlrn$train(task) grlrn$predict(task) } -} +\dontshow{\}) # examplesIf} } \seealso{ Other pipelines: @@ -72,7 +84,6 @@ Other pipelines: \code{\link{mlr_graphs_survaverager}}, \code{\link{mlr_graphs_survbagging}}, \code{\link{mlr_graphs_survtoclassif_IPCW}}, -\code{\link{mlr_graphs_survtoclassif_disctime}}, -\code{\link{mlr_graphs_survtoregr}} +\code{\link{mlr_graphs_survtoclassif_disctime}} } \concept{pipelines} diff --git a/man/mlr_graphs_probregr.Rd b/man/mlr_graphs_probregr.Rd index 24ee3490d..08b3cd1d8 100644 --- a/man/mlr_graphs_probregr.Rd +++ b/man/mlr_graphs_probregr.Rd @@ -36,10 +36,19 @@ If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a \description{ Wrapper around \link{PipeOpProbregr} to simplify \link[mlr3pipelines:Graph]{Graph} creation. } +\section{Dictionary}{ + +This \link[mlr3pipelines:Graph]{Graph} can be instantiated via the \link[mlr3misc:Dictionary]{dictionary} +\link[mlr3pipelines:mlr_graphs]{mlr_graphs} or with the associated sugar function \link[mlr3pipelines:ppl]{ppl()}: + +\if{html}{\out{
                                            }}\preformatted{mlr_graphs$get("probregr") +ppl("probregr") +}\if{html}{\out{
                                            }} +} + \examples{ +\dontshow{if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontrun{ -if (requireNamespace("mlr3pipelines", quietly = TRUE) && - requireNamespace("rpart", quietly = TRUE)) { library("mlr3") library("mlr3pipelines") @@ -64,7 +73,7 @@ if (requireNamespace("mlr3pipelines", quietly = TRUE) && pipe$train(task) pipe$predict(task) } -} +\dontshow{\}) # examplesIf} } \seealso{ Other pipelines: @@ -74,7 +83,6 @@ Other pipelines: \code{\link{mlr_graphs_survaverager}}, \code{\link{mlr_graphs_survbagging}}, \code{\link{mlr_graphs_survtoclassif_IPCW}}, -\code{\link{mlr_graphs_survtoclassif_disctime}}, -\code{\link{mlr_graphs_survtoregr}} +\code{\link{mlr_graphs_survtoclassif_disctime}} } \concept{pipelines} diff --git a/man/mlr_graphs_responsecompositor.Rd b/man/mlr_graphs_responsecompositor.Rd index 80f9f1915..5f34c67d0 100644 --- a/man/mlr_graphs_responsecompositor.Rd +++ b/man/mlr_graphs_responsecompositor.Rd @@ -46,9 +46,19 @@ If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a \description{ Wrapper around \link{PipeOpResponseCompositor} to simplify \link[mlr3pipelines:Graph]{Graph} creation. } +\section{Dictionary}{ + +This \link[mlr3pipelines:Graph]{Graph} can be instantiated via the \link[mlr3misc:Dictionary]{dictionary} +\link[mlr3pipelines:mlr_graphs]{mlr_graphs} or with the associated sugar function \link[mlr3pipelines:ppl]{ppl()}: + +\if{html}{\out{
                                            }}\preformatted{mlr_graphs$get("responsecompositor") +ppl("responsecompositor") +}\if{html}{\out{
                                            }} +} + \examples{ +\dontshow{if (mlr3misc::require_namespaces(c("mlr3pipelines"), quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontrun{ -if (requireNamespace("mlr3pipelines", quietly = TRUE)) { library("mlr3") library("mlr3pipelines") @@ -66,7 +76,7 @@ if (requireNamespace("mlr3pipelines", quietly = TRUE)) { grlrn$train(task, part$train) grlrn$predict(task, part$test) } -} +\dontshow{\}) # examplesIf} } \seealso{ Other pipelines: @@ -76,7 +86,6 @@ Other pipelines: \code{\link{mlr_graphs_survaverager}}, \code{\link{mlr_graphs_survbagging}}, \code{\link{mlr_graphs_survtoclassif_IPCW}}, -\code{\link{mlr_graphs_survtoclassif_disctime}}, -\code{\link{mlr_graphs_survtoregr}} +\code{\link{mlr_graphs_survtoclassif_disctime}} } \concept{pipelines} diff --git a/man/mlr_graphs_survaverager.Rd b/man/mlr_graphs_survaverager.Rd index 0ede56722..e706e5928 100644 --- a/man/mlr_graphs_survaverager.Rd +++ b/man/mlr_graphs_survaverager.Rd @@ -24,9 +24,19 @@ If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a \description{ Wrapper around \link{PipeOpSurvAvg} to simplify \link[mlr3pipelines:Graph]{Graph} creation. } +\section{Dictionary}{ + +This \link[mlr3pipelines:Graph]{Graph} can be instantiated via the \link[mlr3misc:Dictionary]{dictionary} +\link[mlr3pipelines:mlr_graphs]{mlr_graphs} or with the associated sugar function \link[mlr3pipelines:ppl]{ppl()}: + +\if{html}{\out{
                                            }}\preformatted{mlr_graphs$get("survaverager") +ppl("survaverager") +}\if{html}{\out{
                                            }} +} + \examples{ +\dontshow{if (mlr3misc::require_namespaces(c("mlr3pipelines"), quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontrun{ -if (requireNamespace("mlr3pipelines", quietly = TRUE)) { library("mlr3") library("mlr3pipelines") @@ -40,7 +50,7 @@ if (requireNamespace("mlr3pipelines", quietly = TRUE)) { pipe$train(task) pipe$predict(task) } -} +\dontshow{\}) # examplesIf} } \seealso{ Other pipelines: @@ -50,7 +60,6 @@ Other pipelines: \code{\link{mlr_graphs_responsecompositor}}, \code{\link{mlr_graphs_survbagging}}, \code{\link{mlr_graphs_survtoclassif_IPCW}}, -\code{\link{mlr_graphs_survtoclassif_disctime}}, -\code{\link{mlr_graphs_survtoregr}} +\code{\link{mlr_graphs_survtoclassif_disctime}} } \concept{pipelines} diff --git a/man/mlr_graphs_survbagging.Rd b/man/mlr_graphs_survbagging.Rd index 8ea5d37d8..4d01ee60c 100644 --- a/man/mlr_graphs_survbagging.Rd +++ b/man/mlr_graphs_survbagging.Rd @@ -41,8 +41,6 @@ If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a } \value{ \link[mlr3pipelines:Graph]{mlr3pipelines::Graph} or \link[mlr3pipelines:mlr_learners_graph]{mlr3pipelines::GraphLearner} - -\link[mlr3pipelines:mlr_learners_graph]{mlr3pipelines::GraphLearner} } \description{ Wrapper around \link[mlr3pipelines:mlr_pipeops_subsample]{PipeOpSubsample} and \link{PipeOpSurvAvg} to simplify \link[mlr3pipelines:Graph]{Graph} creation. @@ -55,9 +53,19 @@ the sample mean of deterministic predictions and a \link[distr6:MixtureDistribution]{MixtureDistribution} of distribution predictions. This can be further enhanced by using a weighted average by supplying \code{weights}. } +\section{Dictionary}{ + +This \link[mlr3pipelines:Graph]{Graph} can be instantiated via the \link[mlr3misc:Dictionary]{dictionary} +\link[mlr3pipelines:mlr_graphs]{mlr_graphs} or with the associated sugar function \link[mlr3pipelines:ppl]{ppl()}: + +\if{html}{\out{
                                            }}\preformatted{mlr_graphs$get("survbagging") +ppl("survbagging") +}\if{html}{\out{
                                            }} +} + \examples{ +\dontshow{if (mlr3misc::require_namespaces(c("mlr3pipelines"), quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontrun{ -if (requireNamespace("mlr3pipelines", quietly = TRUE)) { library("mlr3") library("mlr3pipelines") @@ -71,7 +79,7 @@ if (requireNamespace("mlr3pipelines", quietly = TRUE)) { pipe$train(task) pipe$predict(task) } -} +\dontshow{\}) # examplesIf} } \seealso{ Other pipelines: @@ -81,7 +89,6 @@ Other pipelines: \code{\link{mlr_graphs_responsecompositor}}, \code{\link{mlr_graphs_survaverager}}, \code{\link{mlr_graphs_survtoclassif_IPCW}}, -\code{\link{mlr_graphs_survtoclassif_disctime}}, -\code{\link{mlr_graphs_survtoregr}} +\code{\link{mlr_graphs_survtoclassif_disctime}} } \concept{pipelines} diff --git a/man/mlr_graphs_survtoclassif_IPCW.Rd b/man/mlr_graphs_survtoclassif_IPCW.Rd index ee560c84c..0fe388f31 100644 --- a/man/mlr_graphs_survtoclassif_IPCW.Rd +++ b/man/mlr_graphs_survtoclassif_IPCW.Rd @@ -3,7 +3,7 @@ \name{mlr_graphs_survtoclassif_IPCW} \alias{mlr_graphs_survtoclassif_IPCW} \alias{pipeline_survtoclassif_IPCW} -\title{Survival to Classification Reduction Pipeline using IPCW} +\title{Survival to Classification Reduction using IPCW Pipeline} \usage{ pipeline_survtoclassif_IPCW( learner, @@ -32,8 +32,7 @@ If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a \link[mlr3pipelines:Graph]{mlr3pipelines::Graph} or \link[mlr3pipelines:mlr_learners_graph]{mlr3pipelines::GraphLearner} } \description{ -Wrapper around multiple \link[mlr3pipelines:PipeOp]{PipeOp}s to help in creation -of complex survival reduction methods. +Wrapper around \link{PipeOpTaskSurvClassifIPCW} and \link{PipeOpPredClassifSurvIPCW} to simplify \link[mlr3pipelines:Graph]{Graph} creation. } \details{ The pipeline consists of the following steps: @@ -44,6 +43,22 @@ The pipeline consists of the following steps: to \link{PredictionSurv}. } } +\section{Dictionary}{ + +This \link[mlr3pipelines:Graph]{Graph} can be instantiated via the \link[mlr3misc:Dictionary]{dictionary} +\link[mlr3pipelines:mlr_graphs]{mlr_graphs} or with the associated sugar function \link[mlr3pipelines:ppl]{ppl()}: + +\if{html}{\out{
                                            }}\preformatted{mlr_graphs$get("survtoclassif_IPCW") +ppl("survtoclassif_IPCW") +}\if{html}{\out{
                                            }} + + +Additional alias id for pipeline construction: + +\if{html}{\out{
                                            }}\preformatted{ppl("survtoclassif_vock") +}\if{html}{\out{
                                            }} +} + \examples{ \dontshow{if (mlr3misc::require_namespaces(c("mlr3pipelines", "mlr3learners"), quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontrun{ @@ -78,7 +93,6 @@ Other pipelines: \code{\link{mlr_graphs_responsecompositor}}, \code{\link{mlr_graphs_survaverager}}, \code{\link{mlr_graphs_survbagging}}, -\code{\link{mlr_graphs_survtoclassif_disctime}}, -\code{\link{mlr_graphs_survtoregr}} +\code{\link{mlr_graphs_survtoclassif_disctime}} } \concept{pipelines} diff --git a/man/mlr_graphs_survtoclassif_disctime.Rd b/man/mlr_graphs_survtoclassif_disctime.Rd index 7ae567bd4..aee1e38df 100644 --- a/man/mlr_graphs_survtoclassif_disctime.Rd +++ b/man/mlr_graphs_survtoclassif_disctime.Rd @@ -3,7 +3,7 @@ \name{mlr_graphs_survtoclassif_disctime} \alias{mlr_graphs_survtoclassif_disctime} \alias{pipeline_survtoclassif_disctime} -\title{Survival to Classification Reduction Pipeline} +\title{Survival to Classification Reduction using Discrete Time Pipeline} \usage{ pipeline_survtoclassif_disctime( learner, @@ -42,25 +42,31 @@ If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a \link[mlr3pipelines:Graph]{mlr3pipelines::Graph} or \link[mlr3pipelines:mlr_learners_graph]{mlr3pipelines::GraphLearner} } \description{ -Wrapper around multiple \link[mlr3pipelines:PipeOp]{PipeOp}s to help in creation -of complex survival reduction methods. +Wrapper around \link{PipeOpTaskSurvClassifDiscTime} and \link{PipeOpPredClassifSurvDiscTime} to simplify \link[mlr3pipelines:Graph]{Graph} creation. } \details{ The pipeline consists of the following steps: \enumerate{ \item \link{PipeOpTaskSurvClassifDiscTime} Converts \link{TaskSurv} to a \link[mlr3:TaskClassif]{TaskClassif}. \item A \link{LearnerClassif} is fit and predicted on the new \code{TaskClassif}. -\item \link{PipeOpPredClassifSurvDiscTime} transforms the resulting \link[mlr3:PredictionClassif]{PredictionClassif} -to \link{PredictionSurv}. +\item \link{PipeOpPredClassifSurvDiscTime} transforms the resulting \link[mlr3:PredictionClassif]{PredictionClassif} to \link{PredictionSurv}. \item Optionally: \link[mlr3pipelines:mlr_pipeops_modelmatrix]{PipeOpModelMatrix} is used to transform the formula of the task before fitting the learner. } } +\section{Dictionary}{ + +This \link[mlr3pipelines:Graph]{Graph} can be instantiated via the \link[mlr3misc:Dictionary]{dictionary} +\link[mlr3pipelines:mlr_graphs]{mlr_graphs} or with the associated sugar function \link[mlr3pipelines:ppl]{ppl()}: + +\if{html}{\out{
                                            }}\preformatted{mlr_graphs$get("survtoclassif_disctime") +ppl("survtoclassif_disctime") +}\if{html}{\out{
                                            }} +} + \examples{ +\dontshow{if (mlr3misc::require_namespaces(c("mlr3pipelines", "mlr3learners"), quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontrun{ -if (requireNamespace("mlr3pipelines", quietly = TRUE) && - requireNamespace("mlr3learners", quietly = TRUE)) { - library(mlr3) library(mlr3learners) library(mlr3pipelines) @@ -77,7 +83,7 @@ if (requireNamespace("mlr3pipelines", quietly = TRUE) && grlrn$train(task, row_ids = part$train) grlrn$predict(task, row_ids = part$test) } -} +\dontshow{\}) # examplesIf} } \seealso{ Other pipelines: @@ -87,7 +93,6 @@ Other pipelines: \code{\link{mlr_graphs_responsecompositor}}, \code{\link{mlr_graphs_survaverager}}, \code{\link{mlr_graphs_survbagging}}, -\code{\link{mlr_graphs_survtoclassif_IPCW}}, -\code{\link{mlr_graphs_survtoregr}} +\code{\link{mlr_graphs_survtoclassif_IPCW}} } \concept{pipelines} diff --git a/man/mlr_graphs_survtoregr.Rd b/man/mlr_graphs_survtoregr.Rd index 4ae09dd0a..7381e7dd3 100644 --- a/man/mlr_graphs_survtoregr.Rd +++ b/man/mlr_graphs_survtoregr.Rd @@ -61,9 +61,6 @@ insampling.} If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a \link[mlr3pipelines:mlr_learners_graph]{GraphLearner} otherwise (default) returns as a \code{Graph}.} } -\value{ -\link[mlr3pipelines:Graph]{mlr3pipelines::Graph} or \link[mlr3pipelines:mlr_learners_graph]{mlr3pipelines::GraphLearner} -} \description{ Wrapper around multiple \link[mlr3pipelines:PipeOp]{PipeOp}s to help in creation of complex survival reduction methods. Three reductions are currently implemented, @@ -120,8 +117,8 @@ predictors of the new data, which can ultimately be composed to a distribution. } } \examples{ +\dontshow{if (mlr3misc::require_namespaces(c("mlr3pipelines"), quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontrun{ -if (requireNamespace("mlr3pipelines", quietly = TRUE)) { library("mlr3") library("mlr3pipelines") @@ -159,17 +156,5 @@ if (requireNamespace("mlr3pipelines", quietly = TRUE)) { pipe$train(task) pipe$predict(task) } +\dontshow{\}) # examplesIf} } -} -\seealso{ -Other pipelines: -\code{\link{mlr_graphs_crankcompositor}}, -\code{\link{mlr_graphs_distrcompositor}}, -\code{\link{mlr_graphs_probregr}}, -\code{\link{mlr_graphs_responsecompositor}}, -\code{\link{mlr_graphs_survaverager}}, -\code{\link{mlr_graphs_survbagging}}, -\code{\link{mlr_graphs_survtoclassif_IPCW}}, -\code{\link{mlr_graphs_survtoclassif_disctime}} -} -\concept{pipelines} From 83349c9c508c54c27273f75b19d0e2faec1f8d28 Mon Sep 17 00:00:00 2001 From: john Date: Tue, 8 Oct 2024 16:43:40 +0200 Subject: [PATCH 80/82] assert classif learner in IPCW and disctime --- R/pipelines.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/pipelines.R b/R/pipelines.R index a9109e53d..0f1d35e87 100644 --- a/R/pipelines.R +++ b/R/pipelines.R @@ -606,6 +606,7 @@ pipeline_survtoregr = function(method = 1, regr_learner = lrn("regr.featureless" #' @export pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, rhs = NULL, graph_learner = FALSE) { + assert_learner(learner, task_type = "classif") assert_true("prob" %in% learner$predict_types) gr = mlr3pipelines::Graph$new() @@ -688,6 +689,7 @@ pipeline_survtoclassif_disctime = function(learner, cut = NULL, max_time = NULL, #' } #' @export pipeline_survtoclassif_IPCW = function(learner, tau = NULL, eps = 1e-3, graph_learner = FALSE) { + assert_learner(learner, task_type = "classif") assert_true("prob" %in% learner$predict_types) gr = mlr3pipelines::Graph$new() From cf41f1c8bd3be30da626ecfb9f16e78c1d0514c2 Mon Sep 17 00:00:00 2001 From: john Date: Tue, 8 Oct 2024 16:59:55 +0200 Subject: [PATCH 81/82] add experimental badge for 3 pipelines (survtoregr, distrcompositor, probregr) --- DESCRIPTION | 1 + R/pipelines.R | 8 +++++++- man/figures/lifecycle-experimental.svg | 1 + man/mlr_graphs_distrcompositor.Rd | 2 ++ man/mlr_graphs_probregr.Rd | 2 ++ man/mlr_graphs_survtoregr.Rd | 1 + 6 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 man/figures/lifecycle-experimental.svg diff --git a/DESCRIPTION b/DESCRIPTION index 687e0277c..1ad0b1e60 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -70,6 +70,7 @@ Suggests: GGally, knitr, lgr, + lifecycle, mlr3pipelines (>= 0.3.4), param6 (>= 0.2.4), pracma, diff --git a/R/pipelines.R b/R/pipelines.R index 0f1d35e87..e37190e0b 100644 --- a/R/pipelines.R +++ b/R/pipelines.R @@ -36,7 +36,6 @@ pipeline_survaverager = function(learners, param_vals = list(), graph_learner = gr } - #' @template pipeline #' @templateVar title Survival Prediction Averaging #' @templateVar pipeop [PipeOpSubsample][mlr3pipelines::PipeOpSubsample] and [PipeOpSurvAvg] @@ -225,6 +224,8 @@ pipeline_responsecompositor = function(learner, method = "rmst", tau = NULL, #' @templateVar pipeop [PipeOpDistrCompositor] or [PipeOpBreslow] #' @templateVar id distrcompositor #' @template param_pipeline_learner +#' @description +#' `r lifecycle::badge("experimental")` #' #' @param estimator (`character(1)`)\cr #' One of `kaplan` (default), `nelson` or `breslow`, corresponding to the Kaplan-Meier, @@ -292,6 +293,9 @@ pipeline_distrcompositor = function(learner, estimator = "kaplan", form = "aft", #' @templateVar pipeop [PipeOpProbregr] #' @templateVar id probregr #' @template param_pipeline_learner_regr +#' @description +#' `r lifecycle::badge("experimental")` +#' #' @param learner_se `[mlr3::Learner]|[mlr3pipelines::PipeOp]` \cr #' Optional [LearnerRegr][mlr3::LearnerRegr] with predict_type `se` to estimate the standard #' error. If left `NULL` then `learner` must have `se` in predict_types. @@ -354,6 +358,8 @@ pipeline_probregr = function(learner, learner_se = NULL, dist = "Uniform", #' @description Wrapper around multiple [PipeOp][mlr3pipelines::PipeOp]s to help in creation #' of complex survival reduction methods. Three reductions are currently implemented, #' see details. +#' `r lifecycle::badge("experimental")` +#' #' @details #' Three reduction strategies are implemented, these are: #' diff --git a/man/figures/lifecycle-experimental.svg b/man/figures/lifecycle-experimental.svg new file mode 100644 index 000000000..d1d060e92 --- /dev/null +++ b/man/figures/lifecycle-experimental.svg @@ -0,0 +1 @@ +lifecyclelifecycleexperimentalexperimental \ No newline at end of file diff --git a/man/mlr_graphs_distrcompositor.Rd b/man/mlr_graphs_distrcompositor.Rd index 7cb42d2c9..1722cf7a4 100644 --- a/man/mlr_graphs_distrcompositor.Rd +++ b/man/mlr_graphs_distrcompositor.Rd @@ -45,6 +45,8 @@ If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a } \description{ Wrapper around \link{PipeOpDistrCompositor} or \link{PipeOpBreslow} to simplify \link[mlr3pipelines:Graph]{Graph} creation. + +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} } \section{Dictionary}{ diff --git a/man/mlr_graphs_probregr.Rd b/man/mlr_graphs_probregr.Rd index 08b3cd1d8..66668c89b 100644 --- a/man/mlr_graphs_probregr.Rd +++ b/man/mlr_graphs_probregr.Rd @@ -35,6 +35,8 @@ If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a } \description{ Wrapper around \link{PipeOpProbregr} to simplify \link[mlr3pipelines:Graph]{Graph} creation. + +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} } \section{Dictionary}{ diff --git a/man/mlr_graphs_survtoregr.Rd b/man/mlr_graphs_survtoregr.Rd index 7381e7dd3..5202d21e2 100644 --- a/man/mlr_graphs_survtoregr.Rd +++ b/man/mlr_graphs_survtoregr.Rd @@ -65,6 +65,7 @@ If \code{TRUE} returns wraps the \link[mlr3pipelines:Graph]{Graph} as a Wrapper around multiple \link[mlr3pipelines:PipeOp]{PipeOp}s to help in creation of complex survival reduction methods. Three reductions are currently implemented, see details. +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} } \details{ Three reduction strategies are implemented, these are: From 69e13df213705e0e3d28d6f7188971ecbaf3ca9a Mon Sep 17 00:00:00 2001 From: john Date: Tue, 8 Oct 2024 17:01:52 +0200 Subject: [PATCH 82/82] update NEWs --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 858f26565..c20e7f20c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,7 +4,9 @@ * New pipeline (**reduction method**): `pipeline_survtoclassif_IPCW` * Improved the way Integrated Brier score handles the `times` argument and the `t_max`, especially when the survival matrix has one time point (column) * Improved documentation of integrated survival scores +* Improved documentation of all pipelines * Temp fix of math-rendering issue in package website +* Add experimental `lifecycle` badge for 3 pipelines (`survtoregr`, `distrcompositor` and `probregr`) - these are currently either not supported by literature or tested enough. # mlr3proba 0.6.8