From 36587963bd9c196c216e43e57b0338109d76463a Mon Sep 17 00:00:00 2001 From: pepijnvink Date: Wed, 20 Sep 2023 15:17:21 +0200 Subject: [PATCH 01/10] add function plot_miss visualization based on plot_pattern code --- NAMESPACE | 1 + R/plot_miss.R | 100 +++++++++++++++++++++++++++++++++++++++++++++++ man/plot_miss.Rd | 26 ++++++++++++ 3 files changed, 127 insertions(+) create mode 100644 R/plot_miss.R create mode 100644 man/plot_miss.Rd diff --git a/NAMESPACE b/NAMESPACE index 5a976a40..8cf346e8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ export(densityplot) export(ggmice) export(plot_corr) export(plot_flux) +export(plot_miss) export(plot_pattern) export(plot_pred) export(plot_trace) diff --git a/R/plot_miss.R b/R/plot_miss.R new file mode 100644 index 00000000..bbc0f72c --- /dev/null +++ b/R/plot_miss.R @@ -0,0 +1,100 @@ +#' Plot missingness in a dataset +#' +#' @param data An incomplete dataset of class `data.frame` or `matrix`. +#' @param vrb String, vector, or unquoted expression with variable name(s), default is "all". +#' @param border Logical indicating whether borders should be present between tiles. +#' @param row.breaks Optional numeric input specifying the number of breaks to be visualized on the y axis. +#' +#' @return An object of class [ggplot2::ggplot]. +#' +#' @examples +#' plot_miss(mice::nhanes) +#' @export + +plot_miss <- + function(data, + vrb = "all", + border = FALSE, + row.breaks = nrow(data)) { + # input processing + if (is.matrix(data) && ncol(data) > 1) { + data <- as.data.frame(data) + } + verify_data(data, df = TRUE) + vrb <- substitute(vrb) + if (vrb[1] == "all") { + vrb <- names(data) + } else { + vrb <- names(dplyr::select(as.data.frame(data), {{vrb}})) + } + if (".x" %in% vrb || ".y" %in% vrb) { + cli::cli_abort( + c( + "The variable names '.x' and '.y' are used internally to produce the missing data pattern plot.", + "i" = "Please exclude or rename your variable(s)." + ) + ) + } + # Create missingness indicator matrix + na.mat <- purrr::map_df(data[,vrb], function(y) as.numeric(is.na(y))) + + # extract pattern info + vrb <- colnames(na.mat) + rws <- nrow(na.mat) + cls <- ncol(na.mat) + rownr <- rownames(na.mat) + na_row <- na.mat[, cls] + na_col <- na.mat[rws, ] + + # transform to long format + long <- + as.data.frame(cbind(.y = 1:rws, na.mat)) %>% + tidyr::pivot_longer(cols = tidyselect::all_of(vrb), + names_to = "x", + values_to = ".where" + ) %>% + dplyr::mutate( + .x = as.numeric(factor( + .data$x, + levels = vrb, ordered = TRUE + )), + .where = factor( + .data$.where, + levels = c(0, 1), + labels = c("missing", "observed") + ) + ) + gg <- + ggplot2::ggplot( + long, + ggplot2::aes( + .data$.x, + .data$.y, + fill = .data$.where + ) + ) + + ggplot2::scale_fill_manual(values = c( + "observed" = "#006CC2B3", + "missing" = "#B61A51B3" + )) + + ggplot2::scale_alpha_continuous(limits = c(0, 1), guide = "none") + + ggplot2::scale_x_continuous( + breaks = 1:cls, + labels = vrb) + + ggplot2::scale_y_reverse( + n.breaks = row.breaks + ) + + ggplot2::labs( + x = "Variables", + y = "Rows in dataset", + fill = "", + alpha = "" + ) + + theme_minimice() + if(border){ + gg <- gg + ggplot2::geom_tile(color = "black") + } else{ + gg <- gg + ggplot2::geom_tile() + } + return(gg) + } diff --git a/man/plot_miss.Rd b/man/plot_miss.Rd new file mode 100644 index 00000000..b4a2e122 --- /dev/null +++ b/man/plot_miss.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_miss.R +\name{plot_miss} +\alias{plot_miss} +\title{Plot missingness in a dataset} +\usage{ +plot_miss(data, vrb = "all", border = FALSE, row.breaks = nrow(data)) +} +\arguments{ +\item{data}{An incomplete dataset of class \code{data.frame} or \code{matrix}.} + +\item{vrb}{String, vector, or unquoted expression with variable name(s), default is "all".} + +\item{border}{Logical indicating whether borders should be present between tiles.} + +\item{row.breaks}{Optional numeric input specifying the number of breaks to be visualized on the y axis.} +} +\value{ +An object of class \link[ggplot2:ggplot]{ggplot2::ggplot}. +} +\description{ +Plot missingness in a dataset +} +\examples{ +plot_miss(mice::nhanes) +} From d372a687103053f0c6bd03be8f422f0c860b5e76 Mon Sep 17 00:00:00 2001 From: pepijnvink Date: Wed, 20 Sep 2023 15:23:01 +0200 Subject: [PATCH 02/10] remove unnecessary objects from plot_miss --- R/plot_miss.R | 3 --- 1 file changed, 3 deletions(-) diff --git a/R/plot_miss.R b/R/plot_miss.R index bbc0f72c..0ac1f898 100644 --- a/R/plot_miss.R +++ b/R/plot_miss.R @@ -42,9 +42,6 @@ plot_miss <- vrb <- colnames(na.mat) rws <- nrow(na.mat) cls <- ncol(na.mat) - rownr <- rownames(na.mat) - na_row <- na.mat[, cls] - na_col <- na.mat[rws, ] # transform to long format long <- From ee6bd125666078b1b9a07ce2dfe384a68eb81175 Mon Sep 17 00:00:00 2001 From: pepijnvink Date: Tue, 26 Sep 2023 16:38:24 +0200 Subject: [PATCH 03/10] Continue plot_miss and add tests --- NAMESPACE | 1 + R/plot_miss.R | 29 ++++++++++++++++++++++++++--- man/plot_miss.Rd | 10 +++++++++- tests/testthat/test-plot_miss.R.R | 26 ++++++++++++++++++++++++++ 4 files changed, 62 insertions(+), 4 deletions(-) create mode 100644 tests/testthat/test-plot_miss.R.R diff --git a/NAMESPACE b/NAMESPACE index 8cf346e8..12244cea 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,7 @@ export(plot_miss) export(plot_pattern) export(plot_pred) export(plot_trace) +export(plot_variance) export(stripplot) export(xyplot) importFrom(magrittr,"%>%") diff --git a/R/plot_miss.R b/R/plot_miss.R index 0ac1f898..53b935e0 100644 --- a/R/plot_miss.R +++ b/R/plot_miss.R @@ -4,6 +4,7 @@ #' @param vrb String, vector, or unquoted expression with variable name(s), default is "all". #' @param border Logical indicating whether borders should be present between tiles. #' @param row.breaks Optional numeric input specifying the number of breaks to be visualized on the y axis. +#' @param ordered Logical indicating whether rows should be ordered according to their pattern. #' #' @return An object of class [ggplot2::ggplot]. #' @@ -15,7 +16,8 @@ plot_miss <- function(data, vrb = "all", border = FALSE, - row.breaks = nrow(data)) { + row.breaks = nrow(data), + ordered = FALSE) { # input processing if (is.matrix(data) && ncol(data) > 1) { data <- as.data.frame(data) @@ -35,9 +37,23 @@ plot_miss <- ) ) } - # Create missingness indicator matrix - na.mat <- purrr::map_df(data[,vrb], function(y) as.numeric(is.na(y))) + if(ordered){ + # extract md.pattern matrix + mdpat <- mice::md.pattern(data, plot = FALSE) %>% + head(., -1) + # save frequency of patterns + freq.pat <- rownames(mdpat) %>% + as.numeric() + na.mat <- mdpat %>% + as.data.frame() %>% + dplyr::select(-ncol(.)) %>% + dplyr::mutate(nmis = freq.pat) %>% + tidyr::uncount(nmis) + } else{ + # Create missingness indicator matrix + na.mat <- purrr::map_df(data[,vrb], function(y) as.numeric(!is.na(y))) + } # extract pattern info vrb <- colnames(na.mat) rws <- nrow(na.mat) @@ -87,11 +103,18 @@ plot_miss <- fill = "", alpha = "" ) + + ggplot2::coord_cartesian(expand = FALSE) + theme_minimice() if(border){ gg <- gg + ggplot2::geom_tile(color = "black") } else{ gg <- gg + ggplot2::geom_tile() } + if(ordered){ + gg <- gg + + ggplot2::theme(axis.text.y = ggplot2::element_blank(), + axis.ticks.y = ggplot2::element_blank() + ) + } return(gg) } diff --git a/man/plot_miss.Rd b/man/plot_miss.Rd index b4a2e122..246d1d4b 100644 --- a/man/plot_miss.Rd +++ b/man/plot_miss.Rd @@ -4,7 +4,13 @@ \alias{plot_miss} \title{Plot missingness in a dataset} \usage{ -plot_miss(data, vrb = "all", border = FALSE, row.breaks = nrow(data)) +plot_miss( + data, + vrb = "all", + border = FALSE, + row.breaks = nrow(data), + ordered = FALSE +) } \arguments{ \item{data}{An incomplete dataset of class \code{data.frame} or \code{matrix}.} @@ -14,6 +20,8 @@ plot_miss(data, vrb = "all", border = FALSE, row.breaks = nrow(data)) \item{border}{Logical indicating whether borders should be present between tiles.} \item{row.breaks}{Optional numeric input specifying the number of breaks to be visualized on the y axis.} + +\item{ordered}{Logical indicating whether rows should be ordered according to their pattern.} } \value{ An object of class \link[ggplot2:ggplot]{ggplot2::ggplot}. diff --git a/tests/testthat/test-plot_miss.R.R b/tests/testthat/test-plot_miss.R.R new file mode 100644 index 00000000..2fd60495 --- /dev/null +++ b/tests/testthat/test-plot_miss.R.R @@ -0,0 +1,26 @@ +# create test objects +dat <- mice::nhanes + +# tests +test_that("plot_miss produces plot", { + expect_s3_class(plot_miss(dat), "ggplot") + expect_s3_class(plot_miss(dat), "ggplot") + expect_s3_class(plot_miss(cbind(dat, "testvar" = NA)), "ggplot") +}) + +test_that("plot_miss works with different inputs", { + expect_s3_class(plot_miss(dat, c("age", "bmi")), "ggplot") + expect_s3_class(plot_miss(dat, c(age, bmi)), "ggplot") + expect_s3_class(plot_miss(data.frame(age = dat$age, testvar = NA)), "ggplot") + expect_s3_class(plot_miss(cbind(dat, "with space" = NA)), "ggplot") +}) + + +test_that("plot_miss with incorrect argument(s)", { + expect_output(plot_miss(na.omit(dat))) + expect_error(plot_miss("test")) + expect_error(plot_miss(dat, vrb = "test")) + expect_error(plot_miss(dat, cluster = "test")) + expect_error(plot_miss(cbind(dat, .x = NA))) + expect_error(plot_miss(dat, npat = "test")) +}) From e8b0d0b27a5640e26a6fa279d38a0c5333dab960 Mon Sep 17 00:00:00 2001 From: pepijnvink Date: Thu, 16 Nov 2023 13:19:17 +0100 Subject: [PATCH 04/10] add option to make square --- R/plot_miss.R | 11 ++++- R/plot_variance.R | 71 +++++++++++++++++++++++++++++++ man/plot_miss.Rd | 3 ++ man/plot_variance.Rd | 29 +++++++++++++ tests/testthat/test-plot_miss.R.R | 6 +-- 5 files changed, 114 insertions(+), 6 deletions(-) create mode 100644 R/plot_variance.R create mode 100644 man/plot_variance.Rd diff --git a/R/plot_miss.R b/R/plot_miss.R index 53b935e0..2bd70218 100644 --- a/R/plot_miss.R +++ b/R/plot_miss.R @@ -5,6 +5,7 @@ #' @param border Logical indicating whether borders should be present between tiles. #' @param row.breaks Optional numeric input specifying the number of breaks to be visualized on the y axis. #' @param ordered Logical indicating whether rows should be ordered according to their pattern. +#' @param square Logical indicating whether the plot tiles should be squares, defaults to squares. #' #' @return An object of class [ggplot2::ggplot]. #' @@ -17,6 +18,7 @@ plot_miss <- vrb = "all", border = FALSE, row.breaks = nrow(data), + square = TRUE, ordered = FALSE) { # input processing if (is.matrix(data) && ncol(data) > 1) { @@ -40,7 +42,7 @@ plot_miss <- if(ordered){ # extract md.pattern matrix mdpat <- mice::md.pattern(data, plot = FALSE) %>% - head(., -1) + utils::head(., -1) # save frequency of patterns freq.pat <- rownames(mdpat) %>% as.numeric() @@ -103,13 +105,18 @@ plot_miss <- fill = "", alpha = "" ) + - ggplot2::coord_cartesian(expand = FALSE) + theme_minimice() + # additional arguments if(border){ gg <- gg + ggplot2::geom_tile(color = "black") } else{ gg <- gg + ggplot2::geom_tile() } + if (square) { + gg <- gg + ggplot2::coord_fixed(expand = FALSE) + } else { + gg <- gg + ggplot2::coord_cartesian(expand = FALSE) + } if(ordered){ gg <- gg + ggplot2::theme(axis.text.y = ggplot2::element_blank(), diff --git a/R/plot_variance.R b/R/plot_variance.R new file mode 100644 index 00000000..ff63b6ab --- /dev/null +++ b/R/plot_variance.R @@ -0,0 +1,71 @@ +#' Plot the scaled between imputation variance for every cell as a heatmap +#' +#' This function plots the cell-level between imputation variance. The function +#' scales the variances column-wise, without centering cf. `base::scale(center = FALSE)` +#' and plots the data image as a heatmap. Darker red cells indicate more variance, +#' lighter cells indicate less variance. White cells represent observed cells or unobserved cells with zero between +#' imputation variance. +#' +#' @param data A package `mice` generated multiply imputed data set of class +#' `mids`. Non-`mids` objects that have not been generated with `mice::mice()` +#' can be converted through a pipeline with `mice::as.mids()`. +#' @param grid Logical indicating whether grid lines should be displayed. +#' +#' @return An object of class `ggplot`. +#' @examples +#' imp <- mice::mice(mice::nhanes, printFlag = FALSE) +#' plot_variance(imp) +#' @export +plot_variance <- function(data, grid = TRUE) { + verify_data(data, imp = TRUE) + if (data$m < 2) { + cli::cli_abort( + c( + "The between imputation variance cannot be computed if there are fewer than two imputations (m < 2).", + "i" = "Please provide an object with 2 or more imputations" + ) + ) + } + if (grid) { + gridcol <- "black" + } else { + gridcol <- NA + } + + gg <- mice::complete(data, "long") %>% + dplyr::mutate(dplyr::across(where(is.factor), as.numeric)) %>% + dplyr::select(-.imp) %>% + dplyr::group_by(.id) %>% + dplyr::summarise(dplyr::across(dplyr::everything(), stats::var)) %>% + dplyr::ungroup() %>% + dplyr::mutate(dplyr::across(.cols = -.id, ~ scale_above_zero(.))) %>% + tidyr::pivot_longer(cols = -.id) %>% + ggplot2::ggplot(ggplot2::aes(name, .id, fill = value)) + + ggplot2::geom_tile(color = gridcol) + + ggplot2::scale_fill_gradient(low = "white", high = mice::mdc(2)) + + ggplot2::labs( + x = "Column name", + y = "Row number", + fill = "Imputation variability* + ", + caption = "*scaled cell-level between imputation variance" + ) + # "Cell-level between imputation\nvariance (scaled)\n\n" + ggplot2::scale_x_discrete(position = "top", expand = c(0, 0)) + + ggplot2::scale_y_continuous(trans = "reverse", expand = c(0, 0)) + + theme_minimice() + + if (!grid) { + gg <- + gg + ggplot2::theme(panel.border = ggplot2::element_rect(fill = NA)) + } + + # return the ggplot object + return(gg) +} + +# function to scale only non-zero values without centering +scale_above_zero <- function(x) { + x <- as.matrix(x) + x[x != 0] <- scale(x[x != 0], center = FALSE) + return(x) +} diff --git a/man/plot_miss.Rd b/man/plot_miss.Rd index 246d1d4b..c1103c89 100644 --- a/man/plot_miss.Rd +++ b/man/plot_miss.Rd @@ -9,6 +9,7 @@ plot_miss( vrb = "all", border = FALSE, row.breaks = nrow(data), + square = TRUE, ordered = FALSE ) } @@ -21,6 +22,8 @@ plot_miss( \item{row.breaks}{Optional numeric input specifying the number of breaks to be visualized on the y axis.} +\item{square}{Logical indicating whether the plot tiles should be squares, defaults to squares.} + \item{ordered}{Logical indicating whether rows should be ordered according to their pattern.} } \value{ diff --git a/man/plot_variance.Rd b/man/plot_variance.Rd new file mode 100644 index 00000000..ade26899 --- /dev/null +++ b/man/plot_variance.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_variance.R +\name{plot_variance} +\alias{plot_variance} +\title{Plot the scaled between imputation variance for every cell as a heatmap} +\usage{ +plot_variance(data, grid = TRUE) +} +\arguments{ +\item{data}{A package \code{mice} generated multiply imputed data set of class +\code{mids}. Non-\code{mids} objects that have not been generated with \code{mice::mice()} +can be converted through a pipeline with \code{mice::as.mids()}.} + +\item{grid}{Logical indicating whether grid lines should be displayed.} +} +\value{ +An object of class \code{ggplot}. +} +\description{ +This function plots the cell-level between imputation variance. The function +scales the variances column-wise, without centering cf. \code{base::scale(center = FALSE)} +and plots the data image as a heatmap. Darker red cells indicate more variance, +lighter cells indicate less variance. White cells represent observed cells or unobserved cells with zero between +imputation variance. +} +\examples{ +imp <- mice::mice(mice::nhanes, printFlag = FALSE) +plot_variance(imp) +} diff --git a/tests/testthat/test-plot_miss.R.R b/tests/testthat/test-plot_miss.R.R index 2fd60495..d08d30d8 100644 --- a/tests/testthat/test-plot_miss.R.R +++ b/tests/testthat/test-plot_miss.R.R @@ -4,7 +4,7 @@ dat <- mice::nhanes # tests test_that("plot_miss produces plot", { expect_s3_class(plot_miss(dat), "ggplot") - expect_s3_class(plot_miss(dat), "ggplot") + expect_s3_class(plot_miss(dat, border = TRUE, ordered = T, row.breaks = 25, square = TRUE), "ggplot") expect_s3_class(plot_miss(cbind(dat, "testvar" = NA)), "ggplot") }) @@ -17,10 +17,8 @@ test_that("plot_miss works with different inputs", { test_that("plot_miss with incorrect argument(s)", { - expect_output(plot_miss(na.omit(dat))) + expect_s3_class(plot_miss(na.omit(dat)), "ggplot") expect_error(plot_miss("test")) expect_error(plot_miss(dat, vrb = "test")) - expect_error(plot_miss(dat, cluster = "test")) expect_error(plot_miss(cbind(dat, .x = NA))) - expect_error(plot_miss(dat, npat = "test")) }) From c1fa298b41aaec0952023a47dfb6ac822bc1f8b2 Mon Sep 17 00:00:00 2001 From: hanneoberman Date: Thu, 16 Nov 2023 14:35:55 +0100 Subject: [PATCH 05/10] Remove breaks argument `plot_miss` --- R/plot_miss.R | 77 ++++++++++++++----------------- man/plot_miss.Rd | 11 +---- tests/testthat/test-plot_miss.R.R | 2 +- 3 files changed, 37 insertions(+), 53 deletions(-) diff --git a/R/plot_miss.R b/R/plot_miss.R index 2bd70218..aebc44b5 100644 --- a/R/plot_miss.R +++ b/R/plot_miss.R @@ -3,7 +3,6 @@ #' @param data An incomplete dataset of class `data.frame` or `matrix`. #' @param vrb String, vector, or unquoted expression with variable name(s), default is "all". #' @param border Logical indicating whether borders should be present between tiles. -#' @param row.breaks Optional numeric input specifying the number of breaks to be visualized on the y axis. #' @param ordered Logical indicating whether rows should be ordered according to their pattern. #' @param square Logical indicating whether the plot tiles should be squares, defaults to squares. #' @@ -17,8 +16,7 @@ plot_miss <- function(data, vrb = "all", border = FALSE, - row.breaks = nrow(data), - square = TRUE, + square = FALSE, ordered = FALSE) { # input processing if (is.matrix(data) && ncol(data) > 1) { @@ -39,7 +37,7 @@ plot_miss <- ) ) } - if(ordered){ + if (ordered) { # extract md.pattern matrix mdpat <- mice::md.pattern(data, plot = FALSE) %>% utils::head(., -1) @@ -52,9 +50,11 @@ plot_miss <- dplyr::select(-ncol(.)) %>% dplyr::mutate(nmis = freq.pat) %>% tidyr::uncount(nmis) - } else{ - # Create missingness indicator matrix - na.mat <- purrr::map_df(data[,vrb], function(y) as.numeric(!is.na(y))) + } else { + # Create missingness indicator matrix + na.mat <- + purrr::map_df(data[, vrb], function(y) + as.numeric(!is.na(y))) } # extract pattern info vrb <- colnames(na.mat) @@ -64,50 +64,42 @@ plot_miss <- # transform to long format long <- as.data.frame(cbind(.y = 1:rws, na.mat)) %>% - tidyr::pivot_longer(cols = tidyselect::all_of(vrb), - names_to = "x", - values_to = ".where" + tidyr::pivot_longer( + cols = tidyselect::all_of(vrb), + names_to = "x", + values_to = ".where" ) %>% - dplyr::mutate( - .x = as.numeric(factor( - .data$x, - levels = vrb, ordered = TRUE - )), - .where = factor( - .data$.where, - levels = c(0, 1), - labels = c("missing", "observed") - ) - ) + dplyr::mutate(.x = as.numeric(factor( + .data$x, + levels = vrb, ordered = TRUE + )), + .where = factor( + .data$.where, + levels = c(0, 1), + labels = c("missing", "observed") + )) gg <- - ggplot2::ggplot( - long, - ggplot2::aes( - .data$.x, - .data$.y, - fill = .data$.where - ) - ) + + ggplot2::ggplot(long, + ggplot2::aes(.data$.x, + as.numeric(.data$.y), + fill = .data$.where)) + ggplot2::scale_fill_manual(values = c( "observed" = "#006CC2B3", "missing" = "#B61A51B3" )) + ggplot2::scale_alpha_continuous(limits = c(0, 1), guide = "none") + - ggplot2::scale_x_continuous( - breaks = 1:cls, - labels = vrb) + - ggplot2::scale_y_reverse( - n.breaks = row.breaks - ) + + ggplot2::scale_x_continuous(breaks = 1:cls, + labels = vrb) + + ggplot2::scale_y_reverse() + ggplot2::labs( - x = "Variables", - y = "Rows in dataset", + x = "Column name", + y = "Row number", fill = "", alpha = "" ) + theme_minimice() # additional arguments - if(border){ + if (border) { gg <- gg + ggplot2::geom_tile(color = "black") } else{ gg <- gg + ggplot2::geom_tile() @@ -117,11 +109,12 @@ plot_miss <- } else { gg <- gg + ggplot2::coord_cartesian(expand = FALSE) } - if(ordered){ + if (ordered) { gg <- gg + - ggplot2::theme(axis.text.y = ggplot2::element_blank(), - axis.ticks.y = ggplot2::element_blank() - ) + ggplot2::theme( + axis.text.y = ggplot2::element_blank(), + axis.ticks.y = ggplot2::element_blank() + ) } return(gg) } diff --git a/man/plot_miss.Rd b/man/plot_miss.Rd index c1103c89..7f405f27 100644 --- a/man/plot_miss.Rd +++ b/man/plot_miss.Rd @@ -4,14 +4,7 @@ \alias{plot_miss} \title{Plot missingness in a dataset} \usage{ -plot_miss( - data, - vrb = "all", - border = FALSE, - row.breaks = nrow(data), - square = TRUE, - ordered = FALSE -) +plot_miss(data, vrb = "all", border = FALSE, square = FALSE, ordered = FALSE) } \arguments{ \item{data}{An incomplete dataset of class \code{data.frame} or \code{matrix}.} @@ -20,8 +13,6 @@ plot_miss( \item{border}{Logical indicating whether borders should be present between tiles.} -\item{row.breaks}{Optional numeric input specifying the number of breaks to be visualized on the y axis.} - \item{square}{Logical indicating whether the plot tiles should be squares, defaults to squares.} \item{ordered}{Logical indicating whether rows should be ordered according to their pattern.} diff --git a/tests/testthat/test-plot_miss.R.R b/tests/testthat/test-plot_miss.R.R index d08d30d8..7c934f2f 100644 --- a/tests/testthat/test-plot_miss.R.R +++ b/tests/testthat/test-plot_miss.R.R @@ -4,7 +4,7 @@ dat <- mice::nhanes # tests test_that("plot_miss produces plot", { expect_s3_class(plot_miss(dat), "ggplot") - expect_s3_class(plot_miss(dat, border = TRUE, ordered = T, row.breaks = 25, square = TRUE), "ggplot") + expect_s3_class(plot_miss(dat, border = TRUE, ordered = TRUE, square = TRUE), "ggplot") expect_s3_class(plot_miss(cbind(dat, "testvar" = NA)), "ggplot") }) From 357f9be08a12e9ea876edb692276b69b9cc8007f Mon Sep 17 00:00:00 2001 From: hanneoberman Date: Thu, 16 Nov 2023 14:59:08 +0100 Subject: [PATCH 06/10] Add axis tick on row 0 `plot_miss` --- R/plot_miss.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/plot_miss.R b/R/plot_miss.R index aebc44b5..8b7c6f97 100644 --- a/R/plot_miss.R +++ b/R/plot_miss.R @@ -90,7 +90,7 @@ plot_miss <- ggplot2::scale_alpha_continuous(limits = c(0, 1), guide = "none") + ggplot2::scale_x_continuous(breaks = 1:cls, labels = vrb) + - ggplot2::scale_y_reverse() + + ggplot2::scale_y_reverse(expand = ggplot2::expansion(add = c(1, 1))) + ggplot2::labs( x = "Column name", y = "Row number", @@ -105,9 +105,9 @@ plot_miss <- gg <- gg + ggplot2::geom_tile() } if (square) { - gg <- gg + ggplot2::coord_fixed(expand = FALSE) + gg <- gg + ggplot2::coord_fixed() } else { - gg <- gg + ggplot2::coord_cartesian(expand = FALSE) + gg <- gg + ggplot2::coord_cartesian() } if (ordered) { gg <- gg + From 398f9ebd9a781def1a46bea8d3eaf9f6388be225 Mon Sep 17 00:00:00 2001 From: hanneoberman Date: Thu, 16 Nov 2023 20:57:03 +0100 Subject: [PATCH 07/10] Add row labels min and max `plot_miss` --- DESCRIPTION | 1 + R/plot_miss.R | 12 +++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1ce588d1..bc53ef37 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,6 +28,7 @@ Imports: mice, purrr, rlang, + scales, stats, stringr, tidyr, diff --git a/R/plot_miss.R b/R/plot_miss.R index 8b7c6f97..013ccb0b 100644 --- a/R/plot_miss.R +++ b/R/plot_miss.R @@ -90,7 +90,12 @@ plot_miss <- ggplot2::scale_alpha_continuous(limits = c(0, 1), guide = "none") + ggplot2::scale_x_continuous(breaks = 1:cls, labels = vrb) + - ggplot2::scale_y_reverse(expand = ggplot2::expansion(add = c(1, 1))) + + ggplot2::scale_y_reverse(breaks = \(y) { + eb = scales::extended_breaks()(y) + eb[1] = min(long$.y) + eb[length(eb)] = max(long$.y) + eb + }) + ggplot2::labs( x = "Column name", y = "Row number", @@ -105,9 +110,9 @@ plot_miss <- gg <- gg + ggplot2::geom_tile() } if (square) { - gg <- gg + ggplot2::coord_fixed() + gg <- gg + ggplot2::coord_fixed(expand = FALSE) } else { - gg <- gg + ggplot2::coord_cartesian() + gg <- gg + ggplot2::coord_cartesian(expand = FALSE) } if (ordered) { gg <- gg + @@ -118,3 +123,4 @@ plot_miss <- } return(gg) } + From 24ec965fea0e23d3e12ee1613b9935eee008adf9 Mon Sep 17 00:00:00 2001 From: pepijnvink Date: Thu, 7 Dec 2023 10:44:48 +0100 Subject: [PATCH 08/10] Add global variable "nmis" and change "." to ".data" --- R/plot_miss.R | 5 ++--- R/utils.R | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/R/plot_miss.R b/R/plot_miss.R index aebc44b5..ece9dab9 100644 --- a/R/plot_miss.R +++ b/R/plot_miss.R @@ -39,15 +39,14 @@ plot_miss <- } if (ordered) { # extract md.pattern matrix - mdpat <- mice::md.pattern(data, plot = FALSE) %>% - utils::head(., -1) + mdpat <- utils::head(mice::md.pattern(data, plot = FALSE), -1) # save frequency of patterns freq.pat <- rownames(mdpat) %>% as.numeric() na.mat <- mdpat %>% as.data.frame() %>% - dplyr::select(-ncol(.)) %>% + dplyr::select(-ncol(.data)) %>% dplyr::mutate(nmis = freq.pat) %>% tidyr::uncount(nmis) } else { diff --git a/R/utils.R b/R/utils.R index 67fcf484..974e1430 100644 --- a/R/utils.R +++ b/R/utils.R @@ -20,7 +20,7 @@ NULL # suppress undefined global functions or variables note -utils::globalVariables(c(".id", ".imp", ".where", ".id", "where", "name", "value")) +utils::globalVariables(c(".id", ".imp", ".where", ".id", "where", "name", "value", "nmis")) # Alias a function with `foo <- function(...) pkgB::blah(...)` From 62e29256072b2a7ea5fce308fec0275a8d0c23a9 Mon Sep 17 00:00:00 2001 From: pepijnvink Date: Thu, 14 Dec 2023 12:26:10 +0100 Subject: [PATCH 09/10] Change 'border' parameter to 'grid' --- R/plot_miss.R | 6 +++--- man/plot_miss.Rd | 4 ++-- tests/testthat/{test-plot_miss.R.R => test-plot_miss.R} | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) rename tests/testthat/{test-plot_miss.R.R => test-plot_miss.R} (89%) diff --git a/R/plot_miss.R b/R/plot_miss.R index 86f1e802..46d5f7f1 100644 --- a/R/plot_miss.R +++ b/R/plot_miss.R @@ -2,7 +2,7 @@ #' #' @param data An incomplete dataset of class `data.frame` or `matrix`. #' @param vrb String, vector, or unquoted expression with variable name(s), default is "all". -#' @param border Logical indicating whether borders should be present between tiles. +#' @param grid Logical indicating whether borders should be present between tiles. #' @param ordered Logical indicating whether rows should be ordered according to their pattern. #' @param square Logical indicating whether the plot tiles should be squares, defaults to squares. #' @@ -15,7 +15,7 @@ plot_miss <- function(data, vrb = "all", - border = FALSE, + grid = FALSE, square = FALSE, ordered = FALSE) { # input processing @@ -103,7 +103,7 @@ plot_miss <- ) + theme_minimice() # additional arguments - if (border) { + if (grid) { gg <- gg + ggplot2::geom_tile(color = "black") } else{ gg <- gg + ggplot2::geom_tile() diff --git a/man/plot_miss.Rd b/man/plot_miss.Rd index 7f405f27..98264a8c 100644 --- a/man/plot_miss.Rd +++ b/man/plot_miss.Rd @@ -4,14 +4,14 @@ \alias{plot_miss} \title{Plot missingness in a dataset} \usage{ -plot_miss(data, vrb = "all", border = FALSE, square = FALSE, ordered = FALSE) +plot_miss(data, vrb = "all", grid = FALSE, square = FALSE, ordered = FALSE) } \arguments{ \item{data}{An incomplete dataset of class \code{data.frame} or \code{matrix}.} \item{vrb}{String, vector, or unquoted expression with variable name(s), default is "all".} -\item{border}{Logical indicating whether borders should be present between tiles.} +\item{grid}{Logical indicating whether borders should be present between tiles.} \item{square}{Logical indicating whether the plot tiles should be squares, defaults to squares.} diff --git a/tests/testthat/test-plot_miss.R.R b/tests/testthat/test-plot_miss.R similarity index 89% rename from tests/testthat/test-plot_miss.R.R rename to tests/testthat/test-plot_miss.R index 7c934f2f..8f10f00e 100644 --- a/tests/testthat/test-plot_miss.R.R +++ b/tests/testthat/test-plot_miss.R @@ -4,7 +4,7 @@ dat <- mice::nhanes # tests test_that("plot_miss produces plot", { expect_s3_class(plot_miss(dat), "ggplot") - expect_s3_class(plot_miss(dat, border = TRUE, ordered = TRUE, square = TRUE), "ggplot") + expect_s3_class(plot_miss(dat, grid = TRUE, ordered = TRUE, square = TRUE), "ggplot") expect_s3_class(plot_miss(cbind(dat, "testvar" = NA)), "ggplot") }) From 7b107d9441a46e3357478f574fab5c850545bed6 Mon Sep 17 00:00:00 2001 From: hanneoberman Date: Thu, 21 Dec 2023 15:40:00 +0100 Subject: [PATCH 10/10] Remove `plot_variance()` in line with `main` --- NAMESPACE | 1 - R/plot_variance.R | 71 -------------------------------------------- man/plot_variance.Rd | 29 ------------------ 3 files changed, 101 deletions(-) delete mode 100644 R/plot_variance.R delete mode 100644 man/plot_variance.Rd diff --git a/NAMESPACE b/NAMESPACE index 12244cea..8cf346e8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,7 +10,6 @@ export(plot_miss) export(plot_pattern) export(plot_pred) export(plot_trace) -export(plot_variance) export(stripplot) export(xyplot) importFrom(magrittr,"%>%") diff --git a/R/plot_variance.R b/R/plot_variance.R deleted file mode 100644 index ff63b6ab..00000000 --- a/R/plot_variance.R +++ /dev/null @@ -1,71 +0,0 @@ -#' Plot the scaled between imputation variance for every cell as a heatmap -#' -#' This function plots the cell-level between imputation variance. The function -#' scales the variances column-wise, without centering cf. `base::scale(center = FALSE)` -#' and plots the data image as a heatmap. Darker red cells indicate more variance, -#' lighter cells indicate less variance. White cells represent observed cells or unobserved cells with zero between -#' imputation variance. -#' -#' @param data A package `mice` generated multiply imputed data set of class -#' `mids`. Non-`mids` objects that have not been generated with `mice::mice()` -#' can be converted through a pipeline with `mice::as.mids()`. -#' @param grid Logical indicating whether grid lines should be displayed. -#' -#' @return An object of class `ggplot`. -#' @examples -#' imp <- mice::mice(mice::nhanes, printFlag = FALSE) -#' plot_variance(imp) -#' @export -plot_variance <- function(data, grid = TRUE) { - verify_data(data, imp = TRUE) - if (data$m < 2) { - cli::cli_abort( - c( - "The between imputation variance cannot be computed if there are fewer than two imputations (m < 2).", - "i" = "Please provide an object with 2 or more imputations" - ) - ) - } - if (grid) { - gridcol <- "black" - } else { - gridcol <- NA - } - - gg <- mice::complete(data, "long") %>% - dplyr::mutate(dplyr::across(where(is.factor), as.numeric)) %>% - dplyr::select(-.imp) %>% - dplyr::group_by(.id) %>% - dplyr::summarise(dplyr::across(dplyr::everything(), stats::var)) %>% - dplyr::ungroup() %>% - dplyr::mutate(dplyr::across(.cols = -.id, ~ scale_above_zero(.))) %>% - tidyr::pivot_longer(cols = -.id) %>% - ggplot2::ggplot(ggplot2::aes(name, .id, fill = value)) + - ggplot2::geom_tile(color = gridcol) + - ggplot2::scale_fill_gradient(low = "white", high = mice::mdc(2)) + - ggplot2::labs( - x = "Column name", - y = "Row number", - fill = "Imputation variability* - ", - caption = "*scaled cell-level between imputation variance" - ) + # "Cell-level between imputation\nvariance (scaled)\n\n" - ggplot2::scale_x_discrete(position = "top", expand = c(0, 0)) + - ggplot2::scale_y_continuous(trans = "reverse", expand = c(0, 0)) + - theme_minimice() - - if (!grid) { - gg <- - gg + ggplot2::theme(panel.border = ggplot2::element_rect(fill = NA)) - } - - # return the ggplot object - return(gg) -} - -# function to scale only non-zero values without centering -scale_above_zero <- function(x) { - x <- as.matrix(x) - x[x != 0] <- scale(x[x != 0], center = FALSE) - return(x) -} diff --git a/man/plot_variance.Rd b/man/plot_variance.Rd deleted file mode 100644 index ade26899..00000000 --- a/man/plot_variance.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plot_variance.R -\name{plot_variance} -\alias{plot_variance} -\title{Plot the scaled between imputation variance for every cell as a heatmap} -\usage{ -plot_variance(data, grid = TRUE) -} -\arguments{ -\item{data}{A package \code{mice} generated multiply imputed data set of class -\code{mids}. Non-\code{mids} objects that have not been generated with \code{mice::mice()} -can be converted through a pipeline with \code{mice::as.mids()}.} - -\item{grid}{Logical indicating whether grid lines should be displayed.} -} -\value{ -An object of class \code{ggplot}. -} -\description{ -This function plots the cell-level between imputation variance. The function -scales the variances column-wise, without centering cf. \code{base::scale(center = FALSE)} -and plots the data image as a heatmap. Darker red cells indicate more variance, -lighter cells indicate less variance. White cells represent observed cells or unobserved cells with zero between -imputation variance. -} -\examples{ -imp <- mice::mice(mice::nhanes, printFlag = FALSE) -plot_variance(imp) -}