diff --git a/DESCRIPTION b/DESCRIPTION index 39077d11..c51d25e2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,6 +28,7 @@ Imports: mice, purrr, rlang, + scales, stats, stringr, tidyr, diff --git a/NAMESPACE b/NAMESPACE index 5a976a40..8cf346e8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ export(densityplot) export(ggmice) export(plot_corr) export(plot_flux) +export(plot_miss) export(plot_pattern) export(plot_pred) export(plot_trace) diff --git a/R/plot_miss.R b/R/plot_miss.R new file mode 100644 index 00000000..46d5f7f1 --- /dev/null +++ b/R/plot_miss.R @@ -0,0 +1,125 @@ +#' Plot missingness in a dataset +#' +#' @param data An incomplete dataset of class `data.frame` or `matrix`. +#' @param vrb String, vector, or unquoted expression with variable name(s), default is "all". +#' @param grid Logical indicating whether borders should be present between tiles. +#' @param ordered Logical indicating whether rows should be ordered according to their pattern. +#' @param square Logical indicating whether the plot tiles should be squares, defaults to squares. +#' +#' @return An object of class [ggplot2::ggplot]. +#' +#' @examples +#' plot_miss(mice::nhanes) +#' @export + +plot_miss <- + function(data, + vrb = "all", + grid = FALSE, + square = FALSE, + ordered = FALSE) { + # input processing + if (is.matrix(data) && ncol(data) > 1) { + data <- as.data.frame(data) + } + verify_data(data, df = TRUE) + vrb <- substitute(vrb) + if (vrb[1] == "all") { + vrb <- names(data) + } else { + vrb <- names(dplyr::select(as.data.frame(data), {{vrb}})) + } + if (".x" %in% vrb || ".y" %in% vrb) { + cli::cli_abort( + c( + "The variable names '.x' and '.y' are used internally to produce the missing data pattern plot.", + "i" = "Please exclude or rename your variable(s)." + ) + ) + } + if (ordered) { + # extract md.pattern matrix + mdpat <- utils::head(mice::md.pattern(data, plot = FALSE), -1) + # save frequency of patterns + freq.pat <- rownames(mdpat) %>% + as.numeric() + + na.mat <- mdpat %>% + as.data.frame() %>% + dplyr::select(-ncol(.data)) %>% + dplyr::mutate(nmis = freq.pat) %>% + tidyr::uncount(nmis) + } else { + # Create missingness indicator matrix + na.mat <- + purrr::map_df(data[, vrb], function(y) + as.numeric(!is.na(y))) + } + # extract pattern info + vrb <- colnames(na.mat) + rws <- nrow(na.mat) + cls <- ncol(na.mat) + + # transform to long format + long <- + as.data.frame(cbind(.y = 1:rws, na.mat)) %>% + tidyr::pivot_longer( + cols = tidyselect::all_of(vrb), + names_to = "x", + values_to = ".where" + ) %>% + dplyr::mutate(.x = as.numeric(factor( + .data$x, + levels = vrb, ordered = TRUE + )), + .where = factor( + .data$.where, + levels = c(0, 1), + labels = c("missing", "observed") + )) + gg <- + ggplot2::ggplot(long, + ggplot2::aes(.data$.x, + as.numeric(.data$.y), + fill = .data$.where)) + + ggplot2::scale_fill_manual(values = c( + "observed" = "#006CC2B3", + "missing" = "#B61A51B3" + )) + + ggplot2::scale_alpha_continuous(limits = c(0, 1), guide = "none") + + ggplot2::scale_x_continuous(breaks = 1:cls, + labels = vrb) + + ggplot2::scale_y_reverse(breaks = \(y) { + eb = scales::extended_breaks()(y) + eb[1] = min(long$.y) + eb[length(eb)] = max(long$.y) + eb + }) + + ggplot2::labs( + x = "Column name", + y = "Row number", + fill = "", + alpha = "" + ) + + theme_minimice() + # additional arguments + if (grid) { + gg <- gg + ggplot2::geom_tile(color = "black") + } else{ + gg <- gg + ggplot2::geom_tile() + } + if (square) { + gg <- gg + ggplot2::coord_fixed(expand = FALSE) + } else { + gg <- gg + ggplot2::coord_cartesian(expand = FALSE) + } + if (ordered) { + gg <- gg + + ggplot2::theme( + axis.text.y = ggplot2::element_blank(), + axis.ticks.y = ggplot2::element_blank() + ) + } + return(gg) + } + diff --git a/R/utils.R b/R/utils.R index 2cb42595..2b35dba4 100644 --- a/R/utils.R +++ b/R/utils.R @@ -19,6 +19,11 @@ #' @return The result of calling `rhs(lhs)`. NULL +# suppress undefined global functions or variables note +utils::globalVariables(c(".id", ".imp", ".where", ".id", "where", "name", "value", "nmis")) + +# Alias a function with `foo <- function(...) pkgB::blah(...)` + #' Utils function to validate data argument inputs #' #' @param data The input supplied to the 'data' argument. diff --git a/man/plot_miss.Rd b/man/plot_miss.Rd new file mode 100644 index 00000000..98264a8c --- /dev/null +++ b/man/plot_miss.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_miss.R +\name{plot_miss} +\alias{plot_miss} +\title{Plot missingness in a dataset} +\usage{ +plot_miss(data, vrb = "all", grid = FALSE, square = FALSE, ordered = FALSE) +} +\arguments{ +\item{data}{An incomplete dataset of class \code{data.frame} or \code{matrix}.} + +\item{vrb}{String, vector, or unquoted expression with variable name(s), default is "all".} + +\item{grid}{Logical indicating whether borders should be present between tiles.} + +\item{square}{Logical indicating whether the plot tiles should be squares, defaults to squares.} + +\item{ordered}{Logical indicating whether rows should be ordered according to their pattern.} +} +\value{ +An object of class \link[ggplot2:ggplot]{ggplot2::ggplot}. +} +\description{ +Plot missingness in a dataset +} +\examples{ +plot_miss(mice::nhanes) +} diff --git a/tests/testthat/test-plot_miss.R b/tests/testthat/test-plot_miss.R new file mode 100644 index 00000000..8f10f00e --- /dev/null +++ b/tests/testthat/test-plot_miss.R @@ -0,0 +1,24 @@ +# create test objects +dat <- mice::nhanes + +# tests +test_that("plot_miss produces plot", { + expect_s3_class(plot_miss(dat), "ggplot") + expect_s3_class(plot_miss(dat, grid = TRUE, ordered = TRUE, square = TRUE), "ggplot") + expect_s3_class(plot_miss(cbind(dat, "testvar" = NA)), "ggplot") +}) + +test_that("plot_miss works with different inputs", { + expect_s3_class(plot_miss(dat, c("age", "bmi")), "ggplot") + expect_s3_class(plot_miss(dat, c(age, bmi)), "ggplot") + expect_s3_class(plot_miss(data.frame(age = dat$age, testvar = NA)), "ggplot") + expect_s3_class(plot_miss(cbind(dat, "with space" = NA)), "ggplot") +}) + + +test_that("plot_miss with incorrect argument(s)", { + expect_s3_class(plot_miss(na.omit(dat)), "ggplot") + expect_error(plot_miss("test")) + expect_error(plot_miss(dat, vrb = "test")) + expect_error(plot_miss(cbind(dat, .x = NA))) +})