diff --git a/DESCRIPTION b/DESCRIPTION index 81b956155..6fd8355cd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: gtsummary Title: Presentation-Ready Data Summary and Analytic Result Tables -Version: 2.0.1.9008 +Version: 2.0.1.9009 Authors@R: c( person("Daniel D.", "Sjoberg", , "danield.sjoberg@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-0862-2018")), diff --git a/NAMESPACE b/NAMESPACE index 423766794..b88ea53c4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ S3method(add_difference,tbl_summary) S3method(add_difference,tbl_svysummary) S3method(add_global_p,tbl_regression) S3method(add_global_p,tbl_uvregression) +S3method(add_n,tbl_likert) S3method(add_n,tbl_regression) S3method(add_n,tbl_summary) S3method(add_n,tbl_survfit) @@ -195,6 +196,7 @@ export(tbl_butcher) export(tbl_continuous) export(tbl_cross) export(tbl_custom_summary) +export(tbl_likert) export(tbl_merge) export(tbl_regression) export(tbl_split) diff --git a/NEWS.md b/NEWS.md index 72f4dae5a..669a4c7cb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -21,6 +21,8 @@ Updates to address regressions in the v2.0.0 release: * The `add_glance*(glance_fun)` argument's default value has been updated to an S3 generic, allowing bespoke handling for some regression classes. (#1822) +* Added function `tbl_likert()` for summarizing ordered categorical (or Likert scales) data as well as the associated `add_n.tbl_likert()` S3 method. (#1660) + # gtsummary 2.0.1 Updates to address regressions in the v2.0.0 release: diff --git a/R/add_n.R b/R/add_n.R index 0e2078971..e41cbdcbd 100644 --- a/R/add_n.R +++ b/R/add_n.R @@ -222,3 +222,7 @@ add_n.tbl_summary <- function(x, statistic = "{N_nonmiss}", col_label = "**N**", #' @name add_n_summary #' @export add_n.tbl_svysummary <- add_n.tbl_summary + +#' @name add_n_summary +#' @export +add_n.tbl_likert <- add_n.tbl_summary diff --git a/R/tbl_likert.R b/R/tbl_likert.R new file mode 100644 index 000000000..2017ae36f --- /dev/null +++ b/R/tbl_likert.R @@ -0,0 +1,254 @@ +#' Likert Summary +#' +#' `r lifecycle::badge("experimental")`\cr +#' Create a table of ordered categorical variables in a wide format. +#' +#' @inheritParams tbl_summary +#' @param statistic ([`formula-list-selector`][syntax])\cr +#' Used to specify the summary statistics for each variable. +#' The default is `everything() ~ "{n} ({p}%)"`. +#' @param digits ([`formula-list-selector`][syntax])\cr +#' Specifies how summary statistics are rounded. Values may be either integer(s) +#' or function(s). If not specified, default formatting is assigned +#' via `assign_summary_digits()`. +#' @param sort (`string`)\cr +#' indicates whether levels of variables should be placed in +#' ascending order (the default) or descending. +#' +#' @return a 'tbl_likert' gtsummary table +#' @export +#' +#' @examples +#' levels <- c("Strongly Disagree", "Disagree", "Agree", "Strongly Agree") +#' df_likert <- data.frame( +#' recommend_friend = sample(levels, size = 20, replace = TRUE) |> factor(levels = levels), +#' regret_purchase = sample(levels, size = 20, replace = TRUE) |> factor(levels = levels) +#' ) +#' +#' # Example 1 ---------------------------------- +#' tbl_likert_ex1 <- +#' df_likert |> +#' tbl_likert(include = c(recommend_friend, regret_purchase)) |> +#' add_n() +#' tbl_likert_ex1 +#' +#' # Example 2 ---------------------------------- +#' # Add continuous summary of the likert scores +#' list( +#' tbl_likert_ex1, +#' tbl_wide_summary( +#' df_likert |> dplyr::mutate(dplyr::across(everything(), as.numeric)), +#' statistic = c("{mean}", "{sd}"), +#' type = ~"continuous", +#' include = c(recommend_friend, regret_purchase) +#' ) +#' ) |> +#' tbl_merge(tab_spanner = FALSE) +tbl_likert <- function(data, + statistic = ~"{n} ({p}%)", + label = NULL, + digits = NULL, + include = everything(), + sort = c("ascending", "descending")) { + set_cli_abort_call() + + # process inputs ------------------------------------------------------------- + check_not_missing(data) + check_data_frame(data) + sort <- arg_match(sort) + + cards::process_selectors(data, include = {{ include }}) + + cards::process_formula_selectors(data[include], label = label, statistic = statistic, digits = digits) + cards::check_list_elements( + x = label, + predicate = \(x) is_string(x), + error_msg = "Values pass in {.arg label} argument must be strings." + ) + cards::check_list_elements( + x = statistic, + predicate = \(x) is_string(x), + error_msg = "Values pass in {.arg statistic} argument must be strings." + ) + + # fill in unspecified variables + cards::fill_formula_selectors( + data[include], + statistic = eval(formals(gtsummary::tbl_likert)[["statistic"]]) + ) + + # fill each element of digits argument + if (!missing(digits)) { + digits <- + assign_summary_digits(data[include], statistic, type = rep_named(include, list("categorical")), digits = digits) + } + + .check_haven_labelled(data[include]) + .check_tbl_summary_args( + data = data, label = label, statistic = statistic, + digits = digits, type = rep_named(include, list("categorical")), value = NULL + ) + + # save processed function inputs --------------------------------------------- + tbl_likert_inputs <- as.list(environment()) + call <- match.call() + + # check all variables are factors -------------------------------------------- + if (some(include, ~!inherits(data[[.x]], "factor"))) { + not_fct <- map_chr(include, ~ifelse(!inherits(data[[.x]], "factor"), .x, NA_character_)) |> discard(is.na) + cli::cli_abort( + c("All variables in the {.arg include} argument must be {.cls factor}.", + i = "Variables {.val {not_fct}} are not class {.cls factor}.") + ) + } + + # check all factors have the same levels ------------------------------------- + walk( + include[-1], + \(x) { + if (!identical(levels(data[[include[1]]]), levels(data[[x]]))) { + cli::cli_abort( + c("All variables in the {.arg include} argument must have the same factor levels.", + i = "Variable {.val {include[1]}} has levels {.val {levels(data[[include[1]]])}}.", + i = "Variable {.val {x}} has levels {.val {levels(data[[x]])}}.", + i = "Use {.fun forcats::fct_unify} to unify levels."), + call = get_cli_abort_call() + ) + } + } + ) + + # reverse the order of the levels if indicated ------------------------------- + if (sort == "descending") { + data <- data |> dplyr::mutate(across(all_of(include), fct_rev)) + } + + # tabulate results ----------------------------------------------------------- + cards <- + dplyr::bind_rows( + cards::ard_attributes( + data = data, + variables = all_of(include), + label = label + ), + cards::ard_missing(data, variables = all_of(include)), + cards::ard_categorical( + data = data, + variables = all_of(include), + fmt_fn = digits, + denominator = "column", + stat_label = ~ default_stat_labels() + ) + ) |> + cards::replace_null_statistic() + + # print all warnings and errors that occurred while calculating requested stats + cards::print_ard_conditions(cards) + + # translate statistic labels ------------------------------------------------- + cards$stat_label <- translate_vector(cards$stat_label) + + # add the gtsummary column names to ARD data frame --------------------------- + cards <- + cards |> + dplyr::left_join( + dplyr::tibble( + variable_level = factor(levels(data[[include[1]]]), levels = levels(data[[include[1]]])) |> as.list(), + gts_column = paste0("stat_", seq_along(levels(data[[include[1]]]))) + ), + by = "variable_level" + ) + + x <- + brdg_likert(cards = cards, variables = include, statistic = statistic) |> + append( + list( + cards = list(tbl_likert = cards), + inputs = tbl_likert_inputs + ) + ) |> + structure(class = c("tbl_likert", "gtsummary")) + + # adding styling ------------------------------------------------------------- + x <- x |> + # updating the headers for the stats columns + modify_header(all_stat_cols() ~ "**{level}**") + + # return tbl_summary table --------------------------------------------------- + x$call_list <- list(tbl_likert = call) + + x + +} + +brdg_likert <- function(cards, + variables, + statistic) { + set_cli_abort_call() + + # check the ARD has all the requested statistics ----------------------------- + walk( + variables, + \(variable) { + specified_stats <- .extract_glue_elements(statistic[[variable]]) + available_stats <- dplyr::filter(cards, .data$variable %in% .env$variable, !is.na(.data$gts_column))$stat_name |> unique() + + if (is_empty(specified_stats)) { + cli::cli_abort("The {.arg statistic} argument string does not contain any + glue element for variable {.val {variable}}, e.g. {.val {{n}} ({{p}}%)}.", + call = get_cli_abort_call()) + } + if (any(!specified_stats %in% available_stats)) { + not_valid_stat <- specified_stats[!specified_stats %in% available_stats] + cli::cli_abort(c("Statistic(s) {.val {not_valid_stat}} are not valid.", + i = "Select one or more of {.val {available_stats}}"), + call = get_cli_abort_call()) + } + } + ) + + # create table_body ---------------------------------------------------------- + table_body <- + pier_likert(cards = cards, variables = variables, statistic = statistic) + + # construct default table_styling -------------------------------------------- + x <- .create_gtsummary_object(table_body) + + # add info to x$table_styling$header for dynamic headers --------------------- + x$table_styling$header <- + x$table_styling$header |> + dplyr::left_join( + cards |> + dplyr::filter(!is.na(.data$gts_column)) |> + dplyr::select(column = "gts_column", modify_stat_level = "variable_level") |> + unique() |> + dplyr::mutate(modify_stat_level = unlist(.data$modify_stat_level) |> as.character()), + by = "column" + ) + + + # adding styling ------------------------------------------------------------- + x <- x |> + # add header to label column and add default indentation + modify_table_styling( + columns = "label", + label = glue("**{translate_string('Characteristic')}**") + ) + + x |> + structure(class = "gtsummary") |> + modify_column_unhide(columns = all_stat_cols()) +} + +pier_likert <- function(cards, variables, statistic) { + set_cli_abort_call() + + pier_summary_continuous( + cards = + cards |> + dplyr::filter(!is.na(.data$gts_column) | .data$context %in% "attributes") |> + dplyr::mutate(group1 = .data$variable, group1_level = .data$variable_level), + variables = variables, + statistic = statistic + ) +} diff --git a/man/add_n_summary.Rd b/man/add_n_summary.Rd index d131e79c9..2bcbad140 100644 --- a/man/add_n_summary.Rd +++ b/man/add_n_summary.Rd @@ -4,6 +4,7 @@ \alias{add_n_summary} \alias{add_n.tbl_summary} \alias{add_n.tbl_svysummary} +\alias{add_n.tbl_likert} \title{Add column with N} \usage{ \method{add_n}{tbl_summary}( @@ -23,6 +24,15 @@ last = FALSE, ... ) + +\method{add_n}{tbl_likert}( + x, + statistic = "{N_nonmiss}", + col_label = "**N**", + footnote = FALSE, + last = FALSE, + ... +) } \arguments{ \item{x}{(\code{tbl_summary})\cr diff --git a/man/tbl_likert.Rd b/man/tbl_likert.Rd new file mode 100644 index 000000000..4de14971e --- /dev/null +++ b/man/tbl_likert.Rd @@ -0,0 +1,73 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tbl_likert.R +\name{tbl_likert} +\alias{tbl_likert} +\title{Likert Summary} +\usage{ +tbl_likert( + data, + statistic = ~"{n} ({p}\%)", + label = NULL, + digits = NULL, + include = everything(), + sort = c("ascending", "descending") +) +} +\arguments{ +\item{data}{(\code{data.frame})\cr A data frame.} + +\item{statistic}{(\code{\link[=syntax]{formula-list-selector}})\cr +Used to specify the summary statistics for each variable. +The default is \code{everything() ~ "{n} ({p}\%)"}.} + +\item{label}{(\code{\link[=syntax]{formula-list-selector}})\cr +Used to override default labels in summary table, e.g. \code{list(age = "Age, years")}. +The default for each variable is the column label attribute, \code{attr(., 'label')}. +If no label has been set, the column name is used.} + +\item{digits}{(\code{\link[=syntax]{formula-list-selector}})\cr +Specifies how summary statistics are rounded. Values may be either integer(s) +or function(s). If not specified, default formatting is assigned +via \code{assign_summary_digits()}.} + +\item{include}{(\code{\link[dplyr:dplyr_tidy_select]{tidy-select}})\cr +Variables to include in the summary table. Default is \code{everything()}.} + +\item{sort}{(\code{string})\cr +indicates whether levels of variables should be placed in +ascending order (the default) or descending.} +} +\value{ +a 'tbl_likert' gtsummary table +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr +Create a table of ordered categorical variables in a wide format. +} +\examples{ +levels <- c("Strongly Disagree", "Disagree", "Agree", "Strongly Agree") +df_likert <- data.frame( + recommend_friend = sample(levels, size = 20, replace = TRUE) |> factor(levels = levels), + regret_purchase = sample(levels, size = 20, replace = TRUE) |> factor(levels = levels) +) + +# Example 1 ---------------------------------- +tbl_likert_ex1 <- + df_likert |> + tbl_likert(include = c(recommend_friend, regret_purchase)) |> + add_n() +tbl_likert_ex1 + +# Example 2 ---------------------------------- +# Add continuous summary of the likert scores +list( + tbl_likert_ex1, + tbl_wide_summary( + df_likert |> dplyr::mutate(dplyr::across(everything(), as.numeric)), + statistic = c("{mean}", "{sd}"), + type = ~"continuous", + include = c(recommend_friend, regret_purchase) + ) +) |> + tbl_merge(tab_spanner = FALSE) +} diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index e900c6477..8b5f94aff 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -70,9 +70,6 @@ reference: - add_stat_label - add_stat - separate_p_footnotes - - subtitle: Wide Summary Tables - - contents: - - tbl_wide_summary - subtitle: Cross Tables - contents: - tbl_cross @@ -83,6 +80,13 @@ reference: - add_overall.tbl_continuous - add_p.tbl_continuous - add_stat + - subtitle: Wide Summary Tables + - contents: + - tbl_wide_summary + - subtitle: Likert Summary Tables + - contents: + - tbl_likert + - add_n.tbl_likert - subtitle: Time-to-event Summary Tables - contents: - tbl_survfit diff --git a/tests/testthat/_snaps/tbl_likert.md b/tests/testthat/_snaps/tbl_likert.md new file mode 100644 index 000000000..719d51cb6 --- /dev/null +++ b/tests/testthat/_snaps/tbl_likert.md @@ -0,0 +1,49 @@ +# tbl_likert(data) + + Code + as.data.frame(add_n(tbl_likert(df_likert))) + Output + **Characteristic** **N** **Strongly Disagree** **Disagree** **Agree** + 1 recommend_friend 20 7 (35%) 3 (15%) 7 (35%) + 2 regret_purchase 20 7 (35%) 1 (5.0%) 8 (40%) + **Strongly Agree** + 1 3 (15%) + 2 4 (20%) + +# tbl_likert(statistic) + + Code + as.data.frame(tbl_likert(df_likert, statistic = ~"{n} / {N} ({p}%)")) + Output + **Characteristic** **Strongly Disagree** **Disagree** **Agree** + 1 recommend_friend 7 / 20 (35%) 3 / 20 (15%) 7 / 20 (35%) + 2 regret_purchase 7 / 20 (35%) 1 / 20 (5.0%) 8 / 20 (40%) + **Strongly Agree** + 1 3 / 20 (15%) + 2 4 / 20 (20%) + +# tbl_likert(digits) + + Code + as.data.frame(tbl_likert(df_likert, digits = ~ list(p = label_style_sigfig( + digits = 3, scale = 100)))) + Output + **Characteristic** **Strongly Disagree** **Disagree** **Agree** + 1 recommend_friend 7 (35.0%) 3 (15.0%) 7 (35.0%) + 2 regret_purchase 7 (35.0%) 1 (5.00%) 8 (40.0%) + **Strongly Agree** + 1 3 (15.0%) + 2 4 (20.0%) + +# tbl_likert(sort) + + Code + as.data.frame(tbl_likert(df_likert, sort = "descending")) + Output + **Characteristic** **Strongly Agree** **Agree** **Disagree** + 1 recommend_friend 3 (15%) 7 (35%) 3 (15%) + 2 regret_purchase 4 (20%) 8 (40%) 1 (5.0%) + **Strongly Disagree** + 1 7 (35%) + 2 7 (35%) + diff --git a/tests/testthat/test-tbl_likert.R b/tests/testthat/test-tbl_likert.R new file mode 100644 index 000000000..d9d1d4273 --- /dev/null +++ b/tests/testthat/test-tbl_likert.R @@ -0,0 +1,116 @@ +skip_if_not(is_pkg_installed("withr", reference_pkg = "gtsummary")) + +levels <- c("Strongly Disagree", "Disagree", "Agree", "Strongly Agree") +df_likert <- + withr::with_seed( + seed = 11235, + data.frame( + recommend_friend = sample(levels, size = 20, replace = TRUE) |> factor(levels = levels), + regret_purchase = sample(levels, size = 20, replace = TRUE) |> factor(levels = levels) + ) + ) + +test_that("tbl_likert(data)", { + # standard use works well + expect_snapshot( + df_likert |> + tbl_likert() |> + add_n() |> + as.data.frame() + ) + + # errors with bad input + expect_error( + tbl_likert(letters), + "The `data` argument must be class" + ) +}) + +test_that("tbl_likert(statistic)", { + # standard use works well + expect_snapshot( + df_likert |> + tbl_likert(statistic = ~"{n} / {N} ({p}%)") |> + as.data.frame() + ) + + # errors with bad inputs + expect_error( + df_likert |> + tbl_likert(statistic = ~letters), + "Values pass in `statistic` argument must be strings" + ) + + # statistic doesn't have any glue syntax + expect_error( + df_likert |> + tbl_likert(statistic = ~"n / N"), + "The `statistic` argument string does not contain any glue element" + ) + + # statistic has stats that are not available + expect_error( + df_likert |> + tbl_likert(statistic = ~"{n} ({sd})"), + "are not valid" + ) +}) + +test_that("tbl_likert(label)", { + expect_equal( + df_likert |> + tbl_likert(label = list(recommend_friend = "I Would Recommend to a Friend")) |> + getElement("table_body") |> + getElement("label") |> + head(1L), + "I Would Recommend to a Friend" + ) + + expect_error( + df_likert |> + tbl_likert(label = list(recommend_friend = letters)), + "Values pass in `label` argument must be strings." + ) +}) + + +test_that("tbl_likert(digits)", { + # standard use works well + expect_snapshot( + df_likert |> + tbl_likert(digits = ~list(p = label_style_sigfig(digits = 3, scale = 100))) |> + as.data.frame() + ) + + # errors with bad inputs + expect_error( + df_likert |> + tbl_likert(digits = ~letters), + "Error in `digits` argument for variable" + ) +}) + +test_that("tbl_likert(include)", { + expect_error( + mtcars |> + tbl_likert(include = mpg), + "All variables in the `include` argument must be" + ) + + expect_error( + df_likert |> + dplyr::mutate( + bad_fct = recommend_friend |> fct_expand("anoter_level") + ) |> + tbl_likert(include = everything()), + "All variables in the `include` argument must have the same factor levels" + ) +}) + +test_that("tbl_likert(sort)", { + expect_snapshot( + df_likert |> + tbl_likert(sort = "descending") |> + as.data.frame() + ) +})